To both protein.pl and homolog.pl, add the following code
right after where $type = $label
#########################################################################
$type =~ s/(sp\|)([\w]*)(\|)/sp\\\|$2\\\|/gi;
$type =~ s/(tr\|)([\w]*)/tr\\\|$2/gi;
########################################################################
In both protein.pl and homolog.pl, change
#####################################################################
if(/\<protein/is && /"$type"/is) to if(/\<protein/is && /$type/is)
#####################################################################
In homolog.pl, add to the first while loop
##########################################
chomp($id);
##########################################
In protein.pl, add the following code to the GetHref subroutine
####################################################################
if(/[sp|tr]\|/) {
return GetHrefSprot($l);
}
#####################################################################
In protein.pl add the following subroutine
#####################################################################
sub GetHrefSprot
{
my ($l) = @_;
my $sp = $l;
$sp =~ s/sp//;
$sp =~ s/tr//;
$sp =~ s/\|//g;
return "http://au.expasy.org/cgi-bin/get-sprot-entry?$sp";
}
#####################################################################
In plist.pl, add the following code to the get_label, get_expect subroutine
#####################################################################
$s =~ s/\s.*//;
$s =~ s/(sp\|\w+\|)\w+$/$1/i;
######################################################################
In plist.pl add the following code to the GetInfo subroutine
###############################################################################
elsif(/[sp|tr]\|/i){
s/sp//;
s/tr//;
s/\|//g;
$l = $_;
$cpath .= "$l.sprot.b";
$html = GetCache($cpath);
if(length($html) > 1) {
return $html;
}
$cpath = $cache . "$l.sprot";
$html = GetCache($cpath);
if(!($html =~ /.*DE\s*?/si)) {
$html = "\n\n";
}
$html =~ s/.*?DE\s*?//si;
$html =~ s/\s*?DE.*//si;
$html =~ s/\(.*//si;
$html =~ s/<([b-zB-Z]).*?>//gsi;
$html =~ s/<\/([b-zB-Z]).*?>//gsi;
$cpath = $cache . "$l.sprot.b";
WriteCache($cpath,$html);
return $html
}
###############################################################################
In get_info.pl and protein.pl add the following code to the DoSearch subroutine
#################################################################
elsif(/[sp|tr]\|/) {
$html .= PostSprot($l);
return $html;
}
#################################################################
In get_info.pl and protein.pl add the following subroutine
#############################################################################
sub PostSprot
{
my $html = "";
my ($l) = @_;
my $sp = $l;
$sp =~ s/sp\|//s;
$sp =~ s/tr\|//s;
$sp =~ s/\|//s;
my $cache = "../cache/$sp.sprot";
my $ok = 0;
open(INPUT,"<$cache") or $ok = 1;
if(not($refresh eq "yes") and $ok == 0) {
while(<INPUT>) {
$html .= $_;
}
close(INPUT);
return $html;
}
unlink($cache . ".b");
my @OutValue;
my $url = "http://au.expasy.org/cgi-bin/get-sprot-entry?" . $sp;
my $agent = LWP::UserAgent->new();
$agent->agent('Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)');
$agent->timeout(10);
my $req = POST($url,\@OutValue,'Content_Type','form-data','Content');
$html = $agent->request($req)->as_string();
if(!($html =~ /<body/si) || ($html =~ /WWW\sError/si)) {
return "<BR><BR>Protein information unavailable:
cannot connect to Swiss-Prot.<BR>";
}
$html =~ s/.*<pre>/<pre>\n/si;
$html =~ s/<\/pre>.*/<\/pre>\n/si;
$html =~ s/.*<html\>/<html\>/gs;
$html =~ s/<body.*\>.*<\/script>/<\/body>/gs;
$html =~ s/<h1>.*<\/h1>/<\//gsi;
my $value = "<hr><h3>Swiss-Prot Report:</h3>" . $html;
open(INPUT,">$cache");
print INPUT $value;
close(INPUT);
return $value;
}
###############################################################################
|
This code was contributed by Colton A Smith at The University of Tennessee - Knoxville TN.
It uses MPI (Message Passing Interface) to run multiple instances
of tandem on a Sun Solaris cluster.
It consists of two C files - t_batch.c
and wrap.c.
Wrap.c counts how many xml input files there are and then calls
t_batch with the appropriate number of machines (one per file).
Neither program has any error checking and Colton has suggested
they be combined into one program.
To compile these programs, you need to have MPI installed
on your machine. A version can be downloaded
here.
The command line to compile is: t_batch.c, type: mpicc t_batch.c -o t_batch.
Thanks Colton!
|