#!/usr/local/bin/perl56 use CGI qw/:all/; my $page = new CGI; $HALT = 0; # Set to 1 to deactivate blast $DEBUG = 0; # print debugging information $ENV{'BLASTMAT'}="/home/pubseq/BioSw/wuBLAST2/matrix"; #set up the blast matrix location $ENV{'BLASTFILTER'}="/usr/local/bin/"; #set up the filtering $ENV{'BLASTDB'}="/home/rocky/databases"; $blast_home = "/home/rocky/www/blastserver"; # "root" for blasterver files #$blastqueue = "fastblastq"; # default batch queue $rm_binary = "/nfs/disk100/humpub/scripts/RepeatMasker"; $seq_type = ""; # Either DNA or protein (D/P) $fasta_name = ""; $form=new CGI; # OK, these are the guts of it... &get_web_form(); &print_HTML_header($page); &determine_input(); # return data by email or to browser $seq_type = &check_and_clean_sequence(); # remove illegal characters &trap_errors($seq_type); &process_options(); # build blast options &make_temp_file(); # create temp storage if ($repeatmasker eq "yes") { &make_mask_file(); } if ($outputtype eq "HTML") { &send_results_to_browser(); } else{ &send_results_to_email(); } &write_log(); (1); # we end - phew! ######################################################################## ######################################################################## ######################################################################## #SUBROUTINES ######################################################################## sub get_web_form { $blast_selection= $form->param('blast_selection'); #$blast_type = $form->param('blast_type'); # executable to use $sequence = $form->param('sequence'); # target sequence $address = $form->param('address'); # email address #$database = $form->param('database'); $align = $form->param('align'); $filter_off = $form->param('filter_off'); $file = $form->param('uploadfile'); $repeatmasker = $form->param('repeatmasker'); $outputtype = $form->param('return'); ($blast_type, $database)=split(/:/,$blast_selection); $address =~ s/^\s+//g; # remove leading whitespace $sequence =~ s/^\s+//; # remove leading whitespace } #end of sub ############################################################# ## Write the HTML header ############################################################# sub print_HTML_header { my $page=shift; select((select(STDOUT),$|=1)[0]); # flush STDOUT buffer immediately... if($HALT == 1){ print "
"; print "Browser = $ENV{'HTTP_USER_AGENT'}\n"; print "Binary = $blast_type\n"; print "Sequence = $sequence\n"; print "Email address = \"$address\"\n"; print "Alignments = $align\n"; print "Filter_off = $filter_off\n"; print "Uploadfile = $file\n"; print "Repeatmasker = $repeatmasker\n"; print "Database = $database\n"; print "Return = $outputtype\n"; print ""; } } # end of sub ############################################################### #Resolve which input to use ############################################################### sub determine_input { if ($file =~/\w/ && $sequence=~/\w/) { if($ENV{'HTTP_USER_AGENT'} =~ /Mac/){ print "Note: Your Macintosh browser may have a bug that prevents correct functioning of the \"file browse\" box.
";
}
else{
print "Warning: ambiguous sequence source (both file and text box contain data).
";
print "Assuming you want to use the sequence in the box.
"; #&output_footer; #exit; } } if ($sequence eq "" && $file ne "") { $sequence=$file; print "Input sequence taken from uploaded file<\B>
"; } } # end of sub ################################################################ # Work out the sequence type and get the length of the sequence ################################################################ sub check_and_clean_sequence { my ($tempseq, $templength, $A,$C,$G,$T,$total); $fasta_name = $sequence; $fasta_name =~ />(\w+)/; # save the FASTA name $fasta_name=$1; if($fasta_name eq ""){$fasta_name = "UNKNOWN-QUERY";} # assign default name to non-FASTA seqs $tempseq = $sequence; # create a temp sequence $tempseq =~ s/>.+\n//; # remove the title of a FASTA sequence if($DEBUG==1){ $templength = length($tempseq); print "Sequence pre cleanup = $tempseq ($templength)\n
"; } $tempseq = uc($tempseq); $_ = $tempseq; $A = tr/A/a/; $C = tr/C/c/; $G = tr/G/g/; $T = tr/T/t/; $total = $A+$C+$G+$T; $templength = length($tempseq); if ($templength <= 0){ print "
Sequence length is zero. Search aborted!
"; &output_footer(); exit(1); } if(($total/$templength) >= 0.70){ # its DNA $tempseq =~ s/[^ACGTUNRYKMBDHVSWNX]//ig; if ($DEBUG==1){ print "Sequence is DNA
";
print "BLAST for nulceotides (removing illegals)
";
$templength = length($tempseq);
print "Sequence post cleanup = $tempseq ($templength)\n
"; } $sequence = $tempseq; return("D"); } else{ # its protein $tempseq =~ s/[^ARNDCEQGHILKMFPSTWYVXZ]//ig; if($DEBUG==1){ print "Sequence is a protein
";
print "BLAST for proteins (removing illegals)
";
$templength = length($tempseq);
print "Sequence post cleanup = $tempseq ($templength)\n
"; } $sequence = $tempseq; return("P"); } } # end of sub ############################################################### #trap errors ############################################################### sub trap_errors { my ($sequence_type) = @_; if ($blast_type eq "") { print "
"; print "You appear to have submitted a DNA sequence"; &output_footer(); exit; } if (($blast_type eq "wublastn" && $sequence_type eq "P") || ($blast_type eq "wublastx" && $sequence_type eq "P")){ print "
"; print "but you cannot perform this type of search ($blast_type) using a DNA sequence"; print "
"; print "You appear to have submitted a protein sequence
"; print "but you cannot perform this type of search ($blast_type) using a protein sequence"; &output_footer(); exit; } if (($blast_type eq "wutblastx" && $sequence_type eq "P")){ print "
"; print "You appear to have submitted a protein sequence
"; print "but you cannot perform this type of search ($blast_type) using a protein sequence"; &output_footer(); exit; } } # end of sub ############################################################### #handle the options ############################################################### sub process_options { my $seqlength = length($sequence); #Decrease the cut-off if the sequence is very small if ($seqlength < 30 ) { $OPTIONS=$OPTIONS . " S=10 "; } if ($align eq "") { $align=100; } $OPTIONS=$OPTIONS . " B=$align "; # number of results to display #if ($blast_type ne "blastn" && $blast_type ne "wublastn" && $filter_off ne "yes") { # $OPTIONS=$OPTIONS . " -filter=seg "; #} if ($filter_off eq "yes") { print "
Low complexity filtering is switched off
"; } if ($repeatmasker eq "no") { print "
REPEATMASKER is switched off
"; } if ($DEBUG==1){ print "Options: $OPTIONS
"; } } #end of sub ################################################################# #make a temp file to hold the sequence ################################################################# sub make_temp_file { open (TEMP, ">$blast_home/Blast.$$.tmp") || print "
Error: Couldn't create temporary file, try again later
";; select((select(TEMP),$|=1)[0]); print TEMP ">$fasta_name\n"; print TEMP "$sequence\n"; close (TEMP); } # end of sub ##################################################################### #Make a repeat masked file sequence if required ##################################################################### sub make_mask_file { print "REPEATMASKER is being used"; system("$rm_binary $blast_home/Blast.$$.tmp"); ## remove unwanted files unlink("$blast_home/Blast.$$.tmp.RepMask") or print "Cannot delete RepMask file: $!"; unlink("$blast_home/Blast.$$.tmp.RepMask.cat") or print "Cannot delete RepMask.cat file: $!"; unlink("$blast_home/Blast.$$.tmp.masked.log") or print "Cannot delete masked log file: $!"; my $maskfile="Blast.$$.tmp.masked"; open(MASK,"$blast_home/$maskfile") or print "Cannot open mask file:$!"; open(TEMPFILE,">$blast_home/Blast.$$.tmp") or print "Cannot create temp file: $!"; while (
) { print TEMPFILE; } close (MASK); close (TEMPFILE); unlink ("$blast_home/$maskfile") or print "Cannot delete mask file: $!"; } # end of sub ##################################################################### #Spawn the blast job if email address specified ##################################################################### sub send_results_to_email { print "Blast job submitted "; print "
Running blast....
"; open(BLAST,"bsub -I -q $blastqueue \"$blast_type $database $blast_home/Blast.$$.tmp -P6 $OPTIONS\"|"); @mail=; close (BLAST); print " Sending mail to $address...."; open (MAIL, "| Mail -s $fasta_name $address"); foreach $line (@mail){ #$line =~ s/\/nfs\/.*\/(.*)/$1/; # remove references to NFS locations $line =~ s/.*\/(.*)/$1/; # remove references to NFS locations print MAIL $line; } close(MAIL); print "done.
"; unlink ("$blast_home/Blast.$$.tmp") or print "Cannot delete temp file: $!"; ## remove the temp file print ""; #print "Options=$OPTIONS
"; print "Please note that sending a number of searches in a rapid succession "; print "will have the effect of making the total time taken for the searches "; print "a lot longer. It is probably best to wait a couple of minutes before "; print "requesting any more searches
"; } # end of sub ##################################################################### # print html marked up output ##################################################################### sub send_results_to_browser { #print "
\n"; #print "Blast Job: $$"; print "
"; open(BLASTOUT,"/usr/local/bin/$blast_type $database $blast_home/Blast.$$.tmp -P6 $OPTIONS |"); while () {print "    $_";} #&default_parse(\*BLASTOUT); close (BLASTOUT); unlink ("$blast_home/Blast.$$.tmp") or print "Cannot delete temp file: $!"; ## remove the temp file } # end of sub ##################################################################### #Log output to file ##################################################################### sub write_log { my ($logsequence,$source) = ""; $logsequence = $in{'sequence'}; $logsequence =~ s/[\n\r\t ]//ig; # make sequence a single line if ($file ne "") { $source = "upload"; } else{ $source = "box"; } # open as briefly as possible.... open (LOG, ">>$blast_home/blastserver_log") || open (LOG, ">$blast_home/blast.$$.failed_write_log"); print LOG "\nPID=$$:Seq=$logsequence:SeqSrc=$source:"; print LOG "Tmpfile=$blast_home/Blast.$$.tmp:Opts=$OPTIONS:Addr=$address:"; print LOG "Type=$blast_type:Alnmts=$align:Db=$database:"; print LOG "Q=$blastqueue:Req=$ENV{'REMOTE_HOST'}:Time=$cpu_info[6]"; close LOG; } # end of sub ##################################################################### #mark up the blast output ##################################################################### sub default_parse { my $start; my $blastfile= shift; #read the acedb_info.map file to see whether these objects can be marked up to webace #open(ACEDB,"$blast_home/acedb_info.map") or die "Cannot open Ace map info: $!"; #while( ) { # if (/^#/) {next;} # if (/(\S+)\s+(\S+)\s+(\S+)/) { # $webaceprefix{$1}=$2; # $webacesuffix{$1}=$3; # } #} #close (ACEDB); while (<$blastfile>) { $_ =~ s/Job <.*>.*//; # remove references to LSF job number #$_ =~ s/\/nfs\/.*\/(.*)/$1/; # remove references to NFS locations $_ =~ s/Database:.*\/(.*)/Database: $1/; # remove references to NFS locations $_ =~ s/Title:.*\/(.*)/Title: $1/; # remove references to NFS locations #print out the blast header if (/Sequences producing High/i) { $start="yes"; print; next; } if (/\*\*\*\s+none\s+\*\*\*/i) { $start=""; print "\n"; } if ($start eq "") { print; next; } push(@blastresults,$_); if (/^(\S+)\s+/) { $sequence=$1; } if (/^>(\S+)/) { $sequence=$1; } if (/acc=(\S+)/) { $acc{$sequence}=$1; } } #mark up the results foreach $line (@blastresults) { #change the accession to a more meaningful comment $line=~s/acc=/EMBL:/; if ($line=~/^>/) {$start="no";print "\n";} #mark up the summary info if ($start eq "yes" && $line=~/^(\S+)(\s+.+)\s+(\S+)(\s+\S+\s+\S+)$/ && $acc{$1} ne ""){ print "$1<\/A>$2$3$4\n"; } if ($start eq "yes" && $line=~/^(\S+)(\s+.+)\s+(\S+)(\s+\S+\s+\S+)$/ && $acc{$1} eq ""){ print "$1$2$3$4\n"; } if ($line =~ /Total cpu time/){ @cpu_info = split(/\s+/,$line); $cpu_info[6] =~ s/[stu]//; # #print "CPU was: $cpu_info[6] seconds!"; &count_hits($cpu_info[6]); } $line =~ s/\/nfs\/.*\/(.*)/$1/; # remove references to NFS locations #mark up the alignments if ($start eq "no") { my $db_link = $database; #$db_link =~ s/\/nfs\/.*\/(.*)/$1/; # remove references to NFS locations $db_link =~ s/.*\/(.*)/$1/; # remove references to NFS locations #print if the acc no exists if ($line=~/>(\S+)(\s+.+)/ || $line=~/>(\S+)$/) { #print the marked up sequence name print ">$1<\/A>"; #add a link to retrieve the full sequence... print " [Full Sequence] "; #if we have an accession mark this up if ($acc{$1} ne "") { print " [EMBL:$acc{$1}]"; } #if this file has a acedb_info.map emtry then mark this up if ($webaceprefix{$database} ne "") { print " [Webace] "; } #this is the dotter line which is not used at present #[DotPlot]$2\n";} #print the rest of the fasta header print "$2\n"; } #print the alignments if ($line=~/^>/) {} else {print $line;} } } } # end of sub ##################################################################### #write the NPH header ##################################################################### sub output_no_content_header { if ($cgititle eq "") {$cgititle=""} if (!$cgiicon) {$cgiicon="hgp.gif"} open (HEAD, "/nfs/WWW/cgi-bin/header_no_content_type -icon $cgiicon -title \"$cgititle\" |") or die "Cannot write header: $!"; while () {print} close (HEAD); } # end of sub ##################################################################### #record the CPU cost of this job ##################################################################### sub count_hits { my ($cpu) = @_; $home = "/nfs/WWW/admin_scripts/blast/"; my $now = `date '+%d-%b-%C%y'`; chomp ($now); my $found =0; open (RECORD,"$home/blast_use.log") or die "Cannot read record log file: $!"; @lines = ; close(RECORD); open (RECORD,">$home/blast_use.log") or die "Cannot update record log file: $!"; foreach $line (@lines){ chomp($line); my ($date, $count, $time) = split(/:/,$line,3); if ($date eq "$now"){ $count++; $time += $cpu; $line = join(":",($date, $count, $time)); print RECORD "$line\n"; $found = 1; } else{ print RECORD "$line\n"; } } if ($found ==0){ $line = join(":",($now, 1, $cpu)); #append a new days record print RECORD "$line\n"; } close(RECORD); } # end of sub