#!/usr/local/bin/perl56 use CGI qw/:all/; my $page = new CGI; $HALT = 0; # Set to 1 to deactivate blast $DEBUG = 0; # print debugging information $ENV{'BLASTMAT'}="/home/pubseq/BioSw/wuBLAST2/matrix"; #set up the blast matrix location $ENV{'BLASTFILTER'}="/usr/local/bin/"; #set up the filtering $ENV{'BLASTDB'}="/home/rocky/databases"; $blast_home = "/home/rocky/www/blastserver"; # "root" for blasterver files #$blastqueue = "fastblastq"; # default batch queue $rm_binary = "/nfs/disk100/humpub/scripts/RepeatMasker"; $seq_type = ""; # Either DNA or protein (D/P) $fasta_name = ""; $form=new CGI; # OK, these are the guts of it... &get_web_form(); &print_HTML_header($page); &determine_input(); # return data by email or to browser $seq_type = &check_and_clean_sequence(); # remove illegal characters &trap_errors($seq_type); &process_options(); # build blast options &make_temp_file(); # create temp storage if ($repeatmasker eq "yes") { &make_mask_file(); } if ($outputtype eq "HTML") { &send_results_to_browser(); } else{ &send_results_to_email(); } &write_log(); (1); # we end - phew! ######################################################################## ######################################################################## ######################################################################## #SUBROUTINES ######################################################################## sub get_web_form { $blast_selection= $form->param('blast_selection'); #$blast_type = $form->param('blast_type'); # executable to use $sequence = $form->param('sequence'); # target sequence $address = $form->param('address'); # email address #$database = $form->param('database'); $align = $form->param('align'); $filter_off = $form->param('filter_off'); $file = $form->param('uploadfile'); $repeatmasker = $form->param('repeatmasker'); $outputtype = $form->param('return'); ($blast_type, $database)=split(/:/,$blast_selection); $address =~ s/^\s+//g; # remove leading whitespace $sequence =~ s/^\s+//; # remove leading whitespace } #end of sub ############################################################# ## Write the HTML header ############################################################# sub print_HTML_header { my $page=shift; select((select(STDOUT),$|=1)[0]); # flush STDOUT buffer immediately... if($HALT == 1){ print "

Sorry Blast servers are temporarily out of service (please see our home page for details)

"; exit; } if ($DEBUG ==1){ print "
";
		print "Browser = $ENV{'HTTP_USER_AGENT'}\n";
		print "Binary = $blast_type\n";
		print "Sequence = $sequence\n";
		print "Email address = \"$address\"\n";
		print "Alignments = $align\n";
		print "Filter_off = $filter_off\n";
		print "Uploadfile = $file\n";
		print "Repeatmasker = $repeatmasker\n";
		print "Database = $database\n";
		print "Return = $outputtype\n";
		print "
"; } } # end of sub ############################################################### #Resolve which input to use ############################################################### sub determine_input { if ($file =~/\w/ && $sequence=~/\w/) { if($ENV{'HTTP_USER_AGENT'} =~ /Mac/){ print "Note: Your Macintosh browser may have a bug that prevents correct functioning of the \"file browse\" box.
"; print "Assuming you want to use the sequence in the box.

"; } else{ print "Warning: ambiguous sequence source (both file and text box contain data).
"; print "Assuming you want to use the sequence in the box.

"; #&output_footer; #exit; } } if ($sequence eq "" && $file ne "") { $sequence=$file; print "Input sequence taken from uploaded file<\B>

"; } } # end of sub ################################################################ # Work out the sequence type and get the length of the sequence ################################################################ sub check_and_clean_sequence { my ($tempseq, $templength, $A,$C,$G,$T,$total); $fasta_name = $sequence; $fasta_name =~ />(\w+)/; # save the FASTA name $fasta_name=$1; if($fasta_name eq ""){$fasta_name = "UNKNOWN-QUERY";} # assign default name to non-FASTA seqs $tempseq = $sequence; # create a temp sequence $tempseq =~ s/>.+\n//; # remove the title of a FASTA sequence if($DEBUG==1){ $templength = length($tempseq); print "Sequence pre cleanup = $tempseq ($templength)\n

"; } $tempseq = uc($tempseq); $_ = $tempseq; $A = tr/A/a/; $C = tr/C/c/; $G = tr/G/g/; $T = tr/T/t/; $total = $A+$C+$G+$T; $templength = length($tempseq); if ($templength <= 0){ print "

Sequence length is zero. Search aborted!

"; &output_footer(); exit(1); } if(($total/$templength) >= 0.70){ # its DNA $tempseq =~ s/[^ACGTUNRYKMBDHVSWNX]//ig; if ($DEBUG==1){ print "Sequence is DNA

"; print "BLAST for nulceotides (removing illegals)
"; $templength = length($tempseq); print "Sequence post cleanup = $tempseq ($templength)\n

"; } $sequence = $tempseq; return("D"); } else{ # its protein $tempseq =~ s/[^ARNDCEQGHILKMFPSTWYVXZ]//ig; if($DEBUG==1){ print "Sequence is a protein

"; print "BLAST for proteins (removing illegals)
"; $templength = length($tempseq); print "Sequence post cleanup = $tempseq ($templength)\n

"; } $sequence = $tempseq; return("P"); } } # end of sub ############################################################### #trap errors ############################################################### sub trap_errors { my ($sequence_type) = @_; if ($blast_type eq "") { print "

Error: No Blast executable specified!<\H2>"; &output_footer; exit; } if ($sequence eq "") { print "

Error: No Sequence given!<\H2>"; &output_footer; exit; } unless ($address =~/\@/ or $address eq "") { print "

Error: Invalid e-mail address specified"; &output_footer; exit; } if (($blast_type eq "wutblastn" && $sequence_type eq "D") || ($blast_type eq "wublastp" && $sequence_type eq "D")){ print "
";
	print "You appear to have submitted a DNA sequence
"; print "but you cannot perform this type of search ($blast_type) using a DNA sequence

"; print "

"; &output_footer(); exit; } if (($blast_type eq "wublastn" && $sequence_type eq "P") || ($blast_type eq "wublastx" && $sequence_type eq "P")){ print "
";
	print "You appear to have submitted a protein sequence
"; print "but you cannot perform this type of search ($blast_type) using a protein sequence

"; &output_footer(); exit; } if (($blast_type eq "wutblastx" && $sequence_type eq "P")){ print "

";
	print "You appear to have submitted a protein sequence
"; print "but you cannot perform this type of search ($blast_type) using a protein sequence

"; &output_footer(); exit; } } # end of sub ############################################################### #handle the options ############################################################### sub process_options { my $seqlength = length($sequence); #Decrease the cut-off if the sequence is very small if ($seqlength < 30 ) { $OPTIONS=$OPTIONS . " S=10 "; } if ($align eq "") { $align=100; } $OPTIONS=$OPTIONS . " B=$align "; # number of results to display #if ($blast_type ne "blastn" && $blast_type ne "wublastn" && $filter_off ne "yes") { # $OPTIONS=$OPTIONS . " -filter=seg "; #} if ($filter_off eq "yes") { print "

Low complexity filtering is switched off

"; } if ($repeatmasker eq "no") { print "

REPEATMASKER is switched off

"; } if ($DEBUG==1){ print "Options: $OPTIONS

"; } } #end of sub ################################################################# #make a temp file to hold the sequence ################################################################# sub make_temp_file { open (TEMP, ">$blast_home/Blast.$$.tmp") || print "

Error: Couldn't create temporary file, try again later

";; select((select(TEMP),$|=1)[0]); print TEMP ">$fasta_name\n"; print TEMP "$sequence\n"; close (TEMP); } # end of sub ##################################################################### #Make a repeat masked file sequence if required ##################################################################### sub make_mask_file { print "REPEATMASKER is being used

"; system("$rm_binary $blast_home/Blast.$$.tmp"); ## remove unwanted files unlink("$blast_home/Blast.$$.tmp.RepMask") or print "Cannot delete RepMask file: $!"; unlink("$blast_home/Blast.$$.tmp.RepMask.cat") or print "Cannot delete RepMask.cat file: $!"; unlink("$blast_home/Blast.$$.tmp.masked.log") or print "Cannot delete masked log file: $!"; my $maskfile="Blast.$$.tmp.masked"; open(MASK,"$blast_home/$maskfile") or print "Cannot open mask file:$!"; open(TEMPFILE,">$blast_home/Blast.$$.tmp") or print "Cannot create temp file: $!"; while () { print TEMPFILE; } close (MASK); close (TEMPFILE); unlink ("$blast_home/$maskfile") or print "Cannot delete mask file: $!"; } # end of sub ##################################################################### #Spawn the blast job if email address specified ##################################################################### sub send_results_to_email { print "Blast job submitted

"; print "

Running blast....
"; open(BLAST,"bsub -I -q $blastqueue \"$blast_type $database $blast_home/Blast.$$.tmp -P6 $OPTIONS\"|"); @mail=; close (BLAST); print "

Sending mail to $address...."; open (MAIL, "| Mail -s $fasta_name $address"); foreach $line (@mail){ #$line =~ s/\/nfs\/.*\/(.*)/$1/; # remove references to NFS locations $line =~ s/.*\/(.*)/$1/; # remove references to NFS locations print MAIL $line; } close(MAIL); print "done.
"; unlink ("$blast_home/Blast.$$.tmp") or print "Cannot delete temp file: $!"; ## remove the temp file print "

"; #print "Options=$OPTIONS

"; print "Please note that sending a number of searches in a rapid succession "; print "will have the effect of making the total time taken for the searches "; print "a lot longer. It is probably best to wait a couple of minutes before "; print "requesting any more searches

"; } # end of sub ##################################################################### # print html marked up output ##################################################################### sub send_results_to_browser { #print "

\n";
	#print "Blast Job: $$

"; print "

";
	open(BLASTOUT,"/usr/local/bin/$blast_type $database $blast_home/Blast.$$.tmp -P6 $OPTIONS  |");
	while () {print "    $_";}
	#&default_parse(\*BLASTOUT);
	close (BLASTOUT);
	unlink ("$blast_home/Blast.$$.tmp") or print "Cannot delete temp file: $!";	## remove the temp file

} # end of sub

#####################################################################
#Log output to file
#####################################################################
sub write_log {

	my ($logsequence,$source) = "";

	$logsequence = $in{'sequence'};
	$logsequence =~ s/[\n\r\t ]//ig;		# make sequence a single line
	if ($file ne "") {
		$source = "upload";
	}
	else{
		$source = "box";
	}

	# open as briefly as possible....
	open (LOG, ">>$blast_home/blastserver_log") || open (LOG, ">$blast_home/blast.$$.failed_write_log");
	print LOG "\nPID=$$:Seq=$logsequence:SeqSrc=$source:";
	print LOG "Tmpfile=$blast_home/Blast.$$.tmp:Opts=$OPTIONS:Addr=$address:";
	print LOG "Type=$blast_type:Alnmts=$align:Db=$database:";
	print LOG "Q=$blastqueue:Req=$ENV{'REMOTE_HOST'}:Time=$cpu_info[6]";
	close LOG; 

} # end of sub

#####################################################################
#mark up the blast output
#####################################################################
sub default_parse {

my $start;
my $blastfile= shift;

#read the acedb_info.map file to see whether these objects can be marked up to webace
#open(ACEDB,"$blast_home/acedb_info.map") or die "Cannot open Ace map info: $!";
#while() {
#	if (/^#/) {next;}
#	if (/(\S+)\s+(\S+)\s+(\S+)/) {
#		$webaceprefix{$1}=$2;
#		$webacesuffix{$1}=$3;
#	}
#}
#close (ACEDB);

while (<$blastfile>) {

        $_ =~ s/Job <.*>.*//; 				# remove references to LSF job number
        #$_ =~ s/\/nfs\/.*\/(.*)/$1/; 			# remove references to NFS locations
	$_ =~ s/Database:.*\/(.*)/Database: $1/;    	# remove references to NFS locations
	$_ =~ s/Title:.*\/(.*)/Title: $1/;    		# remove references to NFS locations

	#print out the blast header
	if (/Sequences producing High/i) {
		$start="yes";
		print;
		next;
	}
	if (/\*\*\*\s+none\s+\*\*\*/i) {
		$start="";
		print "\n";
	}

	if ($start eq "") {
		print;
		next;
	}
	push(@blastresults,$_);

	if (/^(\S+)\s+/) {
		$sequence=$1;
	}
	if (/^>(\S+)/) {
		$sequence=$1;
	}
	if (/acc=(\S+)/) {
		$acc{$sequence}=$1;
	}
}

#mark up the results

foreach $line (@blastresults) {

       	#change the accession to a more meaningful comment
       	$line=~s/acc=/EMBL:/; 

       	if ($line=~/^>/) {$start="no";print "\n";}

       	#mark up the summary info
       	if ($start eq "yes" && $line=~/^(\S+)(\s+.+)\s+(\S+)(\s+\S+\s+\S+)$/ && $acc{$1} ne ""){
		print "$1<\/A>$2$3$4\n";
	}
       	if ($start eq "yes" && $line=~/^(\S+)(\s+.+)\s+(\S+)(\s+\S+\s+\S+)$/ && $acc{$1} eq ""){
		print "$1$2$3$4\n";
	}
			      
       if ($line =~ /Total cpu time/){
		@cpu_info = split(/\s+/,$line);
		$cpu_info[6] =~ s/[stu]//;
#		#print "CPU was: $cpu_info[6] seconds!
"; &count_hits($cpu_info[6]); } $line =~ s/\/nfs\/.*\/(.*)/$1/; # remove references to NFS locations #mark up the alignments if ($start eq "no") { my $db_link = $database; #$db_link =~ s/\/nfs\/.*\/(.*)/$1/; # remove references to NFS locations $db_link =~ s/.*\/(.*)/$1/; # remove references to NFS locations #print if the acc no exists if ($line=~/>(\S+)(\s+.+)/ || $line=~/>(\S+)$/) { #print the marked up sequence name print ">$1<\/A>"; #add a link to retrieve the full sequence... print " [Full Sequence] "; #if we have an accession mark this up if ($acc{$1} ne "") { print " [EMBL:$acc{$1}]"; } #if this file has a acedb_info.map emtry then mark this up if ($webaceprefix{$database} ne "") { print " [Webace] "; } #this is the dotter line which is not used at present #[DotPlot]$2\n";} #print the rest of the fasta header print "$2\n"; } #print the alignments if ($line=~/^>/) {} else {print $line;} } } } # end of sub ##################################################################### #write the NPH header ##################################################################### sub output_no_content_header { if ($cgititle eq "") {$cgititle=""} if (!$cgiicon) {$cgiicon="hgp.gif"} open (HEAD, "/nfs/WWW/cgi-bin/header_no_content_type -icon $cgiicon -title \"$cgititle\" |") or die "Cannot write header: $!"; while () {print} close (HEAD); } # end of sub ##################################################################### #record the CPU cost of this job ##################################################################### sub count_hits { my ($cpu) = @_; $home = "/nfs/WWW/admin_scripts/blast/"; my $now = `date '+%d-%b-%C%y'`; chomp ($now); my $found =0; open (RECORD,"$home/blast_use.log") or die "Cannot read record log file: $!"; @lines = ; close(RECORD); open (RECORD,">$home/blast_use.log") or die "Cannot update record log file: $!"; foreach $line (@lines){ chomp($line); my ($date, $count, $time) = split(/:/,$line,3); if ($date eq "$now"){ $count++; $time += $cpu; $line = join(":",($date, $count, $time)); print RECORD "$line\n"; $found = 1; } else{ print RECORD "$line\n"; } } if ($found ==0){ $line = join(":",($now, 1, $cpu)); #append a new days record print RECORD "$line\n"; } close(RECORD); } # end of sub