use LWP 5.64; use URI; use LWP::Simple; $dirname = "output/"; get_PDB_protein_names("input/RS126_PDB_temp.txt"); #call subroutine sub get_PDB_protein_names {# take in 1 variables, inputfilename, save to 3 files my($inputfile) = @_; # pass in 1 varialbe #open data file if (open(MyInputFile, $inputfile)) { print "InputFile '",$inputfile,"' opened OK \n"; } else { print "Cannot open InputFile '",$inputfile,"'\n"; exit; } while(defined($lineName=)) { chomp $lineName; #print $lineName."\n"; $lineSeq=; chomp $lineSeq; $lineSecStru=; chomp $lineSecStru; print "Step Blast for ".$lineName.": \n"; my $stepurl = "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&QUERY=".$lineSeq."&DATABASE=nr&PROGRAMS=psiBlast&SERVICE=plain&PAGE=Proteins&ALIGNMENTS=2000&DESCRIPTIONS=2000 "; my $content = get $stepurl; if (defined $content) { #check if any problem, mainly due to protein is too long # my $content = get $step10url; # die "Couldn't get $url" unless defined $content; if($content =~ m/RID = \w{11}/g) { # print $&."\n"; $step10RID = substr($&, 6, length($&)-6); print $step10RID."\n"; } else { $step10RID = ""; print "They do not have RID information.\n"; } } #end of if if ($step10RID ne "") { #no RID found for whatever reason, check my $step10url_2 = "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=".$step10RID."&CMD=Get&ALIGNMENTS=2000&DESCRIPTIONS=2000"; print $step10url_2."\n"; sleep 3; my $step10Status = ""; while ($step10Status ne "READY") { $content = get $step10url_2; die "Couldn't get $url" unless defined $content; if($content =~ m/Status=\w{5}/g) { print $&."...\n"; $step10Status = substr($&, 7, length($&)-7); #print $step10Status."...\n"; } sleep 3; } print "\n"; } #end of if # write $content to a temporary file for checking open(tempOutputFile, ">".$dirname."temp".substr($lineName,0,4).".htm"); print tempOutputFile $content; close tempOutputFile; } #end of while }