Perl code - 130 lines

Link:

Perl, pasted on Feb 2:

#! /export/shared_apps/perl/bin/perl

use Bio::SeqIO;
use Bio::SearchIO;
use Bio::DB::GenPept;
use Bio::Structure::IO;
use Bio::SeqUtils;
use Bio::Seq;
use Bio::Tools::pSW;
use Bio::AlignIO;
use DBI;
use Class::Struct;
use CGI;

# Struct for creating the alignment map of the true physical positions, since the BLAST alignment does not 
# conserve the given amino acid positions
struct Map => {
    ncbiid => '$',
    pdbid => '$',
    pdbchain => '$',
    ncbipos => '@',
    pdbpos => '@',
};

# Hash for matching amino acids, and its reverse hash
my %aa1= qw (ALA A ARG R ASN N ASP D CYS C GLN Q GLU E GLY G HIS H ILE I LEU L LYS K MET M PHE F PRO P SER S THR T TRP W TYR Y VAL V);
my %aa2=reverse (%aa1);

# function to perform the actual BLAST alignments to find matches for the protein sequences taken from MutDB
sub parseBlastFile {

        my $blastoutfname = $_[0];
        my $pos = $_[1];

    # Open blast output file
    my $blastres = new Bio::SearchIO(-format => 'blast',
                                     -file => $blastoutfname);

    # Now get identical hits store as an array containing a list of alternating names and $hsp data
    # @hits = ( $name1, $hsp1, $name2, $hsp2, .... ) $hspi == a SearchIO hsp object

    # blast hits
    my @hits = ();

    # cycle through all of the results
    while ( my $res = $blastres->next_result ) {

        # cycle through all of the hits within the result
        while( my $hit = $res->next_hit ) {

            # cycle through each hsp object
            while ( my $hsp = $hit->next_hsp ) {

                # if the identity percentage between the query sequence and the resulting match is 100%, continue
                if ($hsp->frac_identical==1.0){

                    # if the amino acid position of the mutation is within the sequence that matches between
                    # the original query and the result, continue
                    if($hsp->start('query') <= $pos && $hsp->end('query') >= $pos) {
                        print $hit->name, " ";
                        print $hsp->frac_identical(), " ";
                       print OUTFILE $hit->name, " ";
                        print OUTFILE $hsp->frac_identical(), " ";
                        print OUTFILE "\n";
                        print "\n";

                        # if the total length of the matched sequence section is more than 15 amino acids, keep it
                        # all of these special cases eliminates short strings of repeated characters and other such
                        # matches

                        if($hsp->length('total') > 15){
                            push( @hits, $hit->name, $hsp );
                        }
                    }
                }
            }
        }
    }

    # return all of the results found, if any
    return @hits;
}

###################################################################################################################################################################################
# end OF SUB routine and beginning of main block                                                                                                                                  #
###################################################################################################################################################################################


# now we start the main bulk of the script

# open the output logfile
$error = open(OUTFILE, ">blastStatus.txt") or die "Can't create smdbsnplog.txt: $!";

my $blastfile = $ARGV[0];
my $pos = $ARGV[1];

                @hits = parseBlastFile( $blastfile, $pos);

                # get the number of results
                my $size = scalar @hits;
                print "\nblast results: ", $size, "\n";
                print OUTFILE "Sequence: ", $seq_str, "\n";
                print OUTFILE "blast results: ", $size, "\n";

                # if we got any blast results, continue
                if ($size!=0) {

                    # the total number of pdb records kept
                    my $pdbcnt = 0;

                    # cycle through the blast results
                    for (my $x=0; $x<@hits; $x+=2) {

                        # keep the hit
                        my $hsphit = $hits[$x+1];

                        # save the middle two letters of the PDB structure identifier
                        #my $pdbslice = substr($hits[$x], 1, 2);
                        my @hits_substr = split('|',$hits[$x]);
                 print "\n K1000 Complete hit variable: $hits[$x]";
                        my $pdbslice = lc($hits_substr[5].$hits_substr[6]);
                        print "\nK1001 Save the middle two letters of PDB structure identifier: $pdbslice";

                        # save the entire PDB structure identifier
                        my $pdbid = lc($hits_substr[4].$hits_substr[5].$hits_substr[6].$hits_substr[7]);
                        print "\nK1002 Save the entire PDB structure identifier: $pdbid";
                }
                }

Output:

1
2

Can't locate Bio/SeqIO.pm in @INC (@INC contains: /usr/lib/perl5/5.8.0/i486-linux /usr/lib/perl5/5.8.0 /usr/lib/perl5/site_perl/5.8.0/i486-linux /usr/lib/perl5/site_perl/5.8.0 /usr/lib/perl5/site_perl .) at line 3.
BEGIN failed--compilation aborted at line 3.

Create a new paste based on this one

Comments: