[Bio] / FigKernelScripts / p3-blast.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/p3-blast.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Mon Jul 17 22:42:43 2017 UTC revision 1.3, Wed Mar 14 23:38:15 2018 UTC
# Line 19  Line 19 
19    
20  use strict;  use strict;
21  use warnings;  use warnings;
22  use ServicesUtils;  use P3Utils;
23  use gjoseqlib;  use gjoseqlib;
24  use BlastInterface;  use BlastInterface;
25  use Data::Dumper;  use Data::Dumper;
26  use P3DataAPI;  use P3DataAPI;
27    use Hsp;
28    
29  =head1 Blast FASTA Data  =head1 Blast FASTA Data
30    
# Line 34  Line 35 
35    
36  =head2 Parameters  =head2 Parameters
37    
 See L<ServicesUtils> for more information about common command-line options.  
   
38  The positional parameters are the name of the blast program (C<blastn>, C<blastp>, C<blastx>, or C<tblastn>)  The positional parameters are the name of the blast program (C<blastn>, C<blastp>, C<blastx>, or C<tblastn>)
39  followed by the file name of the blast database. If the blast database is not pre-built, it will be built in  followed by the file name of the blast database. If the blast database is not pre-built, it will be built in
40  place. If the database is not found, it is presumed to be a genome ID. If the database name is omitted, the  place. If the database is not found, it is presumed to be a genome ID. If the database name is omitted, the
41  input will be blasted against itself.  input will be blasted against itself.
42    
43    The options in L<P3Utils/ih_options> can be used to override the standard input.
44    
45  The additional command-line options are as follows.  The additional command-line options are as follows.
46    
47  =over 4  =over 4
48    
49  =item hsp  =item hsp
50    
51  If specified, then the output is in the form of HSP data (see L<Hsp>). This is the default, and is mutually exclusive with C<sim>.  If specified, then the output is in the form of HSP data (see L<Hsp>). This is the default, and is mutually exclusive with C<sim> and C<tbl>.
52    
53  =item sim  =item sim
54    
55  If specified, then the output is in the form of similarity data (see L<Sim>). This parameter is mutually exclusive with C<hsp>.  If specified, then the output is in the form of similarity data (see L<Sim>). This parameter is mutually exclusive with C<hsp> and C<tbl>.
56    
57    =item tbl
58    
59    If specified, then the output is in the form of a six-column table: query ID, query description, subject ID, subject description, percent identity, and e-value.
60    
61    =item best
62    
63    If specified, then only the best match for each query sequence will be output.
64    
65  =item BLAST Parameters  =item BLAST Parameters
66    
# Line 88  Line 97 
97  # map each blast tool name to the type of blast database required  # map each blast tool name to the type of blast database required
98  use constant BLAST_TOOL => { blastp => 'prot', blastn => 'dna', blastx => 'prot', tblastn => 'dna' };  use constant BLAST_TOOL => { blastp => 'prot', blastn => 'dna', blastx => 'prot', tblastn => 'dna' };
99    
100    $| = 1;
101  # Get the command-line parameters.  # Get the command-line parameters.
102  my ($opt, $helper) = ServicesUtils::get_options('type blastdb',  my $opt = P3Utils::script_opts('type blastdb',
103          ['output' => hidden => { one_of => [ [ 'hsp' => 'produce HSP output'], ['sim' => 'produce similarity output'] ]}],          P3Utils::ih_options(),
104            ['output' => hidden => { one_of => [ [ 'hsp' => 'produce HSP output'], ['sim' => 'produce similarity output'], ['tbl' => 'produce table output']]}],
105          ['maxE|e=f', 'maximum e-value', { default => 1e-10 }],          ['maxE|e=f', 'maximum e-value', { default => 1e-10 }],
106          ['maxHSP|b', 'if specified, the maximum number of returned results (before filtering)'],          ['maxHSP|b', 'if specified, the maximum number of returned results (before filtering)'],
107          ['minScr=f', 'if specified, the minimum permissible bit score'],          ['minScr=f', 'if specified, the minimum permissible bit score'],
108          ['percIdentity=f', 'if specified, the minimum permissible percent identity'],          ['percIdentity=f', 'if specified, the minimum permissible percent identity'],
109          ['minLen|l=i', 'if specified, the minimum permissible match lengt (for filtering)'],          ['minLen|l=i', 'if specified, the minimum permissible match lengt (for filtering)'],
110          { input => 'whole' });          ['best', 'only output best match for each query sequence']
111            );
112  # Open the input file.  # Open the input file.
113  my $ih = ServicesUtils::ih($opt);  my $ih = P3Utils::ih($opt);
114  # Get the positional parameters.  # Get the positional parameters.
115  my ($blastProg, $blastdb) = @ARGV;  my ($blastProg, $blastdb) = @ARGV;
116  if (! $blastProg) {  if (! $blastProg) {
# Line 108  Line 120 
120  if (! $blastDbType) {  if (! $blastDbType) {
121      die "Invalid blast tool specified.";      die "Invalid blast tool specified.";
122  }  }
123    # Determine the output type.
124    my $outForm = $opt->output // 'hsp';
125  # This hash contains the BLAST parameters.  # This hash contains the BLAST parameters.
126  my %blast;  my %blast;
127  $blast{outForm} = $opt->output // 'hsp';  $blast{outForm} = ($outForm eq 'sim' ? 'sim' : 'hsp');
128  $blast{maxE} = $opt->maxe;  $blast{maxE} = $opt->maxe;
129  $blast{maxHSP} = $opt->maxhsp // 0;  $blast{maxHSP} = $opt->maxhsp // 0;
130  $blast{minIden} = $opt->percidentity // 0;  $blast{minIden} = $opt->percidentity // 0;
131  $blast{minLen} = $opt->minlen // 0;  $blast{minLen} = $opt->minlen // 0;
132    # Save the best-only option.
133    my $best = $opt->best;
134    # Print the output headers.
135    if ($outForm eq 'hsp') {
136        P3Utils::print_cols([qw(qid qdef qlen sid sdef slen score e-val pN p-val match-len identity positive gaps dir q-start q-end q-sequence s-start s-end s-sequence)]);
137    } elsif ($outForm eq 'sim') {
138        P3Utils::print_cols([qw(qid sid pct-identity match-len mismatches gaps q-start q-end s-start s-end e-val score q-len s-len tool)]);
139    } elsif ($outForm eq 'tbl') {
140        P3Utils::print_cols([qw(qid qdef sid sdef pct-identity e-val)]);
141    }
142  # Get the input triples. These are the query sequences.  # Get the input triples. These are the query sequences.
143  my @query = gjoseqlib::read_fasta($ih);  my @query = gjoseqlib::read_fasta($ih);
144  # Now we need to determine the BLAST database.  # Now we need to determine the BLAST database.
# Line 127  Line 151 
151      $blastDatabase = $blastdb;      $blastDatabase = $blastdb;
152  } else {  } else {
153      # Not a file name, so we assume it is a genome.      # Not a file name, so we assume it is a genome.
     #my $gHash = $helper->genome_fasta([$blastdb], $blastDbType);  
154      p3_genome_fasta($blastdb, $blastDbType);      p3_genome_fasta($blastdb, $blastDbType);
155      $blastDatabase = $blastdb;      $blastDatabase = $blastdb;
156      if (! $blastDatabase) {      if (! $blastDatabase) {
# Line 136  Line 159 
159  }  }
160  # Now run the BLAST.  # Now run the BLAST.
161  my $matches = BlastInterface::blast(\@query, $blastDatabase, $blastProg, \%blast);  my $matches = BlastInterface::blast(\@query, $blastDatabase, $blastProg, \%blast);
162    my $lastQuery = '';
163  # Format the output.  # Format the output.
164  for my $match (@$matches) {  for my $match (@$matches) {
165      print join("\t", @$match) . "\n";      my $qid = $match->[0];
166        # The best match for a query sequence is always the first encountered.
167        if (! $best || $qid ne $lastQuery) {
168            if ($outForm eq 'tbl') {
169                # Must convert from HSP to table.
170                my $hsp = $match;
171                $match = [$hsp->qid(), $hsp->qdef(), $hsp->sid(), $hsp->sdef(), $hsp->pct(), $hsp->e_val()];
172            }
173            P3Utils::print_cols($match);
174            $lastQuery = $qid;
175        }
176  }  }
177    
178    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3