[Bio] / FigKernelScripts / p3-blast.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/p3-blast.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/env perl
2 :     #
3 :     # Copyright (c) 2003-2015 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 :    
20 :     use strict;
21 :     use warnings;
22 :     use ServicesUtils;
23 :     use gjoseqlib;
24 :     use BlastInterface;
25 :     use Data::Dumper;
26 :     use P3DataAPI;
27 :    
28 :     =head1 Blast FASTA Data
29 :    
30 :     svc_blast.pl [ options ] type blastdb
31 :    
32 :     Blast the input against a specified blast database. The input should be a FASTA file. The blast database
33 :     can also be a FASTA file, the input itself, or it can be a genome ID.
34 :    
35 :     =head2 Parameters
36 :    
37 :     See L<ServicesUtils> for more information about common command-line options.
38 :    
39 :     The positional parameters are the name of the blast program (C<blastn>, C<blastp>, C<blastx>, or C<tblastn>)
40 :     followed by the file name of the blast database. If the blast database is not pre-built, it will be built in
41 :     place. If the database is not found, it is presumed to be a genome ID. If the database name is omitted, the
42 :     input will be blasted against itself.
43 :    
44 :     The additional command-line options are as follows.
45 :    
46 :     =over 4
47 :    
48 :     =item hsp
49 :    
50 :     If specified, then the output is in the form of HSP data (see L<Hsp>). This is the default, and is mutually exclusive with C<sim>.
51 :    
52 :     =item sim
53 :    
54 :     If specified, then the output is in the form of similarity data (see L<Sim>). This parameter is mutually exclusive with C<hsp>.
55 :    
56 :     =item BLAST Parameters
57 :    
58 :     The following may be specified as BLAST parameters
59 :    
60 :     =over 8
61 :    
62 :     =item maxE
63 :    
64 :     Maximum E-value (default C<1e-10>).
65 :    
66 :     =item maxHSP
67 :    
68 :     Maximum number of returned results (before filtering). The default is to return all results.
69 :    
70 :     =item minScr
71 :    
72 :     Minimum required bit-score. The default is no minimum.
73 :    
74 :     =item percIdentity
75 :    
76 :     Minimum percent identity. The default is no minimum.
77 :    
78 :     =item minLen
79 :    
80 :     Minimum permissible match length (used to filter the results). The default is no filtering.
81 :    
82 :     =back
83 :    
84 :     =back
85 :    
86 :     =cut
87 :    
88 :     # map each blast tool name to the type of blast database required
89 :     use constant BLAST_TOOL => { blastp => 'prot', blastn => 'dna', blastx => 'prot', tblastn => 'dna' };
90 :    
91 :     # Get the command-line parameters.
92 :     my ($opt, $helper) = ServicesUtils::get_options('type blastdb',
93 :     ['output' => hidden => { one_of => [ [ 'hsp' => 'produce HSP output'], ['sim' => 'produce similarity output'] ]}],
94 :     ['maxE|e=f', 'maximum e-value', { default => 1e-10 }],
95 :     ['maxHSP|b', 'if specified, the maximum number of returned results (before filtering)'],
96 :     ['minScr=f', 'if specified, the minimum permissible bit score'],
97 :     ['percIdentity=f', 'if specified, the minimum permissible percent identity'],
98 :     ['minLen|l=i', 'if specified, the minimum permissible match lengt (for filtering)'],
99 :     { input => 'whole' });
100 :     # Open the input file.
101 :     my $ih = ServicesUtils::ih($opt);
102 :     # Get the positional parameters.
103 :     my ($blastProg, $blastdb) = @ARGV;
104 :     if (! $blastProg) {
105 :     die "You must specify the blast tool.";
106 :     }
107 :     my $blastDbType = BLAST_TOOL->{$blastProg};
108 :     if (! $blastDbType) {
109 :     die "Invalid blast tool specified.";
110 :     }
111 :     # This hash contains the BLAST parameters.
112 :     my %blast;
113 :     $blast{outForm} = $opt->output // 'hsp';
114 :     $blast{maxE} = $opt->maxe;
115 :     $blast{maxHSP} = $opt->maxhsp // 0;
116 :     $blast{minIden} = $opt->percidentity // 0;
117 :     $blast{minLen} = $opt->minlen // 0;
118 :     # Get the input triples. These are the query sequences.
119 :     my @query = gjoseqlib::read_fasta($ih);
120 :     # Now we need to determine the BLAST database.
121 :     my $blastDatabase;
122 :     if (! $blastdb) {
123 :     # Use the query.
124 :     $blastDatabase = \@query;
125 :     } elsif (-s $blastdb) {
126 :     # Here the user specified a file name.
127 :     $blastDatabase = $blastdb;
128 :     } else {
129 :     # Not a file name, so we assume it is a genome.
130 :     #my $gHash = $helper->genome_fasta([$blastdb], $blastDbType);
131 :     p3_genome_fasta($blastdb, $blastDbType);
132 :     $blastDatabase = $blastdb;
133 :     if (! $blastDatabase) {
134 :     die "$blastdb is not a file or a genome ID."
135 :     }
136 :     }
137 :     # Now run the BLAST.
138 :     my $matches = BlastInterface::blast(\@query, $blastDatabase, $blastProg, \%blast);
139 :     # Format the output.
140 :     for my $match (@$matches) {
141 :     print join("\t", @$match) . "\n";
142 :     }
143 :    
144 :    
145 :     sub p3_genome_fasta
146 :     {
147 :     my ($genome_id, $blastDbType) = @_;
148 :     my $d = P3DataAPI->new();
149 :    
150 :     my $gto = $d->gto_of($genome_id);
151 :     if ($blastDbType eq "dna") {
152 :     $gto->write_contigs_to_file("$genome_id");
153 :     } else {
154 :     $gto->write_protein_translations_to_file("$genome_id");
155 :     }
156 :     }
157 :    
158 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3