[Bio] / FigKernelScripts / svr_find_fused_genes.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_find_fused_genes.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : fangfang 1.1 #
2 :     # This is a SAS Component
3 :     #
4 :    
5 :     #
6 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
7 :     # for Interpretations of Genomes. All Rights Reserved.
8 :     #
9 :     # This file is part of the SEED Toolkit.
10 :     #
11 :     # The SEED Toolkit is free software. You can redistribute
12 :     # it and/or modify it under the terms of the SEED Toolkit
13 :     # Public License.
14 :     #
15 :     # You should have received a copy of the SEED Toolkit Public License
16 :     # along with this program; if not write to the University of Chicago
17 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
18 :     # Genomes at veronika@thefig.info or download a copy from
19 :     # http://www.theseed.org/LICENSE.TXT.
20 :     #
21 :    
22 :     use strict;
23 :     use Data::Dumper;
24 :     use Carp;
25 :     use Getopt::Long;
26 :    
27 :    
28 :     use AlignTree;
29 :     use ATserver;
30 :     use SeedAware;
31 :     use SeedUtils;
32 :    
33 :     use ffxtree;
34 :     use gjoalignment;
35 :     use gjoseqlib;
36 :    
37 :     my $usage = <<"End_of_Usage";
38 :    
39 :     usage: svr_find_fused_genes [options] [<seqs.fa] >fusion.tbl
40 :    
41 :     -a n_processor - number of processors to use (D = 4)
42 :     -b database - database to search against: SEED (D), PSEED, PPSEED, FASTA file name, FIG genome ID
43 :     -i min_ident - minimum fraction identity (D = 0.12)
44 :     -u max_q_uncov - maximum unmatched query (D = 80)
45 :     -r report_file - output file of psiblast report
46 :     -o output - output psiblast search hits in fasta
47 :     -t html_tree - output html tree painted with fusion genes
48 :     -l - run search locally if a local database is specified
49 :     -peg fid - query gene ID
50 :    
51 :     End_of_Usage
52 :    
53 :     my ($help, $local, $db, $nthread, $uncov, $min_ident, $report_file, $html_tree, $output, $query_peg);
54 :    
55 :     GetOptions("h|help" => \$help,
56 :     "l|local" => \$local,
57 :     "a=i" => \$nthread,
58 :     "b|db=s" => \$db,
59 :     "i=f" => \$min_ident,
60 :     "u=i" => \$uncov,
61 :     "o=s" => \$output,
62 :     "r=s" => \$report_file,
63 :     "t=s" => \$html_tree,
64 :     "peg=s" => \$query_peg);
65 :    
66 :     $help and die $usage;
67 :    
68 :     my $opts;
69 :    
70 :     $opts->{nthread} = $nthread || ($local ? 2 : 4);
71 :     $opts->{db} = gjoseqlib::read_fasta($db) if -s $db;
72 :     $opts->{db} ||= $ENV{SAS_SERVER} || 'SEED';
73 :     $opts->{max_q_uncov} = $uncov || 100;
74 :     $opts->{min_ident} = $min_ident || 0.1;
75 :    
76 :     $opts->{incremental} = 1;
77 :     $opts->{fast} = 1;
78 :     $opts->{max_query_nseq} = 200;
79 :    
80 :     if ($query_peg) {
81 :     my $data = SeedAware::run_gathering_output("echo '$query_peg' | svr_fasta -protein -fasta") or die "Abort: no query sequences.\n";
82 :     $opts->{profile} = gjoseqlib::read_fasta(\$data);
83 :     } else {
84 :     $opts->{profile} = gjoseqlib::read_fasta();
85 :     }
86 :    
87 :     my ($AT, $hits, $report, $ret);
88 :    
89 :     if ($local) {
90 :     ($hits, $report) = AlignTree::psiblast_search($opts);
91 :     } else {
92 :     $AT = ATserver->new();
93 :     $ret = $AT->psiblast_search($opts);
94 :     $hits = $ret->{rv};
95 :     $report = $ret->{report};
96 :     }
97 :    
98 :     if ($report_file) {
99 :     my $report_string = join("\n", map { join "\t", @$_ } sort { $b->[1] <=> $a->[1] } values %$report) . "\n";
100 :     AlignTree::print_string($report_file, $report_string);
101 :     }
102 :    
103 :     gjoseqlib::print_alignment_as_fasta($output, $hits) if $output;
104 :    
105 :     my @incl = grep { $report->{$_}->[4] =~ /included/i } keys %$report or die "No sequences found.\n";
106 :     my @uncov1 = sort { $a <=> $b } map { $report->{$_}->[9] } @incl;
107 :     my @uncov2 = sort { $a <=> $b } map { $report->{$_}->[10] } @incl;
108 :     my $mid1 = $uncov1[int(@uncov1/2)];
109 :     my $mid2 = $uncov2[int(@uncov2/2)];
110 :    
111 :     my @fusions;
112 :     for (@incl) {
113 :     my ($u1, $u2) = @{$report->{$_}}[9, 10];
114 :     my $fu1 = $u1 if $u1 > max($mid1 + 50, 100);
115 :     my $fu2 = $u2 if $u2 > max($mid2 + 50, 100);
116 :     if ($fu1 || $fu2) {
117 :     print join("\t", $_, $u1, $u2) . "\n";
118 :     my $str = join("\t", $_, 'Fusion:');
119 :     $str .= " beg=$fu1" if $fu1;
120 :     $str .= " end=$fu2" if $fu2;
121 :     push @fusions, $str;
122 :     }
123 :     }
124 :    
125 :     if ($html_tree) {
126 :     my $tmpdir = SeedAware::location_of_tmp();
127 :     my $tmpin1 = SeedAware::new_file_name("$tmpdir/psiblast_hits", 'fa');
128 :     my $tmpin2 = SeedAware::new_file_name("$tmpdir/fusion_ends", 'txt');
129 :     my $fusion = join("", map { $_."\n" } @fusions);
130 :     gjoseqlib::print_alignment_as_fasta($tmpin1, $hits);
131 :     AlignTree::print_string($tmpin2, $fusion);
132 :     SeedUtils::run("svr_align_seqs -mafft <$tmpin1 | svr_tree | svr_tree_to_html -c role -nc 20 -anno -p none -f $tmpin2 -d $tmpin2 >$html_tree");
133 :     for ($tmpin1, $tmpin2) { unlink $_ if -e $_ };
134 :     }
135 :    
136 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3