[Bio] / FigKernelScripts / find_genes_in_families.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/find_genes_in_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (view) (download) (as text)

1 : overbeek 1.1 # -*- perl -*-
2 : gdpusch 1.17 ########################################################################
3 : overbeek 1.8 # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 : gdpusch 1.17 ########################################################################
18 : overbeek 1.1
19 : gdpusch 1.20 use strict;
20 :     use warnings;
21 :    
22 :     $0 =~ m/([^\/]+)$/o;
23 :     my $self = $1;
24 : overbeek 1.9 my $usage = "find_genes_in_families FigfamsData ToCallDir FoundFamilies [old] 2> missed.fasta";
25 : overbeek 1.1
26 : overbeek 1.6 if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
27 :     print STDERR "\n $usage\n\n";
28 :     exit (0);
29 :     }
30 : overbeek 1.1
31 :     use FIG;
32 :     my $fig = new FIG;
33 :    
34 : overbeek 1.10 my ($fam_data, $N, $i);
35 : overbeek 1.9
36 :     # First, let's determine which FigfamsData directory to use
37 : olson 1.19
38 : overbeek 1.9 for ($i=0; ($i < @ARGV) && ($ARGV[$i] !~ /^-fams=/); $i++) {}
39 : gdpusch 1.17 if ($i < @ARGV) {
40 :     if ($ARGV[$i] =~ /^-fams=(\S+)/) {
41 : overbeek 1.6 if (-d $1) {
42 :     $fam_data = $1;
43 :     }
44 :     else {
45 : overbeek 1.9 die "Nonexistent FIGfams directory $1\n";
46 : overbeek 1.6 }
47 : overbeek 1.9 splice(@ARGV,$i,1);
48 : overbeek 1.6 }
49 :     }
50 : overbeek 1.9
51 : overbeek 1.1 use FigFam;
52 :     use FigFams;
53 :     my $figfams = new FigFams($fig,$fam_data);
54 :    
55 :     use NewGenome;
56 : overbeek 1.3 use ToCall;
57 :    
58 : overbeek 1.6 my ($to_call_dir, $foundF);
59 :     (
60 : overbeek 1.7 ($to_call_dir = shift @ARGV) && ((-d $to_call_dir) || warn("Directory $to_call_dir does not exist")) &&
61 : overbeek 1.6 ($foundF = shift @ARGV)
62 :     )
63 :     || die "\n usage: $usage\n\n";
64 :    
65 : overbeek 1.16 my($to_call,$use_old);
66 :     $use_old = ((@ARGV > 0) && ($ARGV[0] =~ /^old/i));
67 :    
68 : gdpusch 1.17 if ($use_old) {
69 : overbeek 1.9 $to_call = new ToCall($to_call_dir);
70 : overbeek 1.3 }
71 : gdpusch 1.17 else {
72 : overbeek 1.9 $to_call = new NewGenome($to_call_dir);
73 : overbeek 1.3 }
74 :    
75 : gdpusch 1.20 my $called_by_file = "$to_call_dir/called_by";
76 :     open(CALLED_BY, ">>$called_by_file")
77 :     || die "Could not append-open $called_by_file";
78 :    
79 :     open(FOUNDFAMS,">>$foundF") || die "could not append-open $foundF";
80 : overbeek 1.4
81 : overbeek 1.2 my @orfs = sort { $b->[2] <=> $a->[2] }
82 : gdpusch 1.20 map { my $id = $_;
83 :     my $seq = $to_call->get_feature_sequence($id);
84 :     my $ln = length($seq);
85 :     [$id, $seq, $ln]
86 :     } $to_call->get_fids_for_type('orf');
87 : overbeek 1.2
88 : gdpusch 1.17 foreach $_ (@orfs) {
89 : gdpusch 1.20 my ($orf_id, $seq) = @$_;
90 :     print STDERR "looking for a family for $orf_id\n";
91 :    
92 : overbeek 1.16 my $orf;
93 : gdpusch 1.17 if ($use_old) {
94 : gdpusch 1.20 $orf = &ToCall::PEG::new('ToCall::PEG',$orf_id,$seq);
95 : overbeek 1.16 }
96 : gdpusch 1.17 else {
97 : gdpusch 1.20 $orf = &NewGenome::ORF::new('NewGenome::ORF',$to_call,$orf_id);
98 : overbeek 1.16 }
99 : gdpusch 1.20
100 : gdpusch 1.17 my ($fam, $sims) = $figfams->place_in_family($seq);
101 : overbeek 1.10
102 : overbeek 1.11 if (defined($sims)) {
103 : gdpusch 1.20 print STDERR "ORF $orf_id has ", (scalar @$sims), " sims against family $fam\n" if $ENV{VERBOSE};
104 : overbeek 1.10 $orf->call_start($sims);
105 :     }
106 : overbeek 1.12 else {
107 : gdpusch 1.20 print STDERR "ORF $orf_id had no sims against a family\n" if $ENV{VERBOSE};
108 : overbeek 1.12 }
109 : gdpusch 1.17
110 :     if ($fam) {
111 : overbeek 1.2 my $fam_id = $fam->family_id;
112 : gdpusch 1.20 my $func = $fam->family_function;
113 :     my $annot = [ qq(RAST),
114 :     qq($func\nCalled by "$self" using FIGfam $fam_id.)
115 :     ];
116 :     my $fid = $orf->promote_to_peg($sims, $func, $annot);
117 :     if ($fid) {
118 :     print FOUNDFAMS "$fid\t$fam_id\t$func\n";
119 :     print CALLED_BY "$fid\t$self\n";
120 :     print STDERR "Succeeded:\t$orf_id --> $fid\n\n";
121 :     }
122 :     else {
123 :     print STDERR "Could not promote:\t$orf_id\n\n";
124 :     }
125 :     }
126 :     else {
127 :     print STDERR "Could not place:\t$orf_id\n";
128 : overbeek 1.2 }
129 : overbeek 1.1 }
130 : gdpusch 1.17
131 : gdpusch 1.20 close(FOUNDFAMS);
132 :     close(CALLED_BY);
133 :    
134 :     $to_call->export_features();
135 : gdpusch 1.17
136 :     exit(0);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3