[Bio] / FigKernelScripts / FFB2_make_subsys_based_families.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/FFB2_make_subsys_based_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : overbeek 1.1 ########################################################################
2 :     #
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :     ########################################################################
19 :    
20 :    
21 :     use FIG;
22 : olson 1.2 use strict;
23 : olson 1.3 use Getopt::Long;
24 :    
25 :     # usage: FFB2_make_subsys_based_families > subsys.based.families
26 : olson 1.2
27 : overbeek 1.1 my $fig = new FIG;
28 :    
29 : olson 1.3 my $function_override_file;
30 :    
31 :     my $rc = GetOptions("functions=s" => \$function_override_file);
32 :    
33 :     ($rc && @ARGV == 0) ||
34 :     die "Usage: FFB2_make_subsys_based_families [-functions function-overrides] > subsys.based.families\n";
35 :    
36 :     my %fn_override;
37 :     if (defined($function_override_file))
38 :     {
39 :     open(FN, "<", $function_override_file) or die "Cannot open $function_override_file: $!";
40 :     while (<FN>)
41 :     {
42 :     chomp;
43 :    
44 :     my($id, $fn) = split(/\t/);
45 :     $fn_override{$id} = $fn;
46 :     }
47 :     close(FN);
48 :     }
49 :    
50 : overbeek 1.1
51 :     my $pairsF = "$FIG_Config::temp/peg-func.$$";
52 : olson 1.2 #open(TMP1,"pegs_in_subsystems | cut -f2,3 | sort -u | function_of |")
53 :     # || die "could not get pegs";
54 :    
55 :     #
56 :     # TMP1 emits triples role, peg, assigned function
57 :     #
58 :    
59 :     #open(TMP1, "cut -f2,3 < /scratch/olson/pegs.in.subs | sort -u -S 3G | function_of |");
60 :     open(TMP2,"| sort -S 3G -u > $pairsF") || die "could not open $pairsF";
61 :    
62 :     #mysql> select si.subsystem, si.role, si.protein, f.assigned_function from subsystem_index si LEFT JOIN aux_roles ar ON si.subsystem = ar.subsystem AND si.role = ar.role JOIN subsystem_metadata m ON si.subsystem = m.subsystem JOIN assigned_functions f ON f.prot = si.protein LEFT JOIN deleted_fids df ON si.protein = df.fid WHERE df.fid IS NULL AND ar.role IS NULL and m.class_1 <> '' AND m.class_1 not like 'experimental%' COLLATE latin1_swedish_ci and m.class_1 not like '%delete%' AND si.variant != '0' AND si.variant != '-1' into outfile '/tmp/list6';
63 : overbeek 1.1
64 : olson 1.2
65 :     my $sth = $fig->db_handle->{_dbh}->prepare(qq(SELECT si.role, si.protein, f.assigned_function
66 :     FROM subsystem_index si
67 :     LEFT JOIN aux_roles ar ON si.subsystem = ar.subsystem AND si.role = ar.role
68 :     JOIN subsystem_metadata m ON si.subsystem = m.subsystem
69 :     JOIN assigned_functions f ON f.prot = si.protein
70 :     LEFT JOIN deleted_fids df ON si.protein = df.fid
71 :     WHERE df.fid IS NULL AND
72 :     ar.role IS NULL AND
73 :     m.class_1 <> '' AND
74 :     m.class_1 NOT LIKE 'experimental%' COLLATE latin1_swedish_ci AND
75 :     m.class_1 NOT LIKE '%delete%' COLLATE latin1_swedish_ci AND
76 :     si.variant != '0' AND
77 :     si.variant != '-1'),
78 :     { mysql_use_result => 1 });
79 :    
80 :     $sth->execute();
81 :    
82 :     while (my $row = $sth->fetchrow_arrayref())
83 : overbeek 1.1 {
84 : olson 1.2 my($role, $peg, $func) = @$row;
85 :    
86 : olson 1.3 $func = $fn_override{$peg} if defined($fn_override{$peg});
87 :    
88 : olson 1.2 #while (<TMP1>)
89 :     #{
90 :     # chomp;
91 :     # my($role, $peg, $func) = split(/\t/);
92 :     next if $peg !~ /\.peg\./;
93 : overbeek 1.1 next if ((! $func) || (length($func) < 2));
94 :     my @roles = $fig->roles_of_function($func);
95 :     my $i;
96 :     for ($i=0; ($i < @roles) && ($roles[$i] ne $role); $i++) {}
97 :     if ($i < @roles)
98 :     {
99 :     if ($func !~ /\#.*((trunca)|(framesh)|(fragment))/)
100 :     {
101 :     $func =~ s/\s*\#.*$//;
102 :     print TMP2 "$func\t$peg\n";
103 :     }
104 :     }
105 :     }
106 : olson 1.2 #$sth->finish();
107 :     #close(TMP1);
108 : overbeek 1.1 close(TMP2);
109 :    
110 :     my $n = 1;
111 :     open(TMP2,"<$pairsF") || die "could not open $pairsF";
112 :     my $x = <TMP2>;
113 :     while ($x && ($x =~ /(\S.*\S)\t(fig\|\d+\.\d+\.peg\.\d+)/))
114 :     {
115 :     my $func = $1;
116 :     my @set = ();
117 :     while ($x && ($x =~ /(\S.*\S)\t(fig\|\d+\.\d+\.peg\.\d+)/) && ($1 eq $func))
118 :     {
119 :     push(@set,$2);
120 :     $x = <TMP2>;
121 :     }
122 :    
123 :     if (@set > 1)
124 :     {
125 :     foreach $_ (@set)
126 :     {
127 :     print "$n\t$_\n";
128 :     }
129 :     $n++;
130 :     }
131 :     }
132 : olson 1.2 #unlink($pairsF);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3