[Bio] / FigKernelScripts / merge_families.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/merge_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : overbeek 1.1 ########################################################################
2 :     #
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 :    
20 :     use FIG;
21 :     my $fig = new FIG;
22 :    
23 :     use Subsystem;
24 :    
25 :     $usage = "usage: merge_families FamDir membership-table1";
26 :    
27 :     (
28 :     ($fam_dir = shift @ARGV) &&
29 :     ($members = shift @ARGV)
30 :     )
31 :     || die $usage;
32 :    
33 :     if (-e $fam_dir)
34 :     {
35 :     die "$fam_dir already exists. You should probably delete or save it and rerun";
36 :     }
37 :    
38 :     &FIG::verify_dir("$fam_dir");
39 :     $dref = {};
40 :     open(LOG,">$fam_dir/log") || die "could not open $fam_dir/log";
41 : overbeek 1.3 open(MEMS,"sort -k2 -k1 $members |") || die "could not open $members";
42 : overbeek 1.1 $last = <MEMS>;
43 :     while ($last && ($last =~ /^(\S+)\t(\S+)/))
44 :     {
45 :     $peg = $2;
46 :     @fams = ();
47 :     while ($last && ($last =~ /^(\S+)\t(\S+)/) && ($peg eq $2))
48 :     {
49 :     push(@fams,$1);
50 :     $last = <MEMS>;
51 :     }
52 :    
53 :     if (@fams > 1)
54 :     {
55 :     @fams = sort @fams;
56 :     &merge($dref,\@fams,$peg,\*LOG);
57 :     }
58 :     }
59 :     close(MEM);
60 :    
61 :     $mergedF = "$FIG_Config::temp/merged$$";
62 :     open(MERGED,"| sort -u > $mergedF")
63 :     || die "could not open $FIG_Config::temp/merged$$";
64 :     open(MEMS,"<$members") || die "could not open $members";
65 :     while (defined($_ = <MEMS>))
66 :     {
67 :     if ($_ =~ /^(\S+)\t(\S+)/)
68 :     {
69 :     $peg = $2;
70 :     $fam = $1;
71 :     $fam = &dereference($fam,$dref);
72 :     print MERGED "$fam\t$peg\n";
73 :     }
74 :     }
75 :     close(MEM);
76 :     close(MERGED);
77 :     open(MERGED,"<$mergedF")
78 :     || die "could not open $mergedF";
79 :     open(ALL,">$fam_dir/families") || die "could not open $fam_dir";
80 :    
81 : overbeek 1.2 $_ = <MERGED>;
82 :     while ($_ && ($_ =~ /^(\S+)/))
83 : overbeek 1.1 {
84 : overbeek 1.2 $fam = $1;
85 :     @set = ();
86 :     while ($_ && ($_ =~ /^(\S+)/) && ($fam eq $1))
87 :     {
88 :     push(@set,$_);
89 :     $_ = <MERGED>;
90 :     }
91 :    
92 :     if (@set > 1)
93 :     {
94 :     print ALL join("",@set);
95 :     }
96 : overbeek 1.1 }
97 :     close(MERGED);
98 : overbeek 1.2 unlink($mergedF);
99 : overbeek 1.1
100 :     sub dereference {
101 :     my($fam,$dref) = @_;
102 :    
103 :     while (my $to = $dref->{$fam})
104 :     {
105 :     $fam = $to;
106 :     }
107 :     return $fam;
108 :     }
109 :    
110 :     sub merge {
111 :     my($dref,$fams,$peg,$log_fh) = @_;
112 :    
113 :     my(%derefed,$fam,@whats_left,$i);
114 :     my $in = join(",",@$fams);
115 :     foreach $fam (@$fams)
116 :     {
117 :     $derefed{&dereference($fam,$dref)} = 1;
118 :     }
119 :     @whats_left = sort keys(%derefed);
120 :     for ($i=1; ($i < @whats_left); $i++)
121 :     {
122 :     $dref->{$whats_left[$i]} = $whats_left[0];
123 :     print $log_fh "$whats_left[$i]\t$whats_left[0]\t$peg\t$in\n";
124 :     }
125 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3