[Bio] / FigKernelScripts / load_coupling.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/load_coupling.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3.2.1, Mon Jun 27 15:26:44 2005 UTC revision 1.9, Tue Feb 21 14:37:25 2006 UTC
# Line 1  Line 1 
1    #
2    # Copyright (c) 2003-2006 University of Chicago and Fellowship
3    # for Interpretations of Genomes. All Rights Reserved.
4    #
5    # This file is part of the SEED Toolkit.
6    #
7    # The SEED Toolkit is free software. You can redistribute
8    # it and/or modify it under the terms of the SEED Toolkit
9    # Public License.
10    #
11    # You should have received a copy of the SEED Toolkit Public License
12    # along with this program; if not write to the University of Chicago
13    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14    # Genomes at veronika@thefig.info or download a copy from
15    # http://www.theseed.org/LICENSE.TXT.
16    #
17    
18    
19  # -*- perl -*-  # -*- perl -*-
20    
21  use FIG;  use FIG;
22  use Tracer;  use Tracer;
23    use strict;
24    
25  my $fig = new FIG;  my $fig = new FIG;
26    
27  # usage: load_coupling  # usage: load_coupling [G1 G2 ...]
28    
29  $pchD   = "$FIG_Config::data/CouplingData/PCHs";  my $pchD   = "$FIG_Config::data/CouplingData/PCHs";
30  $scores = "$FIG_Config::data/CouplingData/scores";  my $scores = "$FIG_Config::data/CouplingData/scores";
31    
32  use DBrtns;  use DBrtns;
33    
34    my @genomes;
35    if (@ARGV > 0)
36    {
37        for my $g (@ARGV)
38        {
39            if ($g =~ /^\d+\.\d+$/)
40            {
41                push(@genomes, $g);
42            }
43            else
44            {
45                die "Invalid genome '$g' in argument list\n";
46            }
47        }
48    }
49    
50  if (!(-d $pchD)) {  if (!(-d $pchD)) {
51      Trace("Coupling directory $pchD not found.") if T(1);      Trace("Coupling directory $pchD not found.") if T(1);
52      exit;      exit;
# Line 20  Line 54 
54      Trace("Coupling data file $scores not found.") if T(1);      Trace("Coupling data file $scores not found.") if T(1);
55      exit;      exit;
56  }  }
57  Trace("Re-creating coupling table.") if T(2);  
58  my $dbf = $fig->db_handle;  my $dbf = $fig->db_handle;
59    
60    Trace("Re-creating coupling table.") if T(2);
61    
62    my %genomes = map { $_ => 1 } @genomes;
63    
64    if (@genomes)
65    {
66        my $tmp = "$FIG_Config::temp/lc_tmp.$$";
67        open(TMP, ">$tmp") or die "Cannot open $tmp for writing: $!\n";
68    
69        #
70        # Extract the scores for our genomes to $tmp.
71        #
72    
73        open(S, "<$scores") or die "Cannot open $scores: $!\n";
74        while (<S>)
75        {
76            if (/^fig\|(\d+\.\d+)\.peg/ and $genomes{$1})
77            {
78                print TMP $_;
79            }
80        }
81        close(TMP);
82        close(S);
83    
84        #
85        # Need to drop any entries with our genome.
86        #
87    
88        my $cond = join(" or ", map { "peg1 like 'fig|$_.peg%'" } @genomes);
89        my $where = "($cond)";
90        my $res = $dbf->SQL("delete from fc_pegs where $where");
91    
92        #
93        # Now insert.
94        #
95    
96        $dbf->load_table(tbl => 'fc_pegs', file => $tmp);
97    }
98    else
99    {
100  $dbf->reload_table('all', 'fc_pegs',  $dbf->reload_table('all', 'fc_pegs',
101                     "peg1 varchar(32), peg2 varchar(32), score integer",                     "peg1 varchar(32), peg2 varchar(32), score integer",
102                     { fc_pegs_ix => "peg1, peg2" },                     { fc_pegs_ix => "peg1, peg2" },
103                     $scores                     $scores
104            );            );
105  Trace("Re-creating PCH table.") if T(2);  }
106    
107    #
108    # Now load the PCHs files.
109    #
110    
111    if (@genomes == 0)
112    {
113        #
114        # Reload all PCHs.
115        #
116    
117        Trace("Estimating size of PCH table.") if T(2);
118    
119        my @files = grep { (-s $_) } map { "$pchD/$_" }  OpenDir($pchD, 1);
120    
121        my($row_size, $max_rows) = $dbf->estimate_table_size(\@files);
122    
123        Trace("Re-creating PCH table with row_size=$row_size max_rows=$max_rows.") if T(2);
124    
125  $dbf->reload_table('all', "pchs",  $dbf->reload_table('all', "pchs",
126                     "peg1 varchar(32), peg2 varchar(32), peg3 varchar(32), peg4 varchar(32),                     "peg1 varchar(32), peg2 varchar(32), peg3 varchar(32), peg4 varchar(32),
127                         inden13 varchar(6), inden24 varchar(6), para3  integer, para4 integer, rep char(1)",                         inden13 varchar(6), inden24 varchar(6), para3  integer, para4 integer, rep char(1)",
128                      { pchs_ix => "peg1, peg2" }                     { pchs_ix => "peg1, peg2" }, undef, undef, undef, [$row_size, $max_rows]
129            );            );
130    
131  Trace("Reading PCH directory.") if T(2);  Trace("Reading PCH directory.") if T(2);
132  @files = grep { (-s "$pchD/$_") } OpenDir($pchD, 1);  
133  foreach $file (@files) {      foreach my $file (@files) {
134      Trace("Loading PCH data from $file.") if T(3);      Trace("Loading PCH data from $file.") if T(3);
135      $dbf->load_table( tbl => "pchs",      $dbf->load_table( tbl => "pchs",
136                        file => "$pchD/$file" );                           file => $file );
137  }  }
138  Trace("Finishing PCH load.") if T(2);  Trace("Finishing PCH load.") if T(2);
139  $dbf->finish_load('all', 'pchs');  $dbf->finish_load('all', 'pchs');
140    }
141    else
142    {
143        #
144        # Reload a subset.
145        #
146    
147        for my $g (@genomes)
148        {
149            my $pch_file = "$pchD/$g";
150            if (! -f $pch_file)
151            {
152                die "Cannot open PCH file $pch_file\n";
153            }
154    
155            $dbf->load_table(tbl => 'pchs', file => $pch_file);
156        }
157    }
158    
159  Trace("Couplings loaded.") if T(2);  Trace("Couplings loaded.") if T(2);
160    
161    

Legend:
Removed from v.1.3.2.1  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3