[Bio] / FigKernelScripts / load_coupling.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/load_coupling.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3, Wed Jun 1 01:18:50 2005 UTC revision 1.8, Sun Feb 19 04:15:44 2006 UTC
# Line 1  Line 1 
1    #
2    # Copyright (c) 2003-2006 University of Chicago and Fellowship
3    # for Interpretations of Genomes. All Rights Reserved.
4    #
5    # This file is part of the SEED Toolkit.
6    #
7    # The SEED Toolkit is free software. You can redistribute
8    # it and/or modify it under the terms of the SEED Toolkit
9    # Public License.
10    #
11    # You should have received a copy of the SEED Toolkit Public License
12    # along with this program; if not write to the University of Chicago
13    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14    # Genomes at veronika@thefig.info or download a copy from
15    # http://www.theseed.org/LICENSE.TXT.
16    #
17    
18    
19  # -*- perl -*-  # -*- perl -*-
20    
21  use FIG;  use FIG;
22    use Tracer;
23    use strict;
24    
25  my $fig = new FIG;  my $fig = new FIG;
26    
27  # usage: load_coupling  # usage: load_coupling [G1 G2 ...]
28    
29  $pchD   = "$FIG_Config::fig/CouplingData/PCHs";  my $pchD   = "$FIG_Config::data/CouplingData/PCHs";
30  $scores = "$FIG_Config::fig/CouplingData/scores";  my $scores = "$FIG_Config::data/CouplingData/scores";
31    
32  use DBrtns;  use DBrtns;
33    
34  if (! ((-d $pchD) && (-s $scores))) { exit }  my @genomes;
35    if (@ARGV > 0)
36    {
37        for my $g (@ARGV)
38        {
39            if ($g =~ /^\d+\.\d+$/)
40            {
41                push(@genomes, $g);
42            }
43            else
44            {
45                die "Invalid genome '$g' in argument list\n";
46            }
47        }
48    }
49    
50  my $dbf = $fig->{_dbf};  if (!(-d $pchD)) {
51        Trace("Coupling directory $pchD not found.") if T(1);
52        exit;
53    } elsif (!(-s $scores)) {
54        Trace("Coupling data file $scores not found.") if T(1);
55        exit;
56    }
57    
58  $dbf->drop_table( tbl => "fc_pegs" );  my $dbf = $fig->db_handle;
59  $dbf->create_table( tbl => 'fc_pegs',  
60                      flds => "peg1 varchar(32), peg2 varchar(32), score integer"  Trace("Re-creating coupling table.") if T(2);
61                    );  
62  $dbf->load_table( tbl => "fc_pegs",  my %genomes = map { $_ => 1 } @genomes;
63                    file => $scores );  
64  $dbf->create_index( idx  => "fc_pegs_ix",  if (@genomes)
65                      tbl  => "fc_pegs",  {
66                      type => "btree",      my $tmp = "$FIG_Config::temp/lc_tmp.$$";
67                      flds => "peg1,peg2" );      open(TMP, ">$tmp") or die "Cannot open $tmp for writing: $!\n";
68    
69  $dbf->vacuum_it("fc_pegs");      #
70        # Extract the scores for our genomes to $tmp.
71  $dbf->drop_table( tbl => "pchs" );      #
72  $dbf->create_table( tbl => 'pchs',  
73                      flds => "peg1 varchar(32), peg2 varchar(32), peg3 varchar(32), peg4 varchar(32),      open(S, "<$scores") or die "Cannot open $scores: $!\n";
74                               inden13 varchar(6), inden24 varchar(6), para3  integer, para4 integer, rep char(1)"      while (<S>)
75        {
76            if (/^fig\|(\d+\.\d+)\.peg/ and $genomes{$1})
77            {
78                print TMP $_;
79            }
80        }
81        close(TMP);
82        close(S);
83    
84        #
85        # Need to drop any entries with our genome.
86        #
87    
88        my $cond = join(" or ", map { "peg1 like 'fig|$_.peg%'" } @genomes);
89        my $where = "($cond)";
90        my $res = $dbf->SQL("delete from fc_pegs where $where");
91    
92        #
93        # Now insert.
94        #
95    
96        $dbf->load_table(tbl => 'fc_pegs', file => $tmp);
97    }
98    else
99    {
100        die;
101        $dbf->reload_table('all', 'fc_pegs',
102                           "peg1 varchar(32), peg2 varchar(32), score integer",
103                       { fc_pegs_ix => "peg1, peg2" },
104                           $scores
105                    );                    );
106    }
107    
108  opendir(PCHD,$pchD) || die "could not open $pchD";  #
109  @files = grep { ($_ !~ /^\./) && (-s "$pchD/$_") } readdir(PCHD);  # Now load the PCHs files.
110  foreach $file (@files)  #
111    
112    if (@genomes == 0)
113  {  {
114        #
115        # Reload all PCHs.
116        #
117    
118        Trace("Estimating size of PCH table.") if T(2);
119    
120        my @files = grep { (-s $_) } map { "$pchD/$_" }  OpenDir($pchD, 1);
121    
122        my($row_size, $max_rows) = $dbf->estimate_table_size(\@files);
123    
124        Trace("Re-creating PCH table with row_size=$row_size max_rows=$max_rows.") if T(2);
125    
126        $dbf->reload_table('all', "pchs",
127                           "peg1 varchar(32), peg2 varchar(32), peg3 varchar(32), peg4 varchar(32),
128                           inden13 varchar(6), inden24 varchar(6), para3  integer, para4 integer, rep char(1)",
129                       { pchs_ix => "peg1, peg2" }, undef, undef, undef, [$row_size, $max_rows]
130                          );
131    
132        Trace("Reading PCH directory.") if T(2);
133    
134        foreach my $file (@files) {
135            Trace("Loading PCH data from $file.") if T(3);
136      $dbf->load_table( tbl => "pchs",      $dbf->load_table( tbl => "pchs",
137                    file => "$pchD/$file" );                           file => $file );
138  }  }
139  $dbf->create_index( idx  => "pchs_ix",      Trace("Finishing PCH load.") if T(2);
140                      tbl  => "pchs",      $dbf->finish_load('all', 'pchs');
141                      type => "btree",  }
142                      flds => "peg1,peg2" );  else
143    {
144        #
145        # Reload a subset.
146        #
147    
148        die;
149    
150        for my $g (@genomes)
151        {
152            my $pch_file = "$pchD/$g";
153            if (! -f $pch_file)
154            {
155                die "Cannot open PCH file $pch_file\n";
156            }
157    
158            $dbf->load_table(tbl => 'pchs', file => $pch_file);
159        }
160    }
161    
162    Trace("Couplings loaded.") if T(2);
163    
 $dbf->vacuum_it("pchs");  
164    

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.8

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3