[Bio] / FigKernelScripts / svr_compare_feature_tables.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/svr_compare_feature_tables.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Wed Aug 18 20:35:45 2010 UTC revision 1.7, Wed Oct 6 19:46:52 2010 UTC
# Line 4  Line 4 
4  #       This is a SAS Component.  #       This is a SAS Component.
5  #  #
6    
7  #  ########################################################################
8  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
9  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
10  #  #
# Line 19  Line 19 
19  # at info@ci.uchicago.edu or the Fellowship for Interpretation of  # at info@ci.uchicago.edu or the Fellowship for Interpretation of
20  # Genomes at veronika@thefig.info or download a copy from  # Genomes at veronika@thefig.info or download a copy from
21  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
22  #  ########################################################################
23    
24    =head1 svr_compare_feature_tables
25    
26    usage: svr_compare_feature_tables  old_features.tab  new_fatures.tab  [summary.yaml] > comparison.tab  2> summary.txt
27    
28    Input .tab-file format:
29    
30    ID field2 field3 ... seed-format-location function
31    
32    =cut
33    
 # usage:  svr_compare_feature_tables  old_features.tab  new_fatures.tab  > comparison.tab  2> summary.txt  
34    
35  use strict;  use strict;
36  use warnings;  use warnings;
37    
38  use SeedUtils;  use SeedUtils;
39  use Data::Dumper;  use Data::Dumper;
40    use YAML::Any;
41  # use Carp;  # use Carp;
42    
43  $0 =~ m/([^\/]+)$/;  $0 =~ m/([^\/]+)$/;
44  my $self  = $1;  my $self  = $1;
45  my $usage = "$self  old_features.tab  new_fatures.tab  \> comparison.tab  2\> summary.txt";  my $usage = "$self  old_features.tab  new_fatures.tab [summary.yaml] \> comparison.tab  2\> summary.txt";
46    if (@ARGV && ($ARGV[0] =~ m/^-{1,2}help$/o)) {
47        print STDERR "   usage: $usage\n";
48        exit(0);
49    }
50    
 my $old_3col_file;  
 (($old_3col_file = shift) && (-f $old_3col_file))  
     || die "Could not find old_3col_file $old_3col_file\n\n\tusage: $usage\n\n";  
   
 my $new_3col_file;  
 (($new_3col_file = shift) && (-f $new_3col_file))  
     || die "Could not find new_3col_file $new_3col_file\n\n\tusage: $usage\n\n";  
51    
52  my ($old_tbl, $old_num_pegs) = &load_tbl($old_3col_file);  my $old_tab_file;
53  my ($new_tbl, $new_num_pegs) = &load_tbl($new_3col_file);  (($old_tab_file = shift) && (-f $old_tab_file))
54        || die "Could not find old_tab_file $old_tab_file\n\n\tusage: $usage\n\n";
55    
56    my $new_tab_file;
57    (($new_tab_file = shift) && (-f $new_tab_file))
58        || die "Could not find new_tab_file $new_tab_file\n\n\tusage: $usage\n\n";
59    
60    my $summary_yaml;
61    if (@ARGV) {
62        $summary_yaml = shift;
63    }
64    
65    my ($old_tbl, $old_num_pegs) = &load_tbl($old_tab_file);
66    my ($new_tbl, $new_num_pegs) = &load_tbl($new_tab_file);
67    
68    
69  use constant FID    =>  0;  use constant FID    =>  0;
# Line 56  Line 76 
76  use constant TYPE   =>  7;  use constant TYPE   =>  7;
77  use constant ENTRY  =>  8;  use constant ENTRY  =>  8;
78  use constant FUNC   =>  9;  use constant FUNC   =>  9;
79    use constant ALT_IDS => 10;
80    
81    
82  my $identical   = 0;  my $identical   = 0;
# Line 72  Line 93 
93      $keys{$key} = 1;      $keys{$key} = 1;
94  }  }
95  @keys = sort { &by_key($a,$b) } (keys %keys);  @keys = sort { &by_key($a,$b) } (keys %keys);
96    
97  print STDERR (q(Num keys = ), (scalar @keys), qq(\n\n)) if $ENV{VERBOSE};  print STDERR (q(Num keys = ), (scalar @keys), qq(\n\n)) if $ENV{VERBOSE};
98    
99    print STDOUT (q(#), join(qq(\t), qw(Comparison Old_ID New_ID Old_Length New_Length Length_Diff Old_Loc New_Loc Old_Function New_Function Old_Alt_IDs New_Alt_IDs)), qq(\n));
100    
 print STDOUT (q(#), join(qq(\t), qw(Comparison Old_ID New_ID Old_Length New_Length Length_Diff Old_Loc New_Loc Old_Function New_Function)), qq(\n));  
101  foreach my $key (sort { &by_key($a,$b) } @keys) {  foreach my $key (sort { &by_key($a,$b) } @keys) {
102      my $case      = q();      my $case      = q();
103    
# Line 83  Line 105 
105      my $old_func  = q();      my $old_func  = q();
106      my $old_loc   = q();      my $old_loc   = q();
107      my $old_len   = 0;      my $old_len   = 0;
108        my $old_alt   = q();
109      if (defined($old_tbl->{$key})) {      if (defined($old_tbl->{$key})) {
110          $old_fid   = $old_tbl->{$key}->[FID];          $old_fid   = $old_tbl->{$key}->[FID];
111          $old_func  = $old_tbl->{$key}->[FUNC];          $old_func  = $old_tbl->{$key}->[FUNC];
112          $old_loc   = $old_tbl->{$key}->[LOCUS];          $old_loc   = $old_tbl->{$key}->[LOCUS];
113          $old_len   = $old_tbl->{$key}->[LEN];          $old_len   = $old_tbl->{$key}->[LEN];
114            $old_alt   = $old_tbl->{$key}->[ALT_IDS];
115      }      }
116    
117      my $new_fid   = q();      my $new_fid   = q();
118      my $new_func  = q();      my $new_func  = q();
119      my $new_loc   = q();      my $new_loc   = q();
120      my $new_len   = 0;      my $new_len   = 0;
121        my $new_alt   = q();
122      if (defined($new_tbl->{$key})) {      if (defined($new_tbl->{$key})) {
123          $new_fid   = $new_tbl->{$key}->[FID];          $new_fid   = $new_tbl->{$key}->[FID];
124          $new_func  = $new_tbl->{$key}->[FUNC];          $new_func  = $new_tbl->{$key}->[FUNC];
125          $new_loc   = $new_tbl->{$key}->[LOCUS];          $new_loc   = $new_tbl->{$key}->[LOCUS];
126          $new_len   = $new_tbl->{$key}->[LEN];          $new_len   = $new_tbl->{$key}->[LEN];
127            $new_alt   = $new_tbl->{$key}->[ALT_IDS];
128      }      }
129    
130      if (defined($old_tbl->{$key})) {      if (defined($old_tbl->{$key})) {
# Line 141  Line 167 
167      }      }
168      my $diff = $new_len - $old_len;      my $diff = $new_len - $old_len;
169    
170      print STDOUT (join(qq(\t), ($case, $old_fid, $new_fid, $old_len, $new_len, $diff, $old_loc, $new_loc, $old_func, $new_func)), qq(\n));      print STDOUT (join(qq(\t), ($case, $old_fid, $new_fid, $old_len, $new_len, $diff, $old_loc, $new_loc, $old_func, $new_func, $old_alt, $new_alt)), qq(\n));
171  }  }
172    
173    if (defined($summary_yaml))
174    {
175        if (open(my $fh, ">", $summary_yaml))
176        {
177            &write_summary_yaml($fh, $old_num_pegs, $new_num_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost);
178        }
179        else
180        {
181            die "Error opening $summary_yaml for writing: $!";
182        }
183    }
184    else
185    {
186  &write_summary($old_num_pegs, $new_num_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost);  &write_summary($old_num_pegs, $new_num_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost);
187    }
188    
189  exit(0);  exit(0);
190    
# Line 154  Line 194 
194  {  {
195      my ($file) = @_;      my ($file) = @_;
196      my ($tbl, $num_pegs);      my ($tbl, $num_pegs);
197      my ($key, $entry, $id, $locus, $func, $contig, $beg, $end, $len, $strand, $type);      my ($key, $entry, $id, $locus, $func, $rest, $contig, $beg, $end, $len, $strand, $type);
198    
199      open(TBL, "<$file") || die "Could not read-open \'$file\'";      open(TBL, "<$file") || die "Could not read-open \'$file\'";
200      while (defined($entry = <TBL>))      while (defined($entry = <TBL>))
201      {      {
202            next if ($entry =~ m/^\#/);
203    
204          chomp $entry;          chomp $entry;
205          ($id, $locus, $func) = split /\t/, $entry;          my @fields = split /\t/, $entry, -1;
206            $id    = shift @fields;
207            $func  = pop @fields;
208            $locus = pop @fields;
209    
210            $rest = join(q(,), (grep { $_ } @fields) );
211    
212          if ((($contig, $beg, $end, $len, $strand) = &from_locus($locus))          if ((($contig, $beg, $end, $len, $strand) = &from_locus($locus))
213             && defined($contig) && $contig             && defined($contig) && $contig
# Line 181  Line 228 
228  #           }  #           }
229    
230              if (not defined($tbl->{$key})) {              if (not defined($tbl->{$key})) {
231                  $tbl->{$key} = [ $id, $locus, $contig, $beg, $end, $len, $strand, $type, $entry, ($func || q()), ];                  $tbl->{$key} = [ $id, $locus, $contig, $beg, $end, $len, $strand, $type, $entry, ($func || q()), $rest ];
232              }              }
233              else {              else {
234                  warn "Skipping same-STOP TBL entry for $file, $key:\n"                  warn "Skipping same-STOP TBL entry for $file, $key:\n"
# Line 203  Line 250 
250          my ($contig, $left, $right, $dir) = SeedUtils::boundaries_of($locus);          my ($contig, $left, $right, $dir) = SeedUtils::boundaries_of($locus);
251          my ($beg, $end) = ($dir eq q(+)) ? ($left, $right) : ($right, $left);          my ($beg, $end) = ($dir eq q(+)) ? ($left, $right) : ($right, $left);
252    
253            if ($contig && $left && $right && $dir) {
254          return ($contig, $beg, $end, (1 + abs($right - $left)), $dir);          return ($contig, $beg, $end, (1 + abs($right - $left)), $dir);
255      }      }
256      else {      else {
257          die "Invalid locus $locus";          die "Invalid locus $locus";
258      }      }
259        }
260        else {
261            die "Missing locus";
262        }
263    
264      return ();      return ();
265  }  }
# Line 271  Line 323 
323    
324      return 1;      return 1;
325  }  }
326    
327    
328    sub write_summary_yaml {
329        my ($fh, $old_pegs, $new_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost) = @_;
330    
331        my $dat = {
332            old_num => $old_pegs,
333            new_num => $new_pegs,
334        };
335    
336        for my $what (qw(same_stop added lost identical differ short long))
337        {
338            my $val = eval "\$$what";
339            $dat->{$what} = $val;
340            $dat->{"${what}_pct_old"} = 100 * $val / $old_pegs;
341            $dat->{"${what}_pct_new"} = 100 * $val / $new_pegs;
342        }
343        for my $what (qw(identical differ short long))
344        {
345            my $val = eval "\$$what";
346            $dat->{"${what}_pct_common"} = 100 * $val / $same_stop;
347        }
348    
349        print $fh Dump($dat);
350        return 1;
351    }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3