[Bio] / FigKernelScripts / svr_compare_feature_tables.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/svr_compare_feature_tables.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Wed Aug 18 20:35:45 2010 UTC revision 1.5, Mon Aug 30 02:04:13 2010 UTC
# Line 21  Line 21 
21  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
22  #  #
23    
24  # usage:  svr_compare_feature_tables  old_features.tab  new_fatures.tab  > comparison.tab  2> summary.txt  #
25    # Input files:
26    #
27    # ID seed-format-location function
28    #
29    
30    # usage:  svr_compare_feature_tables  old_features.tab  new_fatures.tab  [summary.yaml] > comparison.tab  2> summary.txt
31    
32  use strict;  use strict;
33  use warnings;  use warnings;
34    
35  use SeedUtils;  use SeedUtils;
36  use Data::Dumper;  use Data::Dumper;
37    use YAML::Any;
38  # use Carp;  # use Carp;
39    
40  $0 =~ m/([^\/]+)$/;  $0 =~ m/([^\/]+)$/;
41  my $self  = $1;  my $self  = $1;
42  my $usage = "$self  old_features.tab  new_fatures.tab  \> comparison.tab  2\> summary.txt";  my $usage = "$self  old_features.tab  new_fatures.tab [summary.yaml] \> comparison.tab  2\> summary.txt";
43    
44  my $old_3col_file;  my $old_tab_file;
45  (($old_3col_file = shift) && (-f $old_3col_file))  (($old_tab_file = shift) && (-f $old_tab_file))
46      || die "Could not find old_3col_file $old_3col_file\n\n\tusage: $usage\n\n";      || die "Could not find old_tab_file $old_tab_file\n\n\tusage: $usage\n\n";
   
 my $new_3col_file;  
 (($new_3col_file = shift) && (-f $new_3col_file))  
     || die "Could not find new_3col_file $new_3col_file\n\n\tusage: $usage\n\n";  
47    
48  my ($old_tbl, $old_num_pegs) = &load_tbl($old_3col_file);  my $new_tab_file;
49  my ($new_tbl, $new_num_pegs) = &load_tbl($new_3col_file);  (($new_tab_file = shift) && (-f $new_tab_file))
50        || die "Could not find new_tab_file $new_tab_file\n\n\tusage: $usage\n\n";
51    
52    my $summary_yaml;
53    if (@ARGV) {
54        $summary_yaml = shift;
55    }
56    
57    my ($old_tbl, $old_num_pegs) = &load_tbl($old_tab_file);
58    my ($new_tbl, $new_num_pegs) = &load_tbl($new_tab_file);
59    
60    
61  use constant FID    =>  0;  use constant FID    =>  0;
# Line 56  Line 68 
68  use constant TYPE   =>  7;  use constant TYPE   =>  7;
69  use constant ENTRY  =>  8;  use constant ENTRY  =>  8;
70  use constant FUNC   =>  9;  use constant FUNC   =>  9;
71    use constant ALT_IDS => 10;
72    
73    
74  my $identical   = 0;  my $identical   = 0;
# Line 72  Line 85 
85      $keys{$key} = 1;      $keys{$key} = 1;
86  }  }
87  @keys = sort { &by_key($a,$b) } (keys %keys);  @keys = sort { &by_key($a,$b) } (keys %keys);
88    
89  print STDERR (q(Num keys = ), (scalar @keys), qq(\n\n)) if $ENV{VERBOSE};  print STDERR (q(Num keys = ), (scalar @keys), qq(\n\n)) if $ENV{VERBOSE};
90    
91    print STDOUT (q(#), join(qq(\t), qw(Comparison Old_ID New_ID Old_Length New_Length Length_Diff Old_Loc New_Loc Old_Function New_Function Old_Alt_IDs New_Alt_IDs)), qq(\n));
92    
 print STDOUT (q(#), join(qq(\t), qw(Comparison Old_ID New_ID Old_Length New_Length Length_Diff Old_Loc New_Loc Old_Function New_Function)), qq(\n));  
93  foreach my $key (sort { &by_key($a,$b) } @keys) {  foreach my $key (sort { &by_key($a,$b) } @keys) {
94      my $case      = q();      my $case      = q();
95    
# Line 83  Line 97 
97      my $old_func  = q();      my $old_func  = q();
98      my $old_loc   = q();      my $old_loc   = q();
99      my $old_len   = 0;      my $old_len   = 0;
100        my $old_alt   = q();
101      if (defined($old_tbl->{$key})) {      if (defined($old_tbl->{$key})) {
102          $old_fid   = $old_tbl->{$key}->[FID];          $old_fid   = $old_tbl->{$key}->[FID];
103          $old_func  = $old_tbl->{$key}->[FUNC];          $old_func  = $old_tbl->{$key}->[FUNC];
104          $old_loc   = $old_tbl->{$key}->[LOCUS];          $old_loc   = $old_tbl->{$key}->[LOCUS];
105          $old_len   = $old_tbl->{$key}->[LEN];          $old_len   = $old_tbl->{$key}->[LEN];
106            $old_alt   = $old_tbl->{$key}->[ALT_IDS];
107      }      }
108    
109      my $new_fid   = q();      my $new_fid   = q();
110      my $new_func  = q();      my $new_func  = q();
111      my $new_loc   = q();      my $new_loc   = q();
112      my $new_len   = 0;      my $new_len   = 0;
113        my $new_alt   = q();
114      if (defined($new_tbl->{$key})) {      if (defined($new_tbl->{$key})) {
115          $new_fid   = $new_tbl->{$key}->[FID];          $new_fid   = $new_tbl->{$key}->[FID];
116          $new_func  = $new_tbl->{$key}->[FUNC];          $new_func  = $new_tbl->{$key}->[FUNC];
117          $new_loc   = $new_tbl->{$key}->[LOCUS];          $new_loc   = $new_tbl->{$key}->[LOCUS];
118          $new_len   = $new_tbl->{$key}->[LEN];          $new_len   = $new_tbl->{$key}->[LEN];
119            $new_alt   = $new_tbl->{$key}->[ALT_IDS];
120      }      }
121    
122      if (defined($old_tbl->{$key})) {      if (defined($old_tbl->{$key})) {
# Line 141  Line 159 
159      }      }
160      my $diff = $new_len - $old_len;      my $diff = $new_len - $old_len;
161    
162      print STDOUT (join(qq(\t), ($case, $old_fid, $new_fid, $old_len, $new_len, $diff, $old_loc, $new_loc, $old_func, $new_func)), qq(\n));      print STDOUT (join(qq(\t), ($case, $old_fid, $new_fid, $old_len, $new_len, $diff, $old_loc, $new_loc, $old_func, $new_func, $old_alt, $new_alt)), qq(\n));
163  }  }
164    
165    if (defined($summary_yaml))
166    {
167        if (open(my $fh, ">", $summary_yaml))
168        {
169            &write_summary_yaml($fh, $old_num_pegs, $new_num_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost);
170        }
171        else
172        {
173            die "Error opening $summary_yaml for writing: $!";
174        }
175    }
176    else
177    {
178  &write_summary($old_num_pegs, $new_num_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost);  &write_summary($old_num_pegs, $new_num_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost);
179    }
180    
181  exit(0);  exit(0);
182    
# Line 154  Line 186 
186  {  {
187      my ($file) = @_;      my ($file) = @_;
188      my ($tbl, $num_pegs);      my ($tbl, $num_pegs);
189      my ($key, $entry, $id, $locus, $func, $contig, $beg, $end, $len, $strand, $type);      my ($key, $entry, $id, $locus, $func, $rest, $contig, $beg, $end, $len, $strand, $type);
190    
191      open(TBL, "<$file") || die "Could not read-open \'$file\'";      open(TBL, "<$file") || die "Could not read-open \'$file\'";
192      while (defined($entry = <TBL>))      while (defined($entry = <TBL>))
193      {      {
194            next if ($entry =~ m/^\#/);
195    
196          chomp $entry;          chomp $entry;
197          ($id, $locus, $func) = split /\t/, $entry;          my @fields = split /\t/, $entry, -1;
198            $id    = shift @fields;
199            $func  = pop @fields;
200            $locus = pop @fields;
201    
202            $rest = join(q(,), (grep { $_ } @fields) );
203    
204          if ((($contig, $beg, $end, $len, $strand) = &from_locus($locus))          if ((($contig, $beg, $end, $len, $strand) = &from_locus($locus))
205             && defined($contig) && $contig             && defined($contig) && $contig
# Line 181  Line 220 
220  #           }  #           }
221    
222              if (not defined($tbl->{$key})) {              if (not defined($tbl->{$key})) {
223                  $tbl->{$key} = [ $id, $locus, $contig, $beg, $end, $len, $strand, $type, $entry, ($func || q()), ];                  $tbl->{$key} = [ $id, $locus, $contig, $beg, $end, $len, $strand, $type, $entry, ($func || q()), $rest ];
224              }              }
225              else {              else {
226                  warn "Skipping same-STOP TBL entry for $file, $key:\n"                  warn "Skipping same-STOP TBL entry for $file, $key:\n"
# Line 203  Line 242 
242          my ($contig, $left, $right, $dir) = SeedUtils::boundaries_of($locus);          my ($contig, $left, $right, $dir) = SeedUtils::boundaries_of($locus);
243          my ($beg, $end) = ($dir eq q(+)) ? ($left, $right) : ($right, $left);          my ($beg, $end) = ($dir eq q(+)) ? ($left, $right) : ($right, $left);
244    
245            if ($contig && $left && $right && $dir) {
246          return ($contig, $beg, $end, (1 + abs($right - $left)), $dir);          return ($contig, $beg, $end, (1 + abs($right - $left)), $dir);
247      }      }
248      else {      else {
249          die "Invalid locus $locus";          die "Invalid locus $locus";
250      }      }
251        }
252        else {
253            die "Missing locus";
254        }
255    
256      return ();      return ();
257  }  }
# Line 271  Line 315 
315    
316      return 1;      return 1;
317  }  }
318    
319    
320    sub write_summary_yaml {
321        my ($fh, $old_pegs, $new_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost) = @_;
322    
323        my $dat = {
324            old_num => $old_pegs,
325            new_num => $new_pegs,
326        };
327    
328        for my $what (qw(same_stop added lost identical differ short long))
329        {
330            my $val = eval "\$$what";
331            $dat->{$what} = $val;
332            $dat->{"${what}_pct_old"} = 100 * $val / $old_pegs;
333            $dat->{"${what}_pct_new"} = 100 * $val / $new_pegs;
334        }
335        for my $what (qw(identical differ short long))
336        {
337            my $val = eval "\$$what";
338            $dat->{"${what}_pct_common"} = 100 * $val / $same_stop;
339        }
340    
341        print $fh Dump($dat);
342        return 1;
343    }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.5

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3