[Bio] / FigKernelScripts / compare_gto_called_CDSs.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/compare_gto_called_CDSs.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Mon Jan 13 14:30:09 2014 UTC revision 1.2, Thu Nov 21 04:25:49 2019 UTC
# Line 51  Line 51 
51    
52  use strict;  use strict;
53  use warnings;  use warnings;
54    use Data::Dumper;
55    #use Carp;
56    
57    use GenomeTypeObject;
58  use SeedUtils;  use SeedUtils;
 use Data::Dumper;  
59  use YAML::Any;  use YAML::Any;
 # use Carp;  
60    
61  use Bio::KBase::GenomeAnnotation::Client;  #use Bio::KBase::GenomeAnnotation::Client;
62  use JSON::XS;  #use JSON::XS;
63    
64    
65  my $help;  my $help;
66  my $first_file;  my $first_file;
# Line 113  Line 115 
115  } else { $out_fh = \*STDOUT; }  } else { $out_fh = \*STDOUT; }
116    
117    
118  my ($old_tbl, $old_num_pegs) = &load_gto($first_file);  my ($old_tbl, $old_num_pegs, $old_metrics) = &load_gto($first_file);
119  my ($new_tbl, $new_num_pegs) = &load_gto($second_file);  my ($new_tbl, $new_num_pegs, $new_metrics) = &load_gto($second_file);
120    #die (Dump($old_metrics), Dump($new_metrics));
121    #die Dumper($old_num_pegs, $new_num_pegs, $old_tbl, $new_tbl);
122    
123  use constant FID     =>  0;  use constant FID     =>  0;
124  use constant LOCUS   =>  1;  use constant LOCUS   =>  1;
# Line 146  Line 149 
149      $keys{$key} = 1;      $keys{$key} = 1;
150  }  }
151  @keys = sort { &by_key($a,$b) } (keys %keys);  @keys = sort { &by_key($a,$b) } (keys %keys);
152    my $num_union = @keys;
153    
154  print STDERR (q(Num keys = ), (scalar @keys), qq(\n\n)) if $ENV{VERBOSE};  print STDERR (q(Num keys = ), (scalar @keys), qq(\n\n)) if $ENV{VERBOSE};
155    
# Line 227  Line 231 
231  {  {
232      if (open(my $fh, ">", $summary_yaml))      if (open(my $fh, ">", $summary_yaml))
233      {      {
234          &write_summary_yaml($fh, $old_num_pegs, $new_num_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost);          &write_summary_yaml($fh, $old_metrics, $new_metrics,
235                                $old_num_pegs, $new_num_pegs, $num_union,
236                                $identical, $same_stop, $differ,
237                                $short, $long, $added, $lost);
238      }      }
239      else      else
240      {      {
241          die "Error opening $summary_yaml for writing: $!";          die "Error opening $summary_yaml for writing: $!";
242      }      }
243  }  }
244  else  
245  {  &write_summary($old_metrics, $new_metrics,
246      &write_summary($old_num_pegs, $new_num_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost);                 $old_num_pegs, $new_num_pegs, $num_union,
247  }                 $identical, $same_stop, $differ,
248                   $short, $long, $added, $lost);
249    
250  exit(0);  exit(0);
251    
# Line 248  Line 256 
256      my ($tbl, $num_pegs);      my ($tbl, $num_pegs);
257      my ($key, $id, $locus, $func, $contig, $beg, $end, $left, $right, $len, $strand, $type, $alt_ids);      my ($key, $id, $locus, $func, $contig, $beg, $end, $left, $right, $len, $strand, $type, $alt_ids);
258    
259      my $fh;      #my $fh;
260      open($fh, "<", $filename) or die "Cannot open $filename: $!";      #open($fh, "<", $filename) or die "Cannot open $filename: $!";
261        #my $json = JSON::XS->new;
262    
263        my $gto = GenomeTypeObject->new({ file => $filename });
264        my $metrics = $gto->metrics();
265        $metrics->{gc_content} = $gto->compute_contigs_gc();
266    
267      my $json = JSON::XS->new;      my $total_coding = 0;
     my $gto;  
     {  
         local $/;  
         undef $/;  
         my $gto_txt = <$fh>;  
         $gto = $json->decode($gto_txt);  
268          foreach my $feature (@ { $gto->{features} })          foreach my $feature (@ { $gto->{features} })
269          {          {
270              ($id,   $contig, $strand,              ($id,   $contig, $strand,
# Line 268  Line 275 
275              $type = $feature->{type};              $type = $feature->{type};
276              next unless ($type =~ m/^peg|CDS$/i);              next unless ($type =~ m/^peg|CDS$/i);
277    
278            $total_coding += $len;
279    
280              $func = $feature->{function} || q();              $func = $feature->{function} || q();
281    
282              $alt_ids = join(',', (defined($feature->{aliases}) ? @ { $feature->{aliases} } : q()));              $alt_ids = join(',', (defined($feature->{aliases}) ? @ { $feature->{aliases} } : q()));
# Line 284  Line 293 
293    
294                  if (not defined($tbl->{$key})) {                  if (not defined($tbl->{$key})) {
295                      ++$num_pegs;                      ++$num_pegs;
296                      $tbl->{$key} = [ $id, $locus, $contig, $strand, $beg, $end, $left, $right, $len, $type, $func, $alt_ids, $feature ];                  $tbl->{$key} = [ $id, $locus, $contig, $strand,
297                                     $beg, $end, $left, $right, $len,
298                                     $type, $func, $alt_ids, $feature ];
299                  }                  }
300                  else {                  else {
301                      warn ("Skipping same-STOP TBL entry for $filename, $key:\n"                      warn ("Skipping same-STOP TBL entry for $filename, $key:\n"
# Line 296  Line 307 
307                  warn ("INVALID ENTRY:\n", Dumper($feature), "\n\n");                  warn ("INVALID ENTRY:\n", Dumper($feature), "\n\n");
308              }              }
309          }          }
310      }      $metrics->{coding_fraction} = 100.0 * $total_coding / $metrics->{totlen};
311    
312      return ($tbl, $num_pegs);      return ($tbl, $num_pegs, $metrics);
313  }  }
314    
315  sub feature_bounds {  sub feature_bounds {
# Line 457  Line 468 
468    
469    
470  sub write_summary {  sub write_summary {
471      my ($old_pegs, $new_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost) = @_;      my ($old_metrics, $new_metrics,
472            $old_pegs, $new_pegs, $num_union,
473      print STDERR "old_num   = $old_pegs PEGs\n";          $identical, $same_stop, $differ,
474      print STDERR "new_num   = $new_pegs PEGs\n\n";          $short, $long, $added, $lost
475            ) = @_;
476      print  STDERR '             Num    %_Old    %_New', qq(\n);      my $num_common = $same_stop;
477      printf STDERR "same_stop = %4u   %5.2f%%   %5.2f%%\n"  
478          , $same_stop, 100*$same_stop/$old_pegs, 100*$same_stop/$new_pegs;      my $pct_same_old   = 100.0 * $same_stop / $old_pegs;
479        my $pct_same_new   = 100.0 * $same_stop / $new_pegs;
480        my $pct_same_union = 100.0 * $same_stop / $num_union;
481      printf STDERR "added     = %4u   %5.2f%%   %5.2f%%\n"  
482          , $added, 100*$added/$old_pegs, 100*$added/$new_pegs;      my $pct_added_old   = 100.0 * $added / $old_pegs;
483        my $pct_added_new   = 100.0 * $added / $new_pegs;
484      printf STDERR "lost      = %4u   %5.2f%%   %5.2f%%\n\n"      my $pct_added_union = 100.0 * $added / $num_union;
485          , $lost, 100*$lost/$old_pegs, 100*$lost/$new_pegs;  
486        my $pct_lost_old   = 100.0 * $lost / $old_pegs;
487        my $pct_lost_new   = 100.0 * $lost / $new_pegs;
488        my $pct_lost_union = 100.0 * $lost / $num_union;
489    
490        my $pct_identical_old    = 100.0 * $identical / $old_pegs;
491        my $pct_identical_new    = 100.0 * $identical / $new_pegs;
492        my $pct_identical_common = 100.0 * $identical / $num_common;
493        my $pct_identical_union  = 100.0 * $identical / $num_union;
494    
495        my $pct_differ_old       = 100.0 * $differ / $old_pegs;
496        my $pct_differ_new       = 100.0 * $differ / $new_pegs;
497        my $pct_differ_common    = 100.0 * $differ / $num_common;
498        my $pct_differ_union     = 100.0 * $differ / $num_union;
499    
500        my $pct_short_old        = 100.0 * $short / $old_pegs;
501        my $pct_short_new        = 100.0 * $short / $new_pegs;
502        my $pct_short_common     = 100.0 * $short / $num_common;
503        my $pct_short_union      = 100.0 * $short / $num_union;
504    
505        my $pct_long_old         = 100.0 * $long / $old_pegs;
506        my $pct_long_new         = 100.0 * $long / $new_pegs;
507        my $pct_long_common      = 100.0 * $long / $num_common;
508        my $pct_long_union       = 100.0 * $long / $num_union;
509    
510    {
511        format STDERR =
512    genome_size = @####### bp
513                  $old_metrics->{totlen}
514    GC_Content  =   @##.##%
515                  $old_metrics->{gc_content}
516    
517                  Num_PEGs     Coding_Fraction
518    old_num     =  @#####          @##.##%
519                $old_pegs,  $old_metrics->{coding_fraction}
520    new_num     =  @#####          @##.##%
521                $new_pegs,  $new_metrics->{coding_fraction}
522    num_union   =  @#####
523                $num_union
524    
525                  Num      %_Old     %_New   %_Union
526    same_stop = @####    @##.##%   @##.##%   @##.##%
527                $same_stop, $pct_same_old, $pct_same_new,  $pct_same_union
528    added     = @####    @##.##%   @##.##%   @##.##%
529                $added, $pct_added_old,    $pct_added_new, $pct_added_union
530    lost      = @####    @##.##%   @##.##%   @##.##%
531                $lost,  $pct_lost_old,     $pct_lost_new,  $pct_lost_union
532    
533                  Num      %_Old     %_New  %_Common   %_Union
534    identical = @####    @##.##%   @##.##%   @##.##%   @##.##%
535           $identical, $pct_identical_old, $pct_identical_new, $pct_identical_common, $pct_identical_union
536    differ    = @####    @##.##%   @##.##%   @##.##%   @##.##%   (Includes Lost and Added)
537              $differ,  $pct_differ_old,   $pct_differ_new,   $pct_differ_common,     $pct_differ_union
538    short     = @####    @##.##%   @##.##%   @##.##%   @##.##%
539              $short,   $pct_short_old,    $pct_short_new,    $pct_short_common,      $pct_short_union
540    long      = @####    @##.##%   @##.##%   @##.##%   @##.##%
541              $long,    $pct_long_old,     $pct_long_new,     $pct_long_common,       $pct_long_union
542    
543      print  STDERR '             Num    %_Old    %_New  %_Common', qq(\n);  .
     printf STDERR "identical = %4u   %5.2f%%   %5.2f%%   %5.2f%%\n"  
         , $identical, 100*$identical/$old_pegs, 100*$identical/$new_pegs, 100*$identical/$same_stop;;  
   
     printf STDERR "differ    = %4u   %5.2f%%   %5.2f%%   %5.2f%%  %s\n"  
         , $differ, 100*$differ/$old_pegs, 100*$differ/$new_pegs, 100*$differ/$same_stop, q((Includes Lost and Added));  
   
     printf STDERR "short     = %4u   %5.2f%%   %5.2f%%   %5.2f%%\n"  
         , $short, 100*$short/$old_pegs, 100*$short/$new_pegs, 100*$short/$same_stop;  
   
     printf STDERR "long      = %4u   %5.2f%%   %5.2f%%   %5.2f%%\n\n"  
         , $long, 100*$long/$old_pegs, 100*$long/$new_pegs, 100*$long/$same_stop;  
544    
545        write STDERR;
546    }
547    
548      return 1;      return 1;
549  }  }
550    
551    
552  sub write_summary_yaml {  sub write_summary_yaml {
553      my ($fh, $old_pegs, $new_pegs, $identical, $same_stop, $differ, $short, $long, $added, $lost) = @_;      my ($fh, $old_metrics, $new_metrics,
554            $old_pegs, $new_pegs, $num_union,
555            $identical, $same_stop, $differ,
556            $short, $long, $added, $lost
557            ) = @_;
558    
559      my $dat = {      my $dat = {
560            genome_size => $old_metrics->{totlen},
561            gc_content  => $old_metrics->{gc_content},
562          old_num => $old_pegs,          old_num => $old_pegs,
563          new_num => $new_pegs,          new_num => $new_pegs,
564            num_common => $same_stop,
565            num_union  => $num_union,
566            old_coding_fraction => $old_metrics->{coding_fraction},
567            new_coding_fraction => $new_metrics->{coding_fraction},
568      };      };
569    
570      for my $what (qw(same_stop added lost identical differ short long))      foreach my $what (qw(same_stop added lost identical differ short long))
571      {      {
572          my $val = eval "\$$what";          my $val = eval "\$$what";
573          $dat->{$what} = $val;          $dat->{$what} = $val;
574          $dat->{"${what}_pct_old"} = 100 * $val / $old_pegs;          $dat->{"${what}_pct_old"} = 100 * $val / $old_pegs;
575          $dat->{"${what}_pct_new"} = 100 * $val / $new_pegs;          $dat->{"${what}_pct_new"} = 100 * $val / $new_pegs;
576            $dat->{"${what}_pct_union"} = 100 * $val / $num_union;
577      }      }
578      for my $what (qw(identical differ short long))  
579        foreach my $what (qw(identical differ short long))
580      {      {
581          my $val = eval "\$$what";          my $val = eval "\$$what";
582          $dat->{"${what}_pct_common"} = 100 * $val / $same_stop;          $dat->{"${what}_pct_common"} = 100 * $val / $same_stop;
583      }      }
584    
585      print $fh Dump($dat);      print $fh Dump($dat);
586        #print STDERR Dumper($dat);
587    
588      return 1;      return 1;
589  }  }
590    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3