[Bio] / FigKernelPackages / Quality.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/Quality.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Nov 8 23:14:00 2006 UTC revision 1.5, Fri Dec 1 21:39:11 2006 UTC
# Line 21  Line 21 
21  use FIG;  use FIG;
22  use FIG_Config;  use FIG_Config;
23    
24    use Carp;
25    use File::Basename;
26    
27  =head1 Routines for Quality Assessment and Repair  =head1 Routines for Quality Assessment and Repair
28    
29  =head3 assess_assembly_quality  =head3 assess_assembly_quality
# Line 39  Line 42 
42  set the optional argument C<$estimated_read_length> to C<100> for genomes  set the optional argument C<$estimated_read_length> to C<100> for genomes
43  sequenced using the "454" technology.  sequenced using the "454" technology.
44    
45  Returns a list of two strings: C<($depth, $completness)>.  Returns a list of two strings on success, C<($depth, $completness)>,
46    and an empty list on failure.
47    
48  =cut  =cut
49    
# Line 48  Line 52 
52    
53      if (!-d $org_dir) {      if (!-d $org_dir) {
54          warn "OrgDir $org_dir does not exist";          warn "OrgDir $org_dir does not exist";
55          return (undef, undef);          return ();
56      }      }
57    
58      if (not defined($estimated_read_length)) {      if (not defined($estimated_read_length)) {
# Line 59  Line 63 
63      my $completeness;      my $completeness;
64      if (!-s "$org_dir/contigs") {      if (!-s "$org_dir/contigs") {
65          warn "Contigs file $org_dir/contigs does not exist";          warn "Contigs file $org_dir/contigs does not exist";
66          return (undef, undef);          return ();
67      }      }
68      else {      else {
69          my $summary = `sequence_length_histogram -null $org_dir/contigs 2>&1`;          my $summary = `sequence_length_histogram -null $org_dir/contigs 2>&1`;
70          if ($summary =~ m/There are (\d+) chars in (\d+) seq.*mean length = (\d+)/so) {          if ($summary =~ m/There are (\d+) chars in (\d+) seq.*mean length = (\d+)/so) {
71              my ($chars, $num_seqs, $expect) = ($1, $2, $3);              my ($chars, $num_seqs, $expect) = ($1, $2, $3);
72                if ($num_seqs == 1) {
73                    return (10.0, 0.999954);
74                }
75    
76              my $size  = $chars;              my $size  = $chars;
77              my $eff_read_len = $estimated_read_length - 50;              my $eff_read_len = $estimated_read_length - 50;
# Line 77  Line 84 
84                  $completeness = (1.0 - exp(-$depth));                  $completeness = (1.0 - exp(-$depth));
85                  $size         = $chars / $completeness;                  $size         = $chars / $completeness;
86    
87                    if ($completeness > 0.999955) {
88                        $completeness = 0.999955;
89                        $depth = 10.0;
90                        last;
91                    }
92    
93                  last if ($depth == $last);                  last if ($depth == $last);
94              }              }
95          }          }
# Line 104  Line 117 
117  The "Skeleton OrgDir" directory-path argument C<$org_dir> is mandatory,  The "Skeleton OrgDir" directory-path argument C<$org_dir> is mandatory,
118  and does not default to a directory in the SEED organism hierarchy.  and does not default to a directory in the SEED organism hierarchy.
119    
120  The routine returns a list of pointers to two hashes, C<($fatal, $warnings)>,  On success, the routine returns a list of pointers to two hashes, C<($fatal, $warnings)>,
121  whose keys are the type of error in each class, and whose values are the  whose keys are the type of error in each class, and whose values are the
122  number of features having that type of error.  number of features having that type of error; on failure, it returns the empty list.
123    
124  As a side-effect, this routine creates three files in the skeleton OrgDir:  As a side-effect, this routine creates three files in the skeleton OrgDir:
125    
# Line 141  Line 154 
154    
155      if (!-d $org_dir) {      if (!-d $org_dir) {
156          warn "OrgDir $org_dir does not exist\n";          warn "OrgDir $org_dir does not exist\n";
157          return (undef, undef);          return ();
158      }      }
159        my $parent   = basename($org_dir) || confess "Could not extract parent of $org_dir";
160    
161      my $fatal    = {};      my $fatal    = {};
162      my $warnings = {};      my $warnings = {};
163      if (  (!-f "$org_dir/quality.report")      if (system("assess_gene_call_quality --meta=$parent/meta.xml  $org_dir > $org_dir/quality.report 2>&1")) {
        || ((-M "$org_dir/quality.report") > (-M "$org_dir/contigs"))  
        ) {  
         if (system("assess_gene_call_quality $org_dir > $org_dir/quality.report 2>&1")) {  
164              warn "FAILED: assess_gene_call_quality $org_dir > $org_dir/quality.report 2>&1";              warn "FAILED: assess_gene_call_quality $org_dir > $org_dir/quality.report 2>&1";
165              return (undef, undef);          return ();
         }  
166      }      }
167    
168      my @report = `cat $org_dir/quality.report`;      my @report = `cat $org_dir/quality.report`;
# Line 210  Line 220 
220          return undef;          return undef;
221      }      }
222    
223        &assess_gene_call_quality($org_dir) || confess "Could not re-assess call quality of $org_dir";
224      return 1;      return 1;
225  }  }
226    
# Line 252  Line 263 
263          return undef;          return undef;
264      }      }
265    
266        &assess_gene_call_quality($org_dir) || confess "Could not re-assess call quality of $org_dir";
267      return 1;      return 1;
268  }  }
269    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.5

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3