[Bio] / Sprout / GenomeStats.pl Repository:
ViewVC logotype

Diff of /Sprout/GenomeStats.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.28, Tue Apr 10 06:01:56 2007 UTC revision 1.30, Thu Dec 6 14:58:03 2007 UTC
# Line 93  Line 93 
93  use SFXlate;  use SFXlate;
94  use CGI qw(:standard);  use CGI qw(:standard);
95  use FIG;  use FIG;
 no warnings 'once'; # only when coding  
96    
97  # Get the command-line options and parameters.  # Get the command-line options and parameters.
98  my ($options, @parameters) = StandardSetup([qw(Sprout ERDB) ],  my ($options, @parameters) = StandardSetup([qw(Sprout ERDB) ],
# Line 108  Line 107 
107                                              counterStyle => ['countercell', 'style for cells with counter values'],                                              counterStyle => ['countercell', 'style for cells with counter values'],
108                                              linkCGI => ['../FIG/genome_statistics.cgi',                                              linkCGI => ['../FIG/genome_statistics.cgi',
109                                                          'path to CGI script for detailed statistics'],                                                          'path to CGI script for detailed statistics'],
                                             groupFile => ["$FIG_Config::sproutData/groups.tbl",  
                                                           'location of the NMPDR group description file'],  
110                                              noNewCheck => [0, 'if specified, skips the check for new genomes'],                                              noNewCheck => [0, 'if specified, skips the check for new genomes'],
111                                              targetDir => ["$FIG_Config::nmpdr_base/next/html/includes",                                              targetDir => ["$FIG_Config::nmpdr_base/next/html/includes",
112                                                            'target directory'],                                                            'target directory'],
113                                              },                                              },
114                                             "",                                             "",
115                                             @ARGV);                                             @ARGV);
116    # The return type (error/no error) goes in here.
117    my $rtype;
118    eval {
119        # This table controls the special attribute columns. For each we need to know the attribute name and the
120        # column title. If any genomes in a group have a value for one of the special columns, that column is
121        # displayed along with the attribute values.
122        my %specialCols = (Serotype => 'Serotype_code',
123                           Phenotype => 'Phenotype');
124  # Verify the directory name.  # Verify the directory name.
125  my $targetDir = $options->{targetDir};  my $targetDir = $options->{targetDir};
126  if (! $targetDir) {  if (! $targetDir) {
# Line 128  Line 133 
133      my %newGroupHash = $sprout->GetGroups();      my %newGroupHash = $sprout->GetGroups();
134      # Extract the genome group data from the new Sprout.      # Extract the genome group data from the new Sprout.
135      if (! $options->{strict}) {      if (! $options->{strict}) {
136          %newGroupHash = Sprout::Fix(%newGroupHash);              %newGroupHash = $sprout->Fix(%newGroupHash);
137      }      }
138      # This hash will be used to determine which genomes are new.      # This hash will be used to determine which genomes are new.
139      my %oldGroupHash = ();      my %oldGroupHash = ();
# Line 142  Line 147 
147          # Extract the genome group data from the old Sprout.          # Extract the genome group data from the old Sprout.
148          %oldGroupHash = $oldSprout->GetGroups();          %oldGroupHash = $oldSprout->GetGroups();
149          if (! $options->{strict}) {          if (! $options->{strict}) {
150              %oldGroupHash = Sprout::Fix(%oldGroupHash);                  %oldGroupHash = $oldSprout->Fix(%oldGroupHash);
151          }          }
152      }      }
153      # Get a FIG object for computing attributes.      # Get a FIG object for computing attributes.
154      my $fig = FIG->new();      my $fig = FIG->new();
155      # Read the group file.          # Get the super-group list.
156      my %groupData = Sprout::ReadGroupFile($options->{groupFile});          my @superGroups = sort keys %newGroupHash;
157      # Set up some useful stuff for the four count columns.      # Set up some useful stuff for the four count columns.
158      my %linkParms = ( s0 => "nothypo_sub", n0 => "nothypo_nosub",      my %linkParms = ( s0 => "nothypo_sub", n0 => "nothypo_nosub",
159                        s1 => "hypo_sub", n1 => "hypo_nosub" );                        s1 => "hypo_sub", n1 => "hypo_nosub" );
# Line 160  Line 165 
165      # Prepare a hash for the summary counters. These will be used on the organism summary page.      # Prepare a hash for the summary counters. These will be used on the organism summary page.
166      my %summaries = ();      my %summaries = ();
167      # Loop through the groups.      # Loop through the groups.
168      for my $groupID (keys %newGroupHash) {          for my $groupID (@superGroups) {
169          Trace("Processing group $groupID.") if T(2);          Trace("Processing group $groupID.") if T(2);
170          # Create a hash for summarizing the counters.          # Create a hash for summarizing the counters.
171          my %groupTotals = ( genomes => 0, pegs => 0, RNAs => 0,          my %groupTotals = ( genomes => 0, pegs => 0, RNAs => 0,
# Line 176  Line 181 
181          # Create the output file.          # Create the output file.
182          my $outFileName = "stats-" . lc($groupID) . ".inc";          my $outFileName = "stats-" . lc($groupID) . ".inc";
183          Open(\*GROUPFILE, ">$targetDir/$outFileName");          Open(\*GROUPFILE, ">$targetDir/$outFileName");
184          # Get the serotypes.              # Get the special columns. We'll stuff them in a hash keyed by column name. Each column name will contain
185          my %serotypes = map { $_->[0] => $_->[2] } $fig->get_attributes(\@newGenomes, "Serotype_code");              # a sub-hash that translates each genome ID to its applicable attribute value (if any).
186          my $hasSeroData = (scalar(keys %serotypes) > 0);              my %specialData = ();
187          # If we have serotypes, we add an extra column.              for my $specialColumn (keys %specialCols) {
188          my @columnNames = "Strain annotated in NMPDR";                  # Get the attribute mapping.
189          if ($hasSeroData) {                  my %specialDataList = map { $_->[0] => $_->[2] } $fig->get_attributes(\@newGenomes, $specialCols{$specialColumn});
190              push @columnNames, "Serotype";                  # We only proceed if some attributes were found. As a result, the keys in %specialData will only be keys
191                    # for columns that exist in the output.
192                    if (scalar(keys %specialDataList)) {
193                        $specialData{$specialColumn} = \%specialDataList;
194                    }
195          }          }
196                # Set up the column names.
197                my @columnNames = "Strain annotated in NMPDR";
198                push @columnNames, sort keys %specialData;
199          push @columnNames,  "Genome size, bp",          push @columnNames,  "Genome size, bp",
200                              "Protein Encoding Genes (PEGs)",                              "Protein Encoding Genes (PEGs)",
201                              "Named genes in subsystems",            # s0                              "Named genes in subsystems",            # s0
# Line 276  Line 288 
288              my $ssCol = "<a href=\"$ssLink\">$ssCount</a>";              my $ssCol = "<a href=\"$ssLink\">$ssCount</a>";
289              # Start creating the table cells.              # Start creating the table cells.
290              my $rowHtml = td("$genomeText$new");              my $rowHtml = td("$genomeText$new");
291              # Check for a serotype.                  # Add any special columns.
292              if ($hasSeroData) {                  for my $specialCol (keys %specialData) {
293                  my $seroType = $serotypes{$genomeID} || "&nbsp;";                      # Here we get the attribute value. If there is none, we leave the column blank.
294                  $rowHtml .= td($seroType);                      my $attribute = $specialData{$specialCol}->{$genomeID} || "&nbsp;";
295                        $rowHtml .= td($attribute);
296              }              }
297              # Now add the data columns.              # Now add the data columns.
298              $rowHtml .= join("",              $rowHtml .= join("",
# Line 335  Line 348 
348      for my $groupName (sort keys %summaries) {      for my $groupName (sort keys %summaries) {
349          my $group = $summaries{$groupName};          my $group = $summaries{$groupName};
350          # Compute the link for the current group.          # Compute the link for the current group.
351          my $groupLink = a({ href => $groupData{$groupName}->[0] }, $groupName);              my $groupLink = a({ href => $sprout->GroupPageName($groupName) }, $groupName);
352          # Create the table row.          # Create the table row.
353          my $rowHtml = join("",          my $rowHtml = join("",
354                             td($groupLink),                             td($groupLink),
# Line 354  Line 367 
367      # We're all done.      # We're all done.
368      Trace("Processing complete.") if T(2);      Trace("Processing complete.") if T(2);
369  }  }
370    };
371    if ($@) {
372        Trace("Stats failed with error: $@") if T(0);
373        $rtype = "error";
374    } else {
375        Trace("Stats complete.") if T(2);
376        $rtype = "no error";
377    }
378    if ($options->{phone}) {
379        my $msgID = Tracer::SendSMS($options->{phone}, "GenomeStats terminated with $rtype.");
380        if ($msgID) {
381            Trace("Phone message sent with ID $msgID.") if T(2);
382        } else {
383            Trace("Phone message not sent.") if T(2);
384        }
385    }
386    
387  1;  1;

Legend:
Removed from v.1.28  
changed lines
  Added in v.1.30

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3