[Bio] / Sprout / GenomeStats.pl Repository:
ViewVC logotype

Diff of /Sprout/GenomeStats.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.24, Tue Oct 3 02:48:59 2006 UTC revision 1.29, Mon Aug 20 23:14:33 2007 UTC
# Line 6  Line 6 
6  the genomes in each of the genome groups. Genomes that are new to this version  the genomes in each of the genome groups. Genomes that are new to this version
7  of the Sprout will be specially marked. In order for this to work, both the  of the Sprout will be specially marked. In order for this to work, both the
8  current and previous Sprout databases must be available on this machine.  current and previous Sprout databases must be available on this machine.
 This is one positional parameter: the name of a directory in which to place  
 the include files.  
9    
10  The currently-supported command-line options are as follows.  The currently-supported command-line options are as follows.
11    
# Line 73  Line 71 
71    
72  Path to the CGI script for displaying detailed statistics.  Path to the CGI script for displaying detailed statistics.
73    
74    =item noNewCheck
75    
76    If specified, the check for new genomes in the group is suppressed. This
77    may need to be done if there's been a change in the database definition. Note
78    that all this really does is keep the B<NEW!> symbol from showing. It does
79    not affect which genomes show up in the table.
80    
81  =back  =back
82    
83  =cut  =cut
# Line 88  Line 93 
93  use SFXlate;  use SFXlate;
94  use CGI qw(:standard);  use CGI qw(:standard);
95  use FIG;  use FIG;
 no warnings 'once'; # only when coding  
96    
97  # Get the command-line options and parameters.  # Get the command-line options and parameters.
98  my ($options, @parameters) = StandardSetup([qw(Sprout ERDB) ],  my ($options, @parameters) = StandardSetup([qw(Sprout ERDB) ],
# Line 104  Line 108 
108                                              linkCGI => ['../FIG/genome_statistics.cgi',                                              linkCGI => ['../FIG/genome_statistics.cgi',
109                                                          'path to CGI script for detailed statistics'],                                                          'path to CGI script for detailed statistics'],
110                                              groupFile => ["$FIG_Config::sproutData/groups.tbl",                                              groupFile => ["$FIG_Config::sproutData/groups.tbl",
111                                                            "location of the NMPDR group description file"],                                                            'location of the NMPDR group description file'],
112                                                noNewCheck => [0, 'if specified, skips the check for new genomes'],
113                                                targetDir => ["$FIG_Config::nmpdr_base/next/html/includes",
114                                                              'target directory'],
115                                              },                                              },
116                                             "<targetDir>",                                             "",
117                                             @ARGV);                                             @ARGV);
118    # This table controls the special attribute columns. For each we need to know the attribute name and the
119    # column title. If any genomes in a group have a value for one of the special columns, that column is
120    # displayed along with the attribute values.
121    my %specialCols = (Serotype => 'Serotype_code',
122                       Phenotype => 'Phenotype');
123  # Verify the directory name.  # Verify the directory name.
124  my $targetDir = $parameters[0];  my $targetDir = $options->{targetDir};
125  if (! $targetDir) {  if (! $targetDir) {
126      Confess("No target directory specified.");      Confess("No target directory specified.");
127  } elsif (! -d $targetDir) {  } elsif (! -d $targetDir) {
128      Confess("Target directory $targetDir not found.");      Confess("Target directory $targetDir not found.");
129  } else {  } else {
     # Get the old Sprout.  
     my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);  
     # Extract the genome group data from the old Sprout.  
     my %oldGroupHash = $oldSprout->GetGroups();  
     if (! $options->{strict}) {  
         %oldGroupHash = Sprout::Fix(%oldGroupHash);  
     }  
130      # Get the new Sprout.      # Get the new Sprout.
131      my $sprout = SFXlate->new_sprout_only();      my $sprout = SFXlate->new_sprout_only();
132      my %newGroupHash = $sprout->GetGroups();      my %newGroupHash = $sprout->GetGroups();
133        # Extract the genome group data from the new Sprout.
134      if (! $options->{strict}) {      if (! $options->{strict}) {
135          %newGroupHash = Sprout::Fix(%newGroupHash);          %newGroupHash = Sprout::Fix(%newGroupHash);
136      }      }
137        # This hash will be used to determine which genomes are new.
138        my %oldGroupHash = ();
139        if ($options->{noNewCheck}) {
140            # Here we can't look at the old Sprout. Set up the hash
141            # so it looks like the old Sprout's data is the same as ours.
142            %oldGroupHash = map { $_ => $newGroupHash{$_} } keys %newGroupHash;
143        } else {
144            # Get the old Sprout.
145            my $oldSprout = SFXlate->old_sprout_only();
146            # Extract the genome group data from the old Sprout.
147            %oldGroupHash = $oldSprout->GetGroups();
148            if (! $options->{strict}) {
149                %oldGroupHash = Sprout::Fix(%oldGroupHash);
150            }
151        }
152        # Get a FIG object for computing attributes.
153        my $fig = FIG->new();
154      # Read the group file.      # Read the group file.
155      my %groupData = Sprout::ReadGroupFile($options->{groupFile});      my %groupData = Sprout::ReadGroupFile($options->{groupFile});
156      # Set up some useful stuff for the four count columns.      # Set up some useful stuff for the four count columns.
# Line 157  Line 180 
180          # Create the output file.          # Create the output file.
181          my $outFileName = "stats-" . lc($groupID) . ".inc";          my $outFileName = "stats-" . lc($groupID) . ".inc";
182          Open(\*GROUPFILE, ">$targetDir/$outFileName");          Open(\*GROUPFILE, ">$targetDir/$outFileName");
183          # Start the table.          # Get the special columns. We'll stuff them in a hash keyed by column name. Each column name will contain
184          print GROUPFILE "<table class=\"$tableStyle\">\n";          # a sub-hash that translates each genome ID to its applicable attribute value (if any).
185          # Create the header row.          my %specialData = ();
186          print GROUPFILE Tr( { class => 'odd' }, th(["Strain annotated in NMPDR",          for my $specialColumn (keys %specialCols) {
187                                                   "Genome size, bp",              # Get the attribute mapping.
188                my %specialDataList = map { $_->[0] => $_->[2] } $fig->get_attributes(\@newGenomes, $specialCols{$specialColumn});
189                # We only proceed if some attributes were found. As a result, the keys in %specialData will only be keys
190                # for columns that exist in the output.
191                if (scalar(keys %specialDataList)) {
192                    $specialData{$specialColumn} = \%specialDataList;
193                }
194            }
195            # Set up the column names.
196            my @columnNames = "Strain annotated in NMPDR";
197            push @columnNames, sort keys %specialData;
198            push @columnNames,  "Genome size, bp",
199                                                   "Protein Encoding Genes (PEGs)",                                                   "Protein Encoding Genes (PEGs)",
200                                                   "Named genes in subsystems",            # s0                                                   "Named genes in subsystems",            # s0
201                                                   "Named genes not in subsystems",        # n0                                                   "Named genes not in subsystems",        # n0
202                                                   "Hypothetical genes in subsystems",     # s1                                                   "Hypothetical genes in subsystems",     # s1
203                                                   "Hypothetical genes not in subsystems", # n1                                                   "Hypothetical genes not in subsystems", # n1
204                                                   "Subsystems",                                                   "Subsystems",
205                                                   "RNAs",                              "RNAs";
206                                                     ])) . "\n";          # Start the table.
207            print GROUPFILE "<table class=\"$tableStyle\">\n";
208            # Create the header row.
209            print GROUPFILE Tr( { class => 'odd' }, th(\@columnNames)) . "\n";
210          # The data rows will be built next. We'll be putting them into a hash keyed by          # The data rows will be built next. We'll be putting them into a hash keyed by
211          # organism name. The hash enables us to spit them out sorted by name.          # organism name. The hash enables us to spit them out sorted by name.
212          my %rows = ();          my %rows = ();
# Line 184  Line 221 
221              Trace("Processing ${new}genome $genomeID for $groupID.") if T(3);              Trace("Processing ${new}genome $genomeID for $groupID.") if T(3);
222              # Get the strain name.              # Get the strain name.
223              my $genomeName = $sprout->GenusSpecies($genomeID);              my $genomeName = $sprout->GenusSpecies($genomeID);
224                # Apply a link.
225                my $genomeText = CGI::a({ href => "../FIG/genome_statistics.cgi?genome=$genomeID;SPROUT=1" }, $genomeName);
226              # If this is a new strain, build the HTML for the NEW! mark.              # If this is a new strain, build the HTML for the NEW! mark.
227              if ($new) {              if ($new) {
228                  $new = " <span class=\"$markerStyle\">NEW!</span>";                  $new = " <span class=\"$markerStyle\">NEW!</span>";
# Line 246  Line 285 
285                                                 [$genomeID]);                                                 [$genomeID]);
286              my $ssLink = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&show_subsystems=1";              my $ssLink = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&show_subsystems=1";
287              my $ssCol = "<a href=\"$ssLink\">$ssCount</a>";              my $ssCol = "<a href=\"$ssLink\">$ssCount</a>";
288              # Create the row text. Note that we use the distributive capability of the TD              # Start creating the table cells.
289              # function to apply the same style to each one.              my $rowHtml = td("$genomeText$new");
290              my $rowHtml = join("",              # Add any special columns.
291                                 td("$genomeName$new"),              for my $specialCol (keys %specialData) {
292                    # Here we get the attribute value. If there is none, we leave the column blank.
293                    my $attribute = $specialData{$specialCol}->{$genomeID} || "&nbsp;";
294                    $rowHtml .= td($attribute);
295                }
296                # Now add the data columns.
297                $rowHtml .= join("",
298                                 td({ class => $numStyle }, $genomeLen),                                 td({ class => $numStyle }, $genomeLen),
299                                 td({ class => $numStyle }, $pegCount),                                 td({ class => $numStyle }, $pegCount),
300                                 td({ class => $counterStyle }, \@counterValues),                                 td({ class => $counterStyle }, \@counterValues),

Legend:
Removed from v.1.24  
changed lines
  Added in v.1.29

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3