[Bio] / FigKernelPackages / SAPtutorial.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/SAPtutorial.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Mon Aug 3 21:31:42 2009 UTC revision 1.3, Tue Aug 4 21:31:29 2009 UTC
# Line 299  Line 299 
299          }          }
300      }      }
301    
302    =head3 Genes in Subsystems for Genomes
303    
304    This next example finds all genes in subsystems for a specific genome. We will
305    perform this operation in two phases. First, we will find the subsystems for
306    each genome, and then the genes for those subsystems. This requires two Sapling
307    Server functions.
308    
309    =over 4
310    
311    =item *
312    
313    L<SAP/genomes_to_subsystems> returns a list of the subsystems for each
314    specified genome.
315    
316    =item *
317    
318    L<SAP/ids_in_subsystems> returns a list of the genes in each listed
319    subsystem for a specified genome.
320    
321    =back
322    
323    Our example program will loop through the genome IDs in an input file. For
324    each genome, it will call I<genomes_to_subsystems> to get the subsystem list,
325    and then feed the list to I<ids_in_subsystems> to get the result.
326    
327    L<SAP/ids_in_subsystems> returns gene IDs rather than taking them as input.
328    Like L<SAP/ids_to_subsystems> and L<SAP/ids_to_functions>, it takes a C<source>
329    parameter that indicates the type of ID desired (e.g. C<NCBI>, C<CMR>, C<LocusTag>).
330    In this case, however, the type describes how the gene IDs will be formatted in
331    the output rather than the type expected upon input. If a gene does not have an
332    ID for a particular source type (e.g. C<fig|100226.1.peg.3361> does not have a
333    locus tag), then the FIG identifier is used. The default source type (C<SEED>)
334    means that FIG identifiers will be used for everything.
335    
336    The program is given below.
337    
338        use strict;
339        use SAPserver;
340    
341        my $sapServer = SAPserver->new();
342        # Loop through the input file. Note that this loop will stop on the first
343        # blank line.
344        while (my $genomeID = <STDIN>) {
345            chomp $genomeID;
346            # Get the subsystems for this genome.
347            my $subHash = $sapServer->genomes_to_subsystems(ids => $genomeID);
348            # The data returned for each genome (and in our case there's only one)
349            # includes the subsystem name and the variant code. The following
350            # statement strips away the variant codes, leaving only the subsystem
351            # names.
352            my $subList = [ map { $_->[0] } @{$subHash->{$genomeID}} ];
353            # Ask for the genes in each subsystem, using NCBI identifiers.
354            my $roleHashes = $sapServer->ids_in_subsystems(subsystems => $subList,
355                                                         genome => $genomeID,
356                                                         source => 'NCBI',
357                                                         roleForm => 'full');
358            # The hash maps each subsystem ID to a hash of roles to lists of feature
359            # IDs. We therefore use three levels of nested loops to produce our
360            # output lines. At the top level we have a hash mapping subsystem IDs
361            # to role hashes.
362            for my $subsystem (sort keys %$roleHashes) {
363                my $geneHash = $roleHashes->{$subsystem};
364                # The gene hash maps each role to a list of gene IDs.
365                for my $role (sort keys %$geneHash) {
366                    my $geneList = $geneHash->{$role};
367                    # Finally, we loop through the gene IDs.
368                    for my $gene (@$geneList) {
369                        print "$genomeID\t$subsystem\t$role\t$gene\n";
370                    }
371                }
372            }
373        }
374    
375    The I<ids_in_subsystems> service has several useful options for changing the nature
376    of the output. For example, in the above program each role is represented by a
377    full description (C<roleForm> set to C<full>). If you don't need the roles, you
378    can specify C<none> for the role form. You can also request that the gene IDs
379    be returned in a comma-separated list instead of a list data structure. These
380    two changes can drastically simplify the above program.
381    
382        use strict;
383        use SAPserver;
384    
385        my $sapServer = SAPserver->new();
386        # Loop through the input file. Note that this loop will stop on the first
387        # blank line.
388        while (my $genomeID = <STDIN>) {
389            chomp $genomeID;
390            # Get the subsystems for this genome.
391            my $subHash = $sapServer->genomes_to_subsystems(ids => $genomeID);
392            # The data returned for each genome (and in our case there's only one)
393            # includes the subsystem name and the variant code. The following
394            # statement strips away the variant codes, leaving only the subsystem
395            # names.
396            my $subList = [ map { $_->[0] } @{$subHash->{$genomeID}} ];
397            # Ask for the genes in each subsystem, using NCBI identifiers.
398            my $genesHash = $sapServer->ids_in_subsystems(subsystems => $subList,
399                                                         genome => $genomeID,
400                                                         source => 'NCBI',
401                                                         roleForm => 'none',
402                                                         grouped => 1);
403            # The hash maps each subsystem ID to a comma-delimited list of gene IDs.
404            for my $subsystem (sort keys %$genesHash) {
405                my $genes = $genesHash->{$subsystem};
406                print "$genomeID\t$subsystem\t$genes\n";
407            }
408        }
409    
410  =cut  =cut
411    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3