[Bio] / FigKernelScripts / svr_subsystem_genome_data.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_subsystem_genome_data.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     use Getopt::Long;
5 :     use SAPserver;
6 :     use ScriptThing;
7 :    
8 :     #
9 :     # This is a SAS Component.
10 :     #
11 :    
12 :     =head1 svr_subsystem_genome_data
13 :    
14 :     svr_subsystem_genome_data --genomeFile=genomes.tbl <sub_ids.tbl >sub_data.tbl
15 :    
16 :     Output the features, variants, and roles for one or more subsystems, optionally
17 :     filtered by genome ID.
18 :    
19 :     This script takes as input a tab-delimited file with subsystem IDs at the end of each
20 :     line. For each subsystem ID, numerous output lines are produced describing the contents
21 :     of the subsystem. Each line will consist of
22 :    
23 :     =over 4
24 :    
25 :     =item 1
26 :    
27 :     Subsystem ID
28 :    
29 :     =item 2
30 :    
31 :     Genome ID (possibly with a region code)
32 :    
33 :     =item 3
34 :    
35 :     Code for the variant of this subsystem used by the genome.
36 :    
37 :     =item 4
38 :    
39 :     ID of a subsystem role.
40 :    
41 :     =item 5
42 :    
43 :     ID of a feature performing the role.
44 :    
45 :     =back
46 :    
47 :     This is a pipe command: the input is taken from the standard input and the output
48 :     is to the standard output.
49 :    
50 :     =head2 Command-Line Options
51 :    
52 :     =over 4
53 :    
54 :     =item url
55 :    
56 :     The URL for the Sapling server, if it is to be different from the default.
57 :    
58 :     =item c
59 :    
60 :     Column index. If specified, indicates that the input subsystem IDs should be taken from the
61 :     indicated column instead of the last column. The first column is column 1.
62 :    
63 :     =item genomeFile
64 :    
65 :     If specified, the name of a tab-delimited file containing genome IDs in the last
66 :     column. Only data relating to the specified genomes will be included in the output.
67 :    
68 :    
69 :     =back
70 :    
71 :     =cut
72 :    
73 :     # Parse the command-line options.
74 :     my $url = '';
75 :     my $column = '';
76 :     my $genomeFile = '';
77 :     my $opted = GetOptions('url=s' => \$url, 'c=i' => \$column, 'genomeFile=s' => \$genomeFile);
78 :     if (! $opted) {
79 :     print "usage: svr_subsystem_genome_data [--url=http://...] [--c=N] [--genomeFile=genomes.tbl] <input >output\n";
80 :     } else {
81 :     # Get the server object.
82 :     my $sapServer = SAPserver->new(url => $url);
83 :     # Get the list of genomes.
84 :     my @genomes;
85 :     if ($genomeFile) {
86 :     open my $ih, "<$genomeFile" || die "Cannot open genome file: $!";
87 :     @genomes = ScriptThing::GetList($ih);
88 :     }
89 :     # The main loop processes chunks of input. We only do 5 at a time because this is a
90 :     # slow process.
91 :     while (my @tuples = ScriptThing::GetBatch(\*STDIN, 5, $column)) {
92 :     # Ask the server for results.
93 :     my $document = $sapServer->pegs_in_variants(-subsystems => [map { $_->[0] } @tuples],
94 :     -genomes => \@genomes);
95 :     # Loop through the IDs, producing output.
96 :     for my $tuple (@tuples) {
97 :     my ($sub, $line) = @$tuple;
98 :     # Get this subsystems's data.
99 :     my $ssData = $document->{$sub};
100 :     # Did we get something?
101 :     if (! $ssData) {
102 :     # No. Write an error notification.
103 :     print STDERR "Not found: $sub\n";
104 :     } else {
105 :     # Yes. We must run through the results producing output.
106 :     for my $genome (sort keys %$ssData) {
107 :     # Get this genome's row information.
108 :     my $genomeData = $ssData->{$genome};
109 :     # Pop off the variant code.
110 :     my $vc = shift @$genomeData;
111 :     # Loop through the cells.
112 :     for my $cell (@$genomeData) {
113 :     # Get this cell's role.
114 :     my $role = shift @$cell;
115 :     # Loop through the features in the cell.
116 :     for my $fid (@$cell) {
117 :     print join("\t", $sub, $genome, $vc, $role, $fid) . "\n";
118 :     }
119 :     }
120 :     }
121 :     }
122 :     }
123 :     }
124 :     }
125 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3