[Bio] / FigKernelScripts / svr_pegs_in_subsystems.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_pegs_in_subsystems.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 : parrello 1.7 use strict;
4 : parrello 1.6 use SAPserver;
5 : parrello 1.1 use Getopt::Long;
6 : parrello 1.7 use ScriptThing;
7 : parrello 1.1
8 : parrello 1.4 # This is a SAS Component
9 :    
10 :     =head1 svr_pegs_in_subsystems
11 :    
12 : parrello 1.5 svr_pegs_in_subsystems genome_ids.tbl <subsystem_ids.tbl >peg_role_data.tbl
13 :    
14 : parrello 1.4 Return all genes in one or more subsystems found in one or more genomes.
15 :    
16 :     This script takes a list of genomes and a list of subsystems and returns a list
17 :     of the genes represented in each genome/subsystem pair. It takes one positional
18 :     parameter-- the name of the file containing the genome IDs, and reads the list
19 :     of subsystem IDs from the standard input.
20 :    
21 :     The standard output will be a tab-delimited file, each record containing a
22 :     subsystem ID, a functional role in that subsystem, and the ID of a gene with
23 :     that role from one of the supplied genomes.
24 :    
25 :     This is a pipe command. The input is from the standard input and the output is
26 :     to the standard output.
27 :    
28 :     The following command-line options are supported.
29 :    
30 :     =over 4
31 : parrello 1.1
32 : parrello 1.4 =item group
33 :    
34 :     If specified, then each output line will be for a single role, and the gene IDs will
35 :     be listed as a single comma-delimited string.
36 :    
37 :     =item noroles
38 :    
39 :     If specified, then the second column in each output line (functional role) will be
40 :     omitted from the output.
41 :    
42 : parrello 1.6 =item url
43 :    
44 :     The URL for the Sapling server, if it is to be different from the default.
45 :    
46 : parrello 1.7 =item c
47 :    
48 :     Number (1-based) of the column in the input file containing the subsystem name. If omitted,
49 :     the last column is used.
50 :    
51 : parrello 1.4 =back
52 : parrello 1.1
53 : parrello 1.4 =cut
54 : parrello 1.1
55 :     my $noroles = 0;
56 :     my $group = 0;
57 :     my $show_owner = 0;
58 : parrello 1.7 my $column;
59 : parrello 1.1 my $oldid = "";
60 : parrello 1.6 my $url = "";
61 : parrello 1.7 my $inputFile = "-";
62 : parrello 1.1
63 :     $0 =~ m/([^\/]+)$/;
64 :     my $self = $1;
65 : parrello 1.6 my $usage = "$self [--noroles --group --url=http://...] GenomeF < SubsystemIDs";
66 : parrello 1.1
67 : parrello 1.7 my $rc = GetOptions("noroles" => \$noroles, "group" => \$group, "url=s" => \$url, "i=s" => \$inputFile, "c=i" => \$column);
68 : parrello 1.1
69 :     if (!$rc) {
70 :     die "\n usage: $usage\n\n";
71 :     }
72 :    
73 :     my $roles = $noroles ? 0 : 1;
74 : parrello 1.6 my $ss = SAPserver->new(url => $url);
75 : parrello 1.1
76 : parrello 1.7 open my $gh, "<" . $ARGV[$#ARGV] || die "Genome file error: $!";
77 : parrello 1.1
78 : parrello 1.7 my @genomes = ScriptThing::GetList($gh);
79 :     close $gh;
80 :     open(my $ih, "<$inputFile") || die "Error opening input: $!";
81 :     while (my @tuples = ScriptThing::GetBatch($ih, 10, $column)) {
82 :     my @subs = map { $_->[0] } @tuples;
83 :     my $pegs_inss = $ss->pegs_in_subsystems(-genomes => \@genomes,
84 :     -subsystems => \@subs);
85 :     # Loop through the incoming lines, and pair the results with the inputs.
86 :     for my $tuple (@tuples) {
87 :     # Get the current line and its subsystem ID.
88 :     my ($sub, $line) = @$tuple;
89 :     # Get the role hash for this subsystem.
90 :     my $roleHash = $pegs_inss->{$sub};
91 :     # Only proceed if we found something.
92 :     if ($roleHash) {
93 :     # Are we including roles in the output?
94 :     if ($roles) {
95 :     # Yes. Loop through the roles.
96 :     for my $role (sort keys %$roleHash) {
97 :     # Get the features for this role.
98 :     my $fids = $roleHash->{$role};
99 :     # Are we in group mode?
100 :     if ($group) {
101 :     # Yes. Put all the pegs on a single line.
102 :     print "$line\t$role\t" . join(", ", @$fids) . "\n";
103 :     } else {
104 :     # No. Put each peg on a line by itself.
105 :     for my $fid (@$fids) {
106 :     print "$line\t$role\t$fid\n";
107 :     }
108 :     }
109 :     }
110 :     } else {
111 :     # Roles are not included in the output. We need to create
112 :     # a list of the features for all the roles.
113 :     my %fids;
114 :     for my $role (keys %$roleHash) {
115 :     my $fidList = $roleHash->{$role};
116 :     for my $fid (@$fidList) {
117 :     $fids{$fid} = 1;
118 :     }
119 :     }
120 :     my @fids = sort keys %fids;
121 :     # Are we grouping the features?
122 :     if ($group) {
123 :     # Yes. Put all the pegs on one line.
124 :     print "$line\t" . join(", ", @fids) . "\n";
125 :     } else {
126 :     # No. Put each peg on its own line.
127 :     for my $fid (@fids) {
128 :     print "$line\t$fid\n";
129 :     }
130 :     }
131 : parrello 1.4 }
132 : disz 1.2 }
133 : parrello 1.4 }
134 : disz 1.2 }
135 : parrello 1.1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3