[Bio] / FigKernelScripts / svr_get_coupling_data.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_get_coupling_data.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : overbeek 1.1 ########################################################################
2 :    
3 :     use strict;
4 :     use Data::Dumper;
5 :     use Carp;
6 :    
7 :     #
8 :     # This is a SAS Component
9 :     #
10 :    
11 :    
12 :     =head1 svr_get_coupling_data [-g genome] -d CouplingDirectory
13 :    
14 :     Get functional coupling data for genes in a genome
15 :    
16 :     ------
17 :    
18 :     Example:
19 :    
20 :     svr_get_coupling_data -d 83333.1.Coupling -g 83333.1
21 :    
22 :     Would build a directory containing the following files:
23 :    
24 :     functionally.coupled - a 3-column table giving basic fc scores
25 :     functionally.coupled.hypos - a 4-column table [hypo-PEG,fc-score,nonHypo-PEG,nonHypo-function]
26 :     functionally.coupled.hypos.by.peg - functionally.coupled.hypos sorted by PEG
27 :    
28 :     expression.coupled - a 3-column table giving basic fc scores (Pearson Correlation coefficients)
29 :     expression.coupled.hypos - a 4-column table [hypo-PEG,fc-score,nonHypo-PEG,nonHypo-function]
30 :     expression.coupled.hypos.by.peg - expression.coupled.hypos sorted by PEG
31 :    
32 :     ------
33 :    
34 :     The genome is normally taken from the command line. If it is not,
35 :     it is read from STDIN and a separate output directory is constructed for
36 :     each genome ID.
37 :    
38 :     Normally, the last field in each
39 :     line would contain the genome ID.
40 :     If some other column contains the genome IDs, use
41 :    
42 :     -c N
43 :    
44 :     where N is the column (from 1).
45 :    
46 :     This is a pipe command. The input is taken from the standard input, and the
47 :     output is to the standard output.
48 :    
49 :     =head2 Command-Line Options
50 :    
51 :     =over 4
52 :    
53 :     =item -g GenomeID
54 :    
55 :     This is normally used to get the genome ID, and in this case output for a single genome is constructed.
56 :     If it is not used, genome IDs are read from STDIN.
57 :    
58 :     =item -c Column
59 :    
60 :     This is used only if there is no -g parameter and the column containing genome IDs is not the last.
61 :    
62 :     =item -d CouplingDirectory
63 :    
64 :     If -g Genome is used, this directory gets built and will contain the data files for the genome.
65 :     If not, then this directory gets built. Subdirectories (named the genome ID) will get the
66 :     output files for each genome named in the input.
67 :    
68 :     =back
69 :    
70 :     =head2 Output Format
71 :    
72 :     If -g Genome is used, the output directory will contain the files for the single genome.
73 :     If not, the output directory will have a subdirectory for each genome specified in STDIN (named
74 :     by the genome ID).
75 :    
76 :     =cut
77 :    
78 :     use SeedUtils;
79 :     use Getopt::Long;
80 :    
81 :     my $usage = "usage: svr_get_coupling_data [-c column] [-g Genome] -d OuputDirectory";
82 :    
83 :     my $column;
84 :     my $g;
85 :     my $outputD;
86 :    
87 :     my $rc = GetOptions('c=i' => \$column,
88 :     'g=s' => \$g,
89 :     'd=s' => \$outputD);
90 :    
91 : overbeek 1.2 if ((! $rc) || (! $outputD)) { print STDERR $usage; exit }
92 : overbeek 1.1 mkdir($outputD,0777) || die "could not make $outputD";
93 :    
94 :     my @lines;
95 :     if ($g)
96 :     {
97 :     @lines = ([$g]);
98 :     }
99 :     else
100 :     {
101 :     @lines = map { chomp; [split(/\t/,$_)] } <STDIN>;
102 :     }
103 :     if (! $column) { $column = @{$lines[0]} }
104 :    
105 :     open(EXP,"svr_exp_genomes |") || die "cannot get which genomes have expression data";
106 :     my %has_expression = map { $_ =~ /(\d+\.\d+)$/; ($1 => 1) } <EXP>;
107 :     close(EXP);
108 :    
109 :     foreach $_ (@lines)
110 :     {
111 :     my $genome = $_->[$column-1];
112 :     my $subD;
113 :     if ($g)
114 :     {
115 :     $subD = $outputD;
116 :     }
117 :     else
118 :     {
119 :     $subD = "$outputD/$genome";
120 :     mkdir($subD,0777) || die "could not make $subD";
121 :     }
122 :     &SeedUtils::run("echo $genome | svr_all_features peg | svr_functionally_coupled -n 20 > $subD/functionally.coupled");
123 :     &SeedUtils::run("sort +1nr -2 +0 -1 $subD/functionally.coupled | svr_is_hypo -c 1 | svr_is_hypo -v | svr_function_of > $subD/functionally.coupled.hypos");
124 :     &SeedUtils::run("sort +0 -1 +1nr -2 $subD/functionally.coupled.hypos > $subD/functionally.coupled.hypos.by.peg");
125 :     if ($has_expression{$genome})
126 :     {
127 :     &SeedUtils::run("echo $genome | svr_all_features peg | svr_corr_by_exp -max 10 > $subD/exp.coupled");
128 :     &SeedUtils::run("sort +1nr -2 +0 -1 $subD/exp.coupled | svr_is_hypo -c 1 | svr_is_hypo -v | svr_function_of > $subD/exp.coupled.hypos");
129 :     &SeedUtils::run("sort +0 -1 +1nr -2 $subD/exp.coupled.hypos > $subD/exp.coupled.hypos.by.peg");
130 :     }
131 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3