[Bio] / FigKernelScripts / svr_corr_by_exp.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_corr_by_exp.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : overbeek 1.4
2 : overbeek 1.1 use strict;
3 :     use Data::Dumper;
4 :     use Carp;
5 :    
6 :     #
7 :     # This is a SAS Component
8 :     #
9 :    
10 :    
11 : overbeek 1.2 =head1 svr_corr_by_exp [-m MinPCC]
12 : overbeek 1.1
13 :     Get genes that have similar expression profiles.
14 :    
15 :     ------
16 :    
17 :     Example:
18 :    
19 :     svr_all_features 83333.1 peg | svr_corr_by_exp
20 :    
21 :     would produce a 3-column table. The first column would contain
22 : overbeek 1.4 PEG IDs for genes occurring in genome 83333.1, the second would give the
23 : overbeek 1.1 Pearson correlation coefficient, and the third would give the PEG that
24 :     seems to have a similar expression profile..
25 :    
26 :     ------
27 :    
28 :     The standard input should be a tab-separated table (i.e., each line
29 :     is a tab-separated set of fields). Normally, the last field in each
30 :     line would contain the PEG for which functions are being requested.
31 :     If some other column contains the PEGs, use
32 :    
33 :     -c N
34 :    
35 :     where N is the column (from 1) that contains the PEG in each case.
36 :    
37 :     This is a pipe command. The input is taken from the standard input, and the
38 :     output is to the standard output.
39 :    
40 :     =head2 Command-Line Options
41 :    
42 :     =over 4
43 :    
44 :     =item -c Column
45 :    
46 :     This is used only if the column containing PEGs is not the last.
47 :    
48 : overbeek 1.2 =item -m Minimum value for the Pearson correlation coefficient
49 :    
50 : overbeek 1.4 =item -max MaxReturned [default 1000000]
51 :    
52 :     This is used to restrict the number of results displayed for a single incoming line (not a restriction
53 :     on the number of tatal lines)
54 :    
55 : overbeek 1.1 =back
56 :    
57 :     =head2 Output Format
58 :    
59 :     The standard output is a tab-delimited file. It consists of lines from
60 :     the input file that are for PEGs that have Pearson correlation
61 :     coefficients that indicate potential correlation. The lines will have
62 :     two appended columns: the Pearson correlation coefficient and the
63 :     functionally PEG that appears to have a correlated profile.
64 :    
65 :     =cut
66 :    
67 :     use SeedUtils;
68 :     use SAPserver;
69 :     my $sapObject = SAPserver->new();
70 :     use Getopt::Long;
71 : parrello 1.5 use ScriptThing;
72 : overbeek 1.1
73 : overbeek 1.4 my $usage = "usage: svr_corr_by_exp [-c column] [-m minPCC] [-max MaxReturned]";
74 : overbeek 1.1
75 :     my $column;
76 : overbeek 1.4 my $maxR = 100000;
77 : overbeek 1.2 my $min_pcc = 0;
78 :     my $rc = GetOptions('c=i' => \$column,
79 : overbeek 1.4 'max=i' => \$maxR,
80 : overbeek 1.3 'm=f' => \$min_pcc);
81 : overbeek 1.2
82 : overbeek 1.1 if (! $rc) { print STDERR $usage; exit }
83 :    
84 : parrello 1.5 while (my @tuples = ScriptThing::GetBatch(\*STDIN, 5, $column)) {
85 :     my $corrH = $sapObject->coregulated_fids(-ids => [map { $_->[0] } @tuples]);
86 :     foreach my $tuple (@tuples) {
87 :     my $printed = 0;
88 :     my ($peg, $line) = @$tuple;
89 :     if (my $x = $corrH->{$peg}) {
90 :     my @pegs2 = keys(%$x);
91 :     foreach my $peg2 (@pegs2) {
92 :     my $pcc = sprintf("%0.3f",$x->{$peg2});
93 :     if ($pcc >= $min_pcc)
94 : overbeek 1.4 {
95 : parrello 1.5 if ($printed < $maxR)
96 :     {
97 :     print join("\t",($line,$pcc,$peg2)),"\n";
98 :     $printed++;
99 :     }
100 : overbeek 1.4 }
101 : overbeek 1.2 }
102 : overbeek 1.1 }
103 :     }
104 : parrello 1.5 }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3