[Bio] / FigKernelScripts / svr_corr_by_exp.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_corr_by_exp.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : overbeek 1.4
2 : overbeek 1.1 use strict;
3 :     use Data::Dumper;
4 :     use Carp;
5 :    
6 :     #
7 :     # This is a SAS Component
8 :     #
9 :    
10 :    
11 : overbeek 1.2 =head1 svr_corr_by_exp [-m MinPCC]
12 : overbeek 1.1
13 :     Get genes that have similar expression profiles.
14 :    
15 :     ------
16 :    
17 :     Example:
18 :    
19 :     svr_all_features 83333.1 peg | svr_corr_by_exp
20 :    
21 :     would produce a 3-column table. The first column would contain
22 : overbeek 1.4 PEG IDs for genes occurring in genome 83333.1, the second would give the
23 : overbeek 1.1 Pearson correlation coefficient, and the third would give the PEG that
24 :     seems to have a similar expression profile..
25 :    
26 :     ------
27 :    
28 :     The standard input should be a tab-separated table (i.e., each line
29 :     is a tab-separated set of fields). Normally, the last field in each
30 :     line would contain the PEG for which functions are being requested.
31 :     If some other column contains the PEGs, use
32 :    
33 :     -c N
34 :    
35 :     where N is the column (from 1) that contains the PEG in each case.
36 :    
37 :     This is a pipe command. The input is taken from the standard input, and the
38 :     output is to the standard output.
39 :    
40 :     =head2 Command-Line Options
41 :    
42 :     =over 4
43 :    
44 :     =item -c Column
45 :    
46 :     This is used only if the column containing PEGs is not the last.
47 :    
48 : overbeek 1.2 =item -m Minimum value for the Pearson correlation coefficient
49 :    
50 : overbeek 1.4 =item -max MaxReturned [default 1000000]
51 :    
52 :     This is used to restrict the number of results displayed for a single incoming line (not a restriction
53 :     on the number of tatal lines)
54 :    
55 : overbeek 1.1 =back
56 :    
57 :     =head2 Output Format
58 :    
59 :     The standard output is a tab-delimited file. It consists of lines from
60 :     the input file that are for PEGs that have Pearson correlation
61 :     coefficients that indicate potential correlation. The lines will have
62 :     two appended columns: the Pearson correlation coefficient and the
63 :     functionally PEG that appears to have a correlated profile.
64 :    
65 :     =cut
66 :    
67 :     use SeedUtils;
68 :     use SAPserver;
69 :     my $sapObject = SAPserver->new();
70 :     use Getopt::Long;
71 :    
72 : overbeek 1.4 my $usage = "usage: svr_corr_by_exp [-c column] [-m minPCC] [-max MaxReturned]";
73 : overbeek 1.1
74 :     my $column;
75 : overbeek 1.4 my $maxR = 100000;
76 : overbeek 1.2 my $min_pcc = 0;
77 :     my $rc = GetOptions('c=i' => \$column,
78 : overbeek 1.4 'max=i' => \$maxR,
79 : overbeek 1.3 'm=f' => \$min_pcc);
80 : overbeek 1.2
81 : overbeek 1.1 if (! $rc) { print STDERR $usage; exit }
82 :    
83 :     my @lines = map { chomp; [split(/\t/,$_)] } <STDIN>;
84 :     if (! $column) { $column = @{$lines[0]} }
85 :     my @fids = map { $_->[$column-1] } @lines;
86 :    
87 :     my $corrH = $sapObject->coregulated_fids(-ids => \@fids);
88 :     foreach $_ (@lines)
89 :     {
90 : overbeek 1.4 my $printed = 0;
91 : overbeek 1.1 my $peg = $_->[$column-1];
92 :     if (my $x = $corrH->{$peg})
93 :     {
94 :     my @pegs2 = keys(%$x);
95 :     foreach my $peg2 (@pegs2)
96 :     {
97 :     my $pcc = sprintf("%0.3f",$x->{$peg2});
98 : overbeek 1.2 if ($pcc >= $min_pcc)
99 :     {
100 : overbeek 1.4 if ($printed < $maxR)
101 :     {
102 :     print join("\t",(@$_,$pcc,$peg2)),"\n";
103 :     $printed++;
104 :     }
105 : overbeek 1.2 }
106 : overbeek 1.1 }
107 :     }
108 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3