[Bio] / FigKernelScripts / svr_corr_by_exp.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_corr_by_exp.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : overbeek 1.1 use strict;
2 :     use Data::Dumper;
3 :     use Carp;
4 :    
5 :     #
6 :     # This is a SAS Component
7 :     #
8 :    
9 :    
10 : overbeek 1.2 =head1 svr_corr_by_exp [-m MinPCC]
11 : overbeek 1.1
12 :     Get genes that have similar expression profiles.
13 :    
14 :     ------
15 :    
16 :     Example:
17 :    
18 :     svr_all_features 83333.1 peg | svr_corr_by_exp
19 :    
20 :     would produce a 3-column table. The first column would contain
21 :     PEG IDs for genes occurring in genome 3702.1, the second would give the
22 :     Pearson correlation coefficient, and the third would give the PEG that
23 :     seems to have a similar expression profile..
24 :    
25 :     ------
26 :    
27 :     The standard input should be a tab-separated table (i.e., each line
28 :     is a tab-separated set of fields). Normally, the last field in each
29 :     line would contain the PEG for which functions are being requested.
30 :     If some other column contains the PEGs, use
31 :    
32 :     -c N
33 :    
34 :     where N is the column (from 1) that contains the PEG in each case.
35 :    
36 :     This is a pipe command. The input is taken from the standard input, and the
37 :     output is to the standard output.
38 :    
39 :     =head2 Command-Line Options
40 :    
41 :     =over 4
42 :    
43 :     =item -c Column
44 :    
45 :     This is used only if the column containing PEGs is not the last.
46 :    
47 : overbeek 1.2 =item -m Minimum value for the Pearson correlation coefficient
48 :    
49 : overbeek 1.1 =back
50 :    
51 :     =head2 Output Format
52 :    
53 :     The standard output is a tab-delimited file. It consists of lines from
54 :     the input file that are for PEGs that have Pearson correlation
55 :     coefficients that indicate potential correlation. The lines will have
56 :     two appended columns: the Pearson correlation coefficient and the
57 :     functionally PEG that appears to have a correlated profile.
58 :    
59 :     =cut
60 :    
61 :     use SeedUtils;
62 :     use SAPserver;
63 :     my $sapObject = SAPserver->new();
64 :     use Getopt::Long;
65 :    
66 :     my $usage = "usage: svr_corr_by_exp [-c column]";
67 :    
68 :     my $column;
69 : overbeek 1.2 my $min_pcc = 0;
70 :     my $rc = GetOptions('c=i' => \$column,
71 :     'm=i' => \$min_pcc);
72 :    
73 : overbeek 1.1 if (! $rc) { print STDERR $usage; exit }
74 :    
75 :     my @lines = map { chomp; [split(/\t/,$_)] } <STDIN>;
76 :     if (! $column) { $column = @{$lines[0]} }
77 :     my @fids = map { $_->[$column-1] } @lines;
78 :    
79 :     my $corrH = $sapObject->coregulated_fids(-ids => \@fids);
80 :     foreach $_ (@lines)
81 :     {
82 :     my $peg = $_->[$column-1];
83 :     if (my $x = $corrH->{$peg})
84 :     {
85 :     my @pegs2 = keys(%$x);
86 :     foreach my $peg2 (@pegs2)
87 :     {
88 :     my $pcc = sprintf("%0.3f",$x->{$peg2});
89 : overbeek 1.2 if ($pcc >= $min_pcc)
90 :     {
91 :     print join("\t",(@$_,$pcc,$peg2)),"\n";
92 :     }
93 : overbeek 1.1 }
94 :     }
95 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3