[Bio] / FigKernelScripts / ex_compute_pccs.pl Repository:
ViewVC logotype

View of /FigKernelScripts/ex_compute_pccs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Sun Jul 28 14:56:40 2013 UTC (6 years, 3 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.2: +20 -3 lines
add spearman stuff

use strict;
use Data::Dumper;
use gjostat;
use Getopt::Long;
use SeedEnv;

my $usage = "usage: ex_compute_pccs -d DataDir\n";
my($i,$j);
my $dataD;
my $rc  = GetOptions('d=s' => \$dataD);

if ((! $rc) || (! -d $dataD))
{ 
    print STDERR $usage; exit ;
}

my($i,$j);
my @loci = `cut -f2 $dataD/gene.locus.exp.val.tuples | sort -u`;
chomp @loci;
my @exps = `cut -f3 $dataD/gene.locus.exp.val.tuples | sort -u`;
chomp @exps;
my %exp_vectors;
foreach my $locus (@loci)
{
    for ($i=0; ($i < @exps); $i++) 
    {
	$exp_vectors{$locus}->[$i] = 0;
    }
}
my %to_exp_index;
for ($i=0; ($i < @exps); $i++) { $to_exp_index{$exps[$i]} = $i }

open(TUPLES,"<$dataD/gene.locus.exp.val.tuples")
    || die "could not open $dataD/gene.locus.exp.val.tuples";
while (defined($_ = <TUPLES>))
{
    chomp;
    my(undef,$locus,$exp,$val) = split(/\t/,$_);
    $locus || die "bad locus";
    defined($val) || die "bad val: $_";
    my $I = $to_exp_index{$exp};
    if (defined($I))
    {
	defined($exp_vectors{$locus}->[$I])
	    || die "locus = $locus I=$I";
	$exp_vectors{$locus}->[$I] = $val;
    }
    else
    {
	print &Dumper(\%to_exp_index,$exp);
	die "badI $exp";
    }

}

close(TUPLES);
my @exp_vectors = map { [$_,$exp_vectors{$_}] } sort keys(%exp_vectors);

open(PCCS,">$dataD/locus.pccs") || die "could not open locus.pccs";
for ($i=0; ($i < $#exp_vectors); $i++)
{
    for ($j=$i+1; ($j < @exp_vectors); $j++)
    {
	my $pcc       = sprintf("%0.3f",&pcc($exp_vectors[$i]->[1],$exp_vectors[$j]->[1]));
	my ($spearman,$spearmanZ)  = &spearman($exp_vectors[$i]->[1],$exp_vectors[$j]->[1]);
	$spearman                  =  sprintf("%0.3f",$spearman);
	$spearmanZ                 =  sprintf("%0.3f",$spearmanZ);

	print PCCS join("\t",($exp_vectors[$i]->[0],
			      $exp_vectors[$j]->[0],
			      $pcc,$spearman,$spearmanZ)),"\n";
    }
}
close(PCCS);

sub pcc {
    my($x,$y) = @_;

    if (length(@$x) != length(@$y))
    {
	print &Dumper($x,$y,length(@$x),length(@$y));
        die "lengths of input vectors differ";
   }
    return &gjostat::correl_coef($x,$y);
}

sub spearman {
    my($x,$y) = @_;

    (length(@$x) == length(@$y)) || die "lengths of input vectors differ";
    my $x1 = &ranks($x);
    my $y1 = &ranks($y);
    my $spc = &gjostat::correl_coef($x1,$y1);
    if ((! defined($spc)) || ($spc eq '')) { $spc = 0 }
    my $N   = @$x;
    my $spcZ;
    if ($spc > 0.9999)
    {
	$spcZ = 100;
    }
    elsif ($spc <  -0.9999)
    {
	$spcZ = -100;
    }
    else
    {
	$spcZ = sqrt( ($N-3) / 4.24 ) * log( (1 + $spc) / (1 - $spc) );
    }
    return ($spc,$spcZ);
}

sub ranks {
    my($x) = @_;

    my %rank;
    my @sorted = sort { $a <=> $b } @$x;
    my $i = 0;
    while ($i < @sorted)
    {
	my $curr = $sorted[$i];
	my $same = 0;
	my $n = 0;
	while (($i < @sorted) && ($sorted[$i] == $curr))
	{
	    $n++;
	    $i++;
	    $same += $i;
	}
	my $rank = $same / $n;
	$rank{$curr} = $rank;
    }
    my @ranks = map { $rank{$_} } @$x;
    return \@ranks;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3