[Bio] / DrugTargets / aln_conservation.pl Repository:
ViewVC logotype

View of /DrugTargets/aln_conservation.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Thu Aug 25 23:22:54 2005 UTC (14 years, 9 months ago) by fangfang
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +2 -2 lines
Fixed the conservation script bug thanks to Bob.
Added category codes instructions page.

#use Data::Dumper;

$usage = "aln_conservation [-peg=FIG_ID] \n";

if (@ARGV > 0) {
    if ($ARGV[0] =~ /-peg=(\S+)/) {
	$peg = $1;
    } else {
	die "Usage: $usage";
    }
}

@pegs  = ();
@seqs  = ();
@chars = ();
$nseq  = 0;
$wtot  = 0;
$ntot  = 0;
$slen  = 0;

while (<stdin>) {
    chomp;
    if ($_ =~ />(\S+)/) {
	push(@pegs, $1);
	#print "$1\n";
    } else {
	push(@seqs, $_);
	@seq = split(//, $_);
	$slen = ($slen > 0 && $slen < @seq) ? $slen : @seq ;
	push(@chars, [@seq]);
	#print join(" ", @{$chars[@chars-1]}) . "\n";
	$nseq++;
	#print "$_\n";
    }
}

for ($j = 0; $j < $slen; $j++) {
    %dict = ();
    for ($i = 0; $i < $nseq; $i++) {
	$ch = uc($chars[$i][$j]);
	$dict{$ch} = (exists $dict{$ch}) ? $dict{$ch}+1 : 1;
    }
    $w[$j] = (exists $dict{'-'}) ? $nseq - $dict{'-'} : $nseq;
    $n[$j] = (exists $dict{'-'}) ? keys(%dict) - 1 : keys(%dict);
    $wtot += $w[$j];
    $ntot += ($n[$j]-1);
    #print "weight  = $w[$j]\t";
    #print "nkinds  = $n[$j]\n";
    for ($i = 0; $i < $nseq; $i++) {
	$ch = uc($chars[$i][$j]);
	if ($ch ne '-') {
	    $wacc[$i] += $w[$j];
	    $sum[$i]  += $dict{$ch};
	} 
    }
    #while( my ($k, $v) = each %dict ) {
        #print "key: $k, value: $v.\n";
    #}
    #print "\n";
    #last;
}

@cons = ();
for ($i = 0; $i < $nseq; $i++) {
    $cons[$i] = ($wacc[$i])? 1.0 * $sum[$i] / $wacc[$i] : 0;
    $scon += $cons[$i];
    #print "cons = $cons[$i]\t sum = $sum[$i]\t wacc = $wacc[$i]\n";
}

$con1 = 1.0*$scon/$nseq;
$con2 = 1 - 1.0*$ntot/$wtot;
$con  = ($con1 + $con2) / 2;

#print "$nseq $slen $con\n";
#printf "%.5f\n", $con;
#printf "%.5f\n", sqrt($con);

#printf "%.5f / %d\n", $con1, $nseq;
#printf "%.5f\t%d %d\n", $con2, $wtot, $ntot;
printf "%.5f\n", $con;

#print &Dumper(\@seqs);


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3