[Bio] / FigKernelScripts / clustered_hypotheticals.pl Repository:
ViewVC logotype

View of /FigKernelScripts/clustered_hypotheticals.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Sun Dec 27 16:06:53 2009 UTC (10 years, 5 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.1: +55 -18 lines
major rewrite

#!/usr/bin/perl -w

use strict;
use SeedEnv;
use Data::Dumper;

my $sapObject = SAPserver->new();

my $genomeHash = $sapObject->all_genomes(-complete => 1);
my @completeG = keys(%$genomeHash);
my $isP = $sapObject->is_prokaryotic(-ids => \@completeG);
my @complete_proks = grep { $isP->{$_} } @completeG;
my %seen;

foreach my $genome (sort { $a <=> $b } @complete_proks) 
{
    print STDERR "processing $genome\n";

    my $genomeName = $genomeHash->{$genome};
    my $geneHash = $sapObject->feature_assignments(-genome => $genome,
						   -type => 'peg');
    my @hypotheticalGenes = sort { &SeedUtils::by_fig_id($a,$b) } 
                            grep { &SeedUtils::hypo($geneHash->{$_}) } 
                            grep { ! $seen{$_} }
                            keys(%$geneHash);

    my $couplingHash    = $sapObject->conserved_in_neighborhood(-ids => \@hypotheticalGenes);
    my @all_coupled_ids = map { map { $_->[1] } @{$couplingHash->{$_}}} keys(%$couplingHash);
    my $subHash         = $sapObject->ids_to_subsystems(-ids => \@all_coupled_ids);

    foreach my $gene (@hypotheticalGenes) 
    {
	my $couplingList = $couplingHash->{$gene};
	if (defined $couplingList) 
	{
	    my ($bestCoupler, $bestScore, $bestFunction) = (undef, 0, '');
	    my $best_pairset;

	    foreach  my $coupling (@$couplingList) 
	    {
		my ($score, $coupler, $function, $pairset) = @$coupling;

		if ($subHash->{$coupler} && $score > $bestScore) 
		{
		    $bestCoupler  = $coupler;
		    $bestScore    = $score;
		    $bestFunction = $function ? $function : '';
		    $best_pairset = $pairset;
		}
	    }

	    if (defined $bestCoupler) 
	    {
		print join("\t", $gene, $geneHash->{$gene}, $genome, $genomeName,
				 $bestCoupler, $bestScore, $bestFunction) . "\n";
		&set_seen_for_pairset($gene,$best_pairset,\%seen,$sapObject);
	    }
	}
    }
}

sub set_seen_for_pairset {
    my($peg,$pairset,$seen,$sapObject) = @_;

    my $pairsetsH = $sapObject->pairsets( -ids => [$pairset] );
    if ($_ = $pairsetsH->{$pairset})
    {
	my($sc,$pairs) = @$_;
	my $got = 0;
	my @set =   map { if ($_->[0] eq $peg) { $got = 1 }; $_->[0] } @$pairs;
	if (! $got)
	{
	    @set = map { $_->[1] } @$pairs;
	}
	foreach $_ (@set)
	{
	    $seen->{$_} = 1;
	}
    }
}

    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3