[Bio] / FigKernelScripts / get_pairs.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_pairs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Thu May 28 18:01:51 2009 UTC (10 years, 8 months ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
New script to find certain functional roles close together.

use strict;
use ERDB;
use Data::Dumper;
use Carp;
use FIG;
use Tracer;
ETracing();
my $fig = new FIG;

my $db = ERDB::GetDatabase('Sapling');
Trace("Acquiring data.") if T(3);
my $act = &get($db,'ACT_domain-containing protein');
my $hdh = &get($db,'Homoserine dehydrogenase (EC 1.1.1.3)');
Trace("Sorting.") if T(3);
my @genomes = sort { $a <=> $b } keys(%$hdh);
Trace("Looping.") if T(3);
foreach my $genome (@genomes)
{
    Trace("Processing genome $genome.") if T(4);
    my $hits1 = $hdh->{$genome};
    my $hits2 = $act->{$genome};
    if ($hits2)
    {
	Trace("Hits found for ACT in $genome.") if T(3);
	my @sortedD = &locs($hits1);
	my @sortedA = &locs($hits2);
	Trace("Hits sorted: " . scalar(@sortedD) . " D-type, " .
	      scalar(@sortedA) . " A-type.") if T(3);
	foreach my $pegD (@sortedD)
	{
	    foreach my $pegA (@sortedA)
	    {
		if (&close($pegD,$pegA))
		{
		    my $gs = $fig->genus_species($genome);
		    print join("\t",($pegD->[0],$pegA->[0],$gs)),"\n";
		}
	    }
	}
    }
    Trace("Genome $genome processed.") if T(4);
}
Trace("All done.") if T(3);

sub locs {
    my($hits) = @_;

    my @locs = ();
    my @sorted = sort { ($a->[0] cmp $b->[0]) or ($a->[1] <=> $b->[1]) } @$hits;
    my $last = shift @sorted;
    while ($last)
    {
	my $chunks = [];
	my $curr = $last->[0];
	while ($last && ($last->[0] eq $curr))
	{
	    push(@$chunks,$last->[2]);
	    $last = shift @sorted;
	}
	push(@locs,[$curr,$chunks]);
    }
    return @locs;
}

sub close {
    my($x,$y) = @_;
    Trace("Comparing $x->[0] to $y->[0].") if T(3);
    my $contigX = $x->[1]->[0]->[0];
    my $contigY = $y->[1]->[0]->[0];
    if ($contigX ne $contigY) {
	Trace("Contig difference: $contigX vs. $contigY.") if T(3);
	return 0;
    }
    my $distance = abs($x->[1]->[0]->[1] - $y->[1]->[0]->[1]);
    Trace("Distance = $distance") if T(3);
    return ($distance < 6000);
}

sub get {
    my($db,$func) = @_;

    my $hits = {};
    my @fids = $db->GetFlat("Feature","Feature(function) LIKE ?",["$func%"], 'id');
    for my $id (@fids) {
        my $genome = &FIG::genome_of($id);
        my $query = $db->Get("IsLocatedIn", "IsLocatedIn(from-link) = ?", [$id]);
        while (my $row = $query->Fetch()) {
            my ($ordinal,$contig,$begin,$dir,$len) = $row->Values(['IsLocatedIn(ordinal)',
                                                                  'IsLocatedIn(to-link)',
                                                                  'IsLocatedIn(begin)',
                                                                  'IsLocatedIn(dir)',
                                                                  'IsLocatedIn(len)'
                                                                 ]);
            push(@{$hits->{$genome}},[$id,$ordinal,[$contig,$begin,$dir,$len]]);
        }
    }
    Trace("Hit data acquired for $func.") if T(3);
    return $hits;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3