[Bio] / FigKernelPackages / FC.pm Repository:
ViewVC logotype

View of /FigKernelPackages/FC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Tue Mar 10 03:35:23 2009 UTC (11 years ago) by parrello
Branch: MAIN
Changes since 1.4: +31 -9 lines
Completed update methods.

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
package FC;

use strict;
use ERDB;
use Data::Dumper;
use FIG;
sub PartitionSeqs;

sub co_occurs {
    my($db,$peg) = @_;

    my @tuples;
    my $query = $db->Get("IsInPair Pairing Determines PairSet",
			 "IsInPair(from-link) = ?", [$peg]);
    while (my $row = $query->Fetch()) 
    {
	my ($pair, $score) = $row->Values(['Determines(from-link)','PairSet(score)']);
	my ($fid) = grep { $_ ne $peg } split(/:/, $pair);
	push @tuples, [$fid, $score];
    }
    return @tuples;
}

sub co_occurrence_and_evidence {
    my($db,$peg) = @_;

    my @tuples;
    my $query = $db->Get("IsInPair Pairing Determines PairSet IsDeterminedBy",
			 "IsInPair(from-link) = ?", [$peg]);
    my @tuoples = ();
    while (my $row = $query->Fetch()) 
    {
	push(@tuples,[$row->Values(['Determines(from-link)',
				    'PairSet(score)',
                                    'IsDeterminedBy(inverted)',
				    'IsDeterminedBy(to-link)'])]);
    }
    @tuples = sort { $a->[0] <=> $b->[0] } @tuples;
    my @co_occurs = ();

    my $x = shift @tuples;
    while ($x)
    {
	my $curr = $x->[0];
	my $sc   = $x->[1];
	my @set = ();
	while ($x && ($x->[0] eq $curr))
	{
	    my($peg3,$peg4) = split(/:/,$x->[3]);
	    push(@set,($x->[2]) ? [$peg3,$peg4] : [$peg4,$peg3]);
	    $x = shift @tuples;
	}
	my $i;
	for ($i=0; ($i < @set) && ($peg ne $set[$i]->[0]); $i++) {}
	if ($i == @set)
	{
	    @set = map { [$_->[1],$_->[0]] } @set;
	}
	my($peg2) = grep { $_ ne $peg } split(/:/,$curr);
	push(@co_occurs,[$sc,$peg2,[grep { $_->[0] ne $peg } @set]]);
    }
    return sort { $b->[0] <=> $a->[0] } @co_occurs; 
}

sub co_occurrence_set {
    my($db,$set) = @_;

    my $query = $db->Get("PairSet IsDeterminedBy",
			 "PairSet(id) = ?", [$set]);
    my $pairs;
    my $sc;
    while (my $row = $query->Fetch())
    {
	if (! $sc) { ($sc) = $row->Values(['PairSet(score)']); }
	my($pairing,$inverted) = $row->Values(['IsDeterminedBy(to-link)','IsDeterminedBy(inverted)']);
	my($peg1,$peg2) = split(/:/,$pairing);
	push(@$pairs,$inverted ? [$peg2,$peg1] : [$peg1,$peg2]);
    }
    return ($sc,$pairs);
}

sub all_co_occurrence_pair_sets {
    my($db) = @_;

    my $query = $db->Get("PairSet","",[]);
    my @PairSets = ();
    while (my $row = $query->Fetch())
    {
	my($pair_set) = $row->Values(['PairSet(id)']);
	push(@PairSets,$pair_set);
    }
    return @PairSets;
}

sub all_co_occurrence_clusters {
    my($db) = @_;

    my $query = $db->Get("Cluster","",[]);
    my @Clusters = ();
    while (my $row = $query->Fetch())
    {
	my($id) = $row->Values(['Cluster(id)']);
	push(@Clusters,$id);
    }
    return @Clusters;
}

sub co_occurrence_cluster {
    my($db,$cluster) = @_;

    my $query = $db->Get("Cluster IsOccurrenceOf",
			 "Cluster(id) = ?", [$cluster]);
    my @pegs = ();    
    while (my $row = $query->Fetch())
    {
	my($peg) = $row->Values(['IsOccurrenceOf(to-link)']);
	push(@pegs,$peg);
    }
    return @pegs;
}

sub co_occurrence_evidence {
    my($db,$peg1,$peg2) = @_;
    
    my $key = ($peg1 lt $peg2) ? "$peg1:$peg2" : "$peg2:$peg1";
    my $query = $db->Get("Determines PairSet",
			 "Determines(from-link) = ?", [$key]);
    my($sc,$pairs);

    if ($query)
    {
	my $row = $query->Fetch();
	if ($row)
	{
	    my($set) = $row->Values(['PairSet(id)']);
	    ($sc,$pairs) = &co_occurrence_set($db,$set);
	    my $i;
	    for ($i=0; ($i < @$pairs) && ($pairs->[$i]->[0] ne $peg1); $i++) {}
	    if ($i == @$pairs)
	    {
		$pairs = [map { [$_->[1],$_->[0]] } @$pairs];
		for ($i=0; ($i < @$pairs) && ($pairs->[$i]->[0] ne $peg1); $i++) {}
		if ($i == @$pairs)
		{
		    print STDERR &Dumper($pairs,$i,$peg1,$peg2);
		    die "Something is screwed up";
		}
	    }
	    splice(@$pairs,$i,1);
	}
	return $pairs;
    }
    return [];
}

sub in_pair_set {
    my($db,$peg1,$peg2) = @_;

    my($key,$inv,$set);
    $key = ($peg1 lt $peg2) ? "$peg1:$peg2" : "$peg2:$peg1";

    my $query = $db->Get("Pairing Determines",
			 "Pairing(id) = ?",[$key]);
    if ($query)
    {
	my $row = $query->Fetch();
	if ($row)
	{
	    my($pair,$inverted) = $row->Values(['Determines(to-link)','Determines(inverted)']);
	    if ($peg1 gt $peg2)
	    {
		$inverted = ! $inverted;
	    }
	    return ($pair,$inverted);
	}
    }
    return undef;
}

# This routine inserts a Pairing for $peg1 and $peg2
sub insert_pair {
    my($db,$peg1,$peg2,$set) = @_;
    # Compute the key and the inversion flag.
    my ($invertFlag, $key);
    if ($peg1 lt $peg2) {
	$key = "$peg1:$peg2";
	$invertFlag = 0;
    } else {
	$key = "$peg2:$peg1";
	$invertFlag = 1;
    }
    # Create the pairing.
    $db->InsertObject('Pairing', id => $key);
    # Connect it to its constituent features.
    $db->InsertObject('IsInPair', from_link => $peg1, to_link => $key);
    $db->InsertObject('IsInPair', from_link => $peg2, to_link => $key);
    # Insert it into the pair set.
    $db->InsertObject('IsDeterminedBy', from_link => $set, to_link => $key,
		      inverted => $invertFlag);
    print STDERR "inserted $peg1:$peg2 into $set\n";
}

# This routine creates a new PairSet, returning the ID
sub new_set {
    my($db) = @_;
    # Create the new pair set.
    my $retVal = $db->InsertNew('PairSet', score => 0);
    # Return its ID.
    return $retVal;
}
    

# This routine deletes a PairSet and relationships to Pairings
# (but not the Pairings).

sub delete_PairSet {
    my($db,$pair_set) = @_;
    # Disconnect the pair set from all of its pairings.
    $db->Disconnect('IsDeterminedBy', PairSet => $pair_set);
    # Delete the pair set. Because we've already disconnected, the pairings
    # are safe.
    $db->Delete(PairSet => $pair_set);
    print STDERR "deleted PairSet $pair_set\n";
}

# This routine deletes the relationship between a Pairing and a PairSet
# (but does not delete either entity).

sub delete_pair_from_PairSet {
    my($db,$pair,$PairSet) = @_;
    # Delete the relationship connecting this set to this pair.
    $db->DeleteRow('IsDeterminedBy', $PairSet, $pair);
}

# This routine updates the "score" field in a PairSet
sub set_pair_set_sc {
    my($db,$pair_set,$sc) = @_;
    # Update the score in place.
    $db->UpdateEntity(PairSet => $pair_set, score => $sc);
    print STDERR "reset score of PairSet to $sc\n";
}

sub rescore {
    my($db,$pair_set) = @_;

    my $fig = new FIG;
    my(undef,$pairs) = &co_occurrence_set($db,$pair_set);
    my %genome_sets;
    foreach my $pair (@$pairs)
    {
	$genome_sets{$fig->representative_genome(&FIG::genome_of($pair->[0]))} = 1;
    }
    my $sc = keys(%genome_sets);
    &set_pair_set_sc($db,$pair_set,$sc);
}

sub extend_pairs_and_pair_sets {
    my($db,$pchF) = @_;

    die "NOT DEBUGGED: RAO March 8";

    open(PCHS,"sort $pchF |") || die "could not open $pchF";

    my $line = <PCHS>;
    while ($line && ($line =~ /^((\S+)\t(\S+))\t(\S+)\t(\S+)/))
    {
	my $curr = $1;
	my $set = [];
	while ($line && ($line =~ /^((\S+)\t(\S+))\t(\S+)\t(\S+)/) && ($1 eq $curr))
	{
	    push(@$set,[$2,$3,$4,$5]);
	    $line = <PCHS>;
	}
	&add_pch_set($db,$set);
    }
    close(PCHS);
}

sub add_pch_set {
    my($db,$set) = @_;

    my $fig = new FIG;
    my @pairs = ([$set->[0]->[0],$set->[0]->[1]],map { [$_->[2],$_->[3]] } @$set);

    my($pair,@unplaced,%in);
    foreach $pair (@pairs)
    {
	my($pair_set,$inverted) = &in_pair_set($db,@$pair);
	if (! defined($pair_set))
	{
	    push(@unplaced,$pair);
	}
	else
	{
	    $in{"$pair_set:$inverted"}++;
	}
    }
    my @in_sets = map { [split(/:/,$_)] } sort { $b->{$_} <=> $a->{$_} } keys(%in);
    if (@in_sets > 1)
    {
	my $i;
	for ($i=1; ($i < @in_sets); $i++)
	{
	    my(undef,$in_old) = &co_occurence_set($db,$in_sets[$i]->[0]);
	    if ($in_sets[$i]->[1] ne $in_sets[0]->[1])
	    {
		$in_old = [ map { [$_->[1],$_->[0]] } @$in_old];
	    }
	    foreach $pair (@$in_old)
	    {
		&insert_pair($db,@$pair,$in_sets[0]->[0]);
	    }
	    &delete_set($db,$in_sets[$i]->[0]);
	}
    }

    if (@in_sets == 1)
    {
	foreach $pair (@unplaced)
	{
	    my($peg3,$peg4) = $in_sets[0]->[0] ? ($pair->[1],$pair->[0]) : @$pair;
	    &insert_pair($db,$peg3,$peg4,$in_sets[0]->[0]);
	}
    }
    elsif (@unplaced >= 5)
    {
	my %genome_sets;
	foreach my $pair (@unplaced)
	{
	    $genome_sets{$fig->representative_genome(&FIG::genome_of($pair->[0]))} = 1;
	}
	my $sc = keys(%genome_sets);
	if ($sc >= 5)
	{
	    my $new_set = &new_set($db);
	    foreach my $pair (@unplaced)
	    {
		&insert_pair($db,@$pair,$new_set);
	    }
	}
    }
}

sub cleanup_pair_sets {
    my($db) = @_;

    die "NOT DEBUGGED: RAO March 8";

    foreach my $pair_set (&all_co_occurrence_pair_sets)
    {
	my(undef,$set) = &co_occurrence_set($db,$pair_set);
	my @partitioned = sort { @$b <=> @$a } &partition_pair_set($db,$set);
	my $keep = shift @partitioned;
	if (@$keep < 5)
	{
	    &delete_PairSet($db,$pair_set);
	}
	else
	{
	    my %to_keep = map { join(":",@$_) => 1 } @$keep;
	    foreach my $pair (@$set)
	    {
		if (! $to_keep{ join(":",@$pair) } )
		{
		    &delete_pair_from_PairSet($db,$pair,$pair_set);
		}
	    }
	    
	    foreach my $new_set (@partitioned)
	    {
		if (@$new_set >= 5)
		{
		    my $next_set = &new_set($db);
		    foreach my $pair1 (@$new_set)
		    {
			&insert_pair($db,@$pair1,$next_set);
		    }
		}
	    }
	}
    }
}

sub partition_pair_set {
    my($db,$set) = @_;

    my @split = ();
    my @sets = &part1($set,0);
    foreach my $part1 (@sets)
    {
	foreach my $part2 (&part1($part1,1))
	{
	    push(@split,$part2);
	}
    }
    return sort { @$b <=> @$a } @split;
}

sub part1 {
    my($set,$index) = @_;

    my %entries = map { $_->[$index] => $_ } @$set;
    my $ids     = [map { $_->[$index] } @$set];
    my @partition = &PartitionSeqs::partition({ pegs => $ids, use => 'blast' });
    my @tmp = map { [map { $entries{$_} } @$_] } @partition;
    return map { [map { $entries{$_} } @$_] } @partition;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3