[Bio] / FigKernelScripts / rep_genomes.pl Repository:
ViewVC logotype

View of /FigKernelScripts/rep_genomes.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Mon Dec 4 19:18:26 2006 UTC (13 years, 4 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
fixes to representative tree generation and added rep_genomes

use FIG;
my $fig = new FIG;
use tree_utilities;

($size = shift @ARGV)
    || die "usage: rep_genomes Size";

my $tree = ['All','',[0],[]];

foreach my $genome (grep { $fig->is_prokaryotic($_) } $fig->genomes('complete'))
{
    my $gs = $fig->genus_species($genome);
    my $tax = $fig->taxonomy_of($genome);
    my @tax = split(/;\s*/,$tax);
    $tax[$#tax] = "$genome: " . $tax[$#tax];
    &insert($tree,\@tax);
}

my $leaves = &tree_utilities::tips_of_tree($tree);
my $sz_all = @$leaves;

my $treeN = &representative_by_size($tree,$size);
my $leavesN = &tree_utilities::tips_of_tree($treeN);
foreach $_ (@$leavesN)
{
    print "$_\n";
}

sub insert {
    my($tree,$taxP) = @_;
    my($label,$ptrs,$i);

    if (@$taxP == 0)
    {
	return;
    }
    else
    {
	$label = shift @$taxP;
	$ptrs = $tree->[2];
	for ($i=1; ($i < @$ptrs) && ($ptrs->[$i]->[0] ne $label); $i++) {}
	if ($i == @$ptrs)
	{
	    push(@$ptrs,[$label,1,[$tree],[]]);
	}
	&insert($ptrs->[$i],$taxP);
	return;
    }
}

sub representative_by_size {
# &representative_by_size($RootedTree,$N) -> \RepTree
    my($treeP,$size) = @_;

    my($btree,@nodes,@sorted_nodes,%del,%keep,$tips);
    my ($start,$to_remove,$i,$j,$nodeP,$cc);

    $tips = &tips_of_tree($treeP);
    $start = $#{$tips} + 1;
    $to_remove = $start - $size;
#   print STDERR "we start with $start, so we need to pull $to_remove\n";
    if (($to_remove <= 0) || ($size < 3))
    {
	return $treeP;
    }

    $btree = &to_binary($treeP);
#   &print_tree($btree);

    &tree_utilities::get_collapse_values($btree,\@nodes);
    @sorted_nodes = sort by_distance @nodes;

#     print STDERR "nodes:\n";
#     foreach $y (@nodes) { print STDERR "    $y->[0] $y->[1]->[0]\n"; }
#     print STDERR "sorted:\n";
#     foreach $y (@sorted_nodes) { print STDERR "    $y->[0] $y->[1]->[0]\n"; }

    for ($k=0; ($k <= $#sorted_nodes) && ($to_remove > 0); $k++,$to_remove--)
    {
	$nodeP = $sorted_nodes[$k]->[1];
	$cc  = &node_pointers($nodeP);
	if ($cc->[1]->[1] <= $cc->[2]->[1])
	{
	    $i = 1; $j = 2;
	}
	else
	{
	    $i = 2; $j = 1;
	}

	$del{$cc->[$j]->[0]} = 1;
#	print STDERR "pulling at $nodeP->[0] ($sorted_nodes[$k]->[0])  $cc->[$j]->[0] $cc->[$i]->[0]\n";

	$nodeP->[0] = $cc->[$i]->[0];
	$nodeP->[1] += $cc->[$i]->[1];
    }
    
    foreach $x (@$tips)
    {
	if (! $del{$x})
	{
	    $keep{$x} = 1;
#	    print STDERR "keeping $x\n";
	}
    }
    
    return &subtree($btree,\%keep);
}
	
sub by_distance {
    return ($a->[0] <=> $b->[0]);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3