[Bio] / FigKernelScripts / make_subsys_based_families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/make_subsys_based_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (download) (as text) (annotate)
Sun Sep 21 12:06:16 2008 UTC (11 years, 2 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0923, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, HEAD
Changes since 1.13: +6 -1 lines
do not use genomes with variants beginning with an asterisk in forming FIGfams based on subsystems

########################################################################
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
########################################################################


use FIG;
my $fig = new FIG;

use Subsystem;

# usage: make_subsys_based_families [trusted] > subsys.based.families

if (@ARGV > 0)
{
    foreach $_ (`cat $ARGV[0]`)
    {
	if ($_ =~ /^(\S[^\t]+\S)/)
	{
	    $trusted{$1} = 1;
	}
    }
}

foreach $sub (grep { $fig->usable_subsystem($_) } $fig->all_subsystems)
{
    if (((@ARGV == 0) && $fig->usable_subsystem($sub)) || $trusted{$sub})
    {
	foreach $role (grep { ! $fig->is_aux_role_in_subsystem($sub,$_) } $fig->subsystem_to_roles($sub))
	{
	    push(@{$subs_for_role{$role}},$sub);
	}
    }
}

$roleN = 1;
@roles = sort keys(%subs_for_role);
for ($i=0; ($i < @roles); $i++)
{
    $role = $roles[$i];
    $fam = "subsys$roleN";
    $roleN++;

    undef %pegs;
    $subs = $subs_for_role{$role};

    foreach $sub (sort @$subs)
    {
	$subO = new Subsystem($sub,$fig);
	@genomes =  map { $_->[0] } @{$fig->subsystem_genomes($sub)};
	foreach $genome (@genomes)
	{
	    my $vc = $subO->get_variant_code_for_genome($genome);

#           On Sept 21, 2008 I added the filter for removing genomes with variants beginning with '*'.
#           Until curators bless those additions by removing the '*', they should not be used to construct
#           families (RAO)

	    if (($vc ne '0') && ($vc ne '-1') && (substr($vc,0,1) ne "*"))
	    {
		@pegs_in_subsystem = grep { $_ =~ /\.peg\./ } $fig->pegs_in_subsystem_cell($sub,$genome,$role);
		foreach $peg (grep { $fig->is_real_feature($_) } @pegs_in_subsystem)
		{
		    $func = $fig->function_of($peg);
		    if ((index($func,$role) >= 0)           &&
			(! $fig->possibly_truncated($peg))  &&
			(! $fig->possible_frameshift($peg)))
		    {
			$pegs{$peg} = 1;
		    }
		}
	    }
	}
    }
    my @pegs = sort { &FIG::by_fig_id($a,$b) } keys(%pegs);
    if (@pegs > 1)
    {
	foreach $peg (@pegs)
	{
	    $func = $fig->function_of_quick($peg);
	    @pieces = split(/(\s+\/\s+)|(\s*[;@]\s+)/,$func);
	    if (@pieces > 1)
	    {
		$key = join("\t",@pieces);
		$multi{$key}->{$peg} = 1;
	    }
	    else
	    {
		print join("\t",($fam,$peg,$func,$role)),"\n";
	    }
	}
    }
}

foreach $key (sort keys(%multi))
{
    $x = $multi{$key};
    @pegs = keys(%$x);
    if (@pegs > 1)
    {
	$fam = "subsys$roleN";
	$roleN++;

	foreach $peg (@pegs)
	{
	    print join("\t",($fam,$peg,scalar $fig->function_of_quick($peg))),"\n";
	}
    }
}


	

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3