[Bio] / FigKernelScripts / FFB2_make_subsys_based_families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB2_make_subsys_based_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Mon Nov 22 17:41:30 2010 UTC (9 years, 6 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.2: +26 -1 lines
Big round of figfam update changes

########################################################################
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
########################################################################


use FIG;
use strict;
use Getopt::Long;

# usage: FFB2_make_subsys_based_families > subsys.based.families

my $fig = new FIG;

my $function_override_file;

my $rc = GetOptions("functions=s" => \$function_override_file);

($rc && @ARGV == 0) ||
    die "Usage: FFB2_make_subsys_based_families [-functions function-overrides] > subsys.based.families\n";

my %fn_override;
if (defined($function_override_file))
{
    open(FN, "<", $function_override_file) or die "Cannot open $function_override_file: $!";
    while (<FN>)
    {
	chomp;

	my($id, $fn) = split(/\t/);
	$fn_override{$id} = $fn;
    }
    close(FN);
}


my $pairsF = "$FIG_Config::temp/peg-func.$$";
#open(TMP1,"pegs_in_subsystems | cut -f2,3 | sort -u | function_of |")
#    || die "could not get pegs";

#
# TMP1 emits triples role, peg, assigned function
#

#open(TMP1, "cut -f2,3 < /scratch/olson/pegs.in.subs | sort -u -S 3G | function_of |");
open(TMP2,"| sort -S 3G -u > $pairsF") || die "could not open $pairsF";

#mysql> select si.subsystem, si.role, si.protein, f.assigned_function from subsystem_index si LEFT JOIN aux_roles ar ON si.subsystem = ar.subsystem AND si.role = ar.role JOIN subsystem_metadata m ON si.subsystem = m.subsystem JOIN assigned_functions f ON f.prot = si.protein LEFT JOIN deleted_fids df ON si.protein = df.fid WHERE df.fid IS NULL AND ar.role IS NULL and m.class_1 <> '' AND m.class_1 not like 'experimental%' COLLATE latin1_swedish_ci and m.class_1 not like '%delete%' AND si.variant != '0' AND si.variant != '-1' into outfile '/tmp/list6';


my $sth = $fig->db_handle->{_dbh}->prepare(qq(SELECT si.role, si.protein, f.assigned_function
					      FROM subsystem_index si
					         LEFT JOIN aux_roles ar ON si.subsystem = ar.subsystem AND si.role = ar.role
					         JOIN subsystem_metadata m ON si.subsystem = m.subsystem
					         JOIN assigned_functions f ON f.prot = si.protein
					         LEFT JOIN deleted_fids df ON si.protein = df.fid
					      WHERE df.fid IS NULL AND
					            ar.role IS NULL AND
					            m.class_1 <> '' AND
					            m.class_1 NOT LIKE 'experimental%' COLLATE latin1_swedish_ci AND
					            m.class_1 NOT LIKE '%delete%' COLLATE latin1_swedish_ci  AND
					            si.variant != '0' AND
					            si.variant != '-1'),
				       { mysql_use_result => 1 });

$sth->execute();

while (my $row = $sth->fetchrow_arrayref())
{
    my($role, $peg, $func) = @$row;

    $func = $fn_override{$peg} if defined($fn_override{$peg});

#while (<TMP1>)
#{
#    chomp;
#    my($role, $peg, $func) = split(/\t/);
    next if $peg !~ /\.peg\./;
    next if ((! $func) || (length($func) < 2));
    my @roles = $fig->roles_of_function($func);
    my $i;
    for ($i=0; ($i < @roles) && ($roles[$i] ne $role); $i++) {}
    if ($i < @roles)
    {
	if ($func !~ /\#.*((trunca)|(framesh)|(fragment))/)
	{
	    $func =~ s/\s*\#.*$//;
	    print TMP2 "$func\t$peg\n";
	}
    }
}
#$sth->finish();
#close(TMP1);
close(TMP2);

my $n = 1;
open(TMP2,"<$pairsF") || die "could not open $pairsF";
my $x = <TMP2>;
while ($x && ($x =~ /(\S.*\S)\t(fig\|\d+\.\d+\.peg\.\d+)/))
{
    my $func = $1;
    my @set = ();
    while ($x && ($x =~ /(\S.*\S)\t(fig\|\d+\.\d+\.peg\.\d+)/) && ($1 eq $func))
    {
	push(@set,$2);
	$x = <TMP2>;
    }

    if (@set > 1)
    {
	foreach $_ (@set)
	{
	    print "$n\t$_\n";
	}
	$n++;
    }
}
#unlink($pairsF);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3