[Bio] / FigKernelScripts / FFB2_make_subsys_based_families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB2_make_subsys_based_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Mon Jul 12 19:30:27 2010 UTC (10 years ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2010_0928, rast_rel_2010_0827
Changes since 1.1: +42 -8 lines
Bunch of figfam generation optimizations.

########################################################################
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
########################################################################


use FIG;
use strict;

my $fig = new FIG;

# usage: FFB2_make_subsys_based_families > subsys.based.families

my $pairsF = "$FIG_Config::temp/peg-func.$$";
#open(TMP1,"pegs_in_subsystems | cut -f2,3 | sort -u | function_of |")
#    || die "could not get pegs";

#
# TMP1 emits triples role, peg, assigned function
#

#open(TMP1, "cut -f2,3 < /scratch/olson/pegs.in.subs | sort -u -S 3G | function_of |");
open(TMP2,"| sort -S 3G -u > $pairsF") || die "could not open $pairsF";

#mysql> select si.subsystem, si.role, si.protein, f.assigned_function from subsystem_index si LEFT JOIN aux_roles ar ON si.subsystem = ar.subsystem AND si.role = ar.role JOIN subsystem_metadata m ON si.subsystem = m.subsystem JOIN assigned_functions f ON f.prot = si.protein LEFT JOIN deleted_fids df ON si.protein = df.fid WHERE df.fid IS NULL AND ar.role IS NULL and m.class_1 <> '' AND m.class_1 not like 'experimental%' COLLATE latin1_swedish_ci and m.class_1 not like '%delete%' AND si.variant != '0' AND si.variant != '-1' into outfile '/tmp/list6';


my $sth = $fig->db_handle->{_dbh}->prepare(qq(SELECT si.role, si.protein, f.assigned_function
					      FROM subsystem_index si
					         LEFT JOIN aux_roles ar ON si.subsystem = ar.subsystem AND si.role = ar.role
					         JOIN subsystem_metadata m ON si.subsystem = m.subsystem
					         JOIN assigned_functions f ON f.prot = si.protein
					         LEFT JOIN deleted_fids df ON si.protein = df.fid
					      WHERE df.fid IS NULL AND
					            ar.role IS NULL AND
					            m.class_1 <> '' AND
					            m.class_1 NOT LIKE 'experimental%' COLLATE latin1_swedish_ci AND
					            m.class_1 NOT LIKE '%delete%' COLLATE latin1_swedish_ci  AND
					            si.variant != '0' AND
					            si.variant != '-1'),
				       { mysql_use_result => 1 });

$sth->execute();

while (my $row = $sth->fetchrow_arrayref())
{
    my($role, $peg, $func) = @$row;

#while (<TMP1>)
#{
#    chomp;
#    my($role, $peg, $func) = split(/\t/);
    next if $peg !~ /\.peg\./;
    next if ((! $func) || (length($func) < 2));
    my @roles = $fig->roles_of_function($func);
    my $i;
    for ($i=0; ($i < @roles) && ($roles[$i] ne $role); $i++) {}
    if ($i < @roles)
    {
	if ($func !~ /\#.*((trunca)|(framesh)|(fragment))/)
	{
	    $func =~ s/\s*\#.*$//;
	    print TMP2 "$func\t$peg\n";
	}
    }
}
#$sth->finish();
#close(TMP1);
close(TMP2);

my $n = 1;
open(TMP2,"<$pairsF") || die "could not open $pairsF";
my $x = <TMP2>;
while ($x && ($x =~ /(\S.*\S)\t(fig\|\d+\.\d+\.peg\.\d+)/))
{
    my $func = $1;
    my @set = ();
    while ($x && ($x =~ /(\S.*\S)\t(fig\|\d+\.\d+\.peg\.\d+)/) && ($1 eq $func))
    {
	push(@set,$2);
	$x = <TMP2>;
    }

    if (@set > 1)
    {
	foreach $_ (@set)
	{
	    print "$n\t$_\n";
	}
	$n++;
    }
}
#unlink($pairsF);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3