[Bio] / FigKernelScripts / find_genes_in_families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/find_genes_in_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (download) (as text) (annotate)
Thu Nov 5 19:45:28 2009 UTC (10 years ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.21: +4 -3 lines
Switch to FFs and FF instead of FigFams and FigFam

# -*- perl -*-
########################################################################
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
########################################################################

use strict;
use warnings;

$0 =~ m/([^\/]+)$/o;
my $self  = $1;
my $usage = "find_genes_in_families FigfamsData ToCallDir FoundFamilies [old] 2> missed.fasta";

if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
    print STDERR "\n   $usage\n\n";
    exit (0);
}

use FIG;
my $fig = new FIG;

my ($fam_data, $N, $i);

# First, let's determine which FigfamsData directory to use

for ($i=0; ($i < @ARGV) && ($ARGV[$i] !~ /^-fams=/); $i++) {}
if ($i < @ARGV) {
    if ($ARGV[$i] =~ /^-fams=(\S+)/) {
	if (-d $1) {
	    $fam_data = $1;
	}
	else {
	    die "Nonexistent FIGfams directory $1\n";
	}
	splice(@ARGV,$i,1);
    }
}
	
use FF;
use FFs;
$fam_data = $fig->get_figfams_data($fam_data);
my $figfams = new FFs($fam_data, $fig);

use NewGenome;
use ToCall;

my ($to_call_dir, $foundF);
(
 ($to_call_dir = shift @ARGV) && ((-d $to_call_dir) || warn("Directory $to_call_dir does not exist")) &&
 ($foundF      = shift @ARGV)
)
    || die "\n   usage: $usage\n\n";

my($to_call,$use_old);
$use_old = ((@ARGV > 0) && ($ARGV[0] =~ /^old/i));

if ($use_old) {
    $to_call = new ToCall($to_call_dir);
}
else {
    $to_call = new NewGenome($to_call_dir);
}

my $called_by_file = "$to_call_dir/called_by";
open(CALLED_BY, ">>$called_by_file")
    || die "Could not append-open $called_by_file";

open(FOUNDFAMS,">>$foundF") || die "could not append-open $foundF";

my @orfs = sort { $b->[2] <=> $a->[2] }
           map  { my $id  = $_; 
		  my $seq = $to_call->get_feature_sequence($id);
		  my $ln  = length($seq);
		  [$id, $seq, $ln]
		  } $to_call->get_fids_for_type('orf');

foreach $_ (@orfs) {
    my ($orf_id, $seq) = @$_;
    print STDERR "looking for a family for $orf_id\n";
    
    my $orf;
    if ($use_old) {
	$orf = &ToCall::PEG::new(   'ToCall::PEG',    $to_call, $orf_id, $seq);
    }
    else {
	$orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);
    }
    
    my ($fam, $sims) = $figfams->place_in_family($seq);
    
    if (defined($sims)) {
	print STDERR "ORF $orf_id has ", (scalar @$sims), " sims against family $fam\n" if $ENV{VERBOSE};
	$orf->call_start($sims);
    }
    else {
	print STDERR "ORF $orf_id had no sims against a family\n" if $ENV{VERBOSE};
    }
    
    if ($fam) {
	my $fam_id = $fam->family_id;
	my $func   = $fam->family_function;
	my $annot  = [ qq(RAST),
		      qq($func\nCalled by "$self" using FIGfam $fam_id.)
		      ];
	my $fid    = $orf->promote_to_peg($sims, $func, $annot);
	if ($fid) {
	    print FOUNDFAMS "$fid\t$fam_id\t$func\n";
	    print CALLED_BY "$fid\t$self\n";
	    print STDERR "Succeeded:\t$orf_id --> $fid\n\n";
	}
	else {
	    print STDERR "Could not promote:\t$orf_id\n\n";
	}
    }
    else {
	print STDERR "Could not place:\t$orf_id\n";
    }
}

close(FOUNDFAMS);
close(CALLED_BY);

$to_call->export_features();

exit(0);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3