[Bio] / FigKernelScripts / find_genes_in_families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/find_genes_in_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (download) (as text) (annotate)
Wed Feb 13 14:54:27 2008 UTC (12 years, 1 month ago) by gdpusch
Branch: MAIN
CVS Tags: rast_rel_2009_05_18, rast_rel_2008_06_18, rast_rel_2008_06_16, rast_rel_2008_12_18, rast_rel_2008_07_21, rast_2008_0924, rast_rel_2008_04_23, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2009_03_26, rast_rel_2008_11_24, rast_rel_2008_08_07
Changes since 1.19: +43 -17 lines
Final clean-up. -- /gdp

# -*- perl -*-
########################################################################
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
########################################################################

use strict;
use warnings;

$0 =~ m/([^\/]+)$/o;
my $self  = $1;
my $usage = "find_genes_in_families FigfamsData ToCallDir FoundFamilies [old] 2> missed.fasta";

if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
    print STDERR "\n   $usage\n\n";
    exit (0);
}

use FIG;
my $fig = new FIG;

my ($fam_data, $N, $i);

# First, let's determine which FigfamsData directory to use

for ($i=0; ($i < @ARGV) && ($ARGV[$i] !~ /^-fams=/); $i++) {}
if ($i < @ARGV) {
    if ($ARGV[$i] =~ /^-fams=(\S+)/) {
	if (-d $1) {
	    $fam_data = $1;
	}
	else {
	    die "Nonexistent FIGfams directory $1\n";
	}
	splice(@ARGV,$i,1);
    }
}
	
use FigFam;
use FigFams;
my $figfams = new FigFams($fig,$fam_data);

use NewGenome;
use ToCall;

my ($to_call_dir, $foundF);
(
 ($to_call_dir = shift @ARGV) && ((-d $to_call_dir) || warn("Directory $to_call_dir does not exist")) &&
 ($foundF      = shift @ARGV)
)
    || die "\n   usage: $usage\n\n";

my($to_call,$use_old);
$use_old = ((@ARGV > 0) && ($ARGV[0] =~ /^old/i));

if ($use_old) {
    $to_call = new ToCall($to_call_dir);
}
else {
    $to_call = new NewGenome($to_call_dir);
}

my $called_by_file = "$to_call_dir/called_by";
open(CALLED_BY, ">>$called_by_file")
    || die "Could not append-open $called_by_file";

open(FOUNDFAMS,">>$foundF") || die "could not append-open $foundF";

my @orfs = sort { $b->[2] <=> $a->[2] }
           map  { my $id  = $_; 
		  my $seq = $to_call->get_feature_sequence($id);
		  my $ln  = length($seq);
		  [$id, $seq, $ln]
		  } $to_call->get_fids_for_type('orf');

foreach $_ (@orfs) {
    my ($orf_id, $seq) = @$_;
    print STDERR "looking for a family for $orf_id\n";
    
    my $orf;
    if ($use_old) {
	$orf = &ToCall::PEG::new('ToCall::PEG',$orf_id,$seq);
    }
    else {
	$orf = &NewGenome::ORF::new('NewGenome::ORF',$to_call,$orf_id);
    }
    
    my ($fam, $sims) = $figfams->place_in_family($seq);
    
    if (defined($sims)) {
	print STDERR "ORF $orf_id has ", (scalar @$sims), " sims against family $fam\n" if $ENV{VERBOSE};
	$orf->call_start($sims);
    }
    else {
	print STDERR "ORF $orf_id had no sims against a family\n" if $ENV{VERBOSE};
    }
    
    if ($fam) {
	my $fam_id = $fam->family_id;
	my $func   = $fam->family_function;
	my $annot  = [ qq(RAST),
		      qq($func\nCalled by "$self" using FIGfam $fam_id.)
		      ];
	my $fid    = $orf->promote_to_peg($sims, $func, $annot);
	if ($fid) {
	    print FOUNDFAMS "$fid\t$fam_id\t$func\n";
	    print CALLED_BY "$fid\t$self\n";
	    print STDERR "Succeeded:\t$orf_id --> $fid\n\n";
	}
	else {
	    print STDERR "Could not promote:\t$orf_id\n\n";
	}
    }
    else {
	print STDERR "Could not place:\t$orf_id\n";
    }
}

close(FOUNDFAMS);
close(CALLED_BY);

$to_call->export_features();

exit(0);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3