[Bio] / FigKernelPackages / ToCall.pm Repository:
ViewVC logotype

View of /FigKernelPackages/ToCall.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (download) (as text) (annotate)
Fri Jan 25 04:05:38 2008 UTC (12 years, 1 month ago) by gdpusch
Branch: MAIN
CVS Tags: rast_rel_2009_05_18, rast_rel_2008_06_18, rast_rel_2008_06_16, rast_rel_2008_12_18, rast_rel_2008_07_21, rast_2008_0924, rast_rel_2008_04_23, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2009_03_26, rast_rel_2008_11_24, rast_rel_2008_08_07
Changes since 1.9: +4 -3 lines
Clarified error msg for missing PEG FASTA file. -- /gdp

# -*- perl -*-
########################################################################
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
########################################################################

package ToCall;

# This package supports the recall of existing genomes.  The output of 
# find_neighbors and find_genes_based_on_neighbors is a "found" file
# listing the families and genes that have been found.  The original
# directory is not altered (in this package; in NewGenomes.pm side-effects
# alter the Feature subdirectories.
# 
use strict;
use FIG;
use FIGV;

use Carp;
use Data::Dumper;


sub new {
    my($class,$genomeDir) = @_;

    my $fig = new FIGV($genomeDir);
    my $to_call = {};
    $to_call->{fig} = $fig;
    $to_call->{genomeDir} = $genomeDir;
    if (! -s "$genomeDir/Features/peg/fasta")
    {
	confess "Zero-size or non-existent FASTA file: $genomeDir/Features/peg/fasta";
    }

    if ((! -s "$genomeDir/Features/peg/fasta.psq") || 
	((-M  "$genomeDir/Features/peg/fasta") < (-M "$genomeDir/Features/peg/fasta.psq")))
    {
	system "formatdb -i $genomeDir/Features/peg/fasta -p T";
    }

    bless $to_call,$class;
    return $to_call;
}

sub get_feature_object {
    my($self,$peg) = @_;

    my $fig = $self->{fig};
    my $seq = $fig->get_translation($peg);
    return ToCall::PEG->new($peg,$seq);
}

sub get_fids_for_type {
    my($self,$type) = @_;

    my $genomeDir = $self->{genomeDir};
    
    if ((($type eq "peg") || ($type eq "orf")) && ($genomeDir =~ /(\d+\.\d+)$/))
    {
	my $fig = $self->{fig};
	return $fig->all_features($1,"peg");
    }
    return ();
}

sub get_feature_length {
    my($self,$peg) = @_;

    $self->load_lens_and_seqs;
    my $lenH = $self->{peg_lengths};
    return $lenH->{$peg};
}

sub load_lens_and_seqs {
    my($self) = @_;

    my $lenH = $self->{peg_lengths};
    if (! $lenH)
    {
	$lenH = {};
	my $seqH = {};
	my $fig = $self->{fig};
	my $dir = $self->{genomeDir};
	open(SEQS,"<$dir/Features/peg/fasta") || die "could not open $dir/Features/peg/fasta";
	my($fid,$seqP);
	while (($fid,$seqP) = $fig->read_fasta_record(\*SEQS))
	{
	    $lenH->{$fid} = length($$seqP) * 3;
	    $seqH->{$fid} = $$seqP;
	}
	close(SEQS);
	$self->{peg_lengths} = $lenH;
	$self->{peg_seqs} = $seqH;
    }
}

sub get_feature_sequence {
    my($self,$peg) = @_;

    $self->load_lens_and_seqs;
    my $seqH = $self->{peg_seqs};
    return $seqH->{$peg};
}

sub candidate_orfs {
    my($self,%args) = @_;
    
    my $fig = $self->{fig};
    my $query_seq = $args{-seq};
    
    if ((not defined($query_seq)) || (length($query_seq) == 0)) {
	print STDERR "Undefined or zero-length query-sequence\n";
	return ();
    }
    
    my $query_file = "$FIG_Config::temp/tmp_query.$$.fasta";
    open(TMP, ">$query_file") || confess "Could not write-open $query_file";
    &FIG::display_id_and_seq('query_seq', \$query_seq, \*TMP); 
    close(TMP)
	|| confess "Could not close query-file $query_file --- args:\n", Dumper(\%args);
    (-s $query_file) 
	|| confess "Could not write query sequence to $query_file --- args:\n", Dumper(\%args);

    my $genomeDir = $self->{genomeDir};
    my $db = "$genomeDir/Features/peg/fasta";
    
    my @sims = `$FIG_Config::ext_bin/blastall -i $query_file -d $db -p blastp -m8 -FF -e 1.0e-10`;
    unlink($query_file);

    my @hits = ();
    my($sim,%seen);
    foreach $sim (@sims)
    {
	if (($sim =~ /^\S+\t(\S+)/) && (! $seen{$1}))
	{
	    my $peg = $1;
	    my $seq = $fig->get_translation($peg);
	    push(@hits,ToCall::PEG->new($peg,$seq));
	    $seen{$peg} = 1;
	}
    }
    print STDERR "No hits for query $query_seq\n" if ($ENV{VERBOSE} && (@hits == 0));
    
    return @hits;
}

sub possible_orfs {
}

sub export_features {
}

sub recall_orfs {
}

package ToCall::PEG;

sub new {
    my($class,$peg,$seq) = @_;

    return bless [$peg,$seq],$class;
}

sub seq {
    my($self) = @_;
    
    return $self->[1];
}

sub get_fid {
    my($self) = @_;
    
    return $self->[0];
}

sub call_start {
    my($self) = @_;
}

sub promote_to_peg {
    my($self) = @_;

    return $self->[0];
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3