[Bio] / FigKernelPackages / NRTools.pm Repository:
ViewVC logotype

View of /FigKernelPackages/NRTools.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Wed Sep 5 21:01:22 2007 UTC (12 years, 10 months ago) by olson
Branch: MAIN
Changes since 1.1: +38 -2 lines
more RAST->SEED import support

#
# Tools for dealing with nonredundant databases - finding sources, building NR, etc.
#

package NRTools;

use strict;

use base qw(Exporter);
use vars qw(@EXPORT);

eval {
    require Job48;
    import Job48;
};

@EXPORT = qw(scan_NR_dir scan_seed_dir scan_rast_jobs);


=head3 scan_NR_dir()

usage: scan_NR_dir(\%nr_hash, $dirname)

Scan a directory containing SEED-formatted NR directories, and fill in
%nr_hash with entries of the form $nr_hash->{name} = { name => dirname, path => full path to NR dir, size => size of fasta file}

=cut

sub scan_NR_dir
{
    my($nr_hash, $dir, $options) = @_;
    
    my $dh = new DirHandle($dir);
    while (defined($_ = $dh->read()))
    {
	next if /^\./;
	next if $options->{skip} and /$options->{skip}/;
	my $path = "$dir/$_";
	my $fasta = "$path/fasta";
	if (-f $fasta)
	{
	    if (! -f "$path/assigned_functions")
	    {
		warn "NR directory $path missing assigned_functions\n";
	    }
	    if (! -f "$path/org.table")
	    {
		warn "NR directory $path missing org.table\n";
	    }
	    $nr_hash->{$_} = { type => "NR", name => $_, path => $path, fasta_path => $fasta, size => -s $fasta };
	}
    }
    $dh->close();
}

=head3 scan_seed_dir()

usage: @fasta = scan_seed_dir(\%nr_hash, dirname)

Scan a SEED organism directory, creating entries as in scan_NR_dir.

=cut

sub scan_seed_dir
{
    my($nr_hash, $dir, $opts) = @_;

    my $dh = new DirHandle($dir);
    my $n = 0;
    while ($_ = $dh->read())
    {
	next if /^\./;
	#next if /^9999999.\d+$/;
	
	#
	# Strip environmental sequences.
	# 
	# c.f. seed-tech mail thread of 2/8/2007 for discusson on the rationale of the following
	# logic.
	#
	# next if $fig->is_environmental($_);
	next if /^4{7}/ or /^9{7}/;

	my $path = "$dir/$_";

	next unless -d $path;
	next if (-e "$path/DELETED");

	my $fasta = "$path/Features/peg/fasta";
	if (-f $fasta)
	{
	    $nr_hash->{$_} = { type => "seed_org", name => $_, path => $path,
				   fasta_path => $fasta, size => -s _ };
	}
	last if $opts->{limit} && $n++ > $opts->{limit};
    }
    $dh->close();
}

=head3 scan_seed_dir()

usage: @jobs = scan_rast_jobs($dir)

Scan the given RAST job directory, finding all completed jobs that are marked
with submit.seed == 1.

=cut

sub scan_rast_jobs
{
    my($jobs, $dir) = @_;

    my $dh = new DirHandle($dir);

    while (defined($_ = $dh->read()))
    {
	next unless /^\d+$/;

	my $job = Job48->new("$dir/$_");
	next unless $job;
	next unless $job->meta->get_metadata("status.final") eq  "complete";
	next unless $job->meta->get_metadata("submit.seed") == 1;

	push(@$jobs, $job);
    }
}

1;


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3