[Bio] / FigKernelPackages / NRTools.pm Repository:
ViewVC logotype

View of /FigKernelPackages/NRTools.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Wed Jul 8 20:37:09 2009 UTC (10 years, 4 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.3: +7 -2 lines
update to new metadata standard

#
# Tools for dealing with nonredundant databases - finding sources, building NR, etc.
#

package NRTools;

use strict;

use base qw(Exporter);
use vars qw(@EXPORT);

eval {
    require Job48;
    import Job48;
};

@EXPORT = qw(scan_NR_dir scan_seed_dir scan_rast_jobs);


=head3 scan_NR_dir()

usage: scan_NR_dir(\%nr_hash, $dirname)

Scan a directory containing SEED-formatted NR directories, and fill in
%nr_hash with entries of the form $nr_hash->{name} = { name => dirname, path => full path to NR dir, size => size of fasta file}

=cut

sub scan_NR_dir
{
    my($nr_hash, $dir, $options) = @_;
    
    my $dh = new DirHandle($dir);
    while (defined($_ = $dh->read()))
    {
	next if /^\./;
	next if $_ eq 'SEED';	# Ignore the export of SEED data that lives here.
	
	next if $options->{skip} and /$options->{skip}/;
	my $path = "$dir/$_";
	my $fasta = "$path/fasta";
	if (-f $fasta)
	{
	    if (! -f "$path/assigned_functions")
	    {
		warn "NR directory $path missing assigned_functions\n";
	    }
	    if (! -f "$path/org.table")
	    {
		warn "NR directory $path missing org.table\n";
	    }
	    $nr_hash->{$_} = { type => "NR", name => $_, path => $path, fasta_path => $fasta, size => -s $fasta };
	}
    }
    $dh->close();
}

=head3 scan_seed_dir()

usage: @fasta = scan_seed_dir(\%nr_hash, dirname)

Scan a SEED organism directory, creating entries as in scan_NR_dir.

=cut

sub scan_seed_dir
{
    my($nr_hash, $dir, $opts) = @_;

    my $dh = new DirHandle($dir);
    my $n = 0;
    while ($_ = $dh->read())
    {
	next if /^\./;
	#next if /^9999999.\d+$/;
	
	#
	# Strip environmental sequences.
	# 
	# c.f. seed-tech mail thread of 2/8/2007 for discusson on the rationale of the following
	# logic.
	#
	# next if $fig->is_environmental($_);
	next if /^4{7}/ or /^9{7}/;

	my $path = "$dir/$_";

	next unless -d $path;
	next if (-e "$path/DELETED");

	my $fasta = "$path/Features/peg/fasta";
	if (-f $fasta)
	{
	    $nr_hash->{$_} = { type => "seed_org", name => $_, path => $path,
				   fasta_path => $fasta, size => -s _ };
	}
	last if $opts->{limit} && $n++ > $opts->{limit};
    }
    $dh->close();
}

=head3 scan_seed_dir()

usage: @jobs = scan_rast_jobs($dir)

Scan the given RAST job directory, finding all completed jobs that are marked
with import.candidate nonzero and import.action set to "import".

=cut

sub scan_rast_jobs
{
    my($jobs, $dir) = @_;

    my $dh = new DirHandle($dir);

    if (!$dh)
    {
	warn "Cannot open directory $dir: $!";
	return;
    }

    while (defined($_ = $dh->read()))
    {
	next unless /^\d+$/;

	my $job = Job48->new("$dir/$_");
	next unless $job;
	next unless $job->meta->get_metadata("status.final") eq  "complete";
	next unless $job->meta->get_metadata("import.candidate") > 0;
	next unless $job->meta->get_metadata("import.action") eq 'import';
	my $stat =  $job->meta->get_metadata('import.status');
	next if $stat eq 'computed' or $stat eq 'installed';

	push(@$jobs, $job);
    }
}

1;


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3