[Bio] / SubsystemExtension / multiple_subsystem_extension.pl Repository:
ViewVC logotype

View of /SubsystemExtension/multiple_subsystem_extension.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Thu Jan 5 19:41:53 2006 UTC (12 years, 8 months ago) by olson
Branch: MAIN
Rearrangement for easier installation.

#!/Users/fig/FIGDisk/env/mac/bin/perl 

use strict;
use warnings;

use Getopt::Std;
use Term::ReadKey;
use IO::Handle;
use Data::Dumper;
use FIG;
use Subsystem;
use SubsystemExtension::AEOS;
use SubsystemExtension::ValidationInterface;
use SubsystemExtension::SubsystemCandidateFactory::MultiSEED;
use SubsystemExtension::ExtensionConfig qw (TEMPDIR);

use constant DEBUG => 1;
no warnings qw(redefine);

#
#  this is necessary if the script is started via rsh(1)
#  otherwise you won't see any output until the first <RETURN>
#
STDOUT->autoflush(1);

sub usage {
    print "\t -= multiple_subsystem_extension =-\n\n";
    print "This script is used to extend subsystems (either one or a list in a file) to one ore more genomes.\n";
    print "If neither the parameter -t nor -m is defined all genomes not yet present in the subsystem (excluding environmental samples) will be used\n\n";
	
    print "usage:\n multiple_subsystem_extension -s <subsystem name> -f <subsystem file> [-t <taxon_id> -m <taxon_id file> -a] -d <validation_directory> -e <e-value cutoff: e.g. 1.0e-10> -r <number of organisms to search in>  [-x use AEOS]\n\n";
    
}

# global variables
our($opt_s, $opt_t, $opt_f, $opt_d, $opt_m, $opt_a, $opt_x, $opt_e, $opt_r);

getopts('s:t:f:d:m:e:r:ax');


if ((!$opt_s) && (!$opt_f)) {
    usage;
    print "ERROR: Can't start script : no subsystem/filename given!\n";
    exit 1;
}




my $fig = new FIG;
my @subsystems;

push @subsystems, $opt_s if ($opt_s);


## collect the subsystem names from a flat file
# each line holds one subsystem name

if ($opt_f && -e $opt_f) {
    open SSF, "<$opt_f";
    while (<SSF>) {
	chomp;
	push @subsystems, $_;
    }
    close SSF;
}

my $html = [];


my @missing_genomes;

# add single or multiple taxon ids (parsed from file) to 
# the list of missing genomes

if ($opt_t) {
    push @missing_genomes, $opt_t;
} elsif ($opt_m && -e $opt_m) {
    open TF, "<$opt_m";
    while (<TF>) {
	chomp;
	push @missing_genomes, $_ if $_ =~ /\d+\.\d+/;
    }
    close TF;
    
} elsif ($opt_a) {
    @missing_genomes = grep { $_ !~ /^99999/ } $fig->genomes("complete");
}

$opt_d = TEMPDIR unless ($opt_d);

foreach (@subsystems) {

    my $subsystem = $fig->get_subsystem($_);

    if (!$subsystem) {
	print STDERR "could not initialize the subsystem $opt_s\n";
	next;
    }

    my $extension = SubsystemExtension::SubsystemCandidateFactory::MultiSEED->new($subsystem, "master", $opt_e, $opt_r);
    

    if (ref $extension && $extension->isa("SubsystemExtension::SubsystemCandidateFactory")) {

	my %in = map { $_ => 1 } $subsystem->get_genomes;


	unless (scalar @missing_genomes > 0) {
	    print STDERR "getting missing genomes for subsystem ".$subsystem->get_name()."\n" if (DEBUG); 
	    @missing_genomes = grep { ! $in{$_} } grep { $_ !~ /^99999/ } $fig->genomes("complete");
	}
	
	

	my $subsystem_candidates = $extension->generate(\@missing_genomes);
	    
	foreach my $genome (keys %$subsystem_candidates) {
	    my $subsystem_candidate;
	    # generate the aeos and extend the subsystem!!
	    if ($opt_x) {
		
		my $aeos = SubsystemExtension::AEOS->new($fig, $subsystem, $genome, $subsystem_candidates->{$genome}, {mute=>1});
		$subsystem_candidate = $aeos->extend();
	    } else {
		$subsystem_candidate = $subsystem_candidates->{$genome};
	    }
	    
	    my $subsystem_name = $subsystem->get_name();
	    $subsystem_name =~ s/\(/\\\(/;
	    $subsystem_name =~ s/\)/\\\)/;
	    print STDERR $subsystem_name."\n\n\n";
	    
	    
	    mkdir $opt_d."/".$subsystem->get_name() unless (-d $opt_d."/".$subsystem->get_name());
	    
	    $subsystem_candidate->toFile($opt_d."/".$subsystem->get_name()."/$genome.scs") if (-d $opt_d);
	    
	    if ($subsystem_candidate->functional_variant() > 0) {
		my $validation_interface = SubsystemExtension::ValidationInterface->new($fig, $subsystem, $genome, $subsystem_candidate);
		if (-d $opt_d) {
		    
		    my $filename = $opt_d."/".$subsystem->get_name()."/".$genome;
		    $filename .= "_complete" if ($subsystem_candidate->functional_variant_score() >= 0.9);
		    $filename .= ".html";
		    open VAL, ">$filename";
		    print VAL $validation_interface->html_output(); 
		    close VAL;
		    # $subsystem_candidate->toFile($opt_d."/".$subsystem->get_name()."/".$genome.".scs");
		} else {
		    print $validation_interface->html_output(); 
		}
	    }
	}
    }
    
}




MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3