[Bio] / SubsystemExtension / multiple_subsystem_extension.pl Repository:
ViewVC logotype

Annotation of /SubsystemExtension/multiple_subsystem_extension.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : olson 1.1 #!/Users/fig/FIGDisk/env/mac/bin/perl
2 :    
3 :     use strict;
4 :     use warnings;
5 :    
6 :     use Getopt::Std;
7 :     use Term::ReadKey;
8 :     use IO::Handle;
9 :     use Data::Dumper;
10 :     use FIG;
11 :     use Subsystem;
12 :     use SubsystemExtension::AEOS;
13 :     use SubsystemExtension::ValidationInterface;
14 :     use SubsystemExtension::SubsystemCandidateFactory::MultiSEED;
15 :     use SubsystemExtension::ExtensionConfig qw (TEMPDIR);
16 :    
17 : heiko 1.2 use constant DEBUG => 0;
18 : olson 1.1 no warnings qw(redefine);
19 :    
20 :     #
21 :     # this is necessary if the script is started via rsh(1)
22 :     # otherwise you won't see any output until the first <RETURN>
23 :     #
24 :     STDOUT->autoflush(1);
25 :    
26 :     sub usage {
27 :     print "\t -= multiple_subsystem_extension =-\n\n";
28 :     print "This script is used to extend subsystems (either one or a list in a file) to one ore more genomes.\n";
29 :     print "If neither the parameter -t nor -m is defined all genomes not yet present in the subsystem (excluding environmental samples) will be used\n\n";
30 :    
31 : heiko 1.2 print "usage:\n multiple_subsystem_extension -s <subsystem name> -f <subsystem file> [-t <taxon_id> -m <taxon_id file> -a] -d <validation_directory> -e <e-value cutoff: e.g. 1.0e-10> -r <number of organisms to search in> -c <score cutoff default=150 >\n\n";
32 : olson 1.1
33 :     }
34 :    
35 :     # global variables
36 : heiko 1.2 our($opt_s, $opt_t, $opt_f, $opt_d, $opt_m, $opt_a, $opt_e, $opt_c, $opt_r);
37 : olson 1.1
38 : heiko 1.2 getopts('s:t:f:d:m:e:r:c:a');
39 : olson 1.1
40 :    
41 :     if ((!$opt_s) && (!$opt_f)) {
42 :     usage;
43 :     print "ERROR: Can't start script : no subsystem/filename given!\n";
44 :     exit 1;
45 :     }
46 :    
47 :    
48 :    
49 :    
50 :     my $fig = new FIG;
51 :     my @subsystems;
52 :    
53 :     push @subsystems, $opt_s if ($opt_s);
54 :    
55 :    
56 :     ## collect the subsystem names from a flat file
57 :     # each line holds one subsystem name
58 :    
59 :     if ($opt_f && -e $opt_f) {
60 :     open SSF, "<$opt_f";
61 :     while (<SSF>) {
62 :     chomp;
63 :     push @subsystems, $_;
64 :     }
65 :     close SSF;
66 :     }
67 :    
68 :     my $html = [];
69 :    
70 :    
71 :     my @missing_genomes;
72 :    
73 :     # add single or multiple taxon ids (parsed from file) to
74 :     # the list of missing genomes
75 :    
76 :     if ($opt_t) {
77 :     push @missing_genomes, $opt_t;
78 :     } elsif ($opt_m && -e $opt_m) {
79 :     open TF, "<$opt_m";
80 :     while (<TF>) {
81 :     chomp;
82 :     push @missing_genomes, $_ if $_ =~ /\d+\.\d+/;
83 :     }
84 :     close TF;
85 :    
86 : heiko 1.2 } elsif ($opt_a) { # extend to all complete genomes
87 : olson 1.1 @missing_genomes = grep { $_ !~ /^99999/ } $fig->genomes("complete");
88 :     }
89 :    
90 :     $opt_d = TEMPDIR unless ($opt_d);
91 :    
92 : overbeek 1.3 unless (-d $opt_d) {mkdir $opt_d}
93 :    
94 :    
95 :    
96 : olson 1.1 foreach (@subsystems) {
97 :    
98 :     my $subsystem = $fig->get_subsystem($_);
99 :    
100 :     if (!$subsystem) {
101 : overbeek 1.3 print STDERR "could not initialize the subsystem $opt_s\n";
102 :     next;
103 : olson 1.1 }
104 :    
105 : overbeek 1.3 # RAE: if the directory to write the results in exists, rename it
106 :     if (-d $opt_d."/".$subsystem->get_name())
107 :     {
108 :     my $count=1;
109 :     $count++ while (-e $opt_d."/".$subsystem->get_name().".".$count);
110 :     rename $opt_d."/".$subsystem->get_name(), $opt_d."/".$subsystem->get_name().".".$count;
111 :     }
112 :     mkdir $opt_d."/".$subsystem->get_name();
113 :    
114 : olson 1.1 my $extension = SubsystemExtension::SubsystemCandidateFactory::MultiSEED->new($subsystem, "master", $opt_e, $opt_r);
115 :    
116 :     if (ref $extension && $extension->isa("SubsystemExtension::SubsystemCandidateFactory")) {
117 :    
118 : heiko 1.2 my %in = map { $_ => 1 } $subsystem->get_genomes;
119 : olson 1.1
120 :    
121 : heiko 1.2 unless (scalar @missing_genomes > 0) {
122 :     print STDERR "getting missing genomes for subsystem ".$subsystem->get_name()."\n" if (DEBUG);
123 :     @missing_genomes = grep { ! $in{$_} } grep { $_ !~ /^99999/ } $fig->genomes("complete");
124 :     }
125 : olson 1.1
126 :    
127 :    
128 : heiko 1.2 my $subsystem_candidates = $extension->generate(\@missing_genomes);
129 : olson 1.1
130 : heiko 1.2 foreach my $genome (keys %$subsystem_candidates) {
131 :     my $subsystem_candidate;
132 :     # generate the aeos and extend the subsystem!!
133 : olson 1.1
134 : heiko 1.2 my $aeos = SubsystemExtension::AEOS->new($fig, $subsystem, $genome, $subsystem_candidates->{$genome}, {mute=>1, score_cutoff => $opt_c});
135 :     $subsystem_candidate = $aeos->extend();
136 : olson 1.1
137 : heiko 1.2 my $subsystem_name = $subsystem->get_name();
138 :     $subsystem_name =~ s/\(/\\\(/;
139 :     $subsystem_name =~ s/\)/\\\)/;
140 : overbeek 1.3
141 :     $subsystem_candidate->toFile($opt_d."/".$subsystem->get_name()."/$genome.scs") if (-d $opt_d."/".$subsystem->get_name());
142 : olson 1.1
143 :     }
144 :     }
145 :    
146 :     }
147 :    
148 :    
149 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3