[Bio] / FigWebServices / fig_scripts_service.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/fig_scripts_service.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (view) (download)

1 : redwards 1.9 #__perl__
2 :    
3 : disz 1.1 use strict;
4 : disz 1.2 use Carp;
5 : disz 1.1 use SOAP::Lite;
6 :     use IPC::Open3;
7 :     use FIGO;
8 :     use SOAP::Transport::HTTP;
9 : disz 1.3 use DBMaster;
10 :     use Job48;
11 :     use Data::Dumper;
12 : redwards 1.9 use GenomeMeta;
13 : dsouza 1.11 use PinnedRegions;
14 : disz 1.1
15 :     SOAP::Transport::HTTP::CGI
16 :     -> dispatch_to('Scripts')
17 :     -> handle;
18 :    
19 :     package Scripts;
20 : redwards 1.12
21 : disz 1.2 use FIG;
22 : redwards 1.12 use FIGV;
23 : disz 1.2 use FIG_Config;
24 :     use Data::Dumper;
25 :     use FigFams;
26 :     use FigFam;
27 : disz 1.3 use IPC::Open3;
28 : redwards 1.9 use raelib;
29 : disz 1.3
30 : redwards 1.12 my $dbm;
31 :     BEGIN {
32 :     eval { $dbm = DBMaster->new(-database => 'FortyEight_WebApplication') };
33 :     die $@ if $@;
34 :     };
35 :    
36 :    
37 :    
38 : disz 1.3 =begin WSDL
39 :     _IN auth $string
40 :     _IN args $string
41 :     _RETURN $string jobid
42 :     _DOC Input is a hash with lots of parms and a fasta file, output is a job id | error
43 :     =cut
44 :     sub load_fasta_to_rast {
45 :     my ($class, %args) = @_;
46 :     open FILE, ">/tmp/foo";
47 :     print FILE $args{file};
48 :     #print STDERR &Dumper(\%args);
49 :     #print STDERR "PWD ", $args{password}, "\n";
50 :     #print STDERR "Dumped\n";
51 :    
52 :    
53 :     # We have to authenticate this user first. Check with Daniel how to do that
54 :     my $organism = {name => $args{genome_name},
55 :     tax_id => $args{tax_id},
56 :     lineage => $args{lineage},
57 :     genetic_code => $args{genetic_code}
58 :     };
59 :    
60 :     #print STDERR &Dumper($organism);
61 :     my $msg;
62 :     my $user = $args{user};
63 :     my $seed_id;
64 :     my $project_id = "Test Project";
65 :     my $filename = "/tmp/foo";
66 :    
67 :     my $data = prepare_data_for_create_job( $msg , $seed_id, $organism, $project_id, $filename, $user);
68 :     print STDERR &Dumper($data), "Data\n";
69 :     my ($job_number,$err) = Job48->create_new_job($data);
70 :    
71 :     print STDERR "job, err = $job_number, $err\n";
72 :     return(join ":", $err, $job_number);
73 :    
74 :     }
75 :    
76 :     sub prepare_data_for_create_job{
77 :     my ( $msg , $seed_id, $organism, $project_desc, $filename, $user) = @_;
78 :    
79 :     my $meta = { "upload.timestamp" => time,
80 :     "status.uploaded" => "complete",
81 :     "keep_genecalls" => "0",
82 :     "correction.automatic" => "1" ,
83 :     # "submit.candidate" => $msg,
84 :     # "replace.seedID" => $seed_id,
85 :     };
86 :    
87 :     print STDERR &Dumper($meta), "meta\n";
88 :     open(SEQFILE , "$filename") or die "Can't open file $filename!\n";
89 :     my $data = { taxonomy_id => $organism->{tax_id},
90 :     metagenome => 0,
91 :     genome => $organism->{name},
92 :     user => $user,
93 :     project => $project_desc,
94 :     taxonomy => $organism->{lineage},
95 :     genetic_code => $organism->{genetic_code},
96 :     sequence_file => \*SEQFILE,
97 :     meta => $meta
98 :     };
99 :    
100 :     # print STDERR "made data \n";
101 :    
102 :     # print STDERR &Dumper($data), "data\n";
103 :    
104 :    
105 :     return $data;
106 :     }
107 :    
108 :    
109 :     =begin WSDL
110 :     _IN in $string
111 :     _IN in $string
112 :     _RETURN $string status
113 : redwards 1.5 _DOC Input is a RAST job id, and username output is a status
114 : disz 1.3 =cut
115 :     sub check_rast_job {
116 :     my ($class, $job_id, $login_name) = @_;
117 :    
118 :     #print STDERR "Check $job_id, $login_name";
119 :     $ENV{DBHOST} = 'bioseed.mcs.anl.gov';
120 : redwards 1.12 # my $dbm = DBMaster->new(-database => 'FortyEight_WebApplication');
121 : disz 1.3 my $users = $dbm->User->get_objects({ login => $login_name });
122 :     my $user;
123 :     if ($users && @$users) {
124 :     $user = $users->[0];
125 :     my ($stage, $status) = Job48->get_status_of_job($job_id, $user);
126 :     print STDERR "stage $stage, status $status\n";
127 :     return(join ":", $stage, $status);
128 :     } else {
129 :     # print STDERR "no users\n";
130 :     return("No users");
131 :     }
132 :     }
133 : disz 1.1
134 :    
135 : disz 1.3 =begin WSDL
136 :     _IN in $string
137 :     _RETURN $string
138 :     _DOC Input is a RAST jobid file, output is a big file
139 :     =cut
140 :     sub get_rast_results {
141 :     my ($class, $jobid) = @_;
142 :     return ("this should be a big file");
143 :     }
144 :    
145 : redwards 1.10 #
146 :     # mg-RAST methods by Rob
147 :     #
148 :     # This is a series of methods I added for accessing the mg-RAST. User authentication is currently based only on username (not password), and is handled
149 :     # by a separate routine towards the end of the file. If you want to change the user access control, just change that method.
150 :     #
151 :    
152 :     =begin WSDL
153 :     _IN username $string
154 :     _RETURN $string
155 :     _DOC Input is a username and output is a comma separated list of the users jobs. Note that this list includes both the users jobs and jobs of their organization
156 :     =cut
157 :     sub mg_rast_user_jobs {
158 :     my ($class, $login_name) = @_;
159 :     my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
160 :     return join(",", @$userj, @$orgj);
161 :     }
162 :    
163 :     =begin WSDL
164 :     _IN job $string
165 :     _IN username $string
166 :     _RETURN $string
167 : redwards 1.12 _DOC Input is a job id and a username, output is a tuple of [id, user, genome id, genome name, size, number of contigs, number of pegs]
168 : redwards 1.10 =cut
169 :     sub mg_rast_job_info {
170 :     my ($class, $job_id, $login_name) = @_;
171 : redwards 1.12
172 : redwards 1.10 my $job=$class->_validate_mg_rast_user($job_id, $login_name, 'getjob');
173 :     if ($job =~ /Access Error/) {return $job}
174 :    
175 : redwards 1.12 my ($sz, $numcontigs, $npegs)=(0,0,0);
176 :     my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
177 :     my $figv=FIGV->new($orgdir);
178 :     $npegs=$figv->genome_pegs($job->genome_id);
179 :    
180 :     my $contig_lens = $figv->contig_lengths($job->genome_id);
181 :     while ( my($contig,$len) = each %$contig_lens)
182 :     {
183 :     $sz += $len;
184 :     $numcontigs++
185 :     }
186 :    
187 :     return join(", ", $job->id, $job->user, $job->genome_id, $job->genome_name, $sz, $numcontigs, $npegs);
188 :     }
189 :    
190 :     =begin WSDL
191 :     _IN username $string
192 :     _RETURN $string
193 :     _DOC Input is a username and output is a list of tuples of [job id, user, genome id, and genome name]
194 :     =cut
195 :     sub mg_rast_users_job_info {
196 :     my ($class, $login_name) = @_;
197 :     my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
198 :     my $output;
199 :     foreach my $jobid (@$userj, @$orgj)
200 :     {
201 :     my $job=$class->_validate_mg_rast_user($jobid, $login_name, 'getjob');
202 :     next if ($job =~ /Access Error/);
203 :    
204 :     my ($coords, $time, $meta);
205 :     eval {$meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$jobid/meta.xml");};
206 :     if ($@) {$coords = $time = "Error with xml"}
207 :     else {
208 :     eval {$coords = $class->mg_rast_lat_lon($jobid, $login_name)};
209 :     eval {$time = $meta->get_metadata("optional_info.time");};
210 :     $coords =~ s/^\s+//; $coords =~ s/\s+$//; $coords =~ s/^\s*\,\s*$//;
211 :     $time =~ s/^\s+//; $time =~ s/\s+$//;
212 :     }
213 :    
214 :     $output .= join("\t", $job->id, $job->user, $job->genome_id,
215 :     $job->genome_name, $coords, $time). "\n";
216 :     }
217 :     chomp($output);
218 :     return $output;
219 :     }
220 :    
221 :    
222 :     =begin WSDL
223 :     _IN job $string
224 :     _IN username $string
225 :     _IN searchterm $string
226 :     _RETURN $string search results
227 :     _DOC Input is a job, a username, and a term to search for, and the results are the search results from querying that metagenome
228 :     =cut
229 :     sub mg_search_metagenome {
230 :     my ($class, $job_id, $login_name, $search) = @_;
231 :     return undef unless ($search);
232 :    
233 :     my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
234 :     if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
235 :    
236 :     my $figv=FIGV->new($orgdir);
237 :     my $res=$figv->search_features_by_annotation($search, 1);
238 :     my $return;
239 :    
240 :     foreach my $fn (keys %$res)
241 :     {
242 :     $return .= join("\n", map {join("\t", $_, $fn)} @{$res->{$fn}});
243 :     }
244 :     return $return;
245 :    
246 : redwards 1.10 }
247 : redwards 1.12
248 :     =begin WSDL
249 :     _IN username $string
250 :     _IN searchterm $string
251 :     _RETURN $string search results
252 :     _DOC Input is a username, and a term to search for, and the results are the search results from querying all metagenomes that user has access to
253 :     =cut
254 :     sub mg_search_users_metagenomes {
255 :     my ($class, $login_name, $search) = @_;
256 :     return undef unless ($search);
257 :    
258 :     my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
259 :     my $return;
260 :     foreach my $job_id (@$userj, @$orgj)
261 :     {
262 :    
263 :     my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
264 :     if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
265 : redwards 1.10
266 : redwards 1.12 my $figv=FIGV->new($orgdir);
267 :     my $res=$figv->search_features_by_annotation($search, 1);
268 :    
269 :     foreach my $fn (keys %$res)
270 :     {
271 :     $return .= join("\n", map {join("\t", $_, $fn)} @{$res->{$fn}})."\n";
272 :     }
273 :     }
274 :     chomp($return);
275 :     return $return;
276 :    
277 :     }
278 :    
279 : redwards 1.5
280 :     =begin WSDL
281 :     _IN job $string
282 :     _IN username $string
283 :     _RETURN $string bindings
284 :     _DOC Input is a RAST job id and a username, output is the bindings that join that job to the subsystems. This is a tple of subsystem name, protein function, and sequence within the metagenome.
285 :     =cut
286 :     sub mg_rast_subsystems {
287 :     my ($class, $job_id, $login_name) = @_;
288 :    
289 : redwards 1.9 my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
290 :     if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
291 :    
292 :     open(IN, "$orgdir/Subsystems/bindings") || return "No subsystems found\n";
293 :     return join("", <IN>);
294 :     }
295 :    
296 :     =begin WSDL
297 :     _IN job $string
298 :     _IN username $string
299 :     _RETURN $string counts
300 :     _DOC Input is a RAST job id and a username, output is a list of tuples of subsystem name and number of occurences
301 :     =cut
302 :     sub mg_rast_subsystem_counts {
303 :     my ($class, $job_id, $login_name) = @_;
304 :    
305 :     my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
306 :     if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
307 :    
308 :     open(IN, "$orgdir/Subsystems/bindings") || return "No subsystems found\n";
309 :     my %count;
310 :     while (<IN>)
311 :     {
312 :     my @a=split /\t/;
313 :     $count{$a[0]}++;
314 :     }
315 :    
316 :     return map {"$_\t$count{$_}\n"} sort {$a cmp $b} keys %count;
317 :     }
318 :    
319 :     =begin WSDL
320 :     _IN job $string
321 :     _IN username $string
322 :     _IN number $string
323 :     _RETURN $string bindings
324 :     _DOC Input is a RAST job id and a username, output is the DNA sequences in the job. The number is a limit in how many sequences will be returned.
325 :     =cut
326 :     sub mg_rast_sequences {
327 :     my ($class, $job_id, $login_name, $number) = @_;
328 :    
329 :     my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
330 :     if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
331 :    
332 :     my $fasta;
333 :     eval {$fasta = raelib->read_fasta("$orgdir/contigs")};
334 :     if ($@) {return "Error: $@"}
335 :     my $keys = raelib->rand([keys %$fasta]); # randomize the order of the sequences returned.
336 :     if ($number) {@$keys = splice(@$keys, 0, $number)}
337 :    
338 :     #return join("\n", "keys", @$keys);
339 :     return join("", map {$_ = ">$_\n".$fasta->{$_}."\n"} @$keys);
340 :     }
341 :    
342 :     =begin WSDL
343 :     _IN job $string
344 :     _IN username $string
345 :     _RETURN $string
346 :     _DOC Input is a RAST job id and a username, output is a tuple of latitude and longitude where the sample was taken, if it is known. This is a single point for every sample, and if there is more than one location for a sample it is the average of all locations.
347 :     =cut
348 :     sub mg_rast_lat_lon {
349 :     my ($class, $job_id, $login_name) = @_;
350 :    
351 :     my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
352 :     if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
353 :    
354 :     my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
355 : redwards 1.12 my $ret = $meta->get_metadata("optional_info.latitude") .",".$meta->get_metadata("optional_info.longitude");
356 :     $ret =~ s/^\s*\,\s*$//;
357 :     return $ret;
358 : redwards 1.9 }
359 :    
360 :     =begin WSDL
361 :     _IN job $string
362 :     _IN username $string
363 :     _RETURN $string
364 :     _DOC Input is a RAST job id and a username, output is the coordinates where the sample was taken. These are semi-colon separated tuples of lat-lon
365 :     =cut
366 :     sub mg_rast_coordinates {
367 :     my ($class, $job_id, $login_name) = @_;
368 : redwards 1.5
369 : redwards 1.9 my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
370 :     if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
371 :    
372 :     my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
373 :     return $meta->get_metadata("optional_info.coordinates");
374 :     }
375 : redwards 1.5
376 : redwards 1.9 =begin WSDL
377 :     _IN job $string
378 :     _IN username $string
379 :     _RETURN $string
380 :     _DOC Input is a RAST job id and a username, output is the date or time that the sample was taken.
381 :     =cut
382 :     sub mg_rast_time {
383 :     my ($class, $job_id, $login_name) = @_;
384 : redwards 1.6
385 : redwards 1.9 my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
386 :     if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
387 : redwards 1.6
388 : redwards 1.9 my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
389 :     return $meta->get_metadata("optional_info.time");
390 : redwards 1.5 }
391 :    
392 : redwards 1.9
393 : disz 1.3 =begin WSDL
394 :     _IN in $string
395 :     _RETURN $string
396 :     _DOC Input is an alias, output is a sequence
397 :     =cut
398 :     sub ali_to_seq {
399 :     my ($class, $arg) = @_;
400 :     return stdin_caller($class, "ali_to_seq", $arg);
401 : disz 1.1 }
402 :    
403 : disz 1.3 =begin WSDL
404 :     _IN in $string
405 :     _RETURN $string
406 : redwards 1.9 _DOC Input is a comma separated list of pegs, and output is the pegs in order along the genome.
407 : disz 1.3 =cut
408 :     sub adjacent {
409 :     my ($class, $arg) = @_;
410 : redwards 1.9 $arg =~ s/\,\s*/\n/g;
411 : disz 1.3 return stdin_caller($class, "adjacent", $arg);
412 :     }
413 : disz 1.1
414 : disz 1.3 =begin WSDL
415 :     _IN in $string
416 :     _RETURN $string
417 :     _DOC Input is a peg, output is two column table of peg\tcluster
418 :     =cut
419 :     sub cluster_by_bbhs {
420 :     my ($class, $arg) = @_;
421 :     return stdin_caller($class, "cluster_by_bbhs", $arg);
422 :     }
423 : disz 1.1
424 : disz 1.3 =begin WSDL
425 :     _IN in $string
426 :     _RETURN $string
427 :     _DOC Input is a peg, output is two column table of peg\cluster
428 :     =cut
429 :     sub cluster_by_sim {
430 : disz 1.1 my ($class, $arg) = @_;
431 : disz 1.3 return stdin_caller($class, "cluster_by_sim", $arg);
432 :     }
433 : disz 1.1
434 : disz 1.3 =begin WSDL
435 :     _IN in $string
436 :     _RETURN $string
437 :     _DOC Input is a peg, output is two column table of peg\text func
438 :     =cut
439 :     sub external_calls {
440 :     my ($class, $arg) = @_;
441 :     return stdin_caller($class, "external_calls", $arg);
442 :     }
443 : disz 1.1
444 : disz 1.3 =begin WSDL
445 :     _IN in $string
446 :     _RETURN $string
447 :     _DOC Input is a peg, output is a function
448 :     =cut
449 :     sub function_of {
450 :     my ($class, $arg) = @_;
451 : redwards 1.7 my $fig=new FIG;
452 :     return scalar($fig->function_of($arg));
453 : disz 1.3 }
454 : disz 1.1
455 : disz 1.3 =begin WSDL
456 :     _IN in $string
457 :     _RETURN $string
458 :     _DOC Input is a peg, output is a single column table of genomes
459 :     =cut
460 :     sub genomes_of {
461 :     my ($class, $arg) = @_;
462 :     return stdin_caller($class, "genomes_of", $arg);
463 :     }
464 : disz 1.1
465 : redwards 1.7
466 :     =begin WSDL
467 :     _IN genome $string
468 :     _RETURN $string
469 :     _DOC Input is a genome ID, output is the genus and species of the genome
470 :     =cut
471 :     sub genus_species {
472 :     my ($class, $arg) = @_;
473 :     my $fig=new FIG;
474 :     return $fig->genus_species($arg);
475 :     }
476 :    
477 :    
478 : disz 1.3 =begin WSDL
479 :     _IN in $string
480 :     _RETURN $string
481 :     _DOC Input is a single column table of pegs, output is a single column table of fasta.DNA
482 :     =cut
483 :     sub fid2dna {
484 :     my ($class, $arg) = @_;
485 :     return stdin_caller($class, "fid2dna", $arg);
486 :     }
487 : disz 1.1
488 : disz 1.3 =begin WSDL
489 :     _IN in $string
490 :     _RETURN $string
491 : redwards 1.7 _DOC Input is a genome id, output is a list of pegs in that genome
492 :     =cut
493 :     sub pegs_of {
494 :     my ($class, $arg) = @_;
495 :     my $fig = new FIG;
496 :     return (join ",", $fig->pegs_of($arg));
497 :     }
498 :    
499 :     =begin WSDL
500 :     _IN genome $string
501 :     _IN contig $string
502 :     _RETURN $string
503 :     _DOC Input is a genome id and a contig name, return is the length of the contig
504 :     =cut
505 :     sub contig_ln {
506 :     my $class = shift();
507 :     my $fig = new FIG;
508 :     return $fig->contig_ln(@_);
509 :     }
510 :    
511 :     =begin WSDL
512 :     _IN in $string
513 :     _RETURN $string
514 : disz 1.3 _DOC Input is a single column table of pegs, output is a single column table of archaeal pegs
515 :     =cut
516 :     sub is_archaeal {
517 :     my ($class, $arg) = @_;
518 :     return stdin_caller($class, "is_archaeal", $arg);
519 :     }
520 : disz 1.1
521 : disz 1.3 =begin WSDL
522 :     _IN in $string
523 :     _RETURN $string
524 :     _DOC Input is a single column table of pegs, output is a single column table of bacterial pegs
525 :     =cut
526 :     sub is_bacterial {
527 :     my ($class, $arg) = @_;
528 :     return stdin_caller($class, "is_bacterial", $arg);
529 :     }
530 : disz 1.1
531 : disz 1.3 =begin WSDL
532 :     _IN in $string
533 :     _RETURN $string
534 :     _DOC Input is a single column table of pegs, output is a single column table of eukaryotic pegs
535 :     =cut
536 :     sub is_eukaryotic {
537 :     my ($class, $arg) = @_;
538 :     return stdin_caller($class, "is_eukaryotic", $arg);
539 :     }
540 : disz 1.1
541 : disz 1.3 =begin WSDL
542 :     _IN in $string
543 :     _RETURN $string
544 :     _DOC Input is a single column table of pegs, output is a single column table of prokaryotic pegs
545 :     =cut
546 :     sub is_prokaryotic {
547 :     my ($class, $arg) = @_;
548 :     return stdin_caller($class, "is_prokaryotic", $arg);
549 : disz 1.1 }
550 :    
551 : disz 1.3 =begin WSDL
552 :     _IN in $string
553 :     _RETURN $string
554 :     _DOC Input is a peg, output is peg-Translation
555 :     =cut
556 :     sub translation_of {
557 :     my ($class, $arg) = @_;
558 :     return stdin_caller($class, "translation_of", $arg);
559 :     }
560 : disz 1.1
561 : disz 1.3 =begin WSDL
562 :     _IN in $string
563 :     _RETURN $string
564 :     _DOC Input is a peg, output is tab separated string of aliases
565 :     =cut
566 :     sub aliases_of {
567 :     my ($class, $arg) = @_;
568 :     return stdin_caller($class, "aliases_of", $arg);
569 :     }
570 : disz 1.1
571 : disz 1.3 =begin WSDL
572 :     _IN in $string
573 :     _RETURN $string
574 :     _DOC Input is an alias, output is a peg
575 :     =cut
576 :     sub alias2fig {
577 :     my ($class, $arg) = @_;
578 :     return stdin_caller($class, "alias2fig", $arg);
579 : disz 1.1 }
580 :    
581 :    
582 : disz 1.3
583 :     =begin WSDL
584 :     _IN in $string ec code
585 :     _RETURN $string ec name
586 :     _DOC Input is aec code , output is ec name
587 :     =cut
588 : disz 1.2 sub ec_name {
589 :     my $class = shift();
590 :     my $fig = new FIG;
591 :     my $result = $fig->ec_name(@_);
592 :     return $result;
593 :     }
594 :    
595 : disz 1.3 =begin WSDL
596 :     _IN in $string
597 : redwards 1.8 _RETURN $string
598 :     _DOC Input is a peg, output is list of [protein, score] for things that are coupled to this peg
599 :     =cut
600 :     sub coupled_to {
601 :     my $class = shift();
602 :     my $fig = new FIG;
603 :     my $return=undef;
604 :     my @result = $fig->coupled_to(@_);
605 :     if (@result)
606 :     {
607 :     $return = join("\n", map {$_->[0].",".$_->[1]} @result);
608 :     }
609 :     return $return;
610 :     }
611 :    
612 :     =begin WSDL
613 :     _IN in $string
614 :     _RETURN $string
615 :     _DOC Input is a peg, output is list of [protein, score] for things that are coupled to this peg
616 : disz 1.3 =cut
617 : disz 1.2 sub abstract_coupled_to {
618 :     my $class = shift();
619 :     my $fig = new FIG;
620 : redwards 1.8 my $return=undef;
621 : disz 1.2 my @result = $fig->abstract_coupled_to(@_);
622 : redwards 1.8 if (@result)
623 :     {
624 :     $return = join("\n", map {$_->[0].",".$_->[1]} @result);
625 :     }
626 :     return $return;
627 : disz 1.2 }
628 :    
629 : disz 1.3 =begin WSDL
630 :     _IN complete $string
631 :     _IN restrictions $string
632 :     _IN domain $string
633 :     _RETURN @string
634 : redwards 1.7 _DOC Input is constraints, output is a comma separated list of genomes
635 :     =cut
636 :     sub all_genomes {
637 :     my $class = shift();
638 :     my $fig=new FIG;
639 :     my @genomes=$fig->genomes(@_);
640 :     return join(",", @genomes);
641 :     }
642 :    
643 :     =begin WSDL
644 :     _IN complete $string
645 :     _IN restrictions $string
646 :     _IN domain $string
647 :     _RETURN @string
648 : disz 1.3 _DOC Input is constraints, output is list of genomes
649 :     =cut
650 : disz 1.1 sub genomes {
651 :     my $class = shift();
652 :     my $fig = new FIG;
653 :     my @result = $fig->genomes(@_);
654 : disz 1.3 my @genomes;
655 :     foreach my $genome (@result)
656 :     {
657 :     print STDERR "Genome is -$genome-\n";
658 :    
659 :     my $genus_species = $fig->genus_species($genome);
660 :     push @genomes, join("\t",$genome,$genus_species);
661 :     # print STDERR join("\t",$genome,$genus_species);
662 :     }
663 :    
664 :     #print STDERR @genomes;
665 :     return @genomes;
666 :     #return @result;
667 :     }
668 :    
669 :     =begin WSDL
670 :     _IN in $string
671 :     _RETURN @string
672 :     _DOC Input is a peg, output is list of loc on contig
673 :     =cut
674 : disz 1.1 sub feature_location {
675 :     my ($class, $arg1) = @_;
676 :     my $fig = new FIG;
677 : disz 1.2 my @result = ($fig->feature_location($arg1));
678 : disz 1.1 return @result;
679 :     }
680 :    
681 : disz 1.3 =begin WSDL
682 :     _IN in $string
683 :     _RETURN $string
684 :     _DOC Input is a peg, output is translation
685 :     =cut
686 : disz 1.1 sub get_translation {
687 :     my ($class, $arg1) = @_;
688 :     my $fig = new FIG;
689 :     my $result = $fig->get_translation($arg1);
690 :     return $result;
691 :     }
692 :    
693 : disz 1.3 =begin WSDL
694 :     _IN pat1 $string
695 :     _IN pat2 $string
696 :     _RETURN @string
697 :     _DOC Input is two patterns, first one is used in search_index, second used to grep the results
698 :     =cut
699 : disz 1.1 sub search_and_grep {
700 :     my ($class, $arg1, $arg2) = @_;
701 :    
702 :     my $fig = new FIG;
703 :    
704 :     my ($pegs, $roles) = $fig->search_index($arg1);
705 :    
706 :     my (@result_list, $entry);
707 :    
708 :     for $entry (@$pegs) {
709 :     push (@result_list, grep(/$arg2/, @$entry));
710 :     }
711 :     push (@result_list, grep(/$arg2/, @$roles));
712 :     chomp @result_list;
713 :     my $return_value = join ("\n", @result_list);
714 :     return $return_value;
715 :     }
716 :    
717 : redwards 1.4
718 :     =begin WSDL
719 :     _IN pat1 $string
720 :     _RETURN @string
721 :     _DOC Input is a pattern to search for, output is tab separated list of pegs and roles
722 :     =cut
723 :     sub simple_search {
724 :     my ($class, $arg1)=@_;
725 :    
726 :     my $fig = new FIG;
727 :    
728 :     my ($pegs, $roles) = $fig->search_index($arg1);
729 :    
730 :     my (@result_list, $entry);
731 :    
732 :     for $entry (@$pegs) {
733 :     push (@result_list, (join("\t", @$entry)));
734 :     }
735 :    
736 : redwards 1.12 # push (@result_list, (join("\t", @$roles)));
737 : redwards 1.4 chomp @result_list;
738 :     my $return_value = join ("\n", @result_list);
739 :     return $return_value;
740 :     }
741 :    
742 :    
743 : disz 1.3 =begin WSDL
744 :     _RETURN $string list of families
745 :     _DOC No Input, output is list of all families
746 :     =cut
747 :     sub all_families {
748 :     my ($class) = @_;
749 :     my $fig = new FIG;
750 :     my $figfams = new FigFams($fig);
751 : disz 1.1
752 : disz 1.3 my @out = $figfams->all_families;
753 :     print STDERR Dumper(@out);
754 :     return @out;
755 : disz 1.1 }
756 : disz 1.2
757 : disz 1.3 =begin WSDL
758 : redwards 1.12 _RETURN $string list of subsystems and their classifications
759 :     _DOC No Input, output is a list of all the subsystems and their classifications
760 :     =cut
761 :     sub all_subsystem_classifications {
762 :     my ($class) = @_;
763 :     my $fig = new FIG;
764 :     my $output;
765 :    
766 :     my %found;
767 :     map {
768 :     my @classification=@{$fig->subsystem_classification($_)};
769 :     $#classification=1;
770 :     push @classification, $_;
771 :     $output.= join("\t", @classification)."\n";
772 :     } sort {$a cmp $b} ($fig->all_subsystems());
773 :     return $output;
774 :     }
775 :    
776 :    
777 :     =begin WSDL
778 : disz 1.3 _RETURN $string list of families and funcs
779 :     _DOC No Input, output is list of all families
780 :     =cut
781 :     sub all_families_with_funcs {
782 : disz 1.2 my ($class) = @_;
783 :     my $fig = new FIG;
784 :     my $figfams = new FigFams($fig);
785 :    
786 : disz 1.3 my @out =$figfams->all_families_with_funcs;
787 : disz 1.2 return @out;
788 :     }
789 :    
790 : disz 1.3 =begin WSDL
791 :     _IN in $string list of famids
792 :     _RETURN $string 2 col table, famid, peg
793 :     _DOC Input is list of families, outoput is 2 col table of famid, peg
794 :     =cut
795 : disz 1.2 sub list_members {
796 :     my ($class, $famids) = @_;
797 :     my $fig = new FIG;
798 :     my $figfams = new FigFams($fig);
799 :     my @in = split(/\t/, $famids);
800 :     warn("Starting 2 list members $famids\n");
801 :     my @out = ();
802 :     foreach my $famid (@in)
803 :     {
804 :     my $famO = new FigFam($fig,$famid);
805 :     foreach my $peg ($famO->list_members)
806 :     {
807 :     push(@out,[$famid,$peg]);
808 :     }
809 :     }
810 :     return @out;
811 :     }
812 :    
813 : disz 1.3 =begin WSDL
814 :     _IN in $string list of pegs
815 :     _RETURN $string returns a 3-column table [PEG,Function,AliasesCommaSeparated]
816 :     _DOC Input is list of families,returns a 3-column table [PEG,Function,AliasesCommaSeparated]
817 :     =cut
818 : disz 1.2 sub CDS_data {
819 :     my ($class, $pegs) = @_;
820 :     my $fig = new FIG;
821 :     my $figfams = new FigFams($fig);
822 :     my @in = split(/\t/, $pegs);
823 :    
824 :     #warn("Starting CDS data $pegs\n");
825 :     #print STDERR &Dumper($pegs);
826 :    
827 :     my @out = ();
828 :     foreach my $peg (@in)
829 :     {
830 :     my @famids = $figfams->families_containing_peg($peg);
831 :     foreach my $famid (@famids)
832 :     {
833 :     push(@out,[$peg,scalar $fig->function_of($peg),[$fig->feature_aliases($peg)]]);
834 :     }
835 :     }
836 :     return @out;
837 :     }
838 :    
839 : disz 1.3 =begin WSDL
840 :     _IN in $string list of pegs
841 :     _RETURN $string a 2-column table [PEG,Sequence]
842 :     _DOC Input is list of families,returns a 2-column table [PEG,Sequence]
843 :     =cut
844 : disz 1.2 sub CDS_sequences {
845 :     my ($class, $pegs) = @_;
846 :     my $fig = new FIG;
847 :     my $figfams = new FigFams($fig);
848 :     my @in = split(/\t/, $pegs);
849 :    
850 :     #warn("Starting CDS seq $pegs\n");
851 :     #print STDERR &Dumper($pegs);
852 :     my @out = ();
853 :     foreach my $peg (@in)
854 :     {
855 :     push(@out,[$peg,$fig->get_translation($peg)]);
856 :     }
857 :     return @out;
858 :     }
859 :    
860 : disz 1.3 =begin WSDL
861 :     _IN in $string list of id seq pairs
862 :     _RETURN $string returns a 2-column table [Id,FamilyID]
863 :     _DOC Input is list of families,returns a 2-column table [Id,FamilyID]
864 :     =cut
865 : disz 1.2 sub is_member_of {
866 :     my ($class, $id_seqs) = @_;
867 :     my $fig = new FIG;
868 :     my $figfams = new FigFams($fig);
869 :     #warn("Doing is member $id_seqs\n");
870 :     #print STDERR &Dumper($id_seqs);
871 :    
872 :     my @in = split(/\n/, $id_seqs);
873 :     my @out = ();
874 :     foreach my $pair (@in)
875 :     {
876 :     my($id,$seq) = split(/\t/, $pair);
877 :     my($famO,undef) = $figfams->place_in_family($seq);
878 :     if ($famO)
879 :     {
880 :     push(@out,[$id,$famO->family_id]);
881 :     }
882 :     }
883 :     return @out;
884 :     }
885 :    
886 : redwards 1.9 =begin WSDL
887 :     _IN peg $string
888 :     _IN maxN $string
889 :     _IN maxP $string
890 :     _RETURN $string
891 :     _DOC Input is a peg, an optional maximum number of hits (default=50), and an optional maximum E value (default=1e-5), return is a list of sims
892 :     =cut
893 :     sub sims {
894 :     my ($class, $peg, $maxN, $maxP)=@_;
895 :     unless (defined $maxN) {$maxN=50}
896 :     unless (defined $maxP) {$maxP=1e-5}
897 :     my $fig=new FIG;
898 :     my $return=undef;
899 :     foreach my $sim ($fig->sims($peg, $maxN, $maxP, 'figx'))
900 :     {
901 :     $return .= join("\t", @$sim). "\n";
902 :     }
903 :    
904 :     return $return;
905 :     }
906 :    
907 : disz 1.2
908 : disz 1.3 sub stdin_caller {
909 :     my ($class, $name, $arg) = @_;
910 :     my($rd, $wr, $err, $pid, $std_err, $return_value, @std_out);
911 :     if (!($pid = open3($wr, $rd, $err, "$FIG_Config::bin/$name")))
912 :     {
913 :     die "Cannot run open3 $name: $!";
914 :     }
915 :    
916 :     $wr->write($arg);
917 :     close($wr);
918 :    
919 :     @std_out= <$rd>;
920 :     close($rd);
921 :     waitpid $pid, 0;
922 :     $return_value = join ("", @std_out);
923 :     return $return_value;
924 :     }
925 : redwards 1.4
926 :    
927 :    
928 :     =begin WSDL
929 :     _IN genome $string
930 :     _IN location1 $string
931 :     _IN location2 $string
932 :     _RETURN @string
933 :     _DOC Input is a genome ID and one or more locations in the form contig_start_stop, output is the DNA sequence
934 :     =cut
935 :     sub dna_sequence {
936 :     my ($class, $genome, @locations)=@_;
937 :     my $fig = new FIG;
938 :     my $seq=$fig->dna_seq($genome,@locations);
939 :     return $seq;
940 :     }
941 :    
942 : redwards 1.9
943 :    
944 :     sub _validate_mg_rast_user {
945 :     # a common method for validating a user. Please note that at the moment this does not use the password (but it should!)
946 :     # this is shared by several things above.
947 :    
948 :     # If the user is valid will return a path to the job directory, otherwise will return "Access Error"
949 :    
950 : redwards 1.10 # by default returns the org dir, but if the boolean job is set will return the reference to the job
951 :    
952 :     my ($self, $job_id, $login_name, $getjob)=@_;
953 : redwards 1.9 $ENV{DBHOST} = 'bioseed.mcs.anl.gov';
954 : redwards 1.12 # my $dbm = DBMaster->new(-database => 'FortyEight_WebApplication');
955 : redwards 1.9 my $users = $dbm->User->get_objects({ login => $login_name });
956 :     my $org=$users->[0]->{organisation};
957 :     my $user;
958 :    
959 :     # get the metagenome job directory. This is hard wired at the moment, but
960 :     # shouldn't be. Not sure if the job directory is in FIG_Config
961 :     # Also, I return "Access Error" regardless of whether the job is there or not to stop people
962 :     # hacking.
963 :     $job_id="/vol/metagenome-48-hour/Jobs.prod/$job_id";
964 : redwards 1.12 return "Access Error 1 and $job_id" unless (-e $job_id); # simple check that we have that id
965 : redwards 1.9
966 :     # now make sure we are a valid user. If we are not a valid user this should throw an error
967 :    
968 :     my $job;
969 :     eval {$job = Job48->new($job_id, $user->[0])};
970 :     if ($@ || !$job) {return "Access Error 2"}
971 :    
972 :     # another user check -- this is just a simple check and doesn't allow for public data or common
973 :     # organizations, so it is not as good. But it works
974 :     return "Access Error 3" unless ($job->user eq $login_name || $job->getUserObject->{organisation} == $org);
975 :    
976 :     return "Access Error: job deleted" if ($job->to_be_deleted);
977 :    
978 : redwards 1.10 return $job if ($getjob);
979 :    
980 : redwards 1.9 my $orgdir=$job->orgdir;
981 :    
982 :     return $orgdir;
983 :     }
984 :    
985 :     sub _user_mg_rast_jobs {
986 :     # a method to extract all the jobs for a user. Returns references to two arrays.
987 :     # The first is user jobs, and the second is jobs of the users organization
988 :     my ($self, $login_name)=@_;
989 :     $ENV{DBHOST} = 'bioseed.mcs.anl.gov';
990 : redwards 1.12 my $org;
991 :     # my $dbm = DBMaster->new(-database => 'FortyEight_WebApplication');
992 : redwards 1.9 my $users = $dbm->User->get_objects({ login => $login_name });
993 :     my $org=$users->[0]->{organisation};
994 :    
995 :     my @user; my @org;
996 :    
997 :     opendir(DIR, "/vol/metagenome-48-hour/Jobs.prod/") || die "Can't open /vol/metagenome-48-hour/Jobs.prod/";
998 :     foreach my $dir (grep {m/^\d+$/} readdir(DIR))
999 :     {
1000 :     my $job_id="/vol/metagenome-48-hour/Jobs.prod/$dir";
1001 : redwards 1.12 next unless (-d $job_id && -e "$job_id/DONE" && !-e "$job_id/DELETE");
1002 :     # print STDERR "$job_id\n";
1003 : redwards 1.9 my $job;
1004 :     eval {$job = Job48->new($job_id)};
1005 : redwards 1.12 die $@ if ($@);
1006 :     die "No job for $job\n" if (!$job);
1007 : redwards 1.9 next if ($job->to_be_deleted);
1008 :     ($job->user eq $login_name) ? push @user, $job->id :
1009 :     ($job->getUserObject->{organisation} == $org) ? push @org, $job->id : 1;
1010 :     }
1011 :     @user = sort {$a <=> $b} @user;
1012 :     @org = sort {$a <=> $b} @org;
1013 :    
1014 :     return \@user, \@org;
1015 :     }
1016 :    
1017 : dsouza 1.11 =begin WSDL
1018 :     _IN peg_id $string
1019 :     _IN n_pch_pins $string
1020 :     _IN n_sims $string
1021 :     _IN sim_cutoff $string
1022 :     _IN color_sim_cutoff $string
1023 :     _IN sort_by $string
1024 :     _RETURN $string
1025 :     _DOC Input is a FIG (PEG) ID and ..., output is the pinned regions data
1026 :     =cut
1027 :     sub pinned_region_data {
1028 :     my ($class, $peg, $n_pch_pins, $n_sims, $sim_cutoff, $color_sim_cutoff, $sort_by, $fast_color, $sims_from, $region_size) = @_;
1029 :    
1030 :     my $fig = new FIG;
1031 :    
1032 :     defined($n_pch_pins) or $n_pch_pins = 5;
1033 :     defined($n_sims) or $n_sims = 0;
1034 :     defined($sim_cutoff) or $sim_cutoff = 1e-20;
1035 :     defined($color_sim_cutoff) or $color_sim_cutoff = 1e-20;
1036 :     defined($sort_by) or $sort_by = '';
1037 :    
1038 :     defined($fast_color) or $fast_color = 0;
1039 :     defined($sims_from) or $sims_from = 'blast';
1040 :     defined($region_size) or $region_size = 16000;
1041 :    
1042 :     my $pin_desc = {
1043 :     'pegs' => [$peg],
1044 :     'collapse_close_genomes' => 0,
1045 :     'n_pch_pins' => $n_pch_pins,
1046 :     'n_sims' => $n_sims,
1047 :     'show_genomes' => '',
1048 :     'sim_cutoff' => $sim_cutoff,
1049 :     'color_sim_cutoff' => $color_sim_cutoff,
1050 :     'sort_by' => $sort_by,
1051 :     'show_genomes' => [],
1052 :     };
1053 :    
1054 :     my $maps = &PinnedRegions::pinned_regions($fig, $pin_desc, $fast_color, $sims_from, $region_size);
1055 :     my $txt = Dumper($maps);
1056 :     return $txt;
1057 :     }
1058 :    
1059 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3