[Bio] / FigKernelPackages / ALITRE.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/ALITRE.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Apr 20 20:17:54 2011 UTC revision 1.3, Thu May 26 21:31:26 2011 UTC
# Line 40  Line 40 
40    
41  Use  Use
42    
43      my $altObject = SAPserver->new();      my $altObject = ALITREserver->new();
44    
45  to create a new alignment/tree server function object. The server function object  to create a new alignment/tree server function object. The server function object
46  is used to invoke the L</Primary Methods> listed below. See L<ALITREserver> for  is used to invoke the L</Primary Methods> listed below. See L<ALITREserver> for
# Line 55  Line 55 
55  #  #
56    
57  sub new {  sub new {
58      my ($class) = @_;      my ($class, $sap) = @_;
59        # Create the sapling object.
60        if (! defined $sap) {
61            $sap = ERDB::GetDatabase('Sapling');
62        }
63      # Create the server object.      # Create the server object.
64      my $retVal = { };      my $retVal = { db => $sap };
65      # Bless and return it.      # Bless and return it.
66      bless $retVal, $class;      bless $retVal, $class;
67      return $retVal;      return $retVal;
# Line 80  Line 84 
84  =cut  =cut
85    
86  use constant METHODS => [qw(  use constant METHODS => [qw(
87                           ali_tree_pairs_to_metadata                           alignment_tree_metadata
88                           ali_tree_pairs_to_prots                           aligns_with_md5ID
89                           all_ali_tree_pairs                           all_alignIDs
90                           prot_set_to_ali_tree_pairs                           all_treeIDs
91                           prots_to_ali_tree_pairs                           expand_duplicate_tips
92                             fid_align_and_tree_to_md5_version
93                             fid_align_to_md5_align
94                             fid_tree_to_md5_tree
95                             map_fid_to_md5
96                             map_md5_to_fid
97                             md5IDs_in_align
98                             md5IDs_in_tree
99                             md5_align_and_tree_by_ID
100                             md5_align_and_tree_to_fid_version
101                             md5_align_to_fid_align
102                             md5_alignment_by_ID
103                             md5_tree_by_ID
104                             md5_tree_to_fid_tree
105                             trees_with_md5ID
106                          )];                          )];
107    
108  sub methods {  sub methods {
# Line 96  Line 114 
114    
115  =head2 Client Methods  =head2 Client Methods
116    
117  =head3 ali_tree_pairs_to_metadata  =head3 alignment_tree_metadata
118    
119      my $altHash =       $altObject->ali_tree_pairs_to_metadata({      my $alignHash =         $altObject->alignment_tree_metadata({
120                              -ids => [$alt1, $alt2, ...]                              -ids => [$alt1, $alt2, ...]
121                          });                          });
122    
123  Return the metadata associated with each specified alignment/tree pair.  Return the construction metadata for the alignment and tree in each specified
124    alignment/tree pair. The construction metadata describes how the alignment
125    and tree were built from the raw data.
126    
127  =over 4  =over 4
128    
129  =item parameter  =item parameters
130    
131  The parameter should be a reference to a hash with the following keys:  The parameter should be a reference to a hash with the following keys:
132    
# Line 114  Line 134 
134    
135  =item -ids  =item -ids
136    
137  Reference to a list of alignment/tree pair IDs.  Reference to a list of alignment/tree IDs.
138    
139    =back
140    
141    =item RETURN
142    
143    Returns a reference to a hash mapping each incoming alignment ID to a 6-tuple
144    of metadata information, including (0) the name of the method used to build
145    the alignment, (1) the parameters passed to the alignment method, (2) the
146    properties of the alignment process, (3) the name of the method used to build
147    the tree, (4) the parameters passed to the tree method, and (5) the properties
148    of the tree-building process.
149    
150        $alignHash => { $alt1 => [$almethod1, $alparms1, $alprops1,
151                                  $trmethod1, $trparms1, $trprops1],
152                        $alt2 => [$almethod2, $alparms2, $alprops2,
153                                  $trmethod2, $trparms2, $trprops2],
154                        ... };
155    
156    =back
157    
158    =cut
159    
160    sub alignment_tree_metadata {
161        # Get the parameters.
162        my ($self, $args) = @_;
163        # Get the Sapling database.
164        my $sap = $self->{db};
165        # Create the return hash.
166        my $retVal = {};
167        # Get the alignment IDs.
168        my $ids = ServerThing::GetIdList(-ids => $args);
169        # Loop through the IDs, extracting the metadata.
170        for my $id (@$ids) {
171            $retVal->{$id} = AlignsAndTrees::alignment_tree_metadata($sap, $id);
172        }
173        # Return the result hash.
174        return $retVal;
175    }
176    
177    =head3 aligns_with_md5ID
178    
179        my $protHash =          $altObject->aligns_with_md5ID({
180                                    -ids => [$prot1, $prot2, ...]
181                                });
182    
183    Return a list of the alignment/tree pairs containing each of the specified proteins.
184    
185    =over 4
186    
187    =item parameters
188    
189    The parameter should be a reference to a hash with the following keys:
190    
191    =over 8
192    
193    =item -ids
194    
195    Reference to a list of MD5 protein IDs.
196    
197  =back  =back
198    
199  =item RETURN  =item RETURN
200    
201  Returns a reference to a hash mapping each incoming alignment/tree pair ID to a sub-hash  Returns a reference to a hash mapping each incoming protein ID to a list of the
202  that maps metadata field names to field values.  IDs for the alignments containing that protein.
203    
204      $altHash = { $alt1 => { $fld1a => $value1a, $fld1b => $value1b, ... },      $protHash = { $prot1 => [$alt1a, $alt1b, ...],
205                   $alt2 => { $fld2a => $value2a, $fld2b => $value2b, ... },                    $prot2 => [$alt2a, $alt2b, ...],
206                   ... };                   ... };
207    
208  =back  =back
209    
210  =cut  =cut
211    
212  sub ali_tree_pairs_to_metadata {  sub aligns_with_md5ID {
213      # Get the parameters.      # Get the parameters.
214      my ($self, $args) = @_;      my ($self, $args) = @_;
215        # Get the Sapling database.
216        my $sap = $self->{db};
217        # Create the return hash.
218        my $retVal = {};
219        # Get the list of incoming IDs.
220        my $ids = ServerThing::GetIdList(-ids => $args);
221        # Loop through the protein IDs, finding the alignments.
222        for my $id (@$ids) {
223            $retVal->{$id} = AlignsAndTrees::aligns_with_md5ID($sap, $id);
224        }
225        # Return the result hash.
226        return $retVal;
227    }
228    
229    =head3 all_alignIDs
230    
231        my $idList =            $altObject->all_alignIDs();
232    
233    Return a list of all the alignment IDs in the database.
234    
235    =over 4
236    
237    =item RETURN
238    
239    Returns a reference to a list of alignment IDs for all the alignments in the database.
240    
241        $idList = [$alt1, $alt2, ...];
242    
243    =back
244    
245    =cut
246    
247    sub all_alignIDs {
248        # Get the parameters.
249        my ($self) = @_;
250        # Get the sapling database.
251        my $sap = $self->{db};
252      # Get the list of IDs.      # Get the list of IDs.
253        my $retVal = AlignsAndTrees::all_alignIDs($sap);
254        # Return it to the caller.
255        return $retVal;
256    }
257    
258    =head3 all_treeIDs
259    
260        my $idList =            $altObject->all_treeIDs();
261    
262    Return a list of all the tree IDs in the database.
263    
264    =over 4
265    
266    =item RETURN
267    
268    Returns a reference to a list of IDs for all the trees in the database. (Note:
269    as currently construed, this is the same as a list of all the alignment IDs, since
270    each alignment has exactly one associated tree and it has the same ID.)
271    
272        $idList = [$alt1, $alt2, ...];
273    
274    =back
275    
276    =cut
277    
278    sub all_treeIDs {
279        return all_alignIDs(@_);
280    }
281    
282    =head3 expand_duplicate_tips
283    
284        my $newTree =       $altObject->expand_duplicate_tips({
285                                -tree => $actualTree,
286                                -map => { $oldName1 => [$newName1a, $newName1b, ...],
287                                          $oldName2 => [$newName2a, $newName2b, ...],
288                                          ... }
289                            });
290    
291    Rename and possibly expand the tips of the specified tree data structure using the
292    specified mapping.
293    
294    =over 4
295    
296    =item parameter
297    
298    The parameter should be a reference to a hash with the following keys.
299    
300    =over 8
301    
302    =item -tree
303    
304    Reference to a list that encodes a newick phylogenetic tree.
305    
306    =item -map
307    
308    Reference to a hash that maps node names to new node names. Each new node name is
309    a reference to a list of names. If the list is a singleton, the mapping is a simple
310    renaming. If the list contains muliple entries, the node will be expanded into duplicates.
311    
312    =back
313    
314    =item RETURN
315    
316    Returns a new version of the tree with the specified renamings performed.
317    
318    =back
319    
320    =cut
321    
322    sub expand_duplicate_tips {
323        # Get the parameters.
324        my ($self, $args) = @_;
325        # Get the incoming tree.
326        my $tree = $args->{-tree};
327        if (! $tree) {
328            Confess("Missing -tree parameter to expand_duplicate_tips.");
329        } elsif (ref $tree ne 'ARRAY') {
330            Confess("Invalid -tree parameter for expand_duplicate_tips.");
331        }
332        # Get the incoming name map.
333        my $map = $args->{-map};
334        if (! $map) {
335            Confess("Missing -map parameter to expand_duplicate_tips.");
336        } elsif (ref $map ne 'HASH') {
337            Confess("Invalid -map parameter to expand_duplicate_tips.");
338        }
339        # Perform the expansion. Note that the expansion is actually done in place,
340        # and the method simply returns the incoming argument.
341        my $retVal = AlignsAndTrees::expand_duplicate_tips($tree, $map);
342        # Return the modified tree.
343        return $retVal;
344    }
345    
346    =head3 fid_align_and_tree_to_md5_version
347    
348        my $md5Tuple =          $altObject->fid_align_and_tree_to_md5_version({
349                                    -align => $fid_align,
350                                    -tree => $fid_tree,
351                                    -meta => $fid_meta,
352                                    -relaxed => 1
353                                });
354    
355    Convert a PEG-based alignment/tree pair to an MD5-based alignment/tree pair. Each
356    PEG identifier in the alignment and tree will be converted to the corresponding MD5
357    protein identifier. This may cause some nodes in the tree and items in the alignment
358    to be collapsed into a single instance, since multiple PEGs can produce the same
359    protein.
360    
361    =over 4
362    
363    =item parameter
364    
365    The parameter should be a reference to a hash with the following keys:
366    
367    =over 8
368    
369    =item -align
370    
371    The PEG-based alignment to convert.
372    
373    =item -tree
374    
375    The corresponding phylogenetic tree.
376    
377    =item -meta
378    
379    Reference to a hash mapping each feature ID in the alignment and tree to a
380    description of which part of the resulting protein was used.
381    
382    =item -relaxed (optional)
383    
384    If TRUE, then incoming feature IDs that are not found in the database will be
385    left untranslated in the output. Otherwise, such IDs will cause an error. The
386    default is FALSE.
387    
388    =back
389    
390    =item RETURN
391    
392    Returns a reference to a 3-tuple containing (0) the MD5 version of the incoming
393    alignment, (1) the MD5 version of the incoming tree, and (2) a reference to a
394    hash describing which portion of each protein was used in the alignment.
395    
396        $md5Tuple = [$md5_align, $md5_tree, $md5_metadata];
397    
398    =back
399    
400    =cut
401    
402    sub fid_align_and_tree_to_md5_version {
403        # Get the parameters.
404        my ($self, $args) = @_;
405        # Get the Sapling database.
406        my $sap = $self->{db};
407        # Get the parameters.
408        my $align = $args->{-align} || Confess("No alignment specified in fid_align_and_tree_to_fid_version.");
409        my $tree = $args->{-tree} || Confess("No tree specified in fid_align_and_tree_to_fid_version.");
410        my $meta = $args->{-meta} || Confess("No metadata specified in fid_align_and_tree_to_fid_version.");
411        my $relaxed = $args->{-relaxed} || 0;
412        # Convert the alignment and tree.
413        my ($newAlign, $newTree, $newMeta) =
414            AlignsAndTrees::fid_align_and_tree_to_md5_version($sap, $align, $tree, $meta, $relaxed);
415        # Return the results.
416        return [$newAlign, $newTree, $newMeta];
417    
418    }
419    
420    =head3 fid_align_to_md5_align
421    
422        my $md5align =          $altObject->fid_align_to_md5_align({
423                                    -align => $fid_align,
424                                    -map => $fid_to_md5_map
425                                });
426    
427    Use a map produced by L</map_fid_to_md5> to convert a PEG-based tree to an MD5-based tree.
428    
429    =over 4
430    
431    =item parameter
432    
433    The parameter should be a reference to a hash with the following keys:
434    
435    =over 8
436    
437    =item -align
438    
439    The PEG-based alignment to be converted.
440    
441    =item -map
442    
443    A hash tha maps each feature ID in the alignment to the corresponding MD5 protein
444    ID.
445    
446    =back
447    
448    =item RETURN
449    
450    Returns a new version of the alignment with the feature IDs replaced by MD5 protein
451    IDs using the data in the map.
452    
453    =back
454    
455    =cut
456    
457    sub fid_align_to_md5_align {
458        # Get the parameters.
459        my ($self, $args) = @_;
460        # Get the incoming alignment and map.
461        my $align = $args->{-align} || Confess("No alignment specified in fid_align_to_md5_align.");
462        my $map = $args->{-map} || Confess("No map specified in fid_align_to_md5_align,");
463        # Perform the conversion.
464        my $retVal = AlignsAndTrees::fid_align_to_md5_align($align, $map);
465        # Return the result.
466        return $retVal;
467    }
468    
469    =head3 fid_tree_to_md5_tree
470    
471        my $md5tree =          $altObject->fid_tree_to_md5_tree({
472                                    -tree => $fid_tree,
473                                    -map => $fid_to_md5_map
474                                });
475    
476    Use a map produced by L</map_fid_to_md5> to convert a PEG-based tree to an MD5-based tree.
477    
478    =over 4
479    
480    =item parameter
481    
482    The parameter should be a reference to a hash with the following keys:
483    
484    =over 8
485    
486    =item -tree
487    
488    The PEG-based tree to be converted.
489    
490    =item -map
491    
492    A hash tha maps each feature ID in the tree to the corresponding MD5 protein
493    ID.
494    
495    =back
496    
497    =item RETURN
498    
499    Returns a new version of the tree with the feature IDs replaced by MD5 protein
500    IDs using the data in the map.
501    
502    =back
503    
504    =cut
505    
506    sub fid_tree_to_md5_tree {
507        # Get the parameters.
508        my ($self, $args) = @_;
509        # Get the incoming tree and map.
510        my $tree = $args->{-tree} || Confess("No tree specified in fid_tree_to_md5_tree.");
511        my $map = $args->{-map} || Confess("No map specified in fid_tree_to_md5_tree,");
512        # Perform the conversion.
513        my $retVal = AlignsAndTrees::fid_tree_to_md5_tree($tree, $map);
514        # Return the result.
515        return $retVal;
516    }
517    
518    =head3 map_fid_to_md5
519    
520        my $md5Tuple =          $altObject->map_fid_to_md5({
521                                    -meta => $fid_metadata,
522                                    -relaxed => 0
523                                });
524    
525    Analyze the metadata for a PEG-basedalignment/tree pair and compute the metadata for
526    the corresponding MD5-based data structions along with a mapping from the PEG IDs
527    to MD5 IDs.
528    
529    =over 4
530    
531    =item parameter
532    
533    Reference to a hash with the following keys:
534    
535    =over 8
536    
537    =item -meta
538    
539    Reference to a hash mapping each FIG feature ID in an alignment/tree pair to
540    information describing which part of each feature's protein was used.
541    
542    =item -relaxed (optional)
543    
544    If TRUE, then incoming feature IDs that are not found in the database will be
545    left untranslated in the output. Otherwise, such IDs will cause an error. The
546    default is FALSE.
547    
548    =back
549    
550    =item RETURN
551    
552    Returns a reference to a 2-tuple containing (0) the MD5-based metadata hash
553    creating from the incoming hash and (1) a hash mapping each incoming feature ID
554    to the corresponding MD5 protein ID.
555    
556        $md5Tuple => [$md5_metadata, { $fida => $md5a, $fidb => $md5b, ... }];
557    
558    =back
559    
560    =cut
561    
562    sub map_fid_to_md5 {
563        # Get the parameters.
564        my ($self, $args) = @_;
565        # Get the Sapling database.
566        my $sap = $self->{db};
567        # Get the metadata structure.
568        my $meta = $args->{-meta} || Confess("No metadata structure passed to map_fid_to_md5.");
569        # Compute the relax flag.
570        my $relaxed = $args->{-relaxed} || 0;
571        # Perform the conversion.
572        my ($newMeta, $map) = AlignsAndTrees::map_fid_to_md5($sap, $meta, $relaxed);
573        # Return the result.
574        return [$newMeta, $map];
575    }
576    
577    
578    =head3 map_md5_to_fid
579    
580        my $fidTuple =          $altObject->map_md5_to_fid({
581                                    -meta => $md5_metadata,
582                                    -relaxed => 0
583                                });
584    
585    Analyze the metadata for an MD5 alignment/tree pair and compute the metadata for
586    the corresponding PEG-based data structions along with a mapping from the MD5 IDs
587    to the PEG IDs.
588    
589    =over 4
590    
591    =item parameter
592    
593    Reference to a hash with the following keys:
594    
595    =over 8
596    
597    =item -meta
598    
599    Reference to a hash mapping each MD5 protein ID in an alignment/tree pair to
600    information describing which part of each protein was used.
601    
602    =item -relaxed (optional)
603    
604    If TRUE, then incoming MD5 IDs that are not found in the database will be
605    left untranslated in the output. Otherwise, such IDs will cause an error. The
606    default is FALSE.
607    
608    =back
609    
610    =item RETURN
611    
612    Returns a reference to a 2-tuple containing (0) the PEG-based metadata hash
613    created from the incoming hash and (1) a hash mapping each incoming MD5 protein
614    ID to a list of corresponding FIG feature IDs.
615    
616        $fidTuple => [$fid_metadata, { $md5a => [$fida1, $fida2, ...],
617                                       $md5b => [$fidb1, $fidb2, ...],
618                                       ... }];
619    
620    =back
621    
622    =cut
623    
624    sub map_md5_to_fid {
625        # Get the parameters.
626        my ($self, $args) = @_;
627        # Get the Sapling database.
628        my $sap = $self->{db};
629        # Get the metadata structure.
630        my $meta = $args->{-meta} || Confess("No metadata structure passed to map_md5_to_fid.");
631        # Compute the relax flag.
632        my $relaxed = $args->{-relaxed} || 0;
633        # Perform the conversion.
634        my ($newMeta, $map) = AlignsAndTrees::map_md5_to_fid($sap, $meta, $relaxed);
635        # Return the result.
636        return [$newMeta, $map];
637    }
638    
639    =head3 md5IDs_in_align
640    
641        my $altHash =           $altObject->md5IDs_in_align({
642                                    -ids => [$alt1, $alt2, ...]
643                                });
644    
645    For each incoming alignment ID, return a list of the MD5 protein IDs for the proteins
646    found in the alignment.
647    
648    =over 4
649    
650    =item parameter
651    
652    The parameter should be a reference to a hash with the following keys:
653    
654    =over 8
655    
656    =item -ids
657    
658    Reference to a list of alignment IDs.
659    
660    =back
661    
662    =item RETURN
663    
664    Returns a reference to a hash mapping each incoming alignment ID to a list of
665    the proteins found in the alignment. Each protein is represented by an MD5 protein
666    ID.
667    
668        $altHash = { $alta => [$md5a1, $md5a2, ... ],
669                     $altb => [$md5b1, $md5b2, ... ],
670                     ... };
671    
672    =back
673    
674    =cut
675    
676    sub md5IDs_in_align {
677        # Get the parameters.
678        my ($self, $args) = @_;
679        # Get the list of alignment IDs.
680      my $ids = ServerThing::GetIdList(-ids => $args);      my $ids = ServerThing::GetIdList(-ids => $args);
681        # Get the sapling database.
682        my $sap = $self->{db};
683      # Declare the return hash.      # Declare the return hash.
684      my $retVal = {};      my $retVal = {};
685      # Loop through the incoming alignment/tree IDs.      # Loop through the incoming IDs.
686      for my $id (@$ids) {      for my $id (@$ids) {
687          # Get the metadata for this alignment.          # Get the MD5s for this alignment.
688          my $metadata = AlignsAndTrees::md5_alignment_metadata($id);          my $md5List = AlignsAndTrees::md5IDs_in_align($sap, $id);
689          # Store it in the return hash.          # Store them in the return hash.
690          $retVal->{$id} = $metadata;          $retVal->{$id} = $md5List;
691      }      }
692      # Return the result.      # Return the result hash.
693      return $retVal;      return $retVal;
694  }  }
695    
 =head3 ali_tree_pairs_to_prots  
696    
697      my $altHash =       $altObject->ali_tree_pairs_to_prots({  =head3 md5IDs_in_tree
698    
699        my $altHash =           $altObject->md5IDs_in_tree({
700                              -ids => [$alt1, $alt2, ...]                              -ids => [$alt1, $alt2, ...]
701                          });                          });
702    
703  Return a list of the protein IDs in each specified alignment/tree pair.  For each incoming tree ID, return a list of the MD5 protein IDs for the proteins
704    found in the tree.
705    
706  =over 4  =over 4
707    
708  =item parameter  =item parameter
709    
710  The parameter should be a reference to a hash with the following keys.  The parameter should be a reference to a hash with the following keys:
711    
712  =over 8  =over 8
713    
714  =item -ids  =item -ids
715    
716  Reference to a list of alignment/tree pair IDs.  Reference to a list of tree IDs.
717    
718  =back  =back
719    
720  =item RETURN  =item RETURN
721    
722  Returns a reference to a hash mapping each incoming alignment/tree ID to a list of MD5 protein IDs  Returns a reference to a hash mapping each incoming tree ID to a list of
723  representing the proteins found in the alignment/tree pair.  the proteins found in the tree. Each protein is represented by an MD5 protein
724    ID.
725    
726      $altHash = { $alt1 => [$prot1a, $prot1b, ...],      $altHash = { $alta => [$md5a1, $md5a2, ... ],
727                   $alt2 => [$prot2a, $prot2b, ...],                   $altb => [$md5b1, $md5b2, ... ],
728                   ... };                   ... };
729    
730  =back  =back
731    
732  =cut  =cut
733    
734  sub ali_tree_pairs_to_prots {  sub md5IDs_in_tree {
735      # Get the parameters.      # Get the parameters.
736      my ($self, $args) = @_;      my ($self, $args) = @_;
737      # Get the list of alignment/tree IDs.      # Get the list of tree IDs.
738      my $ids = ServerThing::GetIdList(-ids => $args);      my $ids = ServerThing::GetIdList(-ids => $args);
739        # Get the sapling database.
740        my $sap = $self->{db};
741      # Declare the return hash.      # Declare the return hash.
742      my $retVal = {};      my $retVal = {};
743      # Loop through the list of IDs.      # Loop through the incoming IDs.
744      for my $id (@$ids) {      for my $id (@$ids) {
745          # Get the list of proteins for this alignment.          # Get the MD5s for this tree.
746          my $prots = AlignsAndTrees::md5IDs_in_align($id);          my $md5List = AlignsAndTrees::md5IDs_in_tree($sap, $id);
747          # Store it in the return hash.          # Store them in the return hash.
748          $retVal->{$id} = $prots;          $retVal->{$id} = $md5List;
749      }      }
750      # Return the result.      # Return the result hash.
751      return $retVal;      return $retVal;
752  }  }
753    
 =head3 all_ali_tree_pairs  
754    
755      my $altList =       $altObject->all_ali_tree_pairs();  =head3 md5_align_and_tree_by_ID
756    
757  Return a list of all the alignment/tree IDs. Each ID represents an alignment and an associated      my $tupleHash =         $altObject->md5_align_and_tree_by_ID({
758  tree.                                  -ids => [$alt1, $alt2, ...]
759                                });
760    
761    Return the alignment and tree for each specified ID. The return hash will contain
762    a 3-tuple for each tree ID consisting of the alignment, the tree, and the metadata
763    describing the proteins involved.
764    
765  =over 4  =over 4
766    
767    =item parameter
768    
769    The parameter should be a reference to a hash with the following keys:
770    
771    =over 8
772    
773    =item -ids
774    
775    Reference to a list of alignment/tree pair IDs.
776    
777    =back
778    
779  =item RETURN  =item RETURN
780    
781  Returns a reference to a list of alignment/tree IDs.  Returns a reference to a hash mapping each incoming ID to a 3-tuple containing (0) the
782    identified MD5 protein alignment, (1) the associated phylogenetic tree, and (2) a
783    hash describing what portion of each protein was used in the alignment.
784    
785      $idList = [$alt1, $alt2, ...];      $tupleHash = { $alt1 => [$md5_align1, $md5_tree1, $md5_metadata1],
786                       $alt2 => [$md5_align2, $md5_tree2, $md5_metadata2],
787                       ... };
788    
789  =back  =back
790    
791  =cut  =cut
792    
793  sub all_ali_tree_pairs {  sub md5_align_and_tree_by_ID {
794      # Get the parameters.      # Get the parameters.
795      my ($self) = @_;      my ($self, $args) = @_;
796      # Get the list of IDs.      # Get the Sapling database.
797      my $retVal = AlignsAndTrees::all_alignIDs();      my $sap = $self->{db};
798      # Return the list.      # Declare the return hash.
799        my $retVal = {};
800        # Get the list of incoming IDs.
801        my $ids = ServerThing::GetIdList(-ids => $args);
802        # Loop through the list.
803        for my $id (@$ids) {
804            # Get the alignment and the metadata.
805            my ($align, $meta) = AlignsAndTrees::md5_alignment_by_ID($sap, $id);
806            # Get the tree as well.
807            my $tree = AlignsAndTrees::md5_tree_by_ID($sap, $id);
808            # Return all three items.
809            $retVal->{$id} = [$align, $tree, $meta];
810        }
811        # Return the result hash.
812      return $retVal;      return $retVal;
813  }  }
814    
815  =head3 prot_set_to_ali_tree_pairs  =head3 md5_align_and_tree_to_fid_version
816    
817      my $altList =       $altObject({      my $fidTuple =          $altObject->md5_align_and_tree_to_fid_version({
818                              -prots => [$prot1, $prot2, ...]                                  -align => $md5_align,
819                                    -tree => $md5_tree,
820                                    -meta => $md5_metadata,
821                                    -relaxed => 1
822                          });                          });
823    
824  Return a list of the alignments containing all of the incoming proteins.  Convert an MD5 alignment/tree pair to a PEG-based alignment-tree pair. Each protein in
825    the alignment or tree will be translated to a corresponding FIG feature ID. In some
826    cases, this may cause a single protein to be replicated to include all the features
827    that produce that protein.
828    
829  =over 4  =over 4
830    
831    =item parameter
832    
833    The parameter should be a reference to a hash with the following keys.
834    
835    =over 8
836    
837    =item -align
838    
839    Reference to the MD5 alignment to be converted.
840    
841    =item -tree
842    
843    Reference to the corresponding phylogenetic tree.
844    
845    =item -meta
846    
847    Reference to a hash mapping each MD5 protein ID in the alignment and tree to a
848    description of what section of the protein was used.
849    
850    =item -relaxed (optional)
851    
852    If TRUE, then incoming feature IDs that are not found in the database will be
853    left untranslated in the output. Otherwise, such IDs will cause an error. The
854    default is FALSE.
855    
856    =back
857    
858    =item RETURN
859    
860    Returns a reference to a 3-tuple containing (0) a PEG-based version of the
861    incoming alignment, (1) a PEG-based version of the incoming tree, and (2) a
862    reference to a hash mapping each feature ID in the new alignment and tree to
863    a description of what section of the feature's protein was used.
864    
865        $fidTuple = [$fid_align, $fid_tree, $fid_metadata];
866    
867    =back
868    
869    =cut
870    
871    sub md5_align_and_tree_to_fid_version {
872        # Get the parameters.
873        my ($self, $args) = @_;
874        # Get the sapling database.
875        my $sap = $self->{db};
876        # Get the parameters.
877        my $align = $args->{-align} || Confess("No alignment specified in md5_align_and_tree_to_fid_version.");
878        my $tree = $args->{-tree} || Confess("No tree specified in md5_align_and_tree_to_fid_version.");
879        my $meta = $args->{-meta} || Confess("No metadata specified in md5_align_and_tree_to_fid_version.");
880        my $relaxed = $args->{-relaxed} || 0;
881        # Convert the alignment and tree.
882        my ($newAlign, $newTree, $newMeta) =
883            AlignsAndTrees::md5_align_and_tree_to_fid_version($sap, $align, $tree, $meta,
884                                                              $relaxed);
885        # Return the results.
886        return [$newAlign, $newTree, $newMeta];
887    }
888    
889    =head md5_align_to_fid_align
890    
891        my $fidAlign =          $altObject->md5_align_to_fid_align({
892                                    -align => $md5_align,
893                                    -map => $md5_to_fid_map
894                                });
895    
896    Use a map produced by L</map_fid_to_md5> to convert an MD5-based alignment to a PEG-based
897    alignment. Since a single protein may be generated by multiple features, this could
898    result in alignment entries being replicated in the result.
899    
900  =over 4  =over 4
901    
902  =item parameter  =item parameter
903    
904  The parameter should be a reference to a hash with the following keys.  The parameter should be a reference to a hash containing the following keys.
905    
906  =over 8  =over 8
907    
908  =item -prots  =item -align
909    
910  Reference to a list of MD5 protein IDs.  The MD5-based alignment to be converted.
911    
912    =item -map
913    
914    Reference to a hash mapping each MD5 protein ID to a list of the corresponding FIG
915    feature IDs.
916    
917    =back
918    
919    =item RETURN
920    
921    Returns a new version of the alignment with the MD5 protein IDs replaced by FIG
922    feature IDs.
923    
924    =back
925    
926    =cut
927    
928    sub md5_align_to_fid_align {
929        # Get the parameters.
930        my ($self, $args) = @_;
931        # Get the alignment and the map.
932        my $align = $args->{-align} || Confess("No alignment specified in md5_align_to_fid_align.");
933        my $map = $args->{-map} || Confess("No map specified in md5_align_to_fid_align.");
934        # Perform the conversion.
935        my $retVal = AlignsAndTrees::md5_align_to_fid_align($align, $map);
936        # Return the result.
937        return $retVal;
938    }
939    
940    =head3 md5_alignment_by_ID
941    
942        my $altHash =           $altObject->md5_alignment_by_ID({
943                                    -ids => [$alt1, $alt2, ...]
944                                });
945    
946    Return the alignments with the specified IDs. The return hash will contain a
947    2-tuple for each alignment ID consisting of the alignment itself followed by
948    the metadata describing the proteins in the alignment.
949    
950    =over 4
951    
952    =item parameter
953    
954    The parameter should be a reference to a hash with the following keys:
955    
956    =over 8
957    
958    =item -ids
959    
960    Reference to a list of alignment IDs.
961    
962  =back  =back
963    
964  =item RETURN  =item RETURN
965    
966  Returns a reference to a list of IDs for the alignment/tree pairs containing all the  Returns a reference to a hash mapping each incoming ID to an MD5 alignment.
 incoming proteins.  
967    
968      $altList = [$alt1, $alt2, ... ];      $altHash = { $alt1 => $md5_align1, $alt2 => $md5_align2, ... };
969    
970  =back  =back
971    
972  =cut  =cut
973    
974  sub prot_set_to_ali_tree_pairs {  sub md5_alignment_by_ID {
975      # Get the parameters.      # Get the parameters.
976      my ($self, $args) = @_;      my ($self, $args) = @_;
977      # Get the protein ID list.      # Get the Sapling database.
978      my $prots = ServerThing::GetIdList(-prots => $args);      my $sap = $self->{db};
979      # Declare the return variable.      # Get the list of incoming IDs.
980      my $retVal = [];      my $ids = ServerThing::GetIdList(-ids => $args);
981      # Only proceed if we have at least one protein.      # Declare the return hash.
982      my ($prot1, @otherProts) = @$prots;      my $retVal = {};
983      if ($prot1) {      # Loop through the incoming IDs.
984          # Create a hash of the alignments found for the first protein.      for my $id (@$ids) {
985          my %found = map { $_ => 1 } AlignsAndTrees::aligns_with_md5ID($prot1);          # Get the tree and metadata for this ID.
986          # Loop through the remaining proteins. In each case we only keep the          $retVal->{$id} = [AlignsAndTrees::md5_alignment_by_ID($sap, $id)];
         # alignments already found.  
         for my $prot (@otherProts) {  
             # Get the alignment/tree pairs for this protein.  
             my %newAlts = map { $_ => 1 } AlignsAndTrees::aligns_with_md5ID($prot);  
             # Delete the alignments in the result hash that are not found for the new  
             # protein.  
             for my $alt (keys %found) {  
                 if (! $newAlts{$alt}) {  
                     delete $found{$alt};  
987                  }                  }
988        # Return the result hash.
989        return $retVal;
990    
991              }              }
992    
993    =head3 md5_tree_by_ID
994    
995        my $tupleHash =         $altObject->md5_tree_by_ID({
996                                    -ids => [$alt1, $alt2, ...]
997                                });
998    
999    Return the trees with the specified IDs. The return hash will contain a 2-tuple
1000    for each tree ID consisting of the tree itself followed by the metadata describing
1001    the proteins in the tree.
1002    
1003    =over 4
1004    
1005    =item parameter
1006    
1007    The parameter should be a reference to a hash with the following keys:
1008    
1009    =over 8
1010    
1011    =item -ids
1012    
1013    Reference to a list of tree IDs.
1014    
1015    =back
1016    
1017    =item RETURN
1018    
1019    Returns a reference to a hash that maps each incoming tree ID to a 2-tuple consisting of
1020    (0) a data structure containing the identified phylogenetic tree represented as a
1021    newick-format list, and (1) a hash containing the metadata for the leaves of the tree.
1022    
1023        $tupleHash = { $tree1 => [$md5_tree1, $md5_metadata1],
1024                       $tree2 => [$md5_tree2, $md5_metadata2],
1025                       ... };
1026    
1027    =back
1028    
1029    =cut
1030    
1031    sub md5_tree_by_ID {
1032        # Get the parameters.
1033        my ($self, $args) = @_;
1034        # Get the sapling database.
1035        my $sap = $self->{db};
1036        # Get the incoming IDs.
1037        my $ids = ServerThing::GetIdList(-ids => $args);
1038        # Declare the return hash.
1039        my $retVal = {};
1040        # Loop through the incoming IDs.
1041        for my $id (@$ids) {
1042            # Get the tree and metadata for this ID.
1043            $retVal->{$id} = [AlignsAndTrees::md5_tree_by_ID($sap, $id)];
1044          }          }
1045          # Store the alignments found as the result list.      # Return the result hash.
1046          $retVal = [ sort keys %found ];      return $retVal;
1047      }      }
1048    
1049    =head md5_tree_to_fid_tree
1050    
1051        my $fidtree =          $altObject->md5_tree_to_fid_tree({
1052                                    -tree => $md5_tree,
1053                                    -map => $md5_to_fid_map
1054                                });
1055    
1056    Use a map produced by L</map_fid_to_md5> to convert an MD5-based tree to a PEG-based
1057    tree. Since a single protein may be generated by multiple features, this could
1058    result in tree nodes being replicated in the result.
1059    
1060    =over 4
1061    
1062    =item parameter
1063    
1064    The parameter should be a reference to a hash containing the following keys.
1065    
1066    =over 8
1067    
1068    =item -tree
1069    
1070    The MD5-based tree to be converted.
1071    
1072    =item -map
1073    
1074    Reference to a hash mapping each MD5 protein ID to a list of the corresponding FIG
1075    feature IDs.
1076    
1077    =back
1078    
1079    =item RETURN
1080    
1081    Returns a new version of the tree with the MD5 protein IDs replaced by FIG
1082    feature IDs.
1083    
1084    =back
1085    
1086    =cut
1087    
1088    sub md5_tree_to_fid_tree {
1089        # Get the parameters.
1090        my ($self, $args) = @_;
1091        # Get the tree and the map.
1092        my $tree = $args->{-tree} || Confess("No tree specified in md5_tree_to_fid_tree.");
1093        my $map = $args->{-map} || Confess("No map specified in md5_tree_to_fid_tree.");
1094        # Perform the conversion.
1095        my $retVal = AlignsAndTrees::md5_tree_to_fid_tree($tree, $map);
1096      # Return the result.      # Return the result.
1097      return $retVal;      return $retVal;
1098  }  }
1099    
1100    =head3 trees_with_md5ID
1101    
1102  =head3 prots_to_ali_tree_pairs      my $protHash =          $altObject->trees_with_md5ID({
1103                                    -ids => [$prot1, $prot2, ...]
     my $protHash =      $altObject({  
                             -prots => [$prot1, $prot2, ...]  
1104                          });                          });
1105    
1106  For each incoming protein ID, return a list of the alignment/tree pairs containing that protein.  Return a list of the alignment/tree pairs containing each of the specified proteins.
1107    
1108  =over 4  =over 4
1109    
1110  =item parameter  =item parameters
1111    
1112  The parameter should be a reference to a hash with the following keys.  The parameter should be a reference to a hash with the following keys:
1113    
1114  =over 8  =over 8
1115    
1116  =item -prots  =item -ids
1117    
1118  Reference to a list of MD5 protein IDs.  Reference to a list of MD5 protein IDs.
1119    
# Line 322  Line 1121 
1121    
1122  =item RETURN  =item RETURN
1123    
1124  Returns a reference to a hash that maps each incoming protein ID to a list of IDs for the  Returns a reference to a hash mapping each incoming protein ID to a list of the
1125  alignment/tree pairs containing that protein.  IDs for the trees containing that protein.
1126    
1127      $protHash = { $prot1 => [$alt1a, $alt1b, ...],      $protHash = { $prot1 => [$alt1a, $alt1b, ...],
1128                    $prot2 => [$alt2a, $alt2b, ...],                    $prot2 => [$alt2a, $alt2b, ...],
# Line 333  Line 1132 
1132    
1133  =cut  =cut
1134    
1135  sub prots_to_ali_tree_pairs {  sub trees_with_md5ID {
1136      # Get the parameters.      # Get the parameters.
1137      my ($self, $args) = @_;      my ($self, $args) = @_;
1138      # Get the protein ID list.      # Get the Sapling database.
1139      my $prots = ServerThing::GetIdList(-prots => $args);      my $sap = $self->{db};
1140      # Declare the return hash.      # Create the return hash.
1141      my $retVal = {};      my $retVal = {};
1142      # Loop through the proteins.      # Get the list of incoming IDs.
1143      for my $prot (@$prots) {      my $ids = ServerThing::GetIdList(-ids => $args);
1144          # Get the alignment/tree pairs for this protein.      # Loop through the protein IDs, finding the trees.
1145          my $alts = AlignsAndTrees::aligns_with_md5ID($prot);      for my $id (@$ids) {
1146          # Store the resulting list in the return hash.          $retVal->{$id} = AlignsAndTrees::trees_with_md5ID($sap, $id);
         $retVal->{$prot} = $alts;  
1147      }      }
1148      # Return the result.      # Return the result hash.
1149      return $retVal;      return $retVal;
1150  }  }
1151    
1152    
   
1153  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3