[Bio] / FigKernelPackages / AlignsAndTreesServer.pm Repository:
ViewVC logotype

View of /FigKernelPackages/AlignsAndTreesServer.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Tue Mar 22 23:17:33 2011 UTC (8 years, 11 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_dev_06072011, mgrast_dev_03252011, mgrast_release_3_0_4, mgrast_release_3_0_3, mgrast_dev_03312011, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_10262011
Changes since 1.2: +4 -5 lines
debugged

#
# Copyright (c) 2003-2010 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

package AlignsAndTreesServer;
#===============================================================================
#  perl functions for loading and accessing Alignments and Trees
#
#  Usage:  use AlignsAndTreesServer;
#
#-------------------------------------------------------------------------------
#  Alignments
#-------------------------------------------------------------------------------
#
#    @alignIDs = all_alignIDs();
#   \@alignIDs = all_alignIDs();
#
#    @alignIDs = aligns_with_md5ID( $md5 );
#   \@alignIDs = aligns_with_md5ID( $md5 );
#
#    @md5IDs   = md5IDs_in_align( $alignID );
#   \@md5IDs   = md5IDs_in_align( $alignID );
#
#   \@seqs               = md5_alignment_by_ID( $alignID );
# ( \@seqs, \%metadata ) = md5_alignment_by_ID( $alignID );
#           \%metadata   = md5_alignment_metadata( $alignID );
#
#       $metadata{ $md5 } = [ $peg_length, $trim_beg, $trim_end, $location_string ]
#
#    @alignIDs = aligns_with_pegID( $fid );
#   \@alignIDs = aligns_with_pegID( $fid );
#
#    @fids     = pegIDs_in_align( $alignID );
#   \@fids     = pegIDs_in_align( $alignID );
#
#   \@seqs               = peg_alignment_by_ID( $alignID );
# ( \@seqs, \%metadata ) = peg_alignment_by_ID( $alignID );
#           \%metadata   = peg_alignment_metadata( $alignID );
#
#       $metadata{ $fid } = [ $peg_length, $trim_beg, $trim_end, $location_string ]
#
#-------------------------------------------------------------------------------
#  Trees
#-------------------------------------------------------------------------------
#
#    @treeIDs = all_treeIDs();
#   \@treeIDs = all_treeIDs();
#
#    @treeIDs = trees_with_md5ID( $md5 );
#   \@treeIDs = trees_with_md5ID( $md5 );
#
#    @md5IDs = md5IDs_in_tree( $treeID );
#   \@md5IDs = md5IDs_in_tree( $treeID );
#
#    $tree = md5_tree_by_ID( $treeID );
#
#    @treeIDs = trees_with_pegID( $fid );
#   \@treeIDs = trees_with_pegID( $fid );
#
#    @fids = pegIDs_in_tree( $treeID );
#   \@fids = pegIDs_in_tree( $treeID );
#
#    $tree = peg_tree_by_ID( $treeID );
#
#===============================================================================

use strict;
use AlignsAndTrees;
use Data::Dumper;

#-------------------------------------------------------------------------------
#  Support for md5 <-> fid interconversion:
#
#    @fids = md5_to_pegs( $md5 );
#   \@fids = md5_to_pegs( $md5 );
#    $md5  = peg_to_md5( $pegID );
#
#-------------------------------------------------------------------------------

my $data_dir = $ENV{ ATNG } && -d $ENV{ ATNG } ? $ENV{ ATNG } :
               -d '/home/fangfang/ATNG'        ? '/home/fangfang/ATNG' :
                                                 '.';
my $md5_data = "$data_dir/pegs_with_md5.tab";
my $md5_read;
my %peg_to_md5;
my %md5_to_pegs;

sub load_md5_data
{
    -f $md5_data && open( MD5, "<$md5_data" )
        or die "load_md5_data() could not find or open data file '$md5_data'.\n";
    local $_;
    while ( defined( $_ = <MD5> ) )
    {
        chomp;
        my ( $fid, $md5 ) = split /\t/;
        $peg_to_md5{ $fid } = $md5;
        push @{ $md5_to_pegs{ $md5 } }, $fid;
    }
    close MD5;

    $md5_read = 1;
}

sub md5_to_pegs
{
    my ( $md5 ) = @_;
    load_md5_data() if ! $md5_read;
    my $fids = $md5 ? ( $md5_to_pegs{ $md5 } || [] ) : [];
    wantarray ? @$fids : [ @$fids ];
}

sub peg_to_md5
{
    my ( $fid ) = @_;
    load_md5_data() if ! $md5_read;
    $fid ? $peg_to_md5{ $fid } : undef;
}

#===============================================================================
#  Alignments
#===============================================================================
#
#    @alignIDs = all_alignIDs();
#   \@alignIDs = all_alignIDs();
#
#-------------------------------------------------------------------------------
sub all_alignIDs
{
    AlignsAndTrees::all_alignIDs();
}

#-------------------------------------------------------------------------------
#  md5 based alignments:
#-------------------------------------------------------------------------------
#
#    @alignIDs = aligns_with_md5ID( $md5 );
#   \@alignIDs = aligns_with_md5ID( $md5 );
#
#-------------------------------------------------------------------------------
sub aligns_with_md5ID
{
    AlignsAndTrees::aligns_with_md5ID( @_ );
}

#-------------------------------------------------------------------------------
#
#    @md5IDs = md5IDs_in_align( $alignID );
#   \@md5IDs = md5IDs_in_align( $alignID );
#
#-------------------------------------------------------------------------------
sub md5IDs_in_align
{
    AlignsAndTrees::md5IDs_in_align( @_ );
}

#-------------------------------------------------------------------------------
#
#   \@seqs               = md5_alignment_by_ID( $alignID );
# ( \@seqs, \%metadata ) = md5_alignment_by_ID( $alignID );
#           \%metadata   = md5_alignment_metadata( $alignID );
#
#       $metadata{ $md5 } = [ $peg_length, $trim_beg, $trim_end, $location_string ]
#
#-------------------------------------------------------------------------------
sub md5_alignment_by_ID
{
    AlignsAndTrees::md5_alignment_by_ID( @_ );
}

sub md5_alignment_metadata
{
    AlignsAndTrees::md5_alignment_metadata( @_ );
}


#-------------------------------------------------------------------------------
#  peg based alignments:
#-------------------------------------------------------------------------------
#
#    @alignIDs = aligns_with_pegID( $fid );
#   \@alignIDs = aligns_with_pegID( $fid );
#
#-------------------------------------------------------------------------------
sub aligns_with_pegID
{
    aligns_with_md5ID( peg_to_md5( @_ ) );
}

#-------------------------------------------------------------------------------
#
#    @fids = pegIDs_in_align( $alignID );
#   \@fids = pegIDs_in_align( $alignID );
#
#-------------------------------------------------------------------------------
sub pegIDs_in_align
{
    my @fids = map { md5_to_pegs( $_ ) } md5IDs_in_align( @_ );
    wantarray ? @fids : \@fids;
}

#-------------------------------------------------------------------------------
#
#   \@seqs               = peg_alignment_by_ID( $alignID );
# ( \@seqs, \%metadata ) = peg_alignment_by_ID( $alignID );
#           \%metadata   = peg_alignment_metadata( $alignID );
#
#       $metadata{ $fid } = [ $peg_length, $trim_beg, $trim_end, $location_string ]
#
#-------------------------------------------------------------------------------
sub peg_alignment_by_ID
{
    my $md5_alignment = md5_alignment_by_ID( @_ );
    my $peg_alignment = md5_align_to_fid_align( $md5_alignment );
    wantarray ? ( $peg_alignment, peg_alignment_metadata( @_ ) ) : $peg_alignment;
}

sub peg_alignment_metadata
{
    my $md5_metadata = md5_alignment_metadata( @_ );
    my %peg_metadata = map { my $md5  = $_;
                             my $data = $md5_metadata->{ $md5 };
                             map { $_ => $data } md5_to_pegs( $md5 )
                           }
                       keys %$md5_metadata;
    \%peg_metadata;
}


#===============================================================================
#  Trees
#===============================================================================
#
#    @treeIDs = all_treeIDs( );
#   \@treeIDs = all_treeIDs( );
#
#-------------------------------------------------------------------------------
sub all_treeIDs
{
    AlignsAndTrees::all_treeIDs( );
}

#-------------------------------------------------------------------------------
#  md5 based trees:
#-------------------------------------------------------------------------------
#
#    @treeIDs = trees_with_md5ID( $md5 );
#   \@treeIDs = trees_with_md5ID( $md5 );
#
#-------------------------------------------------------------------------------
sub trees_with_md5ID
{
    AlignsAndTrees::trees_with_md5ID( @_ );
}

#-------------------------------------------------------------------------------
#
#    @md5IDs = md5IDs_in_tree( $treeID );
#   \@md5IDs = md5IDs_in_tree( $treeID );
#
#-------------------------------------------------------------------------------
sub md5IDs_in_tree
{
    AlignsAndTrees::md5IDs_in_tree( @_ );
}

#-------------------------------------------------------------------------------
#
#    $tree = md5_tree_by_ID( $treeID );
#
#-------------------------------------------------------------------------------
sub md5_tree_by_ID
{
    AlignsAndTrees::md5_tree_by_ID( @_ );
}


#-------------------------------------------------------------------------------
#  peg based trees:
#-------------------------------------------------------------------------------
#
#    @treeIDs = trees_with_pegID( $fid );
#   \@treeIDs = trees_with_pegID( $fid );
#
#-------------------------------------------------------------------------------
sub trees_with_pegID
{
    trees_with_md5ID( peg_to_md5( @_ ) )
}

#-------------------------------------------------------------------------------
#
#    @fids = pegIDs_in_tree( $treeID );
#   \@fids = pegIDs_in_tree( $treeID );
#
#-------------------------------------------------------------------------------
sub pegIDs_in_tree
{
    my @fids = map { md5_to_pegs( $_ ) } md5IDs_in_tree( @_ );
    wantarray ? @fids : \@fids;
}

#-------------------------------------------------------------------------------
#
#    $tree = peg_tree_by_ID( $treeID );
#
#-------------------------------------------------------------------------------
sub peg_tree_by_ID
{
    my $md5_tree = md5_tree_by_ID( @_ );
    return undef if ! $md5_tree;
    md5_tree_to_fid_tree( $md5_tree );
}


#===============================================================================
#  Functions for interconverting alignments and trees that md5-based ids and
#  fid-based ids.  Because the md5 id is based on the sequences, multiple
#  fids can have the same md5 id.  These are reduced to a single instance on
#  conversion to md5, and expanded to all known corresponding fids on conversion
#  back to fids.
#
#      \@md5_align = fid_align_to_md5_align( \@fid_align, $relaxed );
#      \@fid_align = md5_align_to_fid_align( \@md5_align, $relaxed );
#       $md5_tree  = fid_tree_to_md5_tree( $fid_tree, $relaxed );
#       $fid_tree  = md5_tree_to_fid_tree( $md5_tree, $relaxed );
#
#  @fid_align  An alignment, as fid_definition_sequence triples.
#  @md5_align  An alignment, as md5_definition_sequence triples.
#  $fid_tree   A gjonewick tree structure with fid ids.
#  $md5_tree   A gjonewick tree structure with md5 ids.
#  $relaxed    If set to a true value, untranslatable ids are passed through,
#                   rather than deleted.
#===============================================================================
sub fid_align_to_md5_align
{
    my ( $fid_align, $relaxed ) = @_;
    $fid_align && ref( $fid_align ) eq 'ARRAY'
        or return ();

    my @md5_align;

    my %seen;
    foreach ( @$fid_align )
    {
        my $fid = $_->[0];
        my $md5 = peg_to_md5( $fid );
        $md5 = $fid if ! $md5 && $relaxed;
        next if ! $md5 || $seen{ $md5 }++;

        push @md5_align, [ $md5, $_->[1], $_->[2] ];
    }

    \@md5_align;
}


sub md5_align_to_fid_align
{
    my ( $md5_align, $relaxed ) = @_;
    $md5_align && ref( $md5_align ) eq 'ARRAY'
        or return ();

    my @fid_align;

    foreach ( @$md5_align )
    {
        my $md5  = $_->[0];
        my @fids = md5_to_pegs( $md5 );
        @fids = ( $md5 ) if ! @fids && $relaxed;
        foreach my $fid ( @fids )
        {
            push @fid_align, [ $fid, $_->[1], $_->[2] ];
        }
    }

    \@fid_align;
}


sub fid_tree_to_md5_tree
{
    my ( $fid_tree, $relaxed ) = @_;
    $fid_tree && ref( $fid_tree ) eq 'ARRAY'
        or return undef;

    my ( %seen, %tip_to_md5 );
    foreach my $fid ( gjonewicklib::newick_tip_list( $fid_tree ) )
    {
        my $md5 = peg_to_md5( $fid );
        $md5 = $fid if ! $md5 && $relaxed;
        $tip_to_md5{ $fid } = $md5 if $md5 && ! $seen{ $md5 }++;
    }

    gjonewicklib::newick_relabel_tips( gjonewicklib::newick_subtree( $fid_tree, keys %tip_to_md5 ), \%tip_to_md5 );
}


sub md5_tree_to_fid_tree
{
    my ( $md5_tree, $relaxed ) = @_;
    $md5_tree && ref( $md5_tree ) eq 'ARRAY'
        or return ();

    my @tips = gjonewicklib::newick_tip_list( $md5_tree );
    @tips or return undef;

    my %md5_2_fids;
    my $prune = 0;
    foreach my $md5 ( @tips )
    {
        my @fids = md5_to_pegs( $md5 );
        @fids = ( $md5 ) if ! @fids && $relaxed;
        if ( ! @fids ) { $prune = 1; next }
        $md5_2_fids{ $md5 } = \@fids;
    }

    $md5_tree = gjonewicklib::newick_subtree( $md5_tree, [ keys %md5_2_fids ] ) if $prune;
    expand_duplicate_tips( gjonewicklib::copy_newick_tree( $md5_tree ), \%md5_2_fids );
}


#-------------------------------------------------------------------------------
#  Use a hash to relabel, and potentially expand the tips in a newick tree.
#
#  $node = expand_duplicate_tips( $node, \%new_names )
#
#-------------------------------------------------------------------------------
sub expand_duplicate_tips
{
    my ( $node, $new_names ) = @_;

    my @desc = gjonewicklib::newick_desc_list( $node );

    if ( @desc )
    {
        foreach ( @desc ) { expand_duplicate_tips( $_, $new_names ) }
    }
    else
    {
        my $new;
        if ( gjonewicklib::node_has_lbl( $node )
          && defined( $new = $new_names->{ gjonewicklib::newick_lbl( $node ) } )
           )
        {
            my @new = @$new;
            if ( @new == 1 )
            {
                gjonewicklib::set_newick_lbl( $node, $new[0] );
            }
            elsif ( @new > 1 )
            {
                gjonewicklib::set_newick_desc_ref( $node, [ map { [ [], $_, 0 ] } @new ] );
                gjonewicklib::set_newick_lbl( $node, undef );
            }
        }
    }

    $node;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3