[Bio] / FigKernelScripts / FFB3_update_FIGfams.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB3_update_FIGfams.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Tue Jul 9 15:50:33 2013 UTC (6 years, 4 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.1: +1 -0 lines
Favor non-blank functions in FFB3_update_FIGfams::function_of_family
Add new script FFB3_cleanup_families (modification of get_rid_of_small_or_too_many_per_FF.pl to
use the cached data in the figfam build directory)
Run the family cleanup after computing the updated figfams in FFB3_build_updated_FF.

########################################################################
# 
# FIGfams are designed to automatically update in response to changes in
# the collection of subsystems.  There should be no need to do anything
# but work on decision procedures.  The basic operations implemented in
# this code are as follows:
# 
# 1. When a FIGfam is found to contain entries from subsystems that have
#    different functions (not counting comments), the FIGfam must be
#    split.  
# 
# 2. When distinct subsystems exist, each containing PEGs with identical
#    functions and they are globally similar, the FIGfams
#    must be merged.  As of Nov 1, 2009, we assume that pegs in subsystems
#    with identical functions are globally similar.  That is, curators
#    are expected to distinguish roles that are not homologous.
# 
# 3. When new PEGs are found to be in subsystems, but not in the
#    appropriate FIGfam, they need to be added.
# 
# 4. When new PEGs are found to be in subsystems, but there is no
#    appropriate FIGfam, a new FIGfam must be created
# 
# 
###############################

use strict;
use FIG;
use Carp;
use FFB3;
use Getopt::Long;
use Data::Dumper;

my $usage = "usage: FFB3_update_FIGfams [-t tmpdir] FFdir SubsysFamilies NewFamilies2c NewFunctions [OldFF]";

my $ss_file;
my $override_file;
my $tmpdir = "$FIG_Config::temp/update.FIGfams.$$";

my $rc = GetOptions("tmpdir=s" => \$tmpdir);


((@ARGV == 4 || @ARGV == 5) && $rc) or die "$usage\n";

my $ff_dir = shift @ARGV;
my $ss_file = shift @ARGV;
my $new2c    = shift @ARGV;
my $new_func = shift @ARGV;
my $oldFF = shift @ARGV;

my $fig = FIG->new();
my $ffb3 = FFB3->new($ff_dir, $fig);

&FIG::verify_dir($tmpdir);
# mkdir($tmpdir,0777) || die "could not make $tmpdir";

system "date; echo before";
my($peg_to_fam,$members_in_fam,$next_famP) = &existing_families($oldFF,$fig);
my $peg_to_ss_fam                          = &get_current_subsys_based_families($fig,$tmpdir,$ss_file);

system "date; echo after subsys";

my $n = keys(%$peg_to_fam);      print STDERR "$n pegs in existing families\n";
$n    = keys(%$members_in_fam);  print STDERR "$n existing families\n";
$n    = keys(%$peg_to_ss_fam);   print STDERR "$n pegs in ss_fams\n";

my $state = [$fig,$tmpdir,$peg_to_fam,$members_in_fam,$peg_to_ss_fam,$next_famP];

&delete_if_necessary($state);
system "date; echo after delete";
&split_if_necessary($state);
system "date; echo after split";
&add_new_if_necessary($state);
system "date; echo after add";
&merge_if_possible($state);
system "date; echo after merge";

&dump_families_2c($fig,$peg_to_fam,$new2c);
#
# Do the md5 dump later on after we've loaded the memcache with
# sequence data.
#&dump_md5_figfams($fig,$new2c);
&dump_family_functions2($fig,$new2c,$new_func);

sub dump_families_2c {
    my($fig,$peg_to_fam,$new2c) = @_;

    open(FAM2C,"| sort -S 1G > $new2c") || die "could not open $new2c";
    my($peg,$fam);
    my $prev_fam ="";
    my $fam_pegs=[];
    while (($peg,$fam) = each(%$peg_to_fam))
    {
	#print FAM2C "$fam\t$peg\n";
	if ($prev_fam && $fam ne $prev_fam)
	{
	    if (scalar @$fam_pegs > 1)
	    {
		print FAM2C join ("", @$fam_pegs);
		$fam_pegs=[];
	    }
	}
	push @$fam_pegs, "$fam\t$peg\n";
	$prev_fam = $fam;

	# change the role of the peg and subsystems if the figfam id in the function does not match the new figfam id
#	my $role = $fig->function_of($peg);
#	if ($role =~ /^(FIG\d+)(:.*)/)
#	{
#	    my $ff = $1;
#	    my $real_func = $2;
#	    if ($ff ne $fam)
#	    {
#		my $newname = $fam . "$real_func";
#		my $seeduser = "FIGfam_update";    ###### what user should I use here? should I just put FIGfam_update?
#		#$fig->change_funcrole($role,$newname,$seeduser);
#		$fig->assign_function($peg,$seeduser,$newname);
#		print STDERR "FUNCLOG\t$peg\t$seeduser\t$newname\n";
#		#print STDERR "changing $ff: funcrole $role to $newname\n";
#	    }
#	}
#	elsif ($role =~ /^(FIG\d+)$/)
#	{
#	    my $newname = "hypothetical protein";
#	    my $seeduser = "FIGfam_update";    ###### what user should I use here? should I just put FIGfam_update?
#	    $fig->change_funcrole($role,$newname,$seeduser);
#	    print STDERR "FUNCLOG\t$role\t$seeduser\t$newname\n";
#	    #print STDERR "changing funcrole $role to $newname\n";
#	}
    }

    if  (scalar @$fam_pegs > 1)
    {   
	print FAM2C join ("", @$fam_pegs);
	$fam_pegs=[];
    }

    close(FAM2C);
}

sub dump_md5_figfams {
    my ($fig,$new2c) = @_;

    my @paths = split(/\//, $new2c);
    pop @paths;
    my $dir = join("/", @paths);
    if (! $dir) { $dir = "." }

    if (-e "$dir/md5.figfams") { system "rm $dir/md5.figfams*" }
    open(OUT,">$dir/md5.figfams") || die "could not open $dir/md5.figfams";

    my $md5Hash = {};
    open(TMP,"<$new2c")
        || die "could not open $new2c";
    while (defined($_ = <TMP>))
    {
        if ($_ =~ /^(FIG\d+)\t(\S+)/)
        {
            my $fam  = $1;
            my $peg  = $2;
            my $md5 = $fig->md5_of_peg($peg);
	    next if !defined($md5);
            push (@{$md5Hash->{$md5}}, $fam);
        }
    }
    close(TMP);

    foreach my $md5 (sort keys %{$md5Hash}){
        my %saw;
        @saw{@{$md5Hash->{$md5}}} = ();
        my @array = sort keys %saw;  # remove sort if undesired

        print OUT join(",", @array) . "\t$md5\n";
    }
    close OUT;

}

sub dump_family_functions2 {
    my ($fig,$new2c,$new_func) = @_;

    my $family_names ={};
    open(FAMFUNC,">$new_func") || die "could not open $new_func";
    open(TMP,"<$new2c") || die "could not open $new2c";
    my $prev_fam;
    my $fam_pegs=[];
    my $fam_members={};
    my ($fam, $peg);

    while (defined($_ = <TMP>))
    {
	if ($_ =~ /^(FIG\d+)\t(\S+)/)
        {
            $fam  = $1;
            $peg  = $2;

	    if ($prev_fam && $fam ne $prev_fam)
	    {
		$fam_members->{$fam} = $fam_pegs;
		my $func = &function_of_family($fig,$fam,$fam_members);
		push (@{$family_names->{$func}}, $prev_fam);

		# clean out the fam_pegs array
		$fam_pegs=[];
            }
	    push @$fam_pegs, $peg;
	    $prev_fam = $fam;
	}
    }
    close TMP;

    if (scalar @$fam_pegs > 1)
    {
	$fam_members->{$prev_fam} = $fam_pegs;
	my $func = &function_of_family($fig,$prev_fam,$fam_members);
	push (@{$family_names->{$func}}, $prev_fam);
    }
    
    # count the number of times the function occurs, so we can disambiguate FIGfams of same function name
    my $new_fam_funcs={};
    foreach my $func (sort {$family_names->{$b} <=> $family_names->{$a}} keys %$family_names)
    {
#
# Do not introduce these names. It messes up the metabolic reconstructions.
# Ross / Bob Jun 8 2010.
#
#
#         if (scalar @{$family_names->{$func}} > 1 && $func !~ /not subsystem-based/ && $func !~ /^FIG\d\d\d\d\d\d/)
#         {
#             foreach my $fam (@{$family_names->{$func}})
#             {
#                 $new_fam_funcs->{$fam} = "$fam: $func";
#             }
#         }
#         else
        {
            foreach my $fam (@{$family_names->{$func}})
            {
                $new_fam_funcs->{$fam} = $func;
            }
        }
    }

    foreach my $fam (sort {$a cmp $b} keys %$new_fam_funcs)
    {
        print FAMFUNC "$fam\t".$new_fam_funcs->{$fam}."\n";
        #print FAMFUNC "$fam\t$func\n";
        #print FAMFUNC "$fam\t$func\t$qty\n";
    }
    close(FAMFUNC);
    
}

sub dump_family_functions {
    my($fig,$members_in_fam,$new_func) = @_;

    my $family_names={};
    open(FAMFUNC,">$new_func") || die "could not open $new_func";
    foreach my $fam (sort keys(%$members_in_fam))
    {
	my $func = &function_of_family($fig,$fam,$members_in_fam);
	push (@{$family_names->{$func}}, $fam);
    }

    # count the number of times the function occurs, so we can disambiguate FIGfams of same function name
    my $new_fam_funcs={};
    foreach my $func (sort {$family_names->{$b} <=> $family_names->{$a}} keys %$family_names)
    {
#
# Do not introduce these names. It messes up the metabolic reconstructions.
# Ross / Bob Jun 8 2010.
#
# 	if (scalar @{$family_names->{$func}} > 1 && $func !~ /not subsystem-based/ && $func !~ /^FIG\d\d\d\d\d\d/)
# 	{
# 	    foreach my $fam (@{$family_names->{$func}})
# 	    {
# 		$new_fam_funcs->{$fam} = "$fam: $func";
# 	    }
# 	}
# 	else
	{
	    foreach my $fam (@{$family_names->{$func}})
	    {
		$new_fam_funcs->{$fam} = $func;
	    }
	}
    }

    foreach my $fam (sort {$a cmp $b} keys %$new_fam_funcs)
    {
	print FAMFUNC "$fam\t".$new_fam_funcs->{$fam}."\n";
	#print FAMFUNC "$fam\t$func\n";
	#print FAMFUNC "$fam\t$func\t$qty\n";
    }
    close(FAMFUNC);
}

# Nov 10: 2010 - RAO 
#
# I changed this to return 'FIGxxxxxx: hypothetical protein'
# for hypotheticals. 
# 

sub function_of_family {
    my($fig,$fam,$members_in_fam) = @_;
    my($i,$func,%poss);
    my %poss_ss;

    my $set = $members_in_fam->{$fam};
    $set || confess "bad fam: $fam";

    my $peg_ss = $fig->pegs_to_usable_subsystems($set);

    my $ffunc = "";
    for ($i=0; ($i < @$set); $i++)
    {
	my $peg = $set->[$i];
	$func = $ffb3->function_of_filtered($peg);
	next if (&ignore_function($func));
	#next if ($fig->hypo($func));

	my $subs = $peg_ss->{$peg};
#	@subs = grep { $fig->usable_subsystem($_) } $fig->peg_to_subsystems($peg);
	if (ref($subs) && @$subs > 0)
	{
	    $poss_ss{$func}++;
	}
	$poss{$func}++;
    }

    my $funcH = (keys(%poss_ss) > 0) ? \%poss_ss : \%poss;
    my @tmp = sort { $funcH->{$b} <=> $funcH->{$a} } keys(%$funcH);
    @tmp = grep { $_ ne '' } @tmp;

    if ((@tmp > 0) && ($tmp[0] ne "hypothetical protein"))
    {
	return $tmp[0];
    }
    else
    {
	return "$fam: hypothetical protein";
    }
}

sub ignore_function {
    my $x = (@_ == 1) ? $_[0] : $_[1];

    if (! $x)                             { return 1 }
    if ( ($x =~ /predicted by Psort/) )
    {
        return 1;
    }
    return 0;
}

sub get_current_subsys_based_families {
    my($fig,$tmpdir,$ss_file) = @_;

    my $peg_to_ss_fam = {};
    
    open(SSFAMS, "-|", "grep", "-v", "^#", $ss_file) || die "could not open $ss_file: $!";

    my $last = <SSFAMS>;
    my $ss_count=0;
    while ($last)
    {
	if ($last =~ /^(\S+)\t(fig\|\d+\.\d+\.peg\.\d+)/)
	{
	    my $fam = $1;
	    my $set = [];
	    print STDERR "SS processing $fam\n" if ($ss_count%100 == 0);
	    $ss_count++;
	    while ($last && ($last =~ /^(\S+)\t(fig\|\d+\.\d+\.peg\.\d+)/) && ($1 eq $fam))
	    {
		push(@$set,$2);
		$last = <SSFAMS>;
	    }
	    &process_ss_fam($fig,$set,$peg_to_ss_fam);
	}
	else
	{
	    print STDERR "BAD SUBSYS-BASED: $last";
	    $last = <SSFAMS>;
	}
    }
    close(SSFAMS);
    return $peg_to_ss_fam;
}

sub existing_families {
    my ($oldFF,$fig) = @_;

    my $dbh = $fig->db_handle->{_dbh};

    my $peg_to_fam = {};
    my $members_in_fam = {};
    my $max_fam = "FIG00000000";

    my $ffdata = &FIG::get_figfams_data($oldFF);
    open(FAM2C, "<", "$ffdata/families.2c") || die "could not access $ffdata/families.2c";

    my $last = <FAM2C>;
    while ($last && ($last =~ /^(FIG\d+)\t(fig\|\d+\.\d+\.peg\.\d+)/))
    {
	my $fam = $1;
	my $set = [];
	while ($last && ($last =~ /^(FIG\d+)\t(fig\|\d+\.\d+\.peg\.\d+)/) && ($1 eq $fam))
	{
	    my $peg = $2;
	    if (! $fig->is_deleted_fid($peg))
	    {
		push(@$set,$peg);
		$peg_to_fam->{$peg} = $fam;
	    }
	    $last = <FAM2C>;
	}
	if (scalar @$set > 1)
	{
	    $members_in_fam->{$fam} = $set;
	}
	else
	{
	    foreach my $peg (@$set)
	    {
		delete $peg_to_fam->{$peg};
	    }
	}
    
	if ($fam gt $max_fam) { $max_fam = $fam }
    }
    $max_fam++;
    return ($peg_to_fam,$members_in_fam,\$max_fam);
}
	
sub process_ss_fam {
    my($fig,$set,$peg_to_ss_fam) = @_;

    foreach my $peg (@$set)
    {
	$peg_to_ss_fam->{$peg} = $set;
    }
}

# If multiple PEGs from a single genome are in a family, and if
# the family is not labeled as "not subsystem-based",
# and if more than one function appear to be represented for
# a genome, delete all PEGs from the family (for the genome) that
# do not have the family function.  This is a hack that Alex put in.
#

sub delete_if_necessary {
  my($state) = @_;
  my($fig,$tmpdir,$peg_to_fam,$members_in_fam,$peg_to_ss_fam,$next_famP) = @$state;

  my $i = 0;
  my $n = keys %$members_in_fam;
  foreach my $fam (sort keys(%$members_in_fam)) 
  {
      if (++$i % 1000 == 0)
      {
	  print STDERR "Processing deletions for family $fam ($i of $n)\n";
      }
      my $fam_func =  &function_of_family($fig,$fam,$members_in_fam);
      next if ($fam_func =~ /not subsystem-based/);
      my $orgs_in_fam = {};
      my $set = $members_in_fam->{$fam};
      $set || confess "bad fam: $fam";

      # group pegs of same genome
      foreach my $peg (@$set)
      {
	  push @{$orgs_in_fam->{$fig->genome_of($peg)}}, $peg;
      }

      # delete figfam if the function of pegs in the same group have different functions and in a subsystem
      foreach my $group (keys %$orgs_in_fam)
      {
	  my $functionHash = {};
	  foreach my $peg (@{$orgs_in_fam->{$group}})
	  {
	      my $func = $ffb3->function_of_filtered($peg);
	      $functionHash->{$func} = 1;
	  }

	  if (scalar (keys %$functionHash) > 1) 
	  {
	      foreach my $peg (@{$orgs_in_fam->{$group}}) 
	      {
		  my $func = $ffb3->function_of_filtered($peg);
		  if (index($fam_func,$func) < 0)
		  {
		      # print STDERR "deleting $peg in $fam func=$func\n";
		      &delete_peg_from_family($peg,$fam,$peg_to_fam,$members_in_fam);
		  }
	      }
	  }
      }
  }
}

sub delete_peg_from_family {
    my($peg,$fam,$peg_to_fam,$members_in_fam) = @_;

    delete $peg_to_fam->{$peg};
    my $i;
    my $set = $members_in_fam->{$fam};
    if ($set)
    {
	for ($i=0; ($i < @$set) && ($peg ne $set->[$i]); $i++) {}
	if ($i < @$set)
	{
	    splice(@$set,$i,1);
	    if (@$set < 2)
	    {
		foreach $_ (@$set)
		{
		    delete $peg_to_fam->{$_};
		}
		delete $members_in_fam->{$fam};
	    }
	}
	else
	{
	    confess "$peg cannot be deleted from $fam";
	}
    }
}



sub split_if_necessary {
  my($state) = @_;
  my($fig,$tmpdir,$peg_to_fam,$members_in_fam,$peg_to_ss_fam,$next_famP) = @$state;
  
  my $i = 0;
  my $n = keys %$members_in_fam;
  foreach my $fam (sort keys(%$members_in_fam))
  {
      if (++$i % 1000 == 0)
      {
	  print STDERR "Processing splits for family $fam ($i of $n)\n";
      }
      my %ffunc;
      my $set = $members_in_fam->{$fam};
      $set || confess "bad fam: $fam";

      my $peg_ss = $fig->pegs_to_usable_subsystems($set);

      foreach my $peg (@$set)
      {
	  #my @subs = grep { $fig->usable_subsystem($_) } $fig->peg_to_subsystems($peg);
	  my $subs = $peg_ss->{$peg};
	  if (ref($subs) && @$subs > 0) 
	  {
	      my $func = $ffb3->function_of_filtered($peg);
	      $ffunc{$func}++;
	  }
      }
      
      if (keys(%ffunc) > 1)
      {
	  &split_fam($state,$fam,[sort { $ffunc{$b} <=> $ffunc{$a} } keys(%ffunc)]);
      }
    }
}

sub split_fam {
    my($state,$fam,$funcs) = @_;
    my($fig,$tmpdir,$peg_to_fam,$members_in_fam,$peg_to_ss_fam,$next_famP) = @$state;

    my $pegs = $members_in_fam->{$fam};
    my %peg_funcs = map { $_ => $ffb3->function_of_filtered($_) } @$pegs;
    my %ss_func = map { $_ => 1 } @$funcs;

    print STDERR "splitting $fam\n";
    my $i;
    for ($i=0; ($i < @$funcs); $i++)
    {
	my $func = $funcs->[$i];
	my @pegs_with_func = grep { $peg_funcs{$_} eq $func } @$pegs;
	if (@pegs_with_func > 1)
	{
	    my $new_fam;
	    if ($i > 0)
	    {
		$new_fam = $$next_famP++;
		print STDERR "\tmaking $new_fam\n";
		$members_in_fam->{$new_fam} = [@pegs_with_func];
		foreach my $peg (@pegs_with_func)
		{
		    &delete_peg_from_family($peg,$fam,$peg_to_fam,$members_in_fam);
		    $peg_to_fam->{$peg} = $new_fam;
		}
	    }
	}
	else
	{
	    foreach my $peg1 (@pegs_with_func)
	    {
		&delete_peg_from_family($peg1,$fam,$peg_to_fam,$members_in_fam);
	    }
	}
    }

    # Now delete any remaining PEGs that were not in subsystems and had different functions; 
    # they cannot be placed
    foreach my $peg (@$pegs)
    {
	if (! $ss_func{$peg_funcs{$peg}})
	{
	    &delete_peg_from_family($peg,$fam,$peg_to_fam,$members_in_fam);
	}
    }
}

sub add_new_if_necessary {
    my($state) = @_;
    my($fig,$tmpdir,$peg_to_fam,$members_in_fam,$peg_to_ss_fam,$next_famP) = @$state;

    my($peg,$peg1,$proposed,$i,$fam);
    foreach $peg (keys(%$peg_to_ss_fam))
    {
	if (! $peg_to_fam->{$peg})
	{
	    $proposed = $peg_to_ss_fam->{$peg};
	    for ($i=0; ($i < @$proposed) && (! $peg_to_fam->{$proposed->[$i]}); $i++) {}
	    if ($i == @$proposed)
	    {
		$fam = $$next_famP++;
		print STDERR "adding $fam\n";
		$members_in_fam->{$fam} = $proposed;
		foreach $peg1 (@$proposed)
		{
		    $peg_to_fam->{$peg1} = $fam;
		}
	    }
	    else
	    {
		$fam = $peg_to_fam->{$proposed->[$i]};
		foreach $peg1 (@$proposed)
		{
		    if (! $peg_to_fam->{$peg1})
		    {
			$peg_to_fam->{$peg1} = $fam;
			push(@{$members_in_fam->{$fam}},$peg1);
		    }
		}
	    }
	}
    }
}

sub merge_if_possible {
    my($state) = @_;
    my($fig,$tmpdir,$peg_to_fam,$members_in_fam,$peg_to_ss_fam,$next_famP) = @$state;

    my(%seen,$peg,$peg1,$fam1,$proposed,%fams,$i);

    foreach $peg (keys(%$peg_to_ss_fam))
    {
	if ((! $seen{$peg}) && ($proposed = $peg_to_ss_fam->{$peg}))
	{
	    undef %fams;
	    my @joining_pegs;
	    foreach $peg1 (@$proposed)
	    {
		$seen{$peg1} = 1;
		if ($fam1 = $peg_to_fam->{$peg1})
		{
		    $fams{$fam1} = 1;
		    push(@joining_pegs, [$peg1, $fam1]);
		}
	    }

	    my @fams = sort keys(%fams);
	    if (@fams > 1)
	    {
		&merge_fams_if_possible($state,\@fams);
	    }
	}
    }
}

sub merge_fams_if_possible {
    my($state, $fams_to_merge) = @_;
    my($fig,$tmpdir,$peg_to_fam,$members_in_fam,$peg_to_ss_fam,$next_famP) = @$state;

    my @work = @$fams_to_merge;
    while (my $seed = shift @work)
    {
	my $seed_func = &func_of_fam($state, $seed);
	if (!defined($seed_func))
	{
	    print STDERR "not merging $seed: no function\n";
	    next;
	}
	    
	my(@to_merge, @not_to_merge);

	push(@to_merge, $seed);
	foreach my $fam_to_try (@work)
	{
	    my $fam_func = &func_of_fam($state, $fam_to_try);
	    if ($fam_func eq $seed_func)
	    {
		push(@to_merge,$fam_to_try);
	    }
	    else
	    {
		print STDERR "not merging $seed $fam_to_try: '$seed_func' != '$fam_func'\n";
		push(@not_to_merge,$fam_to_try);
	    }
	}

	if (@to_merge > 1)
	{
	    print STDERR "merging based on '$seed_func':\n";
	    print STDERR "\t$_\n" for @to_merge;
	    &merge_fams($state, \@to_merge);
	}
	else
	{
	    print STDERR "not merging singleton $seed func='$seed_func'\n";
	}
	@work = @not_to_merge;
    }
}

sub func_of_fam
{
    my($state, $fam) = @_;
    my($fig,$tmpdir,$peg_to_fam,$members_in_fam,$peg_to_ss_fam,$next_famP) = @$state;

    my %ffunc;
    my $set = $members_in_fam->{$fam};
    ref($set)  || confess "bad fam: $fam " . Dumper($set);

    my $peg_ss = $fig->pegs_to_usable_subsystems($set);

    foreach my $peg (keys %$peg_ss)
    {
	my $subs = $peg_ss->{$peg};
	if (ref($subs) && @$subs > 0) 
	{
	    my $func = $ffb3->function_of_filtered($peg);
	    $ffunc{$func}++;
	}
    }
    my @funcs = keys %ffunc;

    if (@funcs == 0)
    {
	foreach my $peg (@$set)
	{
	    my $func = $ffb3->function_of_filtered($peg);
	    if ($func)
	    {
		$ffunc{$func}++;
	    }
	}
	my @sorted = sort { $ffunc{$b} <=> $ffunc{$a} } keys %ffunc;
	if (@sorted == 0)
	{
	    return undef;
	}
	elsif (@sorted == 1)
	{
	    return $sorted[0];
	}
	elsif ($sorted[0] >= (2 * $sorted[1]))
	{
	    return $sorted[0];
	}
	else
	{
	    return undef;
	}
    }
    elsif (@funcs == 1)
    {
	return $funcs[0];
    }
    else
    {
	return undef;
    }
}

sub merge_fams {
    my($state, $fams_to_merge) = @_;
    my($fig,$tmpdir,$peg_to_fam,$members_in_fam,$peg_to_ss_fam,$next_famP) = @$state;

    my $fam = $fams_to_merge->[0];
    my $members = $members_in_fam->{$fam};
    for (my $i=1; ($i < @$fams_to_merge); $i++)
    {
	my $old = $members_in_fam->{$fams_to_merge->[$i]};
	foreach my $peg1 (@$old)
	{
	    push(@$members,$peg1);
	    $peg_to_fam->{$peg1} = $fam;
	}
	delete $members_in_fam->{$fams_to_merge->[$i]};
    }
}    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3