[Bio] / FigKernelScripts / p3x-split-families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/p3x-split-families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Nov 2 16:25:31 2016 UTC (3 years, 1 month ago) by olson
Branch: MAIN
CVS Tags: HEAD
P3 utility scripts.

#
# Given a families file, split into per-genus family files.
#

use strict;
use Getopt::Long::Descriptive;
use IO::Handle;

my($opt, $usage) = describe_options("%c %o fam-file output-dir max-id-file",
				    ["help|h" => "Show this help message"],
				    );
print($usage->text), exit 0 if $opt->help;
die($usage->text), if @ARGV != 3;

my $fam_file = shift;
my $output_dir = shift;
my $max_id_file = shift;

my %genus_to_fh;
my %genus_max;

-d $output_dir or die "Output directory $output_dir does not exist\n";
open(F, "<", $fam_file) or die "Cannot open families file $fam_file: $!\n";
open(MAX, ">", $max_id_file) or die "Cannot write $max_id_file: $!\n";

while (<F>)
{
    my($gid, $genus, $num) = /^GF(\d+).*?\t([^\t]+)\t(\d+)$/;
    my $fh = $genus_to_fh{$genus};
    $genus_max{GLOBAL} = $gid if $gid > $genus_max{GLOBAL};
    $genus_max{$genus} = $num if $num > $genus_max{$genus};
    if (!$fh)
    {
	$fh = new IO::Handle;
	open($fh, ">", "$output_dir/$genus") or die "Cannot write $output_dir/$genus: $!";
	$genus_to_fh{$genus} = $fh;
    }
    print $fh $_;
}

for my $genus (sort keys %genus_to_fh)
{
    my $fh = $genus_to_fh{$genus};
    close($fh) or warn "Error closing genus file for $genus: $!\n";
}

for my $genus (sort keys %genus_max)
{
    print MAX "$genus\t$genus_max{$genus}\n";
}
close(MAX);


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3