[Bio] / FigKernelScripts / p3x-write-subsystem-qa.pl Repository:
ViewVC logotype

View of /FigKernelScripts/p3x-write-subsystem-qa.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Fri Sep 8 21:50:24 2017 UTC (2 years, 2 months ago) by olson
Branch: MAIN
CVS Tags: HEAD
New module for patric subsystem QA

#
# Given a coreseed / patric mapping file of the form
# core genome id \t core genome name \t patric genome id \t patric genome name
#
# and a subsystem name, emit a spreadsheet containing the projected and coreseed entries.
#

use Excel::Writer::XLSX;
use MinimalSubsystem;
use strict;
use Data::Dumper;
use Getopt::Long::Descriptive;
use File::Slurp;
use JSON::XS;

my($opt, $usage) = describe_options("%c %o subsystem-dir data-dir mapfile output.xlsx",
				    ["help|h" => "Show this help message"]);

print($usage->text), exit 0 if $opt->help;
die($usage->text) if @ARGV != 4;

my $ss_dir = shift;
my $data_dir = shift;
my $map_file = shift;
my $xls_file = shift;

my @core_genomes;
my @patric_genomes;
my %core_to_patric;
my %core_gname;
my %patric_gname;

open(M, "<", $map_file) or die "Cannot read $map_file: $!";
while (<M>)
{
    chomp;
    my($core, $coren, $pat, $patn) = split(/\t/);
    push(@core_genomes, $core);
    push(@patric_genomes, $pat);
    $core_to_patric{$core} = $pat;
    $core_gname{$core} = $coren;
    $patric_gname{$pat} = $patn;
}
close(M);

my $workbook = Excel::Writer::XLSX->new($xls_file);
my $worksheet = $workbook->add_worksheet();

my $core_format = $workbook->add_format();
$core_format->set_bg_color( 'yellow' );
$core_format->set_border(1);
$core_format->set_text_wrap(1);
my $pat_format = $workbook->add_format();
$pat_format->set_border(1);
$pat_format->set_text_wrap(0);

my $mismatch_format = $workbook->add_format();
$mismatch_format->set_border(1);
$mismatch_format->set_text_wrap(0);
$mismatch_format->set_bg_color('red');

my $col = my $row = 0;

my $ss = MinimalSubsystem->new_from_dir($ss_dir);

my $ss_name = $ss->get_name();
$ss_name =~ s/_/ /g;
$worksheet->write($row, $col, $ss_name);
$row++;

my @roles = $ss->get_roles();

$worksheet->write($row, $col++, "Genome");
$worksheet->write($row, $col++, "Name");
$worksheet->write($row, $col++, "VC");
$worksheet->write($row, $col++, $ss->get_abbr_for_role($_)) foreach @roles;

$row++; $col = 0;

for my $core (@core_genomes)
{
    my $vc = $ss->get_variant_code_for_genome($core);
    my $gidx = $ss->get_genome_index($core);
    next unless defined($gidx);

    $worksheet->write_string($row, $col++, $core, $core_format);
    $worksheet->write($row, $col++, $core_gname{$core}, $core_format);
    $worksheet->write_string($row, $col++, $vc, $core_format);

    my %core_rval;

    my @pattern;
    for my $role (@roles)
    {
	my @pegs = $ss->get_pegs_from_cell($gidx, $role);
	s/^fig\|// foreach @pegs;
	s/^$core\.peg\.// foreach @pegs;

	$worksheet->write($row, $col++, join(" ", @pegs), $core_format);

	my $pstr = @pegs ? 1 : 0;
	$core_rval{$role} = $pstr;

	push(@pattern, $pstr);
#	print join("\t", $core, $gidx, $vc, $role, @pegs), "\n";
    }
    $worksheet->write_string($row, $col++, join("", @pattern), $core_format);

    $row++; $col = 0;


    my $pat = $core_to_patric{$core};

    open(J, "<", "$data_dir/$pat") or die "Cannot read $data_dir/$pat: $!";
    my $dat = decode_json(scalar read_file(\*J));
    my $ssdat = $dat->{$ss->get_name()};


    $worksheet->write_string($row, $col++, $pat, $pat_format);
    $worksheet->write($row, $col++, $patric_gname{$pat}, $pat_format);

    if ($ssdat)
    {
	my($xvc, $xroles) = @$ssdat;
	
	$worksheet->write_string($row, $col++, $xvc, $pat_format);
	
	my %xroles = map { @$_ } @$xroles;
	
	@pattern = ();
	for my $role (@roles)
	{
	    my @pegs = split(/,/, $xroles{$role});
	    s/^fig\|// foreach @pegs;
	    s/^$pat\.peg\.// foreach @pegs;
	    my $pstr = @pegs ? 1 : 0;

	    my $fmt = $pstr eq $core_rval{$role} ? $pat_format : $mismatch_format;
	    
	    $worksheet->write($row, $col++, join(" " , @pegs), $fmt);
	    push(@pattern, $pstr);
	}
	$worksheet->write_string($row, $col++, join("", @pattern), $pat_format);
    }

    $row++; $col = 0;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3