[Bio] / FigKernelScripts / build_excel_gene_table.pl Repository:
ViewVC logotype

View of /FigKernelScripts/build_excel_gene_table.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Mon Dec 5 18:56:37 2005 UTC (13 years, 11 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, caBIG-05Apr06-00, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, caBIG-13Feb06-00, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.2: +17 -0 lines
Add license words.

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

$usage = "usage: build_excel_gene_table OrgDir > tab.delimited";

(($dir = shift @ARGV) && (-d $dir) && ($dir =~ /\d+\.\d+$/))
    || die $usage;

@tbls = sort { if ($a->[1] =~ /^(\S+?)(\d+)$/) { $ka1 = $1; $ka2 = $2 } else {$ka1 = $a->[1]; $ka2 = "" };
	       if ($b->[1] =~ /^(\S+?)(\d+)$/) { $kb1 = $1; $kb2 = $2 } else {$kb1 = $b->[1]; $kb2 = "" };
	       ($ka1 cmp $kb1) or ($ka2 <=> $kb2) or ($a->[2] <=> $b->[2]) }
        map { $_ =~ /^(fig\|\S+)\t(\S+)_(\d+)_(\d+)\s+(\S.*\S)?/; $1 ? [$1,$2,$3,$4,$5] : () }
        `cat $dir/Features/*/tbl`;

foreach $_ (`cat $dir/assigned_functions`)
{
    chop;
    ($id,$func,$conf) = split(/\t/,$_);
    $conf = $conf ? $conf : "";
    $func_of{$id} = "$func\t$conf";
}

$/ = "\n>";
open(TRANS,"<$dir/Features/peg/fasta")
    || die "could not open $dir/Features/peg/fasta";
while (defined($_ = <TRANS>))
{
    chomp;
    if ($_ =~ /^>?(\S+)([^\n]*\n)(.*)/s)
    {
	$id  =  $1;
	$comment = $2;
	$seq =  $3;
	$seq =~ s/\s//g;
	$tag = (length($seq) > 20) ? substr($seq,-20) : $seq;
	$tag_of{$id} = $tag;
    }
}
close(TRANS);

if (open(DNA,"<$dir/Features/rna/fasta"))
{
    while (defined($_ = <DNA>))
    {
	chomp;
	if ($_ =~ /^>?(\S+)([^\n]*\n)(.*)/s)
	{
	    $id  =  $1;
	    $comment = $2;
	    $seq =  $3;
	    $seq =~ s/\s//g;
	    $tag = (length($seq) > 20) ? substr($seq,-20) : $seq;
	    $tag_of{$id} = $tag;
	}
    }
    close(DNA);
}

$/ = "\n";

foreach $x (@tbls)
{
    ($id,$contig,$beg,$end,$extra) = @$x;
    $id =~ /^fig\|\d+\.\d+\.([^.]+)\.(\d+)$/;
    $type = $1;
    $n    = $2;
    
    if ($type eq "peg")
    {
	$func   = $func_of{$id};
	$func   = $func ? $func : "\t";
    }
    else
    {
	$extra = $extra ? $extra : "";
	$func  = "$extra\t";
    }
    $func = $func ? $func : "";
    $tag = $tag_of{$id};
    $tag = $tag ? $tag : "";
    print join("\t",($type,$n,$contig,$beg,$end,$func,$tag)),"\n";
}
    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3