[Bio] / FigKernelScripts / add_structured_english.pl Repository:
ViewVC logotype

View of /FigKernelScripts/add_structured_english.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Tue Jun 24 15:15:48 2008 UTC (11 years, 4 months ago) by disz
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.3: +1 -1 lines
Added escape arg to structured engkish

use CGI;
my $cgi = new CGI;
use FIG;

my $fig = new FIG;

#open (PEGS, "rosspegs");
open (PEGS, $ARGV[0]);
my %pegs;

while (<PEGS>) {
	$pegs[$_] = 1;
}

#fig|169963.1.peg.1      NC_003210       318_1673        NP_463534.1     isu,icw(1),ff   dnaA    Chromosomal replication initiator protein dnaA  CDD:pfam00004,CDD:pfam00308,CDD:pfam01695,CDD:smart00382,GeneID:984365,InterPro:IPR001957,InterPro:IPR003593,NCBI_gi:16802049,Pfam:PF00308,SMART:Q8YAW2,SMART:SM00382,UniProtKB:Q8YAW2                  http://www.nmpdr.org/linkin.cgi?id=fig|169963.1.peg.1   MQSIED

while (defined($_ = <STDIN>))
{
    chop;
    ($peg,$contig_refseq,$contig_refseq_coords,$refseq,$ev, $gene, $fixed_func, $dbxref, $kegg, $subsystems, $link, $seq) = split(/\t/,$_);
    $funcSeed = $fig->function_of($peg,undef,1);
    my($func1,$ecs1) = &fix_func($funcSeed);
    if (! $pegs[$peg])
    #if (0) 
    #if (($func1 ne $fixed_func) || ($ecs1 ne $ecs))
    #if ($func1 ne $fixed_func)
    {
	print STDERR &Dumper($fixed_func,$func1,$ecs,$ecs1);
	print STDERR "$_\n";
    }
    else
    {
	my ($evcodes, $subs, $structured_english) = $fig->to_structured_english($peg, 1);
	my $text = $cgi->unescape($structured_english);
	
	my $nmpdr_id_text = "nmpdr_id=$peg";
	my $contig_refseq_text= "reference_contig_refseq=$contig_refseq";
	my $refseq_coords_text = "reference_contig_refseq_coordinates=$contig_refseq_coords";
	my $refseq_id_text= "reference_protein_refseq=$refseq";
	my $evc_text= "evidence_code=$ev";
	my $gene_symbol_text = "gene_symbol=$gene";
	my $desc_text = "description=\"$func1\"";
	#my $desc_text = "description=\"$fixed_func\"";
	my $dbxref_text = "dbxref=$dbxref";
	my $kegg_text = "kegg=$kegg";
	my $subsystem_text = "subsystem=$subs";
	my $nmpdr_link_text = "nmpdr_web_page=\"$link\"";
	my $structured_eng_text= "structured_description=\`$structured_english\`";
	my $seq_text= "protein_sequence=\"$seq\"";

	if ($structured_english && $seq)
	{
	    print join("\n",($nmpdr_id_text, $contig_refseq_text, $refseq_coords_text, $refseq_id_text, $evc_text, $gene_symbol_text, $desc_text, $dbxref_text, $subsystem_text, $nmpdr_link_text, $structured_eng_text, $seq_text)), "\n//\n"; 
	    #print join("\n",($nmpdr_id_text, $contig_refseq_text, $refseq_coords_text, $refseq_id_text, $evc_text, $gene_symbol_text, $desc_text, $dbxref_text, $kegg_text, $subsystem_text, $nmpdr_link_text, $structured_eng_text, $seq_text)), "\n"; 
	    #print join("\t",($peg,$refseq,$seq,'',$fixed_func,$ecs,$structured_english,'','')),"\n";
	}
    }
}



sub fix_func {
    my($func) = @_;
    my($ecs);

    $ecs = {};
    while ($func =~ /^(.*\S)\s*\(EC ([^\)]+)\)(.*)$/)
    {
        $ecs->{$2} = 1;
        $func = $3 ? $1 . $3 : $1;
    }
    $func =~ s/\s+\@\s+/ AND /g;
    $func =~ s/;\s+/ AND\/OR /g;
    return ($func,join(";",sort keys(%$ecs)));
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3