[Bio] / FigKernelScripts / add_structured_english.pl Repository:
ViewVC logotype

View of /FigKernelScripts/add_structured_english.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Mon Jun 23 20:20:09 2008 UTC (11 years, 5 months ago) by disz
Branch: MAIN
Changes since 1.2: +13 -145 lines
Moved english part to FIG.pm

use CGI;
my $cgi = new CGI;
use FIG;

my $fig = new FIG;

#open (PEGS, "rosspegs");
open (PEGS, $ARGV[0]);
my %pegs;

while (<PEGS>) {
	$pegs[$_] = 1;
}

#fig|169963.1.peg.1      NC_003210       318_1673        NP_463534.1     isu,icw(1),ff   dnaA    Chromosomal replication initiator protein dnaA  CDD:pfam00004,CDD:pfam00308,CDD:pfam01695,CDD:smart00382,GeneID:984365,InterPro:IPR001957,InterPro:IPR003593,NCBI_gi:16802049,Pfam:PF00308,SMART:Q8YAW2,SMART:SM00382,UniProtKB:Q8YAW2                  http://www.nmpdr.org/linkin.cgi?id=fig|169963.1.peg.1   MQSIED

while (defined($_ = <STDIN>))
{
    chop;
    ($peg,$contig_refseq,$contig_refseq_coords,$refseq,$ev, $gene, $fixed_func, $dbxref, $kegg, $subsystems, $link, $seq) = split(/\t/,$_);
    $funcSeed = $fig->function_of($peg,undef,1);
    my($func1,$ecs1) = &fix_func($funcSeed);
    if (! $pegs[$peg])
    #if (0) 
    #if (($func1 ne $fixed_func) || ($ecs1 ne $ecs))
    #if ($func1 ne $fixed_func)
    {
	print STDERR &Dumper($fixed_func,$func1,$ecs,$ecs1);
	print STDERR "$_\n";
    }
    else
    {
	my ($evcodes, $subs, $structured_english) = $fig->to_structured_english($fig,$peg);
	my $text = $cgi->unescape($structured_english);
	
	my $nmpdr_id_text = "nmpdr_id=$peg";
	my $contig_refseq_text= "reference_contig_refseq=$contig_refseq";
	my $refseq_coords_text = "reference_contig_refseq_coordinates=$contig_refseq_coords";
	my $refseq_id_text= "reference_protein_refseq=$refseq";
	my $evc_text= "evidence_code=$ev";
	my $gene_symbol_text = "gene_symbol=$gene";
	my $desc_text = "description=\"$func1\"";
	#my $desc_text = "description=\"$fixed_func\"";
	my $dbxref_text = "dbxref=$dbxref";
	my $kegg_text = "kegg=$kegg";
	my $subsystem_text = "subsystem=$subs";
	my $nmpdr_link_text = "nmpdr_web_page=\"$link\"";
	my $structured_eng_text= "structured_description=\`$structured_english\`";
	my $seq_text= "protein_sequence=\"$seq\"";

	if ($structured_english && $seq)
	{
	    print join("\n",($nmpdr_id_text, $contig_refseq_text, $refseq_coords_text, $refseq_id_text, $evc_text, $gene_symbol_text, $desc_text, $dbxref_text, $subsystem_text, $nmpdr_link_text, $structured_eng_text, $seq_text)), "\n//\n"; 
	    #print join("\n",($nmpdr_id_text, $contig_refseq_text, $refseq_coords_text, $refseq_id_text, $evc_text, $gene_symbol_text, $desc_text, $dbxref_text, $kegg_text, $subsystem_text, $nmpdr_link_text, $structured_eng_text, $seq_text)), "\n"; 
	    #print join("\t",($peg,$refseq,$seq,'',$fixed_func,$ecs,$structured_english,'','')),"\n";
	}
    }
}



sub fix_func {
    my($func) = @_;
    my($ecs);

    $ecs = {};
    while ($func =~ /^(.*\S)\s*\(EC ([^\)]+)\)(.*)$/)
    {
        $ecs->{$2} = 1;
        $func = $3 ? $1 . $3 : $1;
    }
    $func =~ s/\s+\@\s+/ AND /g;
    $func =~ s/;\s+/ AND\/OR /g;
    return ($func,join(";",sort keys(%$ecs)));
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3