[Bio] / FigKernelScripts / add_structured_english.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/add_structured_english.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : disz 1.1 use CGI;
2 :     my $cgi = new CGI;
3 :     use FIG;
4 :    
5 :     my $fig = new FIG;
6 :    
7 : disz 1.3 #open (PEGS, "rosspegs");
8 :     open (PEGS, $ARGV[0]);
9 :     my %pegs;
10 :    
11 :     while (<PEGS>) {
12 :     $pegs[$_] = 1;
13 :     }
14 : olson 1.2
15 :     #fig|169963.1.peg.1 NC_003210 318_1673 NP_463534.1 isu,icw(1),ff dnaA Chromosomal replication initiator protein dnaA CDD:pfam00004,CDD:pfam00308,CDD:pfam01695,CDD:smart00382,GeneID:984365,InterPro:IPR001957,InterPro:IPR003593,NCBI_gi:16802049,Pfam:PF00308,SMART:Q8YAW2,SMART:SM00382,UniProtKB:Q8YAW2 http://www.nmpdr.org/linkin.cgi?id=fig|169963.1.peg.1 MQSIED
16 :    
17 : disz 1.1 while (defined($_ = <STDIN>))
18 :     {
19 :     chop;
20 : disz 1.3 ($peg,$contig_refseq,$contig_refseq_coords,$refseq,$ev, $gene, $fixed_func, $dbxref, $kegg, $subsystems, $link, $seq) = split(/\t/,$_);
21 : disz 1.1 $funcSeed = $fig->function_of($peg,undef,1);
22 :     my($func1,$ecs1) = &fix_func($funcSeed);
23 : disz 1.3 if (! $pegs[$peg])
24 :     #if (0)
25 : olson 1.2 #if (($func1 ne $fixed_func) || ($ecs1 ne $ecs))
26 :     #if ($func1 ne $fixed_func)
27 : disz 1.1 {
28 :     print STDERR &Dumper($fixed_func,$func1,$ecs,$ecs1);
29 :     print STDERR "$_\n";
30 :     }
31 :     else
32 :     {
33 : disz 1.4 my ($evcodes, $subs, $structured_english) = $fig->to_structured_english($peg, 1);
34 : disz 1.1 my $text = $cgi->unescape($structured_english);
35 : olson 1.2
36 :     my $nmpdr_id_text = "nmpdr_id=$peg";
37 :     my $contig_refseq_text= "reference_contig_refseq=$contig_refseq";
38 :     my $refseq_coords_text = "reference_contig_refseq_coordinates=$contig_refseq_coords";
39 :     my $refseq_id_text= "reference_protein_refseq=$refseq";
40 :     my $evc_text= "evidence_code=$ev";
41 :     my $gene_symbol_text = "gene_symbol=$gene";
42 :     my $desc_text = "description=\"$func1\"";
43 :     #my $desc_text = "description=\"$fixed_func\"";
44 :     my $dbxref_text = "dbxref=$dbxref";
45 :     my $kegg_text = "kegg=$kegg";
46 : disz 1.3 my $subsystem_text = "subsystem=$subs";
47 : olson 1.2 my $nmpdr_link_text = "nmpdr_web_page=\"$link\"";
48 :     my $structured_eng_text= "structured_description=\`$structured_english\`";
49 :     my $seq_text= "protein_sequence=\"$seq\"";
50 :    
51 :     if ($structured_english && $seq)
52 : disz 1.1 {
53 : disz 1.3 print join("\n",($nmpdr_id_text, $contig_refseq_text, $refseq_coords_text, $refseq_id_text, $evc_text, $gene_symbol_text, $desc_text, $dbxref_text, $subsystem_text, $nmpdr_link_text, $structured_eng_text, $seq_text)), "\n//\n";
54 : olson 1.2 #print join("\n",($nmpdr_id_text, $contig_refseq_text, $refseq_coords_text, $refseq_id_text, $evc_text, $gene_symbol_text, $desc_text, $dbxref_text, $kegg_text, $subsystem_text, $nmpdr_link_text, $structured_eng_text, $seq_text)), "\n";
55 :     #print join("\t",($peg,$refseq,$seq,'',$fixed_func,$ecs,$structured_english,'','')),"\n";
56 : disz 1.1 }
57 :     }
58 :     }
59 :    
60 :    
61 :    
62 :     sub fix_func {
63 :     my($func) = @_;
64 :     my($ecs);
65 :    
66 :     $ecs = {};
67 :     while ($func =~ /^(.*\S)\s*\(EC ([^\)]+)\)(.*)$/)
68 :     {
69 :     $ecs->{$2} = 1;
70 :     $func = $3 ? $1 . $3 : $1;
71 :     }
72 :     $func =~ s/\s+\@\s+/ AND /g;
73 :     $func =~ s/;\s+/ AND\/OR /g;
74 :     return ($func,join(";",sort keys(%$ecs)));
75 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3