[Bio] / FigKernelScripts / match_dna_to_aligned_prots.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/match_dna_to_aligned_prots.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : olson 1.4 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : overbeek 1.1
19 :     $usage = "match_dna_to_aligned_prots dna_filename aligned_prot_filename";
20 :    
21 :     (($dna_filename = shift @ARGV) && ($aligned_prot_filename = shift @ARGV)) ||
22 :     die $usage;
23 :    
24 :     open(DNA,"<$dna_filename") or die "failed to open dna";
25 :     open(PROT,"<$aligned_prot_filename") or die "failed to open prot";
26 :     while (defined($line = <DNA>))
27 :     {
28 :     $line =~ />(\S+)/;
29 :     $id = $1;
30 :     $seq = <DNA>;
31 :     chomp $seq;
32 :     $dnaseqs{$id} = $seq;
33 :     }
34 :    
35 :     while (defined($line = <PROT>))
36 :     {
37 :     $line =~ />(\S+)/;
38 :     $id = $1;
39 :     $seq = <PROT>;
40 :     chomp $seq;
41 :     $protseqs{$id} = $seq;
42 :     }
43 :    
44 :     # for $id (keys(%protseqs)) { print "$id $protseqs{$id}\n"; }
45 :     # for $id (keys(%protseqs)) { print "$id $protseqs{$id}\n" if ($id =~ /1048/); }
46 :     ## for $id (keys(%protseqs))
47 :     ## {
48 :     ## $protseqs{$id} =~ /(\-+)/;
49 :     ## $subseq=$1;
50 :     ## # print "$id NUM ", length($subseq), $subseq, "\n" if ($id =~ /155920.*1503/);
51 :     ## print "$id NUM=", length($subseq), "\n";
52 :     ## }
53 :    
54 :     for $id (keys(%protseqs))
55 :     {
56 :     $protseq = $protseqs{$id};
57 :     @protseq = split('',$protseq);
58 :     $newprotseq = '';
59 :     for ($i=0; $i < @protseq; $i++)
60 :     {
61 :     $newprotseq .= "$protseq[$i] ";
62 :     }
63 :     @newprotseq = split('',$newprotseq);
64 :     $protseqs{$id} = $newprotseq;
65 :     # print "$id $newprotseq\n" if ($id =~ /1048/);
66 :     $dnaseq = $dnaseqs{$id};
67 :     $dnaidx = 0;
68 :     $newdnaseq = '';
69 :     for ($i=0; $i < @protseq; $i++)
70 :     {
71 :     $c = $protseq[$i];
72 :     if ($c =~ /[A-Z]/)
73 :     {
74 :     $newdnaseq .= substr($dnaseq,$dnaidx,3);
75 :     $dnaidx += 3;
76 :     }
77 :     elsif ($c eq '-')
78 :     {
79 :     $newdnaseq .= "---";
80 :     }
81 :     else
82 :     {
83 :     print STDERR "unrecognized char $c\n";
84 :     }
85 :     }
86 :     $dnaseqs{$id} = $newdnaseq;
87 :     # print "$id $newdnaseq\n" if ($id =~ /1048/);
88 :     # sanity check
89 :     if (length($newdnaseq) != length($newprotseq))
90 :     {
91 :     print STDERR "BAD LEN $id ", length($newdnaseq), " ", length($newprotseq), "\n";
92 :     }
93 : overbeek 1.3 $dnaid = "DNA_" . $id;
94 :     $proid = "PRO_" . $id;
95 : overbeek 1.1 print ">$dnaid\n$newdnaseq\n";
96 :     print ">$proid\n$newprotseq\n";
97 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3