Parent Directory
|
Revision Log
Revision 1.6 - (view) (download) (as text)
1 : | overbeek | 1.1 | |
2 : | use FIG; | ||
3 : | my $fig = new FIG; | ||
4 : | |||
5 : | $usage = "usage: build_initial_objects_for_start_predictions < pegs_in_cluster > initial_objects"; | ||
6 : | |||
7 : | @pegs = <STDIN>; | ||
8 : | chomp @pegs; | ||
9 : | # print "NPEGS=", scalar(@pegs), "\n"; | ||
10 : | overbeek | 1.5 | |
11 : | foreach $peg (@pegs) | ||
12 : | overbeek | 1.1 | { |
13 : | overbeek | 1.5 | if ($peg =~ /^fig\|\d+\.\d+$/) |
14 : | overbeek | 1.1 | { |
15 : | overbeek | 1.5 | $genome = $fig->genome_of($peg); |
16 : | $loc = $fig->feature_location($peg); | ||
17 : | # print "$len{$peg}\t$peg\t$loc\n"; | ||
18 : | if ($loc =~ /,/) | ||
19 : | { | ||
20 : | print STDERR "skipping $peg - do not handle complex locs\n"; | ||
21 : | next; | ||
22 : | } | ||
23 : | overbeek | 1.1 | } |
24 : | overbeek | 1.5 | elsif ($peg =~ /^(new\|(\d+\.\d+)\.peg\.\d+)\t(\S+)/) |
25 : | { | ||
26 : | $peg = $1; | ||
27 : | $genome = $2; | ||
28 : | $loc = $3; | ||
29 : | overbeek | 1.1 | } |
30 : | |||
31 : | overbeek | 1.5 | ($genome && $loc) || confess $peg; |
32 : | |||
33 : | overbeek | 1.4 | my $lnC = $fig->contig_ln($genome,$contig); |
34 : | overbeek | 1.1 | ($contig,$begin,$end) = $fig->boundaries_of($loc); |
35 : | if ($begin < $end) | ||
36 : | { | ||
37 : | $l = sprintf("%s_%d_%d",$contig,1,$begin-1); | ||
38 : | overbeek | 1.4 | if ($end < ($lnC-3)) |
39 : | { | ||
40 : | $end += 3; | ||
41 : | } | ||
42 : | overbeek | 1.1 | } |
43 : | else | ||
44 : | { | ||
45 : | overbeek | 1.6 | $l = sprintf("%s_%d_%d",$contig,$fig->contig_ln($genome,$contig),$begin+1); |
46 : | overbeek | 1.4 | if ($end > 3) |
47 : | { | ||
48 : | $end -= 3; | ||
49 : | } | ||
50 : | overbeek | 1.1 | } |
51 : | |||
52 : | $loc = join("_",($contig,$begin,$end)); | ||
53 : | $seq = uc $fig->dna_seq($genome,$loc); | ||
54 : | overbeek | 1.5 | |
55 : | overbeek | 1.3 | if (! $seq) |
56 : | { | ||
57 : | print STDERR &Dumper($peg,$loc,$seq); die "could not get sequence for $peg"; | ||
58 : | } | ||
59 : | |||
60 : | overbeek | 1.1 | # The following hack handles uncertainty about whether SEED gives back the STOP or not |
61 : | if (substr($seq,-6,3) =~ /TAA|TAG|TGA/i) | ||
62 : | { | ||
63 : | overbeek | 1.2 | $end = ($begin < $end) ? $end-3 : $end+3; |
64 : | overbeek | 1.1 | $seq = substr($seq,0,length($seq) - 3); |
65 : | } | ||
66 : | elsif (substr($seq,-3,3) !~ /TAA|TAG|TGA/i) | ||
67 : | { | ||
68 : | overbeek | 1.4 | warn "BAD STOP: $peg $genome $loc $seq\n"; |
69 : | overbeek | 1.1 | } |
70 : | |||
71 : | overbeek | 1.2 | $loc = join("_",($contig,$begin,$end)); |
72 : | overbeek | 1.1 | |
73 : | |||
74 : | # BACK INTO THE STOP FROM START OF PEG | ||
75 : | $pre_peg = uc $fig->dna_seq($genome,$l); | ||
76 : | $found = 0; | ||
77 : | $len_pre_peg = length($pre_peg); | ||
78 : | |||
79 : | for ($i=$len_pre_peg-3; $i > 0 && ! $found; $i-=3) | ||
80 : | { | ||
81 : | $stop = substr($pre_peg,$i,3); | ||
82 : | if ($stop eq "TAG" || $stop eq "TAA" || $stop eq "TGA") | ||
83 : | { | ||
84 : | # print "FOUND $stop for $l\n"; | ||
85 : | $orf = substr($pre_peg,$i+3) . $seq; | ||
86 : | if (($i-27) > 0) | ||
87 : | { | ||
88 : | $pre_orf = substr($pre_peg,$i-27,30); | ||
89 : | overbeek | 1.5 | my $gs = $fig->genus_species($genome); |
90 : | my @aliases = (); | ||
91 : | if ($peg =~ /^fig/) | ||
92 : | { | ||
93 : | @aliases = grep { $_ =~ /^([NXYZA]P_[0-9\.]+)|(uni\|)|(sp\|)/ } $fig->feature_aliases($peg); | ||
94 : | } | ||
95 : | |||
96 : | overbeek | 1.1 | print "ID=$peg\n"; |
97 : | print "GENOME=$gs\n"; | ||
98 : | if (@aliases > 0) | ||
99 : | { | ||
100 : | print "ALIASES=",join(",",@aliases),"\n"; | ||
101 : | } | ||
102 : | print "CONTIG=$contig\n"; | ||
103 : | overbeek | 1.5 | |
104 : | overbeek | 1.1 | if ($begin < $end) |
105 : | { | ||
106 : | print "BEGIN=", $begin-($len_pre_peg-$i-3), "\n"; | ||
107 : | } | ||
108 : | else | ||
109 : | { | ||
110 : | print "BEGIN=", $begin+($len_pre_peg-$i-3), "\n"; | ||
111 : | } | ||
112 : | print "END=$end\n"; | ||
113 : | print "SEQ=$orf\n"; | ||
114 : | print "PREFIX=$pre_orf\n"; | ||
115 : | print "OLD_START_POS=", $len_pre_peg-$i-2, "\n"; | ||
116 : | print "///\n"; | ||
117 : | $found = 1; | ||
118 : | } | ||
119 : | else | ||
120 : | { | ||
121 : | print STDERR "skipping $peg - not enough prefix before orf\n"; | ||
122 : | } | ||
123 : | last; | ||
124 : | } | ||
125 : | } | ||
126 : | if ( ! $found) | ||
127 : | { | ||
128 : | print STDERR "DID NOT FIND STOP BEFORE $peg $contig $begin $end $l\n"; | ||
129 : | } | ||
130 : | } |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |