[Bio] / FigKernelScripts / run_glimmer3.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/run_glimmer3.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Tue May 30 21:06:24 2006 UTC revision 1.2, Wed Jul 16 21:59:37 2008 UTC
# Line 1  Line 1 
1  # -*- perl -*-  # -*- perl -*-
2    
3  $ENV{PATH} = "$ENV{HOME}/Glimmer-3.01/bin:$ENV{PATH}";  $ENV{PATH} = "$ENV{HOME}/Tools/ELPH/bin/elph:$ENV{HOME}/Tools/Glimmer-3.02/bin:$ENV{PATH}";
4    
5  $0 =~ m/([\/]+)$/;  use strict;
6  $usage = "$1 Org-ID contigs [-train=training_tbl[,training_contigs]] [-skip]";  use warnings;
7    
8  $train = $training_tbl = $training_contigs = $skip = "";  use FIG;
9    my $fig = FIG->new();
10    
 $trouble = 0;  
 if (@ARGV >= 2)  
 {  
     if ($ARGV[0] && (!-f $ARGV[0])) {  
         $org = $ARGV[0];  
         print STDERR "Org-ID is $org\n";  
     } else {  
         $trouble = 1;  
         warn "Bad Org-ID $ARGV[0]";  
     }  
11    
12      if ($ARGV[1] && (-s $ARGV[1])) {  $0 =~ m/([^\/]+)$/;
13          $contigs = $ARGV[1];  my $self  = $1;
14          print STDERR "Contigs file is $contigs\n";  my $usage = "$self  [-code=genetic_code_num] [-train=training_tbl[,training_contigs]] [-skip_called]  Taxon_ID contigs";
     } else {  
         $trouble = 1;  
         warn "Missing contigs file $ARGV[1]";  
     }  
 }  
 else  
 {  
     die "usage: $usage";  
 }  
15    
16  $i = 2;  my $genetic_code     =   11;   #...Default is "standard" code
17  while ($i < @ARGV)  
18  {  my $train            = qq();   #...Flag associated with '-train' switch
19      if ($ARGV[$i] =~ m/-train=(\S+)/)  my $training_tbl     = qq();
20    my $training_contigs = qq();
21    
22    my $glimmeropts = qq(-o50 -g110 -t30);
23    
24    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
25    # NOTE: '-skip' is an obscure option introduced to handle one job
26    #   for which new contigs had been added to a set of existing contigs,
27    #   and it was required that we not re-call the existing contigs,
28    #   but instead use their calls as the 'training set' for the new contigs.
29    #   It seems quite unlikely that this option will ever be used again.
30    #-----------------------------------------------------------------------
31    my %skip;                #...Hash storing already-called contig IDs to be skipped.
32    my $skip_called = qq();  #...Flag indicating whether '-skip' switch is present.
33    
34    
35    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
36    #... Parse argument list
37    #-----------------------------------------------------------------------
38    my $trouble = 0;
39    while (@ARGV && ($ARGV[0] =~ m/^-/))
40      {      {
41          ++$i;      if ($ARGV[0] =~ m/^-{1,2}code=(\d+)/) {
42            $genetic_code = $1;
43        }
44        elsif ($ARGV[0] =~ m/^-{1,2}train=(\S+)/) {
45          $train =  $1;          $train =  $1;
46          if ($train =~ m/^([^,]+)/)          ($training_tbl, $training_contigs) = split /,/, $train;
47          {  
48              $training_tbl = $1;          if (-s $training_tbl) {
49              if (-s $training_tbl)              print STDERR "Training features will be taken from $training_tbl\n"
50              {                  if $ENV{VERBOSE};
                 print STDERR "Training set will be taken from $training_tbl\n";  
51              }              }
52              else          else {
             {  
53                  $trouble = 1;                  $trouble = 1;
54                  warn "Training tbl $training_tbl does not exist";                  warn "Training tbl $training_tbl does not exist";
55              }              }
56    
57              if ($train =~ m/,([^,]+)$/)          if ($training_contigs) {
58              {              if (-s $training_contigs) {
59                  $training_contigs = $1;                  print STDERR "Training set will be extracted from $training_contigs\n"
60                  if (-s $training_contigs)                      if $ENV{VERBOSE};
                 {  
                     print STDERR "Training set will be extracted from $training_contigs\n";  
61                  }                  }
62                  else              else {
                 {  
63                      $trouble = 1;                      $trouble = 1;
64                      warn "Training contigs $training_contigs do not exist";                      warn "Training contigs $training_contigs do not exist";
65                  }                  }
66              }              }
67          }          else {
         else  
         {  
68              $trouble = 1;              $trouble = 1;
69              warn "Training argument $train is invalid";              warn "Training argument $train is invalid";
70          }          }
71      }      }
72      elsif ($ARGV[$i] =~ m/-skip/)      elsif ($ARGV[0] =~ m/^-{1,2}skip_called/) {
73      {          $skip_called = $ARGV[0];
74          $skip = $ARGV[$i];          print STDERR "Skip option is set\n" if $ENV{VERBOSE};
         print STDERR "Skip option is set\n";  
         ++$i;  
     }  
     else  
     {  
         die "Unrecognized argument $ARGV[$i]";  
75      }      }
76        else {
77            $trouble = 1;
78            warn "Unrecognized argument $ARGV[0]";
79  }  }
 die "\n\tusage: $usage\n\n" if $trouble;  
80    
81  if (not $training_contigs)      shift @ARGV;
 {  
     $training_contigs = $contigs;  
82  }  }
83    
84  $tmp_contig  = "$FIG_Config::temp/tmp$$.contig";  my $genetic_code_switch = qq(-z $genetic_code);
 $tmp_coord   = "$FIG_Config::temp/tmp$$.coord";  
 $tmp_train   = "$FIG_Config::temp/tmp$$.train";  
 $tmp_model   = "$FIG_Config::temp/tmp$$.model";  
 $tmp_predict = "$FIG_Config::temp/tmp$$.predict";  
85    
 $orf_num = 0;  
 if ($train)  
 {  
     @tbl = `cat $training_tbl`;  
     foreach $entry (@tbl)  
     {  
         ($fid, $locus) = split /\t/, $entry;  
         $fid =~ m/\.(\d+)$/;  
         $orf_num = ($1 > $orf_num) ? $1 : $orf_num;  
86    
87          $locus =~ m/^(\S+)_(\d+)_(\d+)$/;  my $taxon_ID     = qq();
88          ($contig_id, $beg, $end) = ($1, $2, $3);  my $contigs_file = qq();
89    
90          if ($skip) { $skip{$contig_id} = 1; }  if (@ARGV == 2) {
91        ($taxon_ID, $contigs_file) = @ARGV;
92    
93          if (not defined($tbl{$contig_id})) { $tbl{$contig_id} = []; }      if ($taxon_ID && ($taxon_ID =~ m/^\d+\.\d+$/)) {
94          $x = $tbl{$contig_id};          print STDERR "Taxon-ID is $taxon_ID\n" if $ENV{VERBOSE};
95          push @$x, [$beg, $end];      } else {
96            $trouble = 1;
97            warn "Bad Taxon-ID $taxon_ID\n";
98      }      }
99    
100      $n = 0;      if (-s $contigs_file) {
101      open(CONTIGS, "<$training_contigs")          print STDERR "Contigs file is $contigs_file\n" if $ENV{VERBOSE};
102          || die "Could not read-open $training_contigs";      } else {
103      open(TRAIN,   ">$tmp_train")          $trouble = 1;
104          || die "Could not write-open $tmp_train";          warn "Missing or zero-size contigs file $contigs_file";
   
     while (($contig_id, $seqP) = &read_fasta_record(\*CONTIGS))  
     {  
         $len_of{$contig_id} = length($$seqP);  
   
         $x = $tbl{$contig_id};  
         foreach $y (@$x)  
         {  
             ($beg, $end) = @$y;  
   
             if ($beg < $end)  
             {  
                 $dna = substr($$seqP,$beg-1,($end+1-$beg));  
105              }              }
             else  
             {  
                 $dna = substr($$seqP,$end-1,($beg+1-$end));  
                 $dna = $ { &rev_comp(\$dna) };  
106              }              }
107              $dna = lc($dna);  die "\n\tusage: $usage\n\n" if $trouble;
108    
109              ++$n;  
110              $_ = "T$n";  
111              print TRAIN  $_ . (" " x (11-length($_))) . $dna . "\n";  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
112          }  # ... Initial Training Pass
113      }  #-----------------------------------------------------------------------
114      close(CONTIGS) || die "Could not close $contigs";  if (not $training_contigs) {
115  }      $training_contigs = $contigs_file;
116  else  }
117  {  
118      print STDERR "\nFinding training ORFs using default procedure\n";  my $tmp_contig  = "$FIG_Config::temp/tmp$$.contig";
119    my $tmp_coords  = "$FIG_Config::temp/tmp$$.coords";
120    my $tmp_train   = "$FIG_Config::temp/tmp$$.train";
121    my $tmp_model   = "$FIG_Config::temp/tmp$$.icm";
122    
123    my ($contig_id, $seqP);
124    
125    if (not $train) {
126    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
127        print STDERR "\nFinding training ORFs using GLIMMER default procedure\n"
128            if $ENV{VERBOSE};
129    #-----------------------------------------------------------------------
130      if (-s "$tmp_train") {      if (-s "$tmp_train") {
131          system("rm -f $tmp_train")          system("rm -f $tmp_train")
132              && die "Could not remove $tmp_train";              && die "Could not remove $tmp_train";
# Line 159  Line 135 
135      open(CONTIGS, "<$training_contigs")      open(CONTIGS, "<$training_contigs")
136          || die "could not read-open $training_contigs";          || die "could not read-open $training_contigs";
137    
138        $training_tbl = "$FIG_Config::temp/tmp$$.train.tbl";
139        open(TRAINING,  ">$training_tbl")  || die "Could not write-open $training_tbl";
140    
141        my $orf_num = 0;
142        my %len_of;     #...Hash storing training contig lens
143      while (($contig_id, $seqP) = &read_fasta_record(\*CONTIGS))      while (($contig_id, $seqP) = &read_fasta_record(\*CONTIGS))
144      {      {
145          $len_of{$contig_id} = length($$seqP);          $len_of{$contig_id} = length($$seqP);
# Line 167  Line 148 
148          &display_id_and_seq($contig_id, $seqP, \*TMP);          &display_id_and_seq($contig_id, $seqP, \*TMP);
149          close(TMP) || die "Could not close $tmp_contig";          close(TMP) || die "Could not close $tmp_contig";
150    
151          print STDERR "\nScanning contig $contig_id\n";          print STDERR "\nScanning contig $contig_id for long orfs\n" if $ENV{VERBOSE};
152          system("long-orfs -l $tmp_contig - | perl -e \'while(<>) { if (/^Putative/) { \$x=1; next; } print if \$x; }\' > $tmp_coord")  
153            system("long-orfs -l -n -t 1.15 $genetic_code_switch  $tmp_contig $tmp_coords")
154              && die "Could not extract training ORFs from contig $contig_id";              && die "Could not extract training ORFs from contig $contig_id";
155          system("extract $tmp_contig $tmp_coord >> $tmp_train")          system("extract -t $tmp_contig $tmp_coords >> $tmp_train")
156              && die "Could not extract training sequences from contig $contig_id";              && die "Could not extract training sequences from contig $contig_id";
157    
158            #... Translate GLIMMER  $tmp_coords into SEED $training_tbl
159            open(TMP_COORDS, "<$tmp_coords") || die "Could not read-open $tmp_coords";
160            print TRAINING map { ++$orf_num;
161                                 chomp $_;
162                                 my (undef, $beg, $end) = split /\s+/o, $_;
163                                 die "Bad coords in entry: $_" unless ($beg && $end);
164                                 my $fid = qq(orf) . (qq(0)x(5-length($orf_num))) . $orf_num;
165                                 my $loc = join(qq(_), ($contig_id, $beg, $end));
166                                 $_ = qq($fid\t$loc\n)
167                                 } <TMP_COORDS>;
168        }
169        close(TRAINING) || die "Could not close $training_tbl";
170        close(CONTIGS)  || die "Could not close $contigs_file";
171      }      }
     close(CONTIGS) || die "Could not close $contigs";  
172    
173     if (-s "$tmp_coord")  { unlink("$tmp_coord")  || warn "Could not remove $tmp_coord";  }  
174     if (-s "$tmp_contig") { unlink("$tmp_contig") || warn "Could not remove $tmp_contig"; }  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
175    print STDERR ("\n",
176                  "Training using:\n",
177                  "   contigs --- $training_contigs\n",
178                  "   ORFs    --- $training_tbl\n"
179                  )
180        if $ENV{VERBOSE};
181    #-----------------------------------------------------------------------
182    open(CONTIGS, "<$training_contigs")
183        || die "Could not read-open $training_contigs";
184    
185    my ($len_of, $seq_of);
186    while (($contig_id, $seqP) = &read_fasta_record(\*CONTIGS))  {
187        $len_of->{$contig_id} = length($$seqP);
188        $seq_of->{$contig_id} = $$seqP;
189  }  }
190    close(CONTIGS) || die "Could not close $contigs_file";
191    
192  if (($_ =  `grep -c "^>" $tmp_train`) && ($_ =~ m/^\s*(\d+)/))  my $entry;
193  {  my $orf_num;
194      print STDERR "\nExtracted $1 training sequences\n\n";  my $max_orf_num = 0;
195    my %training_tbl;
196    open(TBL,   "<$training_tbl") || die "Could not read-open $training_tbl";
197    open(TRAIN, ">$tmp_train")    || die "Could not write-open $tmp_train";
198    while (defined($entry = <TBL>)) {
199        chomp $entry;
200        my ($fid, $locus) = split /\t/, $entry;
201    
202        if ($fid =~ m/(\d+)$/) {
203            $orf_num     = $1;
204            $max_orf_num = ($orf_num > $max_orf_num) ? $orf_num : $max_orf_num;
205  }  }
206  else      else {
207  {          die "Could not parse FID $fid for training entry $.: $entry";
208        }
209    
210        #...If -skip_called option was selected, record which contigs are in the training set
211        if ($skip_called) { $skip{$contig_id} = 1; }
212    
213        my $training_seq = qq();
214        my @exons        = split /,/, $locus;
215        foreach my $exon (@exons) {
216            if ($exon =~ m/^(\S+)_(\d+)_(\d+)/) {
217                my ($contig, $beg, $end) = ($1, $2, $3);
218    
219                my $contig_seq = $seq_of->{$contig};
220    
221                my $dna = &get_dna_seq($contig, $beg, $end, $len_of, $seq_of);
222    
223                $training_seq .= lc($dna);
224            }
225            else {
226                die "Could not parse exon $exon for training entry $.: $entry";
227            }
228    
229            my $training_ID = qq(orf) . (qq(0) x (5-length($orf_num))) . $orf_num;
230            &display_id_and_seq($training_ID, \$training_seq, \*TRAIN);
231        }
232    }
233    close(TRAIN) || die "Could not close $tmp_train";
234    close(TBL)   || die "Could not close $training_tbl";
235    
236    if (($_ = `grep -c "^>" $tmp_train`) && ($_ =~ m/^\s*(\d+)/)) {
237        print STDERR "\nExtracted $1 training sequences\n\n" if $ENV{VERBOSE};
238    }
239    else {
240      die "\nCould not extract any training sequences";      die "\nCould not extract any training sequences";
241  }  }
242    
 print STDERR "Building interpolated context model in $tmp_model\n";  
243    
244  if (-s "$tmp_model")  
245  {  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
246    #... Build ICM (interpolated context model)
247    #-----------------------------------------------------------------------
248    print STDERR ("Building interpolated context model ---\n",
249                  "   output in $tmp_model\n\n"
250                  ) if $ENV{VERBOSE};
251    
252    if (-s "$tmp_model") {
253      system("rm -f $tmp_model") && die "Could not remove $tmp_model";      system("rm -f $tmp_model") && die "Could not remove $tmp_model";
254  }  }
255    
256  system("build-icm $tmp_model <$tmp_train")  $fig->run("build-icm -r $tmp_model < $tmp_train");
     && die "Failure at build-icm $tmp_model <$tmp_train";  
257    
258  #open(TBL,     ">tbl")      || die "Could not write-open tbl";  
259  open(CONTIGS, "<$contigs") || die "Could not read-open $contigs";  
260  while (($contig_id, $seqP) = &read_fasta_record(\*CONTIGS))  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
261  {  #... First GLIMMER pass
262      if ($skip{$contig_id})  #-----------------------------------------------------------------------
263      {  print STDERR "Running first GLIMMER pass\n" if $ENV{VERBOSE};
264          print STDERR "Skipping $contig_id\n" if $ENV{VERBOSE};  
265    my $tmp_prefix = "$FIG_Config::temp/tmp$$.pass_1";
266    $fig->run("glimmer3 $glimmeropts $contigs_file $tmp_model $tmp_prefix");
267    
268    
269    
270    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
271    #... Extract upstream regions and START codon counts
272    #-----------------------------------------------------------------------
273    my $tmp_predictions_pass_1 = "$tmp_prefix.predict";
274    print STDERR "\nExtracting upstream regions and START counts from $tmp_predictions_pass_1\n"
275        if $ENV{VERBOSE};
276    
277    open(PREDICT,  "<$tmp_predictions_pass_1")
278        || die "Could not read-open $tmp_predictions_pass_1";
279    
280    my $tmp_upstream = "$FIG_Config::temp/tmp$$.upstream";
281    open(UPSTREAM, ">$tmp_upstream")
282        || die "Could not write-open $tmp_upstream";
283    
284    my %start_counts;    #...Hash to hold START codon counts
285    
286    $contig_id = qq();
287    while(defined($entry = <PREDICT>)) {
288        chomp $entry;
289    
290        if ($entry =~ m/^>(\S+)/) {
291            $contig_id = $1;
292          next;          next;
293      }      }
294        elsif ($entry =~ m/^(\S+)\s+(\d+)\s+(\d+)/) {
295            my ($id, $beg, $end) = ($1, $2, $3);
296    
297      $len_of{$contig_id} = length($$seqP);          my $sign = ($beg < $end) ? +1 : -1;
298    
299      open( TMP, ">$tmp_contig") || die "Could not write-open $tmp_contig";          my $end_start   = $beg + $sign * 2;
300      &display_id_and_seq($contig_id, $seqP, \*TMP);          my $start_codon = &get_dna_seq($contig_id, $beg, $end_start, $len_of, $seq_of);
     close(TMP) || die "Could not close $tmp_contig";  
301    
302      print STDERR "\nFinding ORFs on contig $contig_id, len=", length($$seqP), "\n";          ++$start_counts{0};
303      system("glimmer3 -l $tmp_contig $tmp_model $FIG_Config::temp/tmp$$")          ++$start_counts{$start_codon};
304          && die "FAILED: glimmer3 -l $tmp_contig $tmp_model $FIG_Config::temp/tmp$$";  
305            my $up_id = $id;
306            $up_id =~ s/^orf/ups/o;
307            my $up_beg = $beg - $sign * 25;
308            my $up_end = $beg - $sign;
309    
310      $found = 0;          my $up_seq;
311      open(CALLS, "<$FIG_Config::temp/tmp$$.predict")          if ($up_seq = &get_dna_seq($contig_id, $up_beg, $up_end, $len_of, $seq_of)) {
312          || die "Could not read-open $FIG_Config::temp/tmp$$.predict";              &display_id_and_seq($id, \$up_seq, \*UPSTREAM);
     while (defined($call = <CALLS>))  
     {  
         next if ($call =~ m/^>/);  
   
         if ($call =~ m/^\S+\s+(\d+)\s+(\d+)/)  
         {  
             ++$found;  
             ++$orf_num;  
             print STDOUT "fig|$org.peg.$orf_num\t$contig_id\_$1\_$2\t\n";  
313          }          }
         else  
         {  
             warn "Invalid call format: $call\n";  
314          }          }
315        else {
316            die "Could not parse prediction $.: $entry";
317        }
318    }
319    
320    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
321    #... Compute pseudocount-stabilized START codon frequencies
322    #-----------------------------------------------------------------------
323    my $atg_freq    = ($start_counts{atg} + 80) / ($start_counts{0} + 100);
324    my $gtg_freq    = ($start_counts{gtg} + 15) / ($start_counts{0} + 100);
325    my $ttg_freq    = ($start_counts{ttg} +  5) / ($start_counts{0} + 100);
326    my $start_freqs = "$atg_freq,$gtg_freq,$ttg_freq";
327    
328    print STDERR "start_freqs = $start_freqs\n\n" if $ENV{VERBOSE};
329    
330    
331    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
332    #... Build Position Weight Matrix for upstream regions using ELPH
333    #-----------------------------------------------------------------------
334    print STDERR "\nBuilding PWM from upstream regions from $tmp_predictions_pass_1\n";
335    
336    my $tmp_matrix = "$FIG_Config::temp/tmp$$.pwm";
337    system("elph $tmp_upstream LEN=6 | get-motif-counts.awk > $tmp_matrix")
338        && ((-s $tmp_matrix)
339            || die("Could not construct PWM")
340            );
341    
342    
343    
344    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
345    #... Re-call PEGs using ICM and PWM
346    #-----------------------------------------------------------------------
347    print STDERR "Re-calling PEGs using trained ICM\n" if $ENV{VERBOSE};
348    
349    $tmp_prefix = "$FIG_Config::temp/tmp$$";
350    system("glimmer3 $glimmeropts -b $tmp_matrix -P $start_freqs $tmp_contig $tmp_model $tmp_prefix")
351        && die "FAILED: glimmer3 -l $tmp_contig $tmp_model $FIG_Config::temp/tmp$$";
352    
353    
354    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
355    #... Parse and write final predictions
356    #    NOTE: Does not yet handle '-skip' option
357    #-----------------------------------------------------------------------
358    my $tmp_predictions = "$tmp_prefix.predict";
359    print STDERR "\nParsing $tmp_predictions\n"
360        if $ENV{VERBOSE};
361    
362    open(PREDICT,  "<$tmp_predictions")
363        || die "Could not read-open $tmp_predictions";
364    
365    my $fid_num = 0;
366    
367    $contig_id  = qq();
368    while(defined($entry = <PREDICT>)) {
369        chomp $entry;
370    
371        if ($entry =~ m/^>(\S+)/) {
372            $contig_id = $1;
373            next;
374      }      }
375        elsif ($entry =~ m/^(\S+)\s+(\d+)\s+(\d+)/) {
376            my ($id, $beg, $end) = ($1, $2, $3);
377    
378      print STDERR "glimmer3 found $found ORFs on contig $contig_id\n";          if ($beg && $end) {
379                ++$fid_num;
380                my $fid = qq(fig|$taxon_ID.peg.) . $fid_num;
381                print STDOUT (qq($fid\t), join(qq(_), ($contig_id, $beg, $end)), qq(\n));
382            }
383            else {
384                die "Error in $tmp_predictions line $.: $entry";
385            }
386  }  }
387  close(CONTIGS) || die "Could not close $contigs";      else {
388            die "Could not parse prediction $.: $entry";
389        }
390    }
391    
392    
393    die "aborted";
394    
395    
396    if (-s "$tmp_coords") { unlink("$tmp_coords") || warn "Could not remove $tmp_coords";  }
397  if (-s "$tmp_contig") { unlink("$tmp_contig") || warn "Could not remove $tmp_contig"; }  if (-s "$tmp_contig") { unlink("$tmp_contig") || warn "Could not remove $tmp_contig"; }
398  if (-s "$tmp_train")  { unlink("$tmp_train")  || warn "Could not remove $tmp_train";  }  if (-s "$tmp_train")  { unlink("$tmp_train")  || warn "Could not remove $tmp_train";  }
399  if (-s "$tmp_model")  { unlink("$tmp_model")  || warn "Could not remove $tmp_model";  }  if (-s "$tmp_model")  { unlink("$tmp_model")  || warn "Could not remove $tmp_model";  }
# Line 248  Line 401 
401  exit 0;  exit 0;
402    
403    
404    ########################################################################
405    sub get_dna_seq {
406        my ($contig, $beg, $end, $len_of, $seq_of) = @_;
407        my $dna = qq();
408    
409        my $contig_len = $len_of->{$contig};
410        my $contig_seq = $seq_of->{$contig};
411    
412        return undef if (&max($beg, $end) > $contig_len);
413        return undef if (&min($beg, $end) < 1);
414    
415        if ($beg < $end) {
416            $dna = substr($contig_seq, ($beg-1), ($end+1-$beg));
417        }
418        else {
419            $dna = substr($contig_seq, ($end-1), ($beg+1-$end));
420            $dna = $ { &rev_comp(\$dna) };
421        }
422    
423        return lc($dna);
424    }
425    
426    
427  sub read_fasta_record  sub read_fasta_record
428  {  {

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3