[Bio] / FigKernelScripts / build_initial_objects_for_start_predictions.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/build_initial_objects_for_start_predictions.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.10, Mon Dec 5 18:56:37 2005 UTC revision 1.12, Sun Jan 20 21:44:52 2008 UTC
# Line 1  Line 1 
1  # -*- perl -*-  # -*- perl -*-
2  #  ########################################################################
3  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
4  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
5  #  #
# Line 14  Line 14 
14  # at info@ci.uchicago.edu or the Fellowship for Interpretation of  # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15  # Genomes at veronika@thefig.info or download a copy from  # Genomes at veronika@thefig.info or download a copy from
16  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
17  #  ########################################################################
   
18    
19  use FIG;  use FIG;
20  my $fig = new FIG;  my $fig = new FIG;
21    use FIGV;
22    
23  $usage = "usage: build_initial_objects_for_start_predictions [tbl] < pegs_to_call > initial_objects";  $0 =~ m/([^\/]+)$/;  $self = $1;
24    warn "$0 " . join(" ", @ARGV) . "\n";
25    $usage = "build_initial_objects_for_start_predictions [-orgdir=OrgDir] [-genetic_code=CodeNum] [tbl] < pegs_to_call > initial_objects";
26    
27  use constant FID    =>  0;  use constant FID    =>  0;
28  use constant LOCUS  =>  1;  use constant LOCUS  =>  1;
# Line 32  Line 34 
34  use constant LEN    =>  7;  use constant LEN    =>  7;
35  use constant STRAND =>  8;  use constant STRAND =>  8;
36    
   
37  $tbl_file = "";  $tbl_file = "";
38  $trouble  = 0;  $trouble  = 0;
39    $genetic_code_number = 11;
40  while (@ARGV)  while (@ARGV)
41  {  {
42      if ($ARGV[0] =~ m/-h(elp)?/)      if ($ARGV[0] =~ m/-h(elp)?/) {
     {  
43          die "\n\tusage: $usage\n\n";          die "\n\tusage: $usage\n\n";
44      }      }
45      elsif (-s $ARGV[0])      elsif ($ARGV[0] =~ /-orgdir=(\S+)/) {
46      {          warn "Using $ARGV[0]\n";
47            $fig = new FIGV($1);
48        }
49        elsif ($ARGV[0] =~ /-genetic_code=(\d+)/) {
50            warn "$self is using $ARGV[0]\n";
51            $genetic_code_number = $1;
52        }
53        elsif (-s $ARGV[0]) {
54          $tbl_file = $ARGV[0];          $tbl_file = $ARGV[0];
55      }      }
56      else      else {
     {  
57          warn "Invalid argument $ARGV[0]\n";          warn "Invalid argument $ARGV[0]\n";
58      }      }
59      shift @ARGV;      shift @ARGV;
60  }  }
61  die "\nusage: $usage\n\n" if $trouble;  die "\nusage: $usage\n\n" if $trouble;
62    
63    $code_map = &FIG::genetic_code($genetic_code_number);
64    
65  @pegs = <STDIN>;  @pegs = <STDIN>;
66  chomp @pegs;  chomp @pegs;
67  # print "NPEGS=", scalar(@pegs), "\n";  # print "NPEGS=", scalar(@pegs), "\n";
# Line 126  Line 135 
135    
136  foreach $peg (@pegs)  foreach $peg (@pegs)
137  {  {
138        my $tmp = $peg;
139      if ($peg =~ /^fig\|\d+\.\d+\.peg\.\d+$/)      if ($peg =~ /^fig\|\d+\.\d+\.peg\.\d+$/)
140      {      {
141          $genome = $fig->genome_of($peg);          $genome = $fig->genome_of($peg);
# Line 142  Line 152 
152          $peg = $1;          $peg = $1;
153          $genome = $2;          $genome = $2;
154          $loc = $3;          $loc = $3;
155            ($peg && $genome && $loc) || confess "could not parse \'$tmp\'";
156        }
157        else {
158            confess "Could not parse \'$peg\'";
159      }      }
   
     ($genome && $loc) || confess $peg;  
160    
161      my $lnC = $fig->contig_ln($genome,$contig);      my $lnC = $fig->contig_ln($genome,$contig);
162      ($contig,$begin,$end) = $fig->boundaries_of($loc);      ($contig,$begin,$end) = $fig->boundaries_of($loc);
# Line 170  Line 182 
182    
183      if (! $seq)      if (! $seq)
184      {      {
185          print STDERR &Dumper($peg,$loc,$seq); die "could not get sequence for $peg";          confess "could not get sequence for fid=$peg, loc=$loc;\n   seq=\'$seq\'";
186      }      }
187    
188  #   The following hack handles uncertainty about whether SEED gives back the STOP or not  #   The following hack handles uncertainty about whether SEED gives back the STOP or not
189      if (substr($seq,-6,3) =~ /TAA|TAG|TGA/i)  
190        if ($fig->translate(substr($seq,-6,3), $code_map) eq qq(*))
191      {      {
192          $end = ($begin < $end) ? $end-3 : $end+3;          $end = ($begin < $end) ? $end-3 : $end+3;
193          $seq = substr($seq,0,length($seq) - 3);          $seq = substr($seq,0,length($seq) - 3);
194      }      }
195      elsif (substr($seq,-3,3) !~ /TAA|TAG|TGA/i)      elsif (($fig->translate(substr($seq,-3,3), $code_map) ne qq(*)) && (not $fig->possibly_truncated($genome, $loc)))
196      {      {
197          warn "BAD STOP: $peg $genome $loc $seq\n";          warn "BAD STOP: $peg $genome $loc $seq\n";
198      }      }
199    
     $loc = join("_",($contig,$begin,$end));  
   
200    
201      # BACK INTO THE STOP FROM START OF PEG      # BACK INTO THE STOP FROM START OF PEG
202      $pre_peg = uc $fig->dna_seq($genome,$l);      $pre_peg = uc $fig->dna_seq($genome,$l);
# Line 195  Line 206 
206      for ($i=$len_pre_peg-3; $i > 0 && ! $found; $i-=3)      for ($i=$len_pre_peg-3; $i > 0 && ! $found; $i-=3)
207      {      {
208          $stop =  substr($pre_peg,$i,3);          $stop =  substr($pre_peg,$i,3);
209          if ($stop eq "TAG"  ||  $stop eq "TAA"  ||  $stop eq "TGA")          if ($fig->translate($stop,  $code_map) eq qq(*))
210          {          {
211              # print "FOUND $stop for $l\n";              # print "FOUND $stop for $l\n";
212              $orf = substr($pre_peg,$i+3) . $seq;              $orf = substr($pre_peg,$i+3) . $seq;

Legend:
Removed from v.1.10  
changed lines
  Added in v.1.12

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3