[Bio] / FigKernelScripts / parse_genbank.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/parse_genbank.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Mon Dec 1 20:46:40 2003 UTC revision 1.2, Wed Dec 3 19:39:29 2003 UTC
# Line 1  Line 1 
1  use FIG;  use FIG;
2    
3  $usage = "usage: parse_genbank Tbl Fasta Assignments Prefix idN ContigFasta pegDNA < genbank.entry";  $usage = "usage: parse_genbank Genome Dir < genbank.entry";
4  (($tbl = shift @ARGV) && open(TBL,">$tbl") &&  (
5   ($fasta = shift @ARGV) && open(FASTA,">$fasta") &&   $genome = shift @ARGV &&
6   ($assignments = shift @ARGV) && open(ASSIGNMENTS,">$assignments") &&   $dir    = shift @ARGV
  ($prefix = shift @ARGV) &&  
  ($idN = shift @ARGV) &&  
  ($contigF = shift @ARGV) && open(CONTIGS,">$contigF") &&  
  ($pegDNA = shift @ARGV) && open(PEGDNA,">$pegDNA")  
7  )  )
8      || die $usage;      || die $usage;
9    
10    $prefixP = "fig|$genome.peg.";
11    $prefixR = "fig|$genome.rna.";
12    
13    &verify_dir("$dir/Features/peg");
14    &verify_dir("$dir/Features/rna");
15    open(CONTIGS,">$dir/contigs") "" die "could not open $dir/contigs";
16    open(TBLPEG,">$dir/Features/peg/tbl") || die "could not open $dir/Features/peg/tbl";
17    open(FASTAPEG,">$dir/Features/peg/fasta") || die "could not open $dir/Features/peg/fasta";
18    open(ASSIGNMENTS,">$dir/assigned_functions") "" die "could not open $dir/assigned_functions";
19    open(TBLRNA,">$dir/Features/rna/tbl") || die "could not open $dir/Features/rna/tbl";
20    open(FASTARNA,">$dir/Features/rna/fasta") || die "could not open $dir/Features/rna/fasta";
21    
22  $/ = "\nLOCUS";  $/ = "\nLOCUS";
23  while ($contig = <STDIN>)  while ($contig = <STDIN>)
24  {  {
# Line 35  Line 43 
43          while ($contig =~ /\n\s{4,6}CDS\s+([^\n]+(\n\s{20,22}\S[^\n]*)+)/gs)          while ($contig =~ /\n\s{4,6}CDS\s+([^\n]+(\n\s{20,22}\S[^\n]*)+)/gs)
44          {          {
45              $cds = $1;              $cds = $1;
46              &process_cds($id,\$cds,$prefix,\$idN,$contigs,\*TBL,\*FASTA,\*ASSIGNMENTS,\*PEGDNA);              &process_cds($id,\$cds,$prefix,\$idN,$contigs,\*TBLPEG,\*FASTAPEG,\*ASSIGNMENTS);
47            }
48    
49            while ($contig =~ /\n\s{3,6}[tr]RNA\s+([^\n]+(\n\s{20,22}\S[^\n]*)+)/gs)
50            {
51                $rna = $1;
52                &process_rna($id,\$rna,$prefixR,\$idNr,$contigs,\*TBLRNA,\*FASTARNA);
53          }          }
54      }      }
55  }  }
56    close(CONTIGS);
57    close(TBLPEG);
58    close(FASTPEG);
59    close(ASSIGNMENTS);
60    close(TBLRNA);
61    close(FASTARNA);
62    
63    if (! -s "$dir/contigs")            { unlink("$dir/contigs"); print STDERR "no contigs in $dir\n"; }
64    if (! -s "$dir/assigned_functions") { unlink("$dir/assigned_functions"); print STDERR "no assigned_functions in $dir }
65    if (! -s "$dir/Features/peg/tbl")   { system "rm -rf $dir/Features/peg"; print STDERR "no PEGs in $dir }
66    if (! -s "$dir/Features/rna/tbl")   { system "rm -rf $dir/Features/rna"; print STDERR "no RNAs in $dir }
67    
68    
69  sub process_cds {  sub process_cds {
70      my($contigID,$cdsP,$prefix,$idNP,$contigs,$fh_tbl,$fh_fasta,$fh_ass,$fh_dna) = @_;      my($contigID,$cdsP,$prefix,$idNP,$contigs,$fh_tbl,$fh_fasta,$fh_ass,$fh_dna) = @_;
# Line 255  Line 281 
281      return $func;      return $func;
282  }  }
283    
284    sub process_rna {
285        my($contigID,$cdsP,$prefix,$idNP,$contigs,$fh_tbl,$fh_dna) = @_;
286        my($loc,@aliases,$func,$trans,$id,$precise);
287    
288        ($loc,$precise)  = &get_loc($contigID,$cdsP);
289        $func    = &get_rna_func($cdsP);
290    
291        if ($loc)
292        {
293            my $dna  = &FIG::extract_seq($contigs,$loc);
294            $id = $prefix . "$$idNP";
295            $$idNP++;
296            if (! $func )
297            {
298                warn "could not get func $$cdsP\n";
299            }
300            else
301            {
302                print $fh_tbl "$id\t$loc\t$func\n";
303                &FIG::display_id_and_seq($id,\$dna,$fh_dna);
304            }
305        }
306        else
307        {
308            print &Dumper($cdsP); die "aborted";
309        }
310    }
311    
312    sub get_rna_func {
313        my($cdsP) = @_;
314        my $func = "";
315    
316        if ($$cdsP =~ /\/product=\"([^"]*)\"/s)
317        {
318            $func = $1;
319            $func =~ s/\s+/ /gs;
320        }
321        elsif ($$cdsP =~ /\/gene=\"([^"]*)\"/s)
322        {
323            $func = $1;
324            $func =~ s/\s+/ /gs;
325        }
326        elsif ($$cdsP =~ /\/note=\"([^"]*)\"/s)
327        {
328            $func = $1;
329            $func =~ s/\s+/ /gs;
330        }
331        return $func;
332    }
333    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3