[Bio] / FigKernelScripts / p3-gto-fasta.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/p3-gto-fasta.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 =head1 Convert Genome Typed Objects to FASTA
2 :    
3 :     p3-gto-fasta.pl [options] gtoFile
4 :    
5 :     This script produces FASTA files from a L<GenomeTypeObject> instance. The GTO must be
6 :     provided as a file in JSON format.
7 :    
8 :     =head2 Parameters
9 :    
10 :     The positional parameter is the name of the GTO file. If none is specified, the GTO file is read from the standard input.
11 :    
12 :     The command-line options are the following. All three are mutually exclusive.
13 :    
14 :     =over 4
15 :    
16 :     =item protein
17 :    
18 :     If specified, the output will be a protein FASTA file.
19 :    
20 :     =item feature
21 :    
22 :     If specified, the output will be a feature DNA FASTA file.
23 :    
24 :     =item contig
25 :    
26 :     If specified, the output will be a contig DNA FASTA file. this is the default.
27 :    
28 :     =back
29 :    
30 :     =cut
31 :    
32 :     use strict;
33 :     use P3Utils;
34 :     use GenomeTypeObject;
35 :     use SeedUtils;
36 :     use Contigs;
37 :    
38 :     # Get the command-line options.
39 :     my $opt = P3Utils::script_opts('gtoFile',
40 :     ['mode' => hidden => { one_of => [['protein', 'feature protein FASTA'],
41 :     ['feature', 'feature DNA FASTA'],
42 :     ['contig', 'contig DNA FASTA']],
43 :     default => 'contig' }],
44 :     );
45 :     # Get the GTO file.
46 :     my ($gtoFile) = @ARGV;
47 :     if (! $gtoFile) {
48 :     $gtoFile = \*STDIN;
49 :     } elsif (! -s $gtoFile) {
50 :     die "GTO file $gtoFile not found or empty.";
51 :     }
52 :     # Read the GTO.
53 :     my $gto = GenomeTypeObject->create_from_file($gtoFile);
54 :     # Determine the output format.
55 :     my $mode = $opt->mode;
56 :     if ($mode eq 'contig') {
57 :     # In contig mode, we want a list of [contig-id, sequence].
58 :     my $contigs = $gto->{contigs};
59 :     for my $contig (@$contigs) {
60 :     fasta_print($contig->{id}, '', $contig->{dna});
61 :     }
62 :     } elsif ($mode eq 'protein') {
63 :     # Here we are getting all protein features for a genome.
64 :     my $features = $gto->{features};
65 :     for my $feature (@$features) {
66 :     my $prot = $feature->{protein_translation};
67 :     if ($prot) {
68 :     fasta_print($feature->{id}, $feature->{function}, $prot);
69 :     }
70 :     }
71 :     } else {
72 :     # Here we are getting all DNA features for a genome. First we need a contigs object.
73 :     my @contigList = map { [$_->{id}, '', $_->{dna} ] } @{$gto->{contigs}};
74 :     my $contigs = Contigs->new(\@contigList);
75 :     my $features = $gto->{features};
76 :     for my $feature (@$features) {
77 :     my $loc = $feature->{location};
78 :     my $dna = $contigs->dna(@$loc);
79 :     fasta_print($feature->{id}, $feature->{function}, $dna);
80 :     }
81 :     }
82 :    
83 :     sub fasta_print {
84 :     my ($id, $comment, $seq) = @_;
85 :     my @chunks = ($seq =~ /(.{1,60})/g);
86 :     print ">$id $comment\n";
87 :     print join("\n", @chunks, "");
88 :     }
89 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3