[Bio] / FigKernelScripts / find_SEED_SSU_rRNA_genes.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/find_SEED_SSU_rRNA_genes.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Sat Feb 21 15:37:22 2009 UTC revision 1.4, Mon May 4 20:54:03 2009 UTC
# Line 1  Line 1 
1  #! /usr/bin/perl  # -*- perl -*-
2    ########################################################################
3    # Copyright (c) 2003-2009 University of Chicago and Fellowship
4    # for Interpretations of Genomes. All Rights Reserved.
5  #  #
6  #   find_SEED_SSU_rRNA_genes [ genome_id ... ]  # This file is part of the SEED Toolkit.
7  #  #
8    # The SEED Toolkit is free software. You can redistribute
9    # it and/or modify it under the terms of the SEED Toolkit
10    # Public License.
11    #
12    # You should have received a copy of the SEED Toolkit Public License
13    # along with this program; if not write to the University of Chicago
14    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15    # Genomes at veronika@thefig.info or download a copy from
16    # http://www.theseed.org/LICENSE.TXT.
17    ########################################################################
18    
19    my $usage = <<'End_of_Usage';
20    Identify the SSU rRNA genes in one or more SEED genomes.
21    
22    Usage: find_SEED_SSU_rRNA_genes [options] [ genome_id ... ] > found
23    
24    Output:
25    
26        genome_id \t type \t location \t sequence \t tag \t proposed_assignment
27    
28        where type is always rna.
29    
30    Options:
31    
32        -a 'role'  #  Proposed assignment; empty string supresses the field
33                   #    (D = 'SSU rRNA ## 16S rRNA, small subunit ribosomal RNA')
34        -b         #  Add a blank line between genomes (D = no blank)
35        -c nt      #  Condense the sequence to first and last nt nucleotides (D = full seq)
36        -e dist    #  Maximum nucleotides at end to extrapolate a match (D = 10)
37        -s         #  Do not show sequence
38        -t 'tag'   #  A short tage to identify the nature of the feature; allows
39                   #    mixing of different types; empty string supresses the field
40                   #    (D = 'SSU_rRNA')
41    
42    End_of_Usage
43    
44  use strict;  use strict;
45  use find_homologs;  use find_homologs;
# Line 10  Line 48 
48  use FIG;  use FIG;
49  my $fig = new FIG;  my $fig = new FIG;
50    
51    my $assignment = 'SSU rRNA ## 16S rRNA, small subunit ribosomal RNA';
52    my $tag = 'SSU_rRNA';
53    
54    my $blank       =  0;
55    my $extrapolate = 10;
56    my $condense    =  0;
57    
58    while ( @ARGV && $ARGV[0] =~ s/^-// )
59    {
60        $_ = shift;
61        if ( m/^a$/ ) { $assignment  = shift; next }
62        if ( s/^c// ) { $condense    = $_ || shift; next }
63        if ( s/^e// ) { $extrapolate = $_ || shift; next }
64        if ( m/^t$/ ) { $tag         = shift; next }
65    
66        if ( s/b//  ) { $blank    = ! $blank }
67        if ( m/\S/ )
68        {
69            print STDERR "Bad flag '$_'\n", $usage;
70            exit;
71        }
72    }
73    
74  use SSU_rRNA_reps;  use SSU_rRNA_reps;
75  my @rrna = @SSU_rRNA_reps::SSU_rRNA_reps;  my @rrna = @SSU_rRNA_reps::SSU_rRNA_reps;
76    
# Line 19  Line 80 
80      @genomes = grep { $fig->is_prokaryotic($_) } $fig->genomes('complete');      @genomes = grep { $fig->is_prokaryotic($_) } $fig->genomes('complete');
81  }  }
82    
83    my $pat;
84    $pat = qr/^(.{$condense})....+(.{$condense})$/o if $condense;
85    
86  foreach my $genome ( @genomes )  foreach my $genome ( @genomes )
87  {  {
88      my $contig_file = "$FIG_Config::organisms/$genome/contigs";      my $contig_file = "$FIG_Config::organisms/$genome/contigs";
89      -f $contig_file or print STDERR "Could not find contig file $contig_file\n"      -f $contig_file or print STDERR "Could not find contig file $contig_file\n"
90                      and exit;                      and exit;
91    
92      my $options = { description => 'Small subunit ribosomal RNA',      my $options = { description => 'Small subunit ribosomal RNA',
93                      extrapolate =>  10,                      extrapolate =>  10,
94                      refseq      => \@rrna                      refseq      => \@rrna
95                    };                    };
96    
97      my $instances = find_homologs::find_nucleotide_homologs( $contig_file, $options );      my $instances = find_homologs::find_nucleotide_homologs( $contig_file, $options );
98    
99      my @instances = map  { $_->[2] }      my @instances = map  { $_->[2] }
100                      sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] }                      sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] }
101                      map  { my $loc = $_->{ location };                      map  { my $loc = $_->{ location };
102                             my $seq = $_->{ sequence };                             my $seq = $_->{ sequence };
103                             my ( $c, $b, $e ) = $loc =~ /^(.*)_(\d+)_(\d+)$/;                             my ( $c, $b, $e ) = $loc =~ /^(.*)_(\d+)_(\d+)$/;
104                             $seq =~ s/^(.{24})...+(.{24})$/$1...$2/;                             $seq =~ s/$pat/$1...$2/ if $condense;
105                             [ $c, $b+$e, [ $genome, $loc, lc $seq ] ]                             [ $c, $b+$e, [ $genome, 'rna', $loc, lc $seq ] ]
106                           }                           }
107                      @$instances;                      @$instances;
108      foreach ( @instances ) { print join( "\t", @$_ ), "\n" }  
109      print "\n" if @instances;      foreach ( @instances )
110        {
111            print join( "\t", @$_,
112                              ( $tag ? $tag : () ),
113                              ( $assignment ? $assignment : () )
114                      ), "\n"
115        }
116    
117        print "\n" if @instances && $blank;  #  Blank line between genomes
118  }  }

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.4

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3