[Bio] / FigKernelScripts / p3-rep-prots.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/p3-rep-prots.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Tue Aug 8 19:20:16 2017 UTC revision 1.2, Fri Mar 16 01:24:56 2018 UTC
# Line 3  Line 3 
3      p3-rep-prots.pl [options] outDir      p3-rep-prots.pl [options] outDir
4    
5  This script processes a list of genome IDs to create a directory suitable for use by the L<RepresentativeGenomes> server.  This script processes a list of genome IDs to create a directory suitable for use by the L<RepresentativeGenomes> server.
6  It will extract all the instances of the specified seed protein (usually Phenylanyl synthetase alpha chain) and only  It will extract all the instances of the specified seed protein (Phenylanyl synthetase alpha chain) and only
7  keep genomes with a single instance of reasonable length. The list of genome IDs and names will go in the output file  keep genomes with a single instance of reasonable length. The list of genome IDs and names will go in the output file
8  C<complete.genomes> and a FASTA of the seed proteins in C<6.1.1.20.fasta>.  C<complete.genomes> and a FASTA of the seed proteins in C<6.1.1.20.fasta>.
9    
# Line 18  Line 18 
18    
19  =over 4  =over 4
20    
 =item protein  
   
 The description string of the desired protein role. The default is C<Phenylalanyl-tRNA synthetase alpha chain>.  
   
21  =item minlen  =item minlen
22    
23  The minimum acceptable length for the protein. The default is 209.  The minimum acceptable length for the protein. The default is 209.
# Line 43  Line 39 
39  use P3Utils;  use P3Utils;
40  use Stats;  use Stats;
41  use File::Copy::Recursive;  use File::Copy::Recursive;
42    use RoleParse;
43    
44  $| = 1;  $| = 1;
45  # Get the command-line options.  # Get the command-line options.
46  my $opt = P3Utils::script_opts('outDir', P3Utils::col_options(), P3Utils::ih_options(),  my $opt = P3Utils::script_opts('outDir', P3Utils::col_options(), P3Utils::ih_options(),
         ['protein=s', 'protein role description', { default => 'Phenylalanyl-tRNA synthetase alpha chain'}],  
47          ['minlen=i', 'minimum protein length', { default => 209 }],          ['minlen=i', 'minimum protein length', { default => 209 }],
48          ['maxlen=i', 'maximum protein length', { default => 485 }],          ['maxlen=i', 'maximum protein length', { default => 485 }],
49          ['clear', 'clear the output directory if it exists']          ['clear', 'clear the output directory if it exists']
# Line 66  Line 62 
62  # Create the statistics object.  # Create the statistics object.
63  my $stats = Stats->new();  my $stats = Stats->new();
64  # Create a filter from the protein name.  # Create a filter from the protein name.
65  my @filter = (['eq', 'product', $opt->protein]);  my @filter = (['eq', 'product', 'Phenylalanyl tRNA-synthetase alpha chain']);
66    # Save the checksum for the seed role.
67    my $roleCheck = "WCzieTC/aZ6262l19bwqgw";
68  # Create a list of the columns we want.  # Create a list of the columns we want.
69  my @cols = qw(genome_name patric_id aa_sequence);  my @cols = qw(genome_name patric_id aa_sequence product);
70  # Get the length options.  # Get the length options.
71  my $minlen = $opt->minlen;  my $minlen = $opt->minlen;
72  my $maxlen = $opt->maxlen;  my $maxlen = $opt->maxlen;
# Line 97  Line 95 
95      # Collate them by genome ID, discarding the nulls.      # Collate them by genome ID, discarding the nulls.
96      my %proteins;      my %proteins;
97      for my $prot (@$protList) {      for my $prot (@$protList) {
98          my ($genome, $name, $fid, $sequence) = @$prot;          my ($genome, $name, $fid, $sequence, $product) = @$prot;
99          if ($fid) {          if ($fid) {
100                # We have a real feature, check the function.
101                my $check = RoleParse::Checksum($product // '');
102                if ($check ne $roleCheck) {
103                    $stats->Add(funnyProt => 1);
104                } else {
105              push @{$proteins{$genome}}, [$name, $sequence];              push @{$proteins{$genome}}, [$name, $sequence];
106              $stats->Add(protFound => 1);              $stats->Add(protFound => 1);
107          }          }
108      }      }
109        }
110      # Process the genomes one at a time.      # Process the genomes one at a time.
111      for my $genome (keys %proteins) {      for my $genome (keys %proteins) {
112          my @prots = @{$proteins{$genome}};          my @prots = @{$proteins{$genome}};

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3