[Bio] / FigKernelScripts / collect_related_sequences.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/collect_related_sequences.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Sat Apr 28 22:35:02 2007 UTC revision 1.3, Fri May 11 18:51:28 2007 UTC
# Line 1  Line 1 
1  ########################################################################  ########################################################################
2  # -*- perl -*-  # -*- perl -*-
3  #  #
4  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2007 University of Chicago and Fellowship
5  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
6  #  #
7  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 24  Line 24 
24  use gjoseqlib;  use gjoseqlib;
25  use collect_related_sequences;  use collect_related_sequences;
26    
27  my $is_SEED = 0;  my $min_coverage = 0.80;
28  eval { require FIG_Config; $is_SEED = 1 };  my $tmp_dir;
29    my $max_e_value  = 0.001;
30    my $fids;
31    my $min_identity = 0.25;
32    my $merge        = 1;
33    my $nr           = 0;
34    my $tmp;
35    my $extra_ends   = 10;
36    
37  my $usage =<<"End_of_Usage";  my $usage =<<"End_of_Usage";
38    
# Line 33  Line 40 
40    
41  Options:  Options:
42    
43      -c  min_coverage  # D=0.80      -c  min_coverage  # fraction of exemplar coverged (D=$min_coverage)
44      -d  tmp_dir       # name of temporary directory      -d  tmp_dir       # name of temporary directory
45      -e  max_e_value   # D=0.001      -e  max_e_value   # required match significance (D=$max_e_value)
46      -f  'fid ...'     # use sequence(s) from SEED (instead of seqfile)      -f  'fid ...'     # use exemplar(s) from SEED (instead of seqfile)
47      -i  min_identity  # D=0.25      -i  min_identity  # required similarity to exemplar (D=$min_identity)
48      -m                # do NOT merge queries with found sequences      -m                # do NOT merge queries with found sequences
49      -n                # use SEED nr database (instead of dbfile)      -n                # use SEED nr database (instead of dbfile)
50      -t  tmp           # place for temporary directory      -t  tmp           # place for temporary directory
51      -x  extra_ends    # extra length at ends      -x  extra_ends    # extra length at ends (D=$extra_ends residues)
52    
53  End_of_Usage  End_of_Usage
54    
 my $min_coverage = 0.80;  
 my $tmp_dir;  
 my $max_e_value  = 0.001;  
 my $fids;  
 my $min_identity = 0.25;  
 my $merge        = 1;  
 my $nr           = 0;  
 my $tmp;  
 my $extra_ends   = 10;  
   
55  while ( $ARGV[0] =~ /^-/ )  while ( $ARGV[0] =~ /^-/ )
56  {  {
57      $_ = shift;      $_ = shift;
58      if    ( s/^-c// ) { $min_coverage = $_ || shift }      if    ( s/^-c// ) { $min_coverage = $_ || shift }
59      elsif ( s/^-d// ) { $tmp_dir      = $_ || shift }      elsif ( s/^-d// ) { $tmp_dir      = $_ || shift }
60      elsif ( s/^-e// ) { $max_e_value  = $_ || shift }      elsif ( s/^-e// ) { $max_e_value  = $_ || shift }
61        elsif ( s/^-f// ) { $fids         = $_ || shift }
62      elsif ( s/^-i// ) { $min_identity = $_ || shift }      elsif ( s/^-i// ) { $min_identity = $_ || shift }
63      elsif ( s/^-m// ) { $merge        = 0 }      elsif ( s/^-m// ) { $merge        = 0 }
64        elsif ( s/^-n// ) { $nr           = 1           }
65      elsif ( s/^-t// ) { $tmp          = $_ || shift }      elsif ( s/^-t// ) { $tmp          = $_ || shift }
66      elsif ( s/^-x// ) { $extra_ends   = $_ || shift }      elsif ( s/^-x// ) { $extra_ends   = $_ || shift }
     elsif ( $is_SEED && s/^-f// ) { $fids = $_ || shift }  
     elsif ( $is_SEED && s/^-n// ) { $nr   = 1           }  
     elsif ( m/^-[fn]/ )  
     {  
         usage( "-f and -n are only valid in the SEED environment.\n" );  
     }  
67      else      else
68      {      {
69          usage( "Bad command flag '$_'\n" );          usage( "Bad command flag '$_'\n" );
70      }      }
71  }  }
72    
73  my ( $dbfile, $seqfile ) = @ARGV;  my $dbfile = shift @ARGV if ! $nr;
74    $nr || -f $dbfile || usage( "Cannot locate database file '$dbfile'." );
 my $dbfile = $nr ? "$FIG_Config::global/nr"  
                  : shift @ARGV;  
 -f $dbfile || usage( "Cannot locate database file '$dbfile'." );  
   
 my $fig;  
 if ( $nr || $fids )  
 {  
     require FIG;  
     $fig = new FIG;  
 }  
75    
76  my @seq;  my @seq;
77    my @fids = ();
78    
79  if ( $fids )  if ( $fids )
80  {  {
81      foreach my $fid ( split /[,\s]+/, $fids )      @fids = ref( $fids ) eq 'ARRAY' ? @$fids
82      {                                      : split /[,\s]+/, $fids;
         my $seq = $fig->get_translation( $fid );  
         push @seq, [ $fid, '', $seq ] if $seq;  
     }  
     @seq or usage( "Failed to get translations for '$fids'." );  
83  }  }
84  else  else
85  {  {
# Line 108  Line 89 
89  }  }
90    
91  my $options =  my $options =
92      { min_coverage => $min_coverage,      { max_e_value  => $max_e_value,
93        max_e_value  => $max_e_value,        min_coverage => $min_coverage,
94        min_identity => $min_identity,        min_identity => $min_identity,
95        extra_ends   => $extra_ends        extra_ends   => $extra_ends
96      };      };
97    
98    $options->{ exemplars } = \@seq      if ! $fids;
99    $options->{ fids      } = \@fids     if   $fids;
100    $options->{ no_merge  } =  1         if ! $merge;
101    $options->{ nr        } =  1         if   $nr;
102    $options->{ seq_db    } = $dbfile    if ! $nr;
103  $options->{ tmp     } = $tmp     if $tmp;  $options->{ tmp     } = $tmp     if $tmp;
104  $options->{ tmp_dir } = $tmp_dir if $tmp_dir;  $options->{ tmp_dir } = $tmp_dir if $tmp_dir;
105    
106  my $found = collect_related_sequences::collect_related_sequences( $dbfile, \@seq, $options );  my $found = collect_related_sequences::collect_related_sequences( $options );
   
 #  SEED nr has xxx_... ids.  We need fig ids:  
107    
108  if ( $nr )  print_alignment_as_fasta( $found ) if $found;
 {  
     my ( $id, $def, $seq, @ids );  
     @$found = map { ( $id, $def, $seq ) = @$_;  
                     @ids = $fig->recast_ids( 'fig\|', [ $id ] );  
                     map { [ $_, $def, $seq ] } @ids  
                   }  
               @$found;  
 }  
   
 # Merge exemplars and others:  
   
 if ( $merge )  
 {  
     my %seen;  
     @$found = grep { ! $seen{ $_->[0] }++ } @seq, @$found;  
 }  
109    
 print_alignment_as_fasta( $found );  
110  exit;  exit;
111    
112    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3