[Bio] / FigKernelScripts / collect_related_sequences.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/collect_related_sequences.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Sat Apr 28 15:19:29 2007 UTC revision 1.3, Fri May 11 18:51:28 2007 UTC
# Line 1  Line 1 
1  ########################################################################  ########################################################################
2  # -*- perl -*-  # -*- perl -*-
3  #  #
4  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2007 University of Chicago and Fellowship
5  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
6  #  #
7  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 24  Line 24 
24  use gjoseqlib;  use gjoseqlib;
25  use collect_related_sequences;  use collect_related_sequences;
26    
27  my $usage =<<'End_of_Usage';  my $min_coverage = 0.80;
28    my $tmp_dir;
29    my $max_e_value  = 0.001;
30    my $fids;
31    my $min_identity = 0.25;
32    my $merge        = 1;
33    my $nr           = 0;
34    my $tmp;
35    my $extra_ends   = 10;
36    
37    my $usage =<<"End_of_Usage";
38    
39  Usage:  collect_related_sequences  [options]  dbfile  seqfile  Usage:  collect_related_sequences  [options]  dbfile  seqfile
40    
41  Options:  Options:
42    
43      -c  min_coverage  # D=0.80      -c  min_coverage  # fraction of exemplar coverged (D=$min_coverage)
44      -d  tmp_dir       # name of temporary directory      -d  tmp_dir       # name of temporary directory
45      -e  max_e_value   # D=0.01      -e  max_e_value   # required match significance (D=$max_e_value)
46      -i  min_identity  # D=0.25      -f  'fid ...'     # use exemplar(s) from SEED (instead of seqfile)
47        -i  min_identity  # required similarity to exemplar (D=$min_identity)
48        -m                # do NOT merge queries with found sequences
49        -n                # use SEED nr database (instead of dbfile)
50      -t  tmp           # place for temporary directory      -t  tmp           # place for temporary directory
51      -x  extra_ends    # extra length at ends      -x  extra_ends    # extra length at ends (D=$extra_ends residues)
52    
53  End_of_Usage  End_of_Usage
54    
 my $min_coverage = 0.80;  
 my $tmp_dir;  
 my $max_e_value  = 0.01;  
 my $min_identity = 0.25;  
 my $tmp;  
 my $extra_ends   = 10;  
   
55  while ( $ARGV[0] =~ /^-/ )  while ( $ARGV[0] =~ /^-/ )
56  {  {
57      $_ = shift;      $_ = shift;
58      if    ( s/^-c// ) { $min_coverage = $_ || shift }      if    ( s/^-c// ) { $min_coverage = $_ || shift }
59      elsif ( s/^-d// ) { $tmp_dir      = $_ || shift }      elsif ( s/^-d// ) { $tmp_dir      = $_ || shift }
60      elsif ( s/^-e// ) { $max_e_value  = $_ || shift }      elsif ( s/^-e// ) { $max_e_value  = $_ || shift }
61        elsif ( s/^-f// ) { $fids         = $_ || shift }
62      elsif ( s/^-i// ) { $min_identity = $_ || shift }      elsif ( s/^-i// ) { $min_identity = $_ || shift }
63        elsif ( s/^-m// ) { $merge        = 0 }
64        elsif ( s/^-n// ) { $nr           = 1           }
65      elsif ( s/^-t// ) { $tmp          = $_ || shift }      elsif ( s/^-t// ) { $tmp          = $_ || shift }
66      elsif ( s/^-x// ) { $extra_ends   = $_ || shift }      elsif ( s/^-x// ) { $extra_ends   = $_ || shift }
67      else      else
# Line 61  Line 70 
70      }      }
71  }  }
72    
73  @ARGV == 2 or usage( "collect_related_sequences requires 2 parameters" );  my $dbfile = shift @ARGV if ! $nr;
74    $nr || -f $dbfile || usage( "Cannot locate database file '$dbfile'." );
75    
76  my ( $dbfile, $seqfile ) = @ARGV;  my @seq;
77    my @fids = ();
78    
79  my @seq = read_fasta( $seqfile );  if ( $fids )
80  @seq or usage( "Failed to read sequences from '$seqfile'" );  {
81        @fids = ref( $fids ) eq 'ARRAY' ? @$fids
82                                        : split /[,\s]+/, $fids;
83    }
84    else
85    {
86        my $seqfile = shift @ARGV;
87        @seq = read_fasta( $seqfile );
88        @seq or usage( "Failed to read sequences from '$seqfile'." );
89    }
90    
91  my $options =  my $options =
92      { min_coverage => $min_coverage,      { max_e_value  => $max_e_value,
93        max_e_value  => $max_e_value,        min_coverage => $min_coverage,
94        min_identity => $min_identity,        min_identity => $min_identity,
95        extra_ends   => $extra_ends        extra_ends   => $extra_ends
96      };      };
97    
98    $options->{ exemplars } = \@seq      if ! $fids;
99    $options->{ fids      } = \@fids     if   $fids;
100    $options->{ no_merge  } =  1         if ! $merge;
101    $options->{ nr        } =  1         if   $nr;
102    $options->{ seq_db    } = $dbfile    if ! $nr;
103  $options->{ tmp     } = $tmp     if $tmp;  $options->{ tmp     } = $tmp     if $tmp;
104  $options->{ tmp_dir } = $tmp_dir if $tmp_dir;  $options->{ tmp_dir } = $tmp_dir if $tmp_dir;
105    
106  my $other = collect_related_sequences::collect_related_sequences( $dbfile, \@seq, $options );  my $found = collect_related_sequences::collect_related_sequences( $options );
107    
108    print_alignment_as_fasta( $found ) if $found;
109    
 print_alignment_as_fasta( $other );  
110  exit;  exit;
111    
112    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3