[Bio] / FigKernelScripts / collect_related_sequences.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/collect_related_sequences.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Sat Apr 28 15:19:29 2007 UTC revision 1.2, Sat Apr 28 22:35:02 2007 UTC
# Line 24  Line 24 
24  use gjoseqlib;  use gjoseqlib;
25  use collect_related_sequences;  use collect_related_sequences;
26    
27  my $usage =<<'End_of_Usage';  my $is_SEED = 0;
28    eval { require FIG_Config; $is_SEED = 1 };
29    
30    my $usage =<<"End_of_Usage";
31    
32  Usage:  collect_related_sequences  [options]  dbfile  seqfile  Usage:  collect_related_sequences  [options]  dbfile  seqfile
33    
# Line 32  Line 35 
35    
36      -c  min_coverage  # D=0.80      -c  min_coverage  # D=0.80
37      -d  tmp_dir       # name of temporary directory      -d  tmp_dir       # name of temporary directory
38      -e  max_e_value   # D=0.01      -e  max_e_value   # D=0.001
39        -f  'fid ...'     # use sequence(s) from SEED (instead of seqfile)
40      -i  min_identity  # D=0.25      -i  min_identity  # D=0.25
41        -m                # do NOT merge queries with found sequences
42        -n                # use SEED nr database (instead of dbfile)
43      -t  tmp           # place for temporary directory      -t  tmp           # place for temporary directory
44      -x  extra_ends    # extra length at ends      -x  extra_ends    # extra length at ends
45    
# Line 41  Line 47 
47    
48  my $min_coverage = 0.80;  my $min_coverage = 0.80;
49  my $tmp_dir;  my $tmp_dir;
50  my $max_e_value  = 0.01;  my $max_e_value  = 0.001;
51    my $fids;
52  my $min_identity = 0.25;  my $min_identity = 0.25;
53    my $merge        = 1;
54    my $nr           = 0;
55  my $tmp;  my $tmp;
56  my $extra_ends   = 10;  my $extra_ends   = 10;
57    
# Line 53  Line 62 
62      elsif ( s/^-d// ) { $tmp_dir      = $_ || shift }      elsif ( s/^-d// ) { $tmp_dir      = $_ || shift }
63      elsif ( s/^-e// ) { $max_e_value  = $_ || shift }      elsif ( s/^-e// ) { $max_e_value  = $_ || shift }
64      elsif ( s/^-i// ) { $min_identity = $_ || shift }      elsif ( s/^-i// ) { $min_identity = $_ || shift }
65        elsif ( s/^-m// ) { $merge        = 0 }
66      elsif ( s/^-t// ) { $tmp          = $_ || shift }      elsif ( s/^-t// ) { $tmp          = $_ || shift }
67      elsif ( s/^-x// ) { $extra_ends   = $_ || shift }      elsif ( s/^-x// ) { $extra_ends   = $_ || shift }
68        elsif ( $is_SEED && s/^-f// ) { $fids = $_ || shift }
69        elsif ( $is_SEED && s/^-n// ) { $nr   = 1           }
70        elsif ( m/^-[fn]/ )
71        {
72            usage( "-f and -n are only valid in the SEED environment.\n" );
73        }
74      else      else
75      {      {
76          usage( "Bad command flag '$_'\n" );          usage( "Bad command flag '$_'\n" );
77      }      }
78  }  }
79    
 @ARGV == 2 or usage( "collect_related_sequences requires 2 parameters" );  
   
80  my ( $dbfile, $seqfile ) = @ARGV;  my ( $dbfile, $seqfile ) = @ARGV;
81    
82  my @seq = read_fasta( $seqfile );  my $dbfile = $nr ? "$FIG_Config::global/nr"
83  @seq or usage( "Failed to read sequences from '$seqfile'" );                   : shift @ARGV;
84    -f $dbfile || usage( "Cannot locate database file '$dbfile'." );
85    
86    my $fig;
87    if ( $nr || $fids )
88    {
89        require FIG;
90        $fig = new FIG;
91    }
92    
93    my @seq;
94    if ( $fids )
95    {
96        foreach my $fid ( split /[,\s]+/, $fids )
97        {
98            my $seq = $fig->get_translation( $fid );
99            push @seq, [ $fid, '', $seq ] if $seq;
100        }
101        @seq or usage( "Failed to get translations for '$fids'." );
102    }
103    else
104    {
105        my $seqfile = shift @ARGV;
106        @seq = read_fasta( $seqfile );
107        @seq or usage( "Failed to read sequences from '$seqfile'." );
108    }
109    
110  my $options =  my $options =
111      { min_coverage => $min_coverage,      { min_coverage => $min_coverage,
# Line 77  Line 116 
116  $options->{ tmp     } = $tmp     if $tmp;  $options->{ tmp     } = $tmp     if $tmp;
117  $options->{ tmp_dir } = $tmp_dir if $tmp_dir;  $options->{ tmp_dir } = $tmp_dir if $tmp_dir;
118    
119  my $other = collect_related_sequences::collect_related_sequences( $dbfile, \@seq, $options );  my $found = collect_related_sequences::collect_related_sequences( $dbfile, \@seq, $options );
120    
121    #  SEED nr has xxx_... ids.  We need fig ids:
122    
123    if ( $nr )
124    {
125        my ( $id, $def, $seq, @ids );
126        @$found = map { ( $id, $def, $seq ) = @$_;
127                        @ids = $fig->recast_ids( 'fig\|', [ $id ] );
128                        map { [ $_, $def, $seq ] } @ids
129                      }
130                  @$found;
131    }
132    
133    # Merge exemplars and others:
134    
135    if ( $merge )
136    {
137        my %seen;
138        @$found = grep { ! $seen{ $_->[0] }++ } @seq, @$found;
139    }
140    
141  print_alignment_as_fasta( $other );  print_alignment_as_fasta( $found );
142  exit;  exit;
143    
144    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3