[Bio] / FigKernelScripts / make_protein_sets.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/make_protein_sets.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : efrank 1.1 # -*- perl -*-
2 : olson 1.4 #
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 : efrank 1.1
20 :     use Carp;
21 :     use Data::Dumper;
22 :     use strict;
23 :     use FIG;
24 :    
25 :     my $fig = new FIG;
26 :     $| = 1;
27 :    
28 :     my $usage = "usage: make_protein_sets Exemplars Nohits CutOff < sorted.counts > sets.file";
29 :    
30 :     my($pair,$exemplars,$nohits,$cutoff);
31 :    
32 :     (($exemplars = shift @ARGV) && open(EXEMPLARS,">$exemplars") &&
33 :     ($nohits = shift @ARGV) && open(NOHITS,">$nohits") &&
34 :     ($cutoff = shift @ARGV)
35 :     ) || die $usage;
36 :    
37 :     my(%nr,$n,$id,@against,$id2,$x,$count,$id1,$sc);
38 :    
39 : overbeek 1.3 open(TMP, "grep \"^>\" $FIG_Config::global/nr |") || die "Could not pipe-open $FIG_Config::global/nr";
40 :     while (defined($_ = <TMP>))
41 : efrank 1.1 {
42 :     if ($_ =~ /^>(\S+)/)
43 :     {
44 :     $nr{$1} = 1;
45 :     }
46 :     }
47 :    
48 : overbeek 1.3 print STDERR "processed nr\n";
49 :    
50 : efrank 1.1 $n = 1;
51 :     while (defined($_ = <STDIN>))
52 :     {
53 :     if (($_ =~ /^(\d+)\t(\S+)/) && (($count,$id) = ($1,$2)) && $nr{$id})
54 :     {
55 :     if ($count >= 2)
56 :     {
57 :     @against = ();
58 : overbeek 1.2 foreach $id2 (map { $_->id2 } $fig->sims($id,100000,1.0e-5,"raw"))
59 : efrank 1.1 {
60 :     if ($nr{$id2})
61 :     {
62 :     push(@against,$id2);
63 :     }
64 :     }
65 :    
66 :     if (@against > 2)
67 :     {
68 :     print EXEMPLARS "$n\t$id\n";
69 :     foreach $_ (($id,@against))
70 :     {
71 : overbeek 1.3 $nr{$_} = 0;
72 : efrank 1.1 print "$n\t$_\n";
73 :     }
74 :     $n++;
75 :     }
76 :     }
77 :     }
78 :     }
79 :    
80 :     foreach $id (sort keys(%nr))
81 :     {
82 : overbeek 1.3 if ($nr{$id})
83 :     {
84 :     print NOHITS "$id\n";
85 :     }
86 : efrank 1.1 }
87 :    
88 :     undef $fig;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3