[Bio] / FigKernelScripts / compute_new_ids.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/compute_new_ids.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : olson 1.2 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : olson 1.1
19 :     #
20 :     # Given:
21 :     #
22 :     # an ID mapping file that looks like this:
23 :     #
24 :     # fig|9999999.1.peg.929074 fig|9999999.1.peg.929074
25 :     # fig|9999999.1.peg.568952 fig|9999999.1.peg.568952
26 :     # fig|9999999.1.peg.902009 fig|9999999.1.peg.902009
27 :     # fig|9999999.1.peg.954630 fig|9999999.1.peg.954630
28 :     # fig|9999999.1.peg.550866 fig|9999999.1.peg.550866
29 :     # fig|9999999.1.peg.886341 fig|9999999.1.peg.886341
30 :     # gi|48786064 gi|48786064
31 :     # uni|Q7XX34 uni|Q7XX34
32 :     # uni|Q7SIE1 uni|Q7SIE1
33 :     # uni|Q9TL34 uni|Q9TL34
34 :     #
35 :     # and a new NR, we wish to determine which sequences are in the new NR but
36 :     # not in the old mapping file. These are sequences for which we need
37 :     # to compute new sims. We do this by building a mapping from
38 :     # new sequence -> old sequence from the mapping file, walking the new NR,
39 :     # and writing any sequences that do not have a mapping.
40 :     #
41 :    
42 :     use strict;
43 :    
44 :     my($usage, $new_nr, $mapfile);
45 :    
46 :     $usage = "usage: $0 NewNR id-mapping";
47 :    
48 :     (($new_nr = shift @ARGV) &&
49 :     ($mapfile = shift @ARGV)
50 :     ) || die $usage;
51 :    
52 :    
53 :     my $idmap = load_mapfile($mapfile);
54 :    
55 :     my $nrfh;
56 :    
57 :     open($nrfh, "<$new_nr") or die;
58 :    
59 :     while (<$nrfh>)
60 :     {
61 :     chomp;
62 :    
63 :     if (/^>(\S+)\s*/)
64 :     {
65 :     my $id = $1;
66 :     if (!$idmap->{$id})
67 :     {
68 :     print "$id\n";
69 :     }
70 :     }
71 :     }
72 :    
73 :    
74 :     sub load_mapfile
75 :     {
76 :     my($mapfile) = @_;
77 :     my $fh;
78 :     my $map = {};
79 :    
80 :     open($fh, "<$mapfile") or die;
81 :    
82 :     my $n = 0;
83 :     while (<$fh>)
84 :     {
85 :     chomp;
86 :     my($old, $new) = split(/\t/);
87 :     $map->{$new} = $old;
88 :    
89 :     }
90 :     warn "Done with loadmap\n";
91 :    
92 :     return $map;
93 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3