[Bio] / FigKernelScripts / reformat_sims.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/reformat_sims.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (view) (download) (as text)

1 : overbeek 1.2 # -*- perl -*-
2 : olson 1.6 #
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 : olson 1.8 use strict;
20 :     use DB_File;
21 : efrank 1.1
22 : olson 1.8 my $usage = "usage: reformat_sims NR [optional additional fasta files] < sims > reformatted.sims";
23 : efrank 1.1
24 : overbeek 1.4 die $usage unless ($ARGV[0]);
25 : efrank 1.1
26 : olson 1.8 my @hashes;
27 :     my %ln;
28 :    
29 :     push(@hashes, \%ln);
30 :    
31 : overbeek 1.4 foreach my $fa (@ARGV)
32 : efrank 1.1 {
33 : olson 1.8 if ($fa =~ /\.btree$/)
34 :     {
35 :     my $h = {};
36 :     my $t = tie %$h, 'DB_File', $fa, O_RDONLY, 0, $DB_BTREE;
37 :     $t or die "Cannot tie $fa as a btree\n";
38 :     unshift(@hashes, $h);
39 :     }
40 :     else
41 : efrank 1.1 {
42 : olson 1.8 open(FA, "<$fa") || die "Can't read $fa";
43 :    
44 :     $/ = "\n>";
45 :     while (defined($_ = <FA>))
46 :     {
47 :     chomp;
48 :     if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
49 :     {
50 :     my $id = $1;
51 :     my $seq = $2;
52 :     $seq =~ s/\s//gs;
53 :     $ln{$id} = length($seq);
54 :     }
55 :     }
56 :     close(FA);
57 : efrank 1.1 }
58 :     }
59 :     $/ = "\n";
60 : overbeek 1.4 print STDERR "reformat_sims finished reading NR and other files\n";
61 :    
62 : olson 1.8 my $last = "";
63 :     my $last_count = 0;
64 :     my %seen;
65 : efrank 1.1 while (defined($_ = <STDIN>))
66 :     {
67 :     chop;
68 : olson 1.8 my ($id1,$id2,$iden,$ali_ln,$mis,$gaps,$b1,$e1,$b2,$e2,$psc,$bsc) =
69 : overbeek 1.3 map { s/\s//g; $_ } split(/\t/,$_);
70 : efrank 1.1
71 :     if ($last eq "$id1,$id2")
72 :     {
73 : overbeek 1.3 ++$last_count;
74 : efrank 1.1 }
75 :     else
76 :     {
77 :     $last = "$id1,$id2";
78 :     $last_count = 1;
79 :     }
80 :    
81 :     if (($id1 ne $id2) && ($psc < 1.0e-2) && ($last_count < 3) &&
82 :     ((! $seen{$id1}) || ($seen{$id1} < 5) || ($psc < 1.0e-5)))
83 :     {
84 : olson 1.8 my($ln1, $ln2);
85 :     for my $h (@hashes)
86 :     {
87 :     $ln1 = $h->{$id1};
88 :     last if defined($ln1);
89 :     }
90 :     for my $h (@hashes)
91 :     {
92 :     $ln2 = $h->{$id2};
93 :     last if defined($ln2);
94 :     }
95 :     if (defined($ln1) and defined($ln2))
96 : efrank 1.1 {
97 : olson 1.7 print join("\t",($id1,$id2,$iden,$ali_ln,$mis,$gaps,$b1,$e1,$b2,$e2,$psc,$bsc,$ln1,$ln2)),"\n"
98 :     or die "Error writing to stdout: $!";
99 : overbeek 1.3 ++$seen{$id1};
100 : efrank 1.1 }
101 :     else
102 :     {
103 :     print STDERR "failed: $_\n";
104 :     }
105 :     }
106 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3