[Bio] / FigKernelScripts / make_scored_instances.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/make_scored_instances.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : overbeek 1.1 use strict;
2 :     use FIG;
3 :     use FIGV;
4 :     use Data::Dumper;
5 :     use compare_coding;
6 : overbeek 1.3 use TBLstuff;
7 : overbeek 1.1
8 : overbeek 1.5 my $usage = "usage: make_scored_instances GenomeFile EvidenceD AlignmentsD ContigsF [HTMLdir]";
9 :     my($genomesF,$evidenceD,$ali_dir,$contigsF);
10 : overbeek 1.1
11 :     (
12 :     ($genomesF = shift @ARGV) &&
13 : overbeek 1.3 ($evidenceD = shift @ARGV) &&
14 : overbeek 1.5 ($ali_dir = shift @ARGV) &&
15 :     ($contigsF = shift @ARGV)
16 : overbeek 1.1 )
17 :     || die $usage;
18 :    
19 : overbeek 1.2 my $html = (@ARGV > 0) ? $ARGV[0] : "$evidenceD.HTML";
20 :    
21 : overbeek 1.1 my @genomes = map { $_ =~ /^(\d+\.\d+)\s+(\S+)\s*$/; [$1,$2] } `cat $genomesF`;
22 :     print STDERR "processing ",scalar @genomes," genomes\n";
23 :    
24 :     open(CONTIGS,">$contigsF") || die "could not open $contigsF";
25 :     foreach my $tuple (@genomes)
26 :     {
27 :     my($genome,$genomeD) = @$tuple;
28 :     open(IN,"<$genomeD/contigs")
29 :     || die "could not open $genomeD/contigs";
30 :     while (defined($_ = <IN>))
31 :     {
32 :     if ($_ =~ /^>(.*)$/)
33 :     {
34 :     print CONTIGS ">$genome:$1\n";
35 :     }
36 :     else
37 :     {
38 :     print CONTIGS $_;
39 :     }
40 :     }
41 :     close(IN);
42 :     }
43 :     close(CONTIGS);
44 :     &FIG::run("$FIG_Config::ext_bin/formatdb -i $contigsF -p F");
45 :    
46 :     (-d $evidenceD) || mkdir($evidenceD,0777) || die "could not make $evidenceD";
47 :    
48 :     my %seen;
49 : overbeek 1.4 my $options = { html_dir => $html, scored_ali_dir => $ali_dir };
50 : overbeek 1.3 my %peg_locations;
51 :     my %location_to_peg;
52 :    
53 : overbeek 1.4 my %number_contigs;
54 : overbeek 1.3 foreach my $tuple (@genomes)
55 :     {
56 :     my($genome,$genomeD) = @$tuple;
57 :     my $fig = new FIGV($genomeD);
58 : overbeek 1.4 $number_contigs{$genome} = scalar $fig->contigs_of($genome);
59 : overbeek 1.3 foreach my $peg ($fig->all_features($genome,"peg"))
60 :     {
61 :     my $loc = $fig->feature_location($peg);
62 :     if ($loc =~ /^([^,]+)_(\d+)_(\d+)$/)
63 :     {
64 :     my($contig,$beg,$end) = ($1,$2,$3);
65 :     $peg_locations{$genome}->{$peg} = [$contig,$beg,$end];
66 :     $location_to_peg{"$genome:$contig\_$end"} = $peg;
67 :     }
68 :     }
69 :     }
70 :    
71 : overbeek 1.4 @genomes = map { $_->[0] }
72 :     sort { $a->[1] <=> $b->[1] }
73 :     map { [$_,$number_contigs{$_} ] }
74 :     @genomes;
75 :    
76 : overbeek 1.1 foreach my $tuple (@genomes)
77 :     {
78 :     my($genome,$genomeD) = @$tuple;
79 :     my $fig = new FIGV($genomeD);
80 :     foreach my $peg ($fig->all_features($genome,"peg"))
81 :     {
82 :     my $loc = $fig->feature_location($peg);
83 :     if ($loc =~ /^([^,]+)_(\d+)_(\d+)$/)
84 :     {
85 :     my($contig,$beg,$end) = ($1,$2,$3);
86 :     if (! &been_seen($fig,\%seen,"$genome:$contig\_$beg\_$end"))
87 :     {
88 : overbeek 1.2 my $translation = $fig->get_translation($peg);
89 :     if ($translation)
90 : overbeek 1.1 {
91 : overbeek 1.2 my $prot = [$peg,'',$translation];
92 :     my @scored_instances =
93 :     &compare_coding::scored_protein_starts($contigsF,$prot,$options);
94 :     foreach my $instance (@scored_instances)
95 : overbeek 1.1 {
96 : overbeek 1.2 my($type,$genome_region,$sc,$scratch) = @$instance;
97 : overbeek 1.3 my $key = $genome_region;
98 :     $key =~ s/^(.*)_\d+(_\d+)$/$1$2/;
99 :     $scratch->{fid} = $location_to_peg{$key} if ($location_to_peg{$key});
100 : overbeek 1.2 if (! &been_seen($fig,\%seen,$genome_region))
101 :     {
102 :     &mark_seen(\%seen,$genome_region);
103 :     my $genome = &genome_of_instance($genome_region);
104 :     open(TMP,">>$evidenceD/$genome")
105 :     || die "could not open $evidenceD/$genome";
106 : overbeek 1.3 print TMP join("\t",($type,$genome_region,$sc,map { $_ => $scratch->{$_}}
107 :     grep { ! ref($scratch->{$_}) }
108 :     sort keys(%$scratch)
109 :     )
110 :     ),"\n";
111 : overbeek 1.2 close(TMP);
112 :     }
113 : overbeek 1.1 }
114 :     }
115 :     }
116 :     }
117 :     }
118 :     }
119 :    
120 :     sub genome_of_instance {
121 :     my($genome_region) = @_;
122 :    
123 :     if ($genome_region =~ /^(\d+\.\d+):/)
124 :     {
125 :     return $1;
126 :     }
127 :     die "could not parse $genome_region";
128 :     }
129 :    
130 :     sub mark_seen {
131 :     my($seen,$genome_region) = @_;
132 :    
133 :     $seen->{$genome_region} = 1;
134 :     }
135 :    
136 :     sub been_seen {
137 :     my($fig,$seen,$genome_region) = @_;
138 :    
139 :     return $seen->{$genome_region};
140 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3