[Bio] / FigKernelScripts / make_scored_instances.pl Repository:
ViewVC logotype

View of /FigKernelScripts/make_scored_instances.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Sun Feb 24 15:21:29 2013 UTC (6 years, 8 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0912, rast_rel_2014_0729, HEAD
Changes since 1.4: +4 -4 lines
compute correspondences for a pan genome

use strict;
use FIG;
use FIGV;
use Data::Dumper;
use compare_coding;
use TBLstuff;

my $usage = "usage: make_scored_instances GenomeFile EvidenceD AlignmentsD ContigsF [HTMLdir]";
my($genomesF,$evidenceD,$ali_dir,$contigsF);

(
 ($genomesF   = shift @ARGV) && 
 ($evidenceD  = shift @ARGV) &&
 ($ali_dir    = shift @ARGV) &&
 ($contigsF   = shift @ARGV)
)
    || die $usage;

my $html = (@ARGV > 0) ? $ARGV[0] : "$evidenceD.HTML";

my @genomes    = map { $_ =~ /^(\d+\.\d+)\s+(\S+)\s*$/; [$1,$2] } `cat $genomesF`;
print STDERR "processing ",scalar @genomes," genomes\n";

open(CONTIGS,">$contigsF") || die "could not open $contigsF";
foreach my $tuple (@genomes)
{
    my($genome,$genomeD) = @$tuple;
    open(IN,"<$genomeD/contigs")
	|| die "could not open $genomeD/contigs";
    while (defined($_ = <IN>))
    {
	if ($_ =~ /^>(.*)$/)
	{
	    print CONTIGS ">$genome:$1\n";
	}
	else
	{
	    print CONTIGS $_;
	}
    }
    close(IN);
}
close(CONTIGS);
&FIG::run("$FIG_Config::ext_bin/formatdb -i $contigsF -p F");

(-d $evidenceD) || mkdir($evidenceD,0777) || die "could not make $evidenceD";

my %seen;
my $options = { html_dir => $html, scored_ali_dir => $ali_dir };
my %peg_locations;
my %location_to_peg;

my %number_contigs;
foreach my $tuple (@genomes)
{
    my($genome,$genomeD) = @$tuple;
    my $fig = new FIGV($genomeD);
    $number_contigs{$genome} = scalar $fig->contigs_of($genome);
    foreach my $peg ($fig->all_features($genome,"peg"))
    {
	my $loc = $fig->feature_location($peg);
	if ($loc =~ /^([^,]+)_(\d+)_(\d+)$/)
	{
	    my($contig,$beg,$end) = ($1,$2,$3);
	    $peg_locations{$genome}->{$peg} = [$contig,$beg,$end];
	    $location_to_peg{"$genome:$contig\_$end"} = $peg;
	}
    }
}

@genomes = map { $_->[0] }
           sort { $a->[1] <=> $b->[1] }
           map  { [$_,$number_contigs{$_} ] }
           @genomes;
              
foreach my $tuple (@genomes)
{
    my($genome,$genomeD) = @$tuple;
    my $fig = new FIGV($genomeD);
    foreach my $peg ($fig->all_features($genome,"peg"))
    {
	my $loc = $fig->feature_location($peg);
	if ($loc =~ /^([^,]+)_(\d+)_(\d+)$/)
	{
	    my($contig,$beg,$end) = ($1,$2,$3);
	    if (! &been_seen($fig,\%seen,"$genome:$contig\_$beg\_$end"))
	    {
		my $translation = $fig->get_translation($peg);
		if ($translation)
		{
		    my $prot = [$peg,'',$translation];
		    my @scored_instances = 
			&compare_coding::scored_protein_starts($contigsF,$prot,$options);
		    foreach my $instance (@scored_instances)
		    {
			my($type,$genome_region,$sc,$scratch) = @$instance;
			my $key = $genome_region;
			$key =~ s/^(.*)_\d+(_\d+)$/$1$2/;
			$scratch->{fid} = $location_to_peg{$key} if ($location_to_peg{$key});
			if (! &been_seen($fig,\%seen,$genome_region))
			{
			    &mark_seen(\%seen,$genome_region);
			    my $genome = &genome_of_instance($genome_region);
			    open(TMP,">>$evidenceD/$genome")
				|| die "could not open $evidenceD/$genome";
			    print TMP join("\t",($type,$genome_region,$sc,map { $_ => $scratch->{$_}}
					                                  grep { ! ref($scratch->{$_}) }
                                                                          sort keys(%$scratch)
                                                )
				          ),"\n";
			    close(TMP);
			}
		    }
		}
	    }
	}
    }
}

sub genome_of_instance {
    my($genome_region) = @_;
    
    if ($genome_region =~ /^(\d+\.\d+):/)
    {
	return $1;
    }
    die "could not parse $genome_region";
}

sub mark_seen {
    my($seen,$genome_region) = @_;

    $seen->{$genome_region} = 1;
}

sub been_seen {
    my($fig,$seen,$genome_region) = @_;

    return $seen->{$genome_region};
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3