[Bio] / FigKernelScripts / condense_blast.pl Repository:
ViewVC logotype

View of /FigKernelScripts/condense_blast.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Mon Dec 5 18:56:37 2005 UTC (13 years, 11 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, caBIG-05Apr06-00, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, caBIG-13Feb06-00, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.1: +17 -0 lines
Add license words.

# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


# usage: condense_blast [MaxE] < blast.output

$| = 1;

($maxE = shift @ARGV) || ($maxE = 1.0e-5);

defined($hdr = <STDIN>) || die "empty input file";
$line = $hdr;
while (defined($line))
{
    @lines = ();
    while (defined($line = <STDIN>) && ($line ne $hdr)) 
    {
	push(@lines,$line);
	$saved_line = $line;
    }
    &process(\@lines);
}
if ($saved_line !~ /bits\)\n/s) { exit(1); }

sub process {
    my($lines) = @_;
    my($query,$qlen,$id,$len,$expect,$iden,$qs,$ss,$qe,$se);

    while (defined($_ = shift @$lines) && ($_ !~ /^Query=\s*/)) {}

    if ($_ && ($_ =~ /^Query\=\s*(\S+)/))
    {
	$query = $1;
	while (defined($_ = shift @$lines) && ($_ !~ /^\s+\(([,0-9]+)\s+letters/)) {}
	($_ && ($_ =~ /^\s+\(([,0-9]+)\s+letters/)) || die "could not get query length";
	$qlen = $1; 
	$qlen =~ s/,//g;
    }
    else
    {
	return;
    }
	
    while (defined($_ = shift @$lines) && ($_ !~ /^\>/)) {}
    while ($_ && ($_ =~ /^\>(\S+)/))
    {
	$id = $1;
	while ((@$lines > 0) && defined($_ = shift @$lines) && 
	       ($_ !~ /^\s+Length\s*\=\s*[1-9]/)) {} # flush title continuations
	if ((@$lines > 0) && defined($_) && ($_ =~ /^\s+Length\s*\=\s*([,0-9]+)/))
	{
	    $len = $1;
	    $len =~ s/,//g;
	    while ((@$lines > 0) && defined($_ = shift @$lines) && (($_ !~ /^\>/) && ($_ !~ /^\s*Score /))) {}

	    while (defined($_) && ($_ =~ /^\s*Score .*Expect[^=]*=\s*([0-9\.e+-]+)/))
	    {
		$expect = $1;
		if ($expect =~ /^e/) { $expect = "1.0" . $expect; }
		
		$_ = shift @$lines;
		if ($_ =~ /^\s*Identities\s*=\s*\d+\/\d+\s*\((\d+)\%/)
		{
		    $iden = $1;
		}
		else
		{
		    $iden = -1;
		}

		$qs = -1; $ss = -1;
		while (defined($_ = shift @$lines) && ($_ !~ /^\>/) && ($_ !~ /^Query/) && ($_ !~ /^\s*Score /)) {}
		while (defined($_) && ($_ =~ /^Query:\s*([0-9]+) .* ([0-9]+)$/))
		{
		    if ($qs == -1) { $qs = $1; }
		    $qe = $2;
		    ($_ = shift @$lines);
		    ($_ = shift @$lines);
		    if ($_ =~ /^Sbjct:\s*([0-9]+) .* ([0-9]+)$/)
		    {
			if ($ss == -1) { $ss = $1; }
			$se = $2;
		    }
		    while (defined($_ = shift @$lines) && ($_ !~ /^\>/) && ($_ !~ /^Query/) &&
			   ($_ !~ /^\s*Score /)) {}
		}
		if (($expect <= $maxE) && (($query ne $id) || ($qs != $ss)))
		{
		    print "$query,$qlen,$id,$len,$iden,$expect,$qs,$qe,$ss,$se\n";
		}
	    }
	}
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3