[Bio] / FigKernelScripts / TestLocations.pl Repository:
ViewVC logotype

View of /FigKernelScripts/TestLocations.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (download) (as text) (annotate)
Thu May 8 22:49:19 2008 UTC (11 years, 9 months ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.7: +6 -6 lines
Converted to use new name for Contrain method.

#!/usr/bin/perl -w
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


=head1 Location Test

C<TestLocations> [I<options>]

This method performs rudimentary testing of the location object.

The currently-supported command-line options are as follows.

=over 4

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2.

=back

=cut

use strict;
use Tracer;
use Cwd;
use File::Copy;
use File::Path;
use BasicLocation;
use FullLocation;
use FIG;

# Get the command-line options.
my ($options, @parameters) = Tracer::ParseCommand({ trace => 2 }, @ARGV);
# Set up tracing.
my $traceLevel = $options->{trace};
TSetup("$traceLevel errors Tracer Location BasicLocation FullLocation FBasicLocation BBasicLocation", "TEXT");

# Construct a location from its values.
my $basicLoc = BasicLocation->new("BLUE", 400, "_", 301);
# Verify the values.
Assert($basicLoc->Contig eq "BLUE") || Confess("Invalid contig value.");
Assert($basicLoc->Begin == 400) || Confess("Invalid begin location.");
Assert($basicLoc->Dir eq "-") || Confess("Invalid direction.");
Assert($basicLoc->EndPoint == 301) || Confess("Invalid end point.");
Assert($basicLoc->Length == 100) || Confess("Invalid length.");
# Create a vanilla location object.
my $loc = BasicLocation->new("RED_10_400");
# Verify its values.
Assert($loc->Contig eq "RED") || Confess("Invalid contig value.");
Assert($loc->Begin == 10) || Confess("Invalid begin location.");
Assert($loc->Dir eq "+") || Confess("Invalid direction.");
Assert($loc->EndPoint == 400) || Confess("Invalid end point.");
Assert($loc->Length == 391) || Confess("Invalid length.");
# Add some augments.
$loc->{fid} = "fig|pinky.1.peg.3";
$loc->{index} = 2;
# Get the augment string.
my $result = $loc->AugmentString;
Assert($result eq "RED_10+391(fid = fig|pinky.1.peg.3, index = 2)") || Confess("Invalid augment result.");
# Convert it back.
my $newLoc = BasicLocation->new($result);
# Make a copy.
my $newerLoc = BasicLocation->new($loc);
# Construct a duplicate.
my $dupLoc = BasicLocation->new("RED", 10, "+", 391, { fid => $loc->{fid}, index => $loc->{index} });
# Verify equality.
for my $key (keys %{$loc}) {
    if (defined $loc->{key}) {
        Assert($loc->{$key} eq $newLoc->{$key}) || Confess("Conversion error on key $key for new location.");
        Assert($loc->{$key} eq $newerLoc->{$key}) || Confess("Conversion error on key $key for newer location.");
        Assert($loc->{$key} eq $dupLoc->{$key}) || Confess("Conversion error on key $key for duplicate location.");
    }
}
# Verify the match function.
Assert(! BasicLocation::Matches($basicLoc, $loc)) || Confess("Basic and vanilla locations match.");
Assert(BasicLocation::Matches($loc, $newLoc)) || Confess("Vanilla and new locations do not match.");
Trace("Successful basic location test!") if T(2);

# Get a FIG object.
my $fig = new FIG();
{
    # Create a simple, forward full location.
    my $bigLoc = FullLocation->new($fig, "83333.1", "NC_000913_190_255");
    # Test GetBounds.
    my ($bounds, $loc1, $locN) = $bigLoc->GetBounds;
    Assert($bounds->SeedString eq "NC_000913_190_255") || Confess("Bounds location failure.");
    Assert($loc1->SeedString eq "NC_000913_190_255") || Confess("Bounds loc1 failure.");
    Assert($locN->SeedString eq "NC_000913_190_255") || Confess("Bounds locN failure.");
    # Test the Constrain method.
    my $constrain = $bigLoc->ConstrainPoint(-10);
    Assert($constrain == 1) || Confess("Left constrain failed.");
    $constrain = $bigLoc->ConstrainPoint(100000);
    Assert($constrain == 100000) || Confess("Middle constrain failed.");
    $constrain = $bigLoc->ConstrainPoint(14639222);
    Assert($constrain == 4639221) || Confess("Right constrain failed.");
    # Test the Contig and DNA methods.
    my $baseContig = $bigLoc->Contig;
    Assert($baseContig eq "NC_000913") || Confess("Invalid base contig.");
    my $baseDir = $bigLoc->Dir;
    Assert($baseDir eq '+') || Confess("Base direction invalid.");
    my $dna = $bigLoc->DNA;
    my $testDna = $fig->dna_seq("83333.1", "NC_000913_190_255");
    Assert($dna eq $testDna) || Confess("DNA sequence invalid.");
    my $translation = $bigLoc->Translation;
    Assert($translation eq FIG::translate($dna)) || Confess("Translation failed.");
    my $find = $bigLoc->Search("att|tac", 190, '-', 1000);
    my $find3 = $find + 2;
    my $foundDNA = $fig->dna_seq("83333.1", "NC_000913_${find}_$find3");
    Assert($foundDNA eq "att" || $foundDNA eq "tac") || Confess("Incorrect result from search 1.");
    $find = $bigLoc->Search("tta|cta", 255, '+', 1000);
    $find3 = $find + 2;
    $foundDNA = $fig->dna_seq("83333.1", "NC_000913_${find}_$find3");
    Assert($foundDNA eq "tta" || $foundDNA eq "cta") || Confess("Incorrect result from search 2.");
    my $first = $bigLoc->ExtremeCodon('first');
    my $last = $bigLoc->ExtremeCodon('last');
    my $contig_len = $fig->contig_ln("83333.1", "NC_000913");
    Trace("First codon is $first, last is $last.") if T(2);
    Assert($first - 3 <= 0) || Confess("First codon is not far enough.");
    Assert($first >= 1) || Confess("First codon is too far.");
    Assert(($bigLoc->Begin - $first) % 3 == 0) || Confess("First codon is mis-aligned.");
    Assert($last + 3 > $contig_len) || Confess("Last codon is not far enough.");
    Assert($last <= $contig_len) || Confess("Last codon is too far.");
    Assert(($last + 1 - $bigLoc->EndPoint) % 3 == 0) || Confess("Last codon is mis-aligned.");
    Trace("Successful singleton location test!") if T(2);
}
{
    # Create a multiple, backward full location.
    my $bigLoc = FullLocation->new($fig, "83333.1", "NC_000913_255_200, NC_000913_190_100");
    # Test GetBounds.
    my ($bounds, $loc1, $locN) = $bigLoc->GetBounds;
    Assert($bounds->SeedString eq "NC_000913_255_100") || Confess("Bounds location failure.");
    Assert($loc1->SeedString eq "NC_000913_255_200") || Confess("Bounds loc1 failure.");
    Assert($locN->SeedString eq "NC_000913_190_100") || Confess("Bounds locN failure.");
    # Test the Constrain method.
    my $constrain = $bigLoc->ConstrainPoint(-10);
    Assert($constrain == 1) || Confess("Left constrain failed.");
    $constrain = $bigLoc->ConstrainPoint(100000);
    Assert($constrain == 100000) || Confess("Middle constrain failed.");
    $constrain = $bigLoc->ConstrainPoint(14639222);
    Assert($constrain == 4639221) || Confess("Right constrain failed.");
    # Test the Contig and DNA methods.
    my $baseContig = $bigLoc->Contig;
    Assert($baseContig eq "NC_000913") || Confess("Invalid base contig.");
    my $baseDir = $bigLoc->Dir;
    Assert($baseDir eq '-') || Confess("Base direction invalid.");
    my $dna = $bigLoc->DNA;
    my $testDna = $fig->dna_seq("83333.1", "NC_000913_255_200", "NC_000913_190_100");
    Assert($dna eq $testDna) || Confess("DNA sequence invalid.");
    my $translation = $bigLoc->Translation;
    Assert($translation eq FIG::translate($dna)) || Confess("Translation failed.");
    my $find = $bigLoc->Search("att", 100, '+', 1000);
    my $find3 = $find - 2;
    my $foundDNA = $fig->dna_seq("83333.1", "NC_000913_${find}_$find3");
    Assert($foundDNA eq "att") || Confess("Incorrect result from search 3.");
    $find = $bigLoc->Search("tac|cca", 255, '-', 1000);
    $find3 = $find - 2;
    $foundDNA = $fig->dna_seq("83333.1", "NC_000913_${find}_$find3");
    Assert($foundDNA eq "tac" | $foundDNA eq "cca") || Confess("Incorrect result from search 4.");
    my $first = $bigLoc->ExtremeCodon('first');
    my $last = $bigLoc->ExtremeCodon('last');
    my $contig_len = $fig->contig_ln("83333.1", "NC_000913");
    Trace("First codon is $first, last is $last.") if T(2);
    Assert($last - 3 <= 0) || Confess("Last codon is not far enough.");
    Assert($last >= 1) || Confess("Last codon is too far.");
    my $bigEnd = $bigLoc->EndPoint;
    Assert(($bigEnd - $last) % 3 == 0) || Confess("Last codon is mis-aligned.");
    Assert($first + 3 > $contig_len) || Confess("First codon is not far enough.");
    Assert($first <= $contig_len) || Confess("First codon is too far.");
    Assert(($first + 1 - $bigLoc->Begin) % 3 == 0) || Confess("First codon is mis-aligned.");
    Trace("Successful multiple location test!") if T(2);
}
{
    my $peg    = "fig|169963.1.peg.2107";
    
    #real:     NC_003210_2197206_2196382
    my $loc    = "NC_003210_2197197_2196391";
    my $genome = "169963.1";
    my $tran   = "KWRGICGSLLIALGITQLYSFISAVIGYFNAEENSFVFVWNYWMLLFFGVGLFIIGFAFMRKESFRLASIIGVMCFVLFQGFSVYYYQLRVLSKLEYTQPFEWSGTLLCILGLLVLIALLIGPIFQAKEIQADQAWKTKWRYAAGVFSLLGAVTSVFAAVTIFRQLHSDNIKEGYLFTTALDGYFACFMAVIFLLVVIFSWRKVSYLLVGILMGAAFILLTNYLSVTNWIDFAKENLSITFGSNEREVFGMQFLMGASAFLSSIFGYIA";
    
    my @others = ("fig|272626.1.peg.2207","fig|265669.1.peg.2128","fig|267409.1.peg.1143","fig|267410.1.peg.444");
    
    my ($new_loc,$new_tran) = &pick_gene_boundaries($fig,$genome,$loc,$tran);
    print &Dumper($new_loc,$new_tran);
}

sub pick_gene_boundaries { 
    my($fig,$genome,$loc,$tran) = @_;
    # No need to parse location: the constructor does it for us.
    my $full_loc = new FullLocation($fig,$genome,$loc,$tran);
    # Search upstream. "PrevPoint" returns the point before the begin point.
    my $leftStop   = $full_loc->Search("taa|tga|tag", $full_loc->PrevPoint ,"-", 9000);
    Trace("Left stop point is $leftStop with codon " . $full_loc->Codon($leftStop) . ".") if T(2);
    # Search downstream from the point found.
    my $firstStart = $full_loc->Search("atg|gtg|ttg", $leftStop, "+", 9000);
    Trace("First start point is $firstStart with codon " . $full_loc->Codon($firstStart) . ".") if T(2);
    # Search downstream from the end point. "NextPoint" returns the point on the other
    # side of the end.
    my $rightStop  = $full_loc->Search("taa|tga|tag", $full_loc->NextPoint, "+", 9000);
    Trace("New stop point is $rightStop with codon " . $full_loc->Codon($rightStop) . ".") if T(2);
    # The "Adjusted" function automatically knows whether to add or subtract.
    $rightStop  = $full_loc->Adjusted($rightStop, 2);
    # Extend the location.
    $full_loc->Extend($firstStart, $full_loc->Adjusted($rightStop, -3));
    # Return the updated location string and the translation.
    return ($full_loc->SeedString, $full_loc->Translation);
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3