[Bio] / FigKernelScripts / prep_viz.pl Repository:
ViewVC logotype

View of /FigKernelScripts/prep_viz.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Mon Dec 5 18:56:38 2005 UTC (14 years, 2 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, caBIG-05Apr06-00, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, caBIG-13Feb06-00, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.1: +17 -0 lines
Add license words.

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use FIG;
my $fig = new FIG;

$usage = "prep_viz  aligned_dna_pro  objects_file_with_starts_called";
#  produces fasta and markups for visualization (fasta seqs are turncated)

(($aligned_dna_pro_filename = shift) && ($called_starts_filename = shift))
    || die $usage;

open(ALIGNEDSEQS,"<$aligned_dna_pro_filename") || die "failed to open aligned";
open(CALLEDSTARTS,"<$called_starts_filename") || die "failed to open starts";

open(FASTAVIZ,">fasta.viz");
open(MARKUPVIZ,">markup.viz");

while (defined($line = <ALIGNEDSEQS>))
{
    $line =~ />(\S+)/;    # dna id
    $dnaid = $1;
    $dnaid =~ /^DNA_(\S+)/;
    $id = $1;
    $dnaseq = <ALIGNEDSEQS>;
    chomp $dnaseq;
    $dnaseqs{$id} = $dnaseq;
    $line = <ALIGNEDSEQS>;
    $line =~ />(\S+)/;    # pro id
    # $proid = $1;
    $proseq = <ALIGNEDSEQS>;
    chomp $proseq;
    $proseqs{$id} = $proseq;
}

$leftmost_pos = 999999;
$rightmost_pos = 0;

$/ = "///\n";
while (defined($obj = <CALLEDSTARTS>))
{
    chomp $obj;
    if ($obj =~ /^(.*?\n)\/\/\n(.*)/s)
    {
        ($hdr,$start_data) = ($1,$2);
        $kv = &load_kv($hdr);
        $id      = $kv->{'ID'};
        $old_start_pos = $kv->{'OLD_START_POS'};
        $new_start_pos = $kv->{'NEW_START_POS'};
    }
    else
    {
        print STDERR "FAILED to get data for $obj\n";
        next;
    }
    $aligned_old_start_pos = [ &map_to_aligned($old_start_pos,$id,\%dnaseqs),
                               &map_to_aligned($old_start_pos+1,$id,\%dnaseqs),
                               &map_to_aligned($old_start_pos+2,$id,\%dnaseqs) ];
    $aligned_new_start_pos = [ &map_to_aligned($new_start_pos,$id,\%dnaseqs),
                               &map_to_aligned($new_start_pos+1,$id,\%dnaseqs),
                               &map_to_aligned($new_start_pos+2,$id,\%dnaseqs) ];
    if ($aligned_old_start_pos->[-1] > $rightmost_pos)
    {
        $rightmost_pos = $aligned_old_start_pos->[-1];
        $rightmost_id = $id;
    }
    if ($aligned_old_start_pos->[0] < $leftmost_pos)
    {
        $leftmost_pos = $aligned_old_start_pos->[0];
    }
    if ($aligned_new_start_pos->[-1] > $rightmost_pos)
    {
        $rightmost_pos = $aligned_new_start_pos->[-1];
        $rightmost_id = $id;
    }
    if ($aligned_new_start_pos->[0] < $leftmost_pos)
    {
        $leftmost_pos = $aligned_new_start_pos->[0];
    }
    @starts = split(/\n\/\/\n/,$start_data);
    $aligned_old_rbs_pos{$id} = [];
    $aligned_new_rbs_pos{$id} = [];
    foreach $poss_start (@starts)
    {
	$kv2        = &load_kv($poss_start);
	$start      = $kv2->{'START'};
	if (exists($kv2->{'RBS_LEN'}))
	{
	    $rbs_len = $kv2->{'RBS_LEN'};
	    $rbs_pos = $kv2->{'RBS_POS'};    # negative offset from start
            $rbs_pos = $start + $rbs_pos;    # adding a negative value
	    if ($rbs_len > 0)
	    {
		if ($start == $old_start_pos)
		{
                    $aligned_old_rbs_len{$id} = $rbs_len;
                    for ($i=0; $i < $rbs_len; $i++)
                    {
                        push(@{$aligned_old_rbs_pos{$id}},
                             &map_to_aligned($rbs_pos+$i,$id,\%dnaseqs));
                    }
		    if ($aligned_old_rbs_pos{$id}[0] < $leftmost_pos)
		    {
			$leftmost_pos = $aligned_old_rbs_pos{$id}->[0];
		    }
		}
		elsif ($start == $new_start_pos)
		{
                    $aligned_new_rbs_len{$id} = $rbs_len;
                    for ($i=0; $i < $rbs_len; $i++)
                    {
                        push(@{$aligned_new_rbs_pos{$id}},
                             &map_to_aligned($rbs_pos+$i,$id,\%dnaseqs));
                    }
		    if ($aligned_new_rbs_pos{$id}[0] < $leftmost_pos)
		    {
			$leftmost_pos = $aligned_new_rbs_pos{$id}->[0];
		    }
		}
	    }
	}
    }
    print "OSP=", join(' ',@$aligned_old_start_pos), "\n"  if ($id =~ /1485/);
    print "NSP=", join(' ',@$aligned_new_start_pos), "\n"  if ($id =~ /1485/);
    $objs{$id} = [$aligned_old_start_pos,$aligned_new_start_pos];
}
$/ = "\n";

print "LM=$leftmost_pos\n";
print "RM=$rightmost_pos RMID=$rightmost_id\n";

$leftmost_pos -= 30  if ($leftmost_pos > 30);
$rightmost_pos += 6;
$len_subseq = $rightmost_pos - $leftmost_pos;

@sorted_objs = sort { $objs{$a}->[1]->[0] <=> $objs{$b}->[1]->[0] } keys(%objs);  # starts
for $id (@sorted_objs)
{
    print "OSP=", join(' ',@{$objs{$id}->[0]}), "\n"  if ($id =~ /1485/);
    print "NSP=", join(' ',@{$objs{$id}->[1]}), "\n"  if ($id =~ /1485/);
}

for $id (@sorted_objs)
{
    $aliases_and_genome = &aliases_and_genome($fig,$id);
    @aligned_old_start_pos = @{$objs{$id}->[0]};
    @aligned_new_start_pos = @{$objs{$id}->[1]};
    print "OSP=", join(' ',@aligned_old_start_pos), "\n"  if ($id =~ /1485/);
    print "NSP=", join(' ',@aligned_new_start_pos), "\n"  if ($id =~ /1485/);
    $trunc_dna{$id} = substr($dnaseqs{$id},$leftmost_pos-1,$len_subseq);
    $trunc_pro{$id} = substr($proseqs{$id},$leftmost_pos-1,$len_subseq);
    print FASTAVIZ ">DNA_$id $aliases_and_genome\n", $trunc_dna{$id}, "\n";
    print FASTAVIZ ">PRO_$id $aliases_and_genome\n", $trunc_pro{$id}, "\n";

    print MARKUPVIZ ">DNA_$id\n";
    if($aligned_old_start_pos[0] == $aligned_new_start_pos[0])
    {
        for ($i=0; $i < 3; $i++)
        {
            $pos = $aligned_old_start_pos[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos green\n";
        }
    }
    else
    {
        for ($i=0; $i < 3; $i++)
        {
            $pos = $aligned_old_start_pos[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos orange\n";
        }
        for ($i=0; $i < 3; $i++)
        {
            $pos = $aligned_new_start_pos[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos red\n";
        }
    }
    if ($aligned_old_rbs_len{$id})
    {
        print STDERR "ORBS LEN=$aligned_old_rbs_len{$id} ID=$id\n";
        for ($i=0; $i < $aligned_old_rbs_len{$id}; $i++)
        {
            $pos = $aligned_old_rbs_pos{$id}->[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos blue\n";
        }
    }
    if ($aligned_new_rbs_len{$id})
    {
        for ($i=0; $i < $aligned_new_rbs_len{$id}; $i++)
        {
            $pos = $aligned_new_rbs_pos{$id}->[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos blue\n";
        }
    }

    print MARKUPVIZ ">PRO_$id\n";
    if($aligned_old_start_pos[0] == $aligned_new_start_pos[0])
    {
        for ($i=0; $i < 3; $i++)
        {
            $pos = $aligned_old_start_pos[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos green\n";
        }
    }
    else
    {
        for ($i=0; $i < 3; $i++)
        {
            $pos = $aligned_old_start_pos[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos orange\n";
        }
        for ($i=0; $i < 3; $i++)
        {
            $pos = $aligned_new_start_pos[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos red\n";
        }
    }
    if ($aligned_old_rbs_len{$id})
    {
        for ($i=0; $i < $aligned_old_rbs_len{$id}; $i++)
        {
            $pos = $aligned_old_rbs_pos{$id}->[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos blue\n";
        }
    }
    if ($aligned_new_rbs_len{$id})
    {
        for ($i=0; $i < $aligned_new_rbs_len{$id}; $i++)
        {
            $pos = $aligned_new_rbs_pos{$id}->[$i] - $leftmost_pos + 1;
            print MARKUPVIZ "$pos $pos blue\n";
        }
    }
}


sub load_kv {
    my($obj) = @_;
    my($kv,$kvH);

    $kvH = {};
    foreach $kv (split(/\n/,$obj))
    {
        if ($kv =~ /^([^=]+)=(.*)$/)
        {
            $kvH->{$1} = $2;
        }
    }
    return $kvH;
}

sub map_to_aligned {
    my($start_pos,$id,$dnaseqs) = @_;
    my($ntcnt,$aligned_start_pos,$i,$dnaseq,@dnaseq);

    $dnaseq = $dnaseqs->{$id};
    @dnaseq = split('',$dnaseq);
    $ntcnt = 0;
    $aligned_start_pos = 0;
    for ($i=0; $ntcnt < ($start_pos+30)  &&  $i < @dnaseq; $i++)
    {
	if ($dnaseq[$i] =~ /[A-Z]/)
        {
            $ntcnt++;
        }
        $aligned_start_pos++;
    }
    return $aligned_start_pos;
}

sub aliases_and_genome {
    my($fig,$peg) = @_;

    my $gs = $fig->org_of($peg);
    my @aliases = grep { $_ =~ /^([NXYZA]P_[0-9\.]+)|(uni\|)|(sp\|)/ } $fig->feature_aliases($peg);
    return " " . join(",",@aliases) . " " . $gs;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3