[Bio] / FigKernelScripts / build_initial_objects_for_start_predictions.pl Repository:
ViewVC logotype

View of /FigKernelScripts/build_initial_objects_for_start_predictions.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Thu May 12 20:38:37 2005 UTC (14 years, 6 months ago) by overbeek
Branch: MAIN
CVS Tags: merge-trunktag-bobdev_news-2, Root-bobdev_news, merge-bobdev_news-1, merge-trunktag-bobdev_news-1, merge-bodev_news-3, merge-bobdev_news-2, merge-trunktag-bodev_news-3
Branch point for: Branch-bobdev_news
Changes since 1.2: +6 -2 lines
add tool to generate gene blocks for variation analysis

use FIG;
my $fig = new FIG;

$usage = "usage: build_initial_objects_for_start_predictions < pegs_in_cluster > initial_objects";

@pegs = <STDIN>;
chomp @pegs;
# print "NPEGS=", scalar(@pegs), "\n";
for ($i=0; $i < @pegs; $i++)  # strip off all but peg ids at front
{
    if ($pegs[$i] =~ /(\S+)\t.*/)
    {
        $pegs[$i] = $1;
    }
}

foreach $peg (@pegs)
{
    $genome = $fig->genome_of($peg);
    $loc = $fig->feature_location($peg);
    # print "$len{$peg}\t$peg\t$loc\n";
    if ($loc =~ /,/)
    {
        print STDERR "skipping $peg - do not handle complex locs\n";
        next;
    }

    ($contig,$begin,$end) = $fig->boundaries_of($loc);
    if ($begin < $end)
    {
        $l = sprintf("%s_%d_%d",$contig,1,$begin-1);
	$end += 3;
    }
    else
    {
        $l = sprintf("%s_%d_%d",$contig,$begin+1,$fig->contig_ln($genome,$contig));
	$end -= 3;
    }

    $loc = join("_",($contig,$begin,$end));
    $seq = uc $fig->dna_seq($genome,$loc);
    if (! $seq)
    {
	print STDERR &Dumper($peg,$loc,$seq); die "could not get sequence for $peg";
    }

#   The following hack handles uncertainty about whether SEED gives back the STOP or not
    if (substr($seq,-6,3) =~ /TAA|TAG|TGA/i)
    {
	$end = ($begin < $end) ? $end-3 : $end+3;
	$seq = substr($seq,0,length($seq) - 3);
    }
    elsif (substr($seq,-3,3) !~ /TAA|TAG|TGA/i)
    {
	die "BAD STOP: $peg $genome $loc $seq\n";
    }

    $loc = join("_",($contig,$begin,$end));


    # BACK INTO THE STOP FROM START OF PEG
    $pre_peg = uc $fig->dna_seq($genome,$l);
    $found = 0;
    $len_pre_peg = length($pre_peg);

    for ($i=$len_pre_peg-3; $i > 0 && ! $found; $i-=3)
    {
        $stop =  substr($pre_peg,$i,3);
        if ($stop eq "TAG"  ||  $stop eq "TAA"  ||  $stop eq "TGA")
        {
            # print "FOUND $stop for $l\n";
            $orf = substr($pre_peg,$i+3) . $seq;
            if (($i-27) > 0)
            {
                $pre_orf = substr($pre_peg,$i-27,30);
		my $gs = $fig->org_of($peg);
	        my @aliases = grep { $_ =~ /^([NXYZA]P_[0-9\.]+)|(uni\|)|(sp\|)/ } $fig->feature_aliases($peg);
                print "ID=$peg\n";
	        print "GENOME=$gs\n";
	        if (@aliases > 0)
		{
		    print "ALIASES=",join(",",@aliases),"\n";
		}
                print "CONTIG=$contig\n";
                #####print "BEGIN=$begin\n";
                if ($begin < $end)
                {
                    print "BEGIN=", $begin-($len_pre_peg-$i-3), "\n";
                }
                else
                {
                    print "BEGIN=", $begin+($len_pre_peg-$i-3), "\n";
                }
                print "END=$end\n";
                print "SEQ=$orf\n";
                print "PREFIX=$pre_orf\n";
                print "OLD_START_POS=", $len_pre_peg-$i-2, "\n";
                print "///\n";
            	$found = 1;
            }
            else
            {
                print STDERR "skipping $peg - not enough prefix before orf\n";
            }
            last;
        }
    }
    if ( ! $found)
    {
        print STDERR "DID NOT FIND STOP BEFORE $peg  $contig $begin $end $l\n";
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3