[Bio] / FigKernelPackages / SeedUtils.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/SeedUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.41, Fri Jul 30 19:04:13 2010 UTC revision 1.42, Tue Aug 10 19:36:09 2010 UTC
# Line 22  Line 22 
22  package SeedUtils;  package SeedUtils;
23  use BerkTable;  use BerkTable;
24  use DB_File;  use DB_File;
25    use Carp;
26    
27  #  #
28  # In case we are running in a SEED, pull in the FIG_Config  # In case we are running in a SEED, pull in the FIG_Config
# Line 532  Line 533 
533      }      }
534  }  }
535    
536    =head3 extract_seq
537    
538     $seq = &SeedUtils::extract_seq($contigs,$loc)
539    
540    This is just a little utility routine that I have found convenient.  It assumes that
541    $contigs is a hash that contains IDs as keys and sequences as values.  $loc must be of the
542    form
543    
544           Contig_Beg_End
545    
546    where Contig is the ID of one of the sequences; Beg and End give the coordinates of the sought
547    subsequence.  If Beg > End, it is assumed that you want the reverse complement of the subsequence.
548    This routine plucks out the subsequence for you.
549    
550    =cut
551    
552    sub extract_seq {
553        my($contigs,$loc) = @_;
554        my($contig,$beg,$end,$contig_seq);
555        my($plus,$minus);
556    
557        $plus = $minus = 0;
558        my $strand = "";
559        my @loc = split(/,/,$loc);
560        my @seq = ();
561        foreach $loc (@loc)
562        {
563            if ($loc =~ /^\S+_(\d+)_(\d+)$/)
564            {
565                if ($1 < $2)
566                {
567                    $plus++;
568                }
569                elsif ($2 < $1)
570                {
571                    $minus++;
572                }
573            }
574        }
575        if ($plus > $minus)
576        {
577            $strand = "+";
578        }
579        elsif ($plus < $minus)
580        {
581            $strand = "-";
582        }
583    
584        foreach $loc (@loc)
585        {
586            if ($loc =~ /^(\S+)_(\d+)_(\d+)$/)
587            {
588                ($contig,$beg,$end) = ($1,$2,$3);
589    
590                my $len = length($contigs->{$contig});
591                if (!$len)
592                {
593                    carp "Undefined or zero-length contig $contig";
594                    return "";
595                }
596    
597                if (($beg > $len) || ($end > $len))
598                {
599                    carp "Region $loc out of bounds (contig len=$len)";
600                }
601                else
602                {
603                    if (($beg < $end) || (($beg == $end) && ($strand eq "+")))
604                    {
605                        push(@seq,substr($contigs->{$contig},$beg-1,($end+1-$beg)));
606                    }
607                    else
608                    {
609                        $strand = "-";
610                        push(@seq,&reverse_comp(substr($contigs->{$contig},$end-1,($beg+1-$end))));
611                    }
612                }
613            }
614        }
615        return join("",@seq);
616    }
617    
618    
619  =head3 genome_of  =head3 genome_of
620    

Legend:
Removed from v.1.41  
changed lines
  Added in v.1.42

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3