[Bio] / FigKernelScripts / svr_cut_domain.pl Repository:
ViewVC logotype

View of /FigKernelScripts/svr_cut_domain.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Oct 27 18:44:56 2010 UTC (9 years, 5 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
cut domains from sequences

use strict;
use Data::Dumper;
use Carp;

#
# This is a SAS Component
#


=head1 svr_cut_domain

Clip domains out of a set of protein sequences

------

Example:

    svr_cut_domain domain_desc < fasta > fasta.of.domains

would read a 3-column table (domain_desc) in which each line
contains [ID,Begin,End].  The complete protein sequences are
read from STDIN.  A fasta file of the extracted domains is written
to STDOUT.

------

=cut

use SeedUtils;

my $usage = "usage: svr_cut_domain DomainDesc < protein_seqs.fasta > domain_seqs.fasta";
($ARGV[0] && open(DOM,"<",$ARGV[0])) || die $usage;

my %seqs;
$/ = "\n>";
while (defined($_ = <STDIN>))
{
    chomp;
    my($id,$seq);
    if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
    {
	$id  = $1;
	$seq = $2;
	$seq    =~ s/\s//g;
    }
    if ($seqs{$id}) { die "$id occurs multiple times in the input collection" }
    $seqs{$id} = $seq;
}

$/ = "\n";
while (defined($_ = <DOM>))
{
    my $domain;
    if (($_ =~ /^(\S+)\t(\d+)\t(\d+)\s*$/) && $seqs{$1} && ($domain = substr($seqs{$1},$2-1,(($3+1)-$2))))
    {
	print ">$1\n$domain\n";
    }
    else
    {
	die "bad input: $_";
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3