[Bio] / FigKernelScripts / svr_just_ends.pl Repository:
ViewVC logotype

View of /FigKernelScripts/svr_just_ends.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.3 - (download) (as text) (annotate)
Sat Feb 12 21:26:02 2011 UTC (8 years, 9 months ago) by redwards
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, mgrast_release_3_0, mgrast_dev_03252011, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.2: +1 -1 lines
increasing min length to 2000 so that you have two ends!

use strict;
use Data::Dumper;
use Carp;
use gjoseqlib;

# This is a SAS Component

=head1 svr_just_ends

Clip off the ends of a set of contigs

=head2 Introduction


    svr_just_ends -ln 500 < contigs > just.ends

=head2 Command-Line Arguments

The program is invoked using

    svr_just_ends [-ln=N] < contigs > clipped.ends

where B<contigs> is a fasta file (usually containing DNA)

=over 4

=item -ln=N

Take N characters from each end (1000 is the default)


=head2 Output

A fasta file of ends of contigs.  The IDs will be of the form

    Contig_1_N  (e.g., contig1_1_1000) or
    Contig_x_y  where y is the length of the contig (e.g., contig2_232_1231)


use Getopt::Long;
my $ln = 1000;
my $usage = "svr_just_ends [-ln=N]< contigs > clipped.ends\n";

my $rc = GetOptions( "ln=i" => \$ln );
$rc or print STDERR $usage and exit;

my @contigs = &gjoseqlib::read_fasta;
foreach my $tuple (@contigs)
    my($id,undef,$seq) = @$tuple;
    my $contig_ln = length($seq);
    if ($contig_ln >= 2000)
	my $id1   = join("_",($id,1,1000));
	my $seq1  = substr($seq,0,1000);
	my $id2   = join("_",($id,$contig_ln - ($ln-1),$contig_ln));
	my $seq2  = substr($seq,$contig_ln - $ln, $ln);
	print ">$id1\n$seq1\n";
	print ">$id2\n$seq2\n";
	my $id1 = join("_",($id,1,$contig_ln));
	print ">$id1\n$seq\n";

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3