[Bio] / FigMetagenomeTools / renumber_based_on_fasta.pl Repository:
ViewVC logotype

View of /FigMetagenomeTools/renumber_based_on_fasta.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Fri Mar 16 20:51:25 2007 UTC (12 years, 9 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
Changes since 1.1: +0 -1 lines
initial tweaks

#!/usr/bin/perl -w

# renumber some sequences based on what is in the fasta file. We have renumbered the 454 sequences, and then lost them all in a harddrive failure
# but I have the fa (just not the qual files)

use Rob;

my $rob=new Rob;

my $usage=<<EOF;
$0 
-f new fasta file
-o original fasta file
-q original quality file
-x fasta ouptut file
-y quality output file

EOF

my ($fa, $of, $qf, $qo, $fo);
while (@ARGV)
{
 my $t=shift;
 if ($t eq "-f") {$fa=shift}
 elsif ($t eq "-o") {$of=shift}
 elsif ($t eq "-q") {$qf=shift}
 elsif ($t eq "-y") {$qo=shift}
 elsif ($t eq "-x") {$fo=shift}
}
die $usage unless ($fa && $of && $qf && $qo && $fo);
my $fasta=$rob->read_fasta($fa);
my $seq;
map {$fasta->{$_}=~s/\s+//g; $fasta->{$_}=uc($fasta->{$_}); $seq->{$fasta->{$_}}=$_} keys %$fasta;

my $orifa=$rob->read_fasta($of);
my $oriqf=$rob->read_fasta($qf, 1);


open(FA, ">$fo") || die "Can't open $fo for writing";
open(QU, ">$qo") || die "Can't open $qo for writing";

my %seen; my $skip; my $kept;
map 
{
 if ($seen{$orifa->{$_}}) {$skip++}
 else
 {
  $seen{$orifa->{$_}}=1;
  $orifa->{$_}=~s/\s+//g;
  $orifa->{$_}=uc($orifa->{$_});
  $kept++;
  if ($seq->{$orifa->{$_}})
  {
   print FA ">", $seq->{$orifa->{$_}}, "\n", $orifa->{$_}, "\n";
   print QU ">", $seq->{$orifa->{$_}}, "\n", $oriqf->{$_}, "\n";
  }
  else 
  {
   print STDERR "$_ not found!\n";
  }
 }
} keys %$orifa;

  
  
print STDERR "$kept sequences were kept (seen was: ", scalar(keys %seen), " and $skip sequences were ignored\n";
 

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3