[Bio] / Babel / bin / m5tools.pl Repository:
ViewVC logotype

View of /Babel/bin/m5tools.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Aug 3 19:10:18 2011 UTC (8 years, 11 months ago) by tharriso
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_release_3_1_1
*** empty log message ***

use strict;
use warnings;

use Getopt::Long;
use Data::Dumper;

use Babel::lib::M5NR;

my $verbose = 0;
my $id      = '';
my $md5     = '';
my $seq     = '';
my $source  = '';
my $option  = '';
my $help    = 0 ;
my $options = {sequence => 1, annotation => 1};

GetOptions( "verbose!"   =>\$verbose,
	    "id=s"       =>\$id,
	    "md5=s"      =>\$md5,
            "sequence=s" =>\$seq,
	    "source=s"   =>\$source,
	    "option=s"   =>\$option,
	    "help!"      =>\$help
 	  );

my $m5nr = new M5NR;
unless ($m5nr->dbh) {
  print STDERR "Unable to retrieve the M5NR database.\n";
  exit 1;
}

if ($help or ! ($options->{$option} or $seq)) {
  &help($options, $m5nr);
  exit 1;
}

my $hdr = ["ID", "MD5", "Function", "Organism"];
unless ($source) { push @$hdr, "Source"; }

if ($seq) {
  &output( [[$m5nr->sequence2md5($seq)]] );
}
elsif ($md5 && ($option eq 'sequence')) {
  my $md5s = &list_from_input($md5);
  &output( [[$m5nr->md5s2sequences($md5s)]] );
}
elsif ($md5 && ($option eq 'annotation')) {
  my $md5s = &list_from_input($md5);
  if ($source) {
    &output($hdr, $m5nr->md5s2sets4source($md5s, $source));
  } else {
    &output($hdr, $m5nr->md5s2sets($md5s));
  }
}
elsif($id && ($option eq 'sequence')) {
  my $ids = &list_from_input($id);
  &output( [[$m5nr->ids2sequences($ids)]] );
}
elsif($id && ($option eq 'annotation')) {
  my %ids  = map {$_, 1} @{ &list_from_input($id) };
  my %md5s = map {$_->[0], 1} @{ $m5nr->ids2md5s([keys %ids]) };
  my @data = ();
  if ($source) {
    @data = grep {exists $ids{$_->[0]}} @{ $m5nr->md5s2sets4source([keys %md5s], $source) };
  } else {
    @data = grep {exists $ids{$_->[0]}} @{ $m5nr->md5s2sets([keys %md5s]) };
  }
  &output($hdr, \@data);
}
else {
  &help($options, $m5nr);
  exit 1;
}

sub list_from_input {
  my ($input) = @_;

  my @list = ();
  if (-s $input) {
    @list = `cat $input`;
    chomp @list;
  }
  else {
    @list = split(/,/, $input);
  }
  my %set = map {$_, 1} @list;

  return [keys %set];
}

sub output {
  my ($hdr, $rows) = @_;

  if ($hdr && @$hdr) {
    print join("\t", @$hdr) . "\n";
  }
  foreach my $row (@$rows){
    print join("\t", @$row) . "\n";
  }
}

sub help {
    my ($options, $m5nr) = @_ ;

    my $smap = $m5nr->sources;
    my $opts = join(", ", keys %$options);
    my $srcs = join(", ", sort grep { $smap->{$_}{type} ne 'rna' } keys %$smap);

    print STDERR qq(Usage: $0
  --id        <protein ids>      file or comma seperated list of protein ids
  --md5       <md5sums>          file or comma seperated list of md5sums
  --sequence  <aa sequence>      protein sequence, returns md5sum of sequence
  --source    <source name>      source for annotation, default is all
  --option    <output option>    output type, one of: $opts
  --verbose                      verbose output
  --help                         show this

  Sources: $srcs
);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3