[Bio] / DomReg / scripts / putRiboswitchesIntoDB.pl Repository:
ViewVC logotype

View of /DomReg/scripts/putRiboswitchesIntoDB.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Fri Feb 2 16:28:08 2007 UTC (13 years ago) by bartels
Branch: MAIN
CVS Tags: HEAD
*** empty log message ***

#!/usr/bin/env /home/bartels/FIGdisk/env/cee/bin/perl

use strict;
use warnings;
use lib "../../PPO/";
use lib "../";
use DBMaster;
use FIG;

my $dir = shift;
my $dbmaster = DBMaster->new( 'DomReg' );

my $ribohashofhashes;

opendir( DIR, $dir ) or die "could not open directory $dir\n";
foreach ( readdir( DIR ) ) {

  my $fl = $_;
  my $filename = $dir.'/'.$fl;
  if ( !defined( $fl ) ) {
    next;
  }

  if ( $fl =~ /(.*)\..*\.summary/ ) {
    my $k = $1;
    my $ribohash = parseSummaryFile( $dbmaster, $filename );
    $ribohashofhashes->{ $k }->{ 'ribohash' } = $ribohash;
  }
  elsif( $fl =~ /(.*)\..*\.detail/ ) {
    my $k = $1;
    my $riboseqhash = parseDetailFile( $dbmaster, $filename );
    $ribohashofhashes->{ $k }->{ 'riboseqhash' } = $riboseqhash;
  }

}

foreach my $k ( keys %$ribohashofhashes ) {
  my $ribohash = $ribohashofhashes->{ $k }->{ 'ribohash' };
  my $riboseqhash = $ribohashofhashes->{ $k }->{ 'riboseqhash' };
  
  my $success = createRiboswitchObjects( $dbmaster, $ribohash, $riboseqhash, $k );

}



sub parseDetailFile {

  my ( $dbmaster, $file ) = @_;

  my $riboseqhash;

  open ( FILE, $file ) or die "cannot open file $file\n";

  my $genome;
  my $start;
  my $stop;

  while ( my $line = <FILE> ) {
    if ( $line =~ /Genome\: (NC\_\d+)/ ) {
      $genome = $1;
    }
    elsif ( $line =~ /Location\: (\w+)\, (\d+)\-(\d+)/ ) {
      $start = $2;
      $stop = $3;
      my $plusminus = $1;
      if ( $plusminus =~ /minus/ ) {
	if ( $start < $stop ) {
	  my $val = $start;
	  $start = $stop;
	  $stop  = $val;
	}
      }
    }
    elsif ( $line =~ /^SEQ\:/ ) {
      $line = <FILE>;
      my $key = $genome.$start.$stop;
      $riboseqhash->{ $key }->{ 'sequence' } = $line;
    }
  }

  close FILE;

  return $riboseqhash;

}

sub parseSummaryFile {

  my ( $dbmaster, $file ) = @_;

  my $ribohash;

  #parse file into hash structure
  open ( FILE, $file ) or die "cannot open file $file!\n";

  my $counter = 0;
  while ( <FILE> ) {
    $counter++;
    next if ( $counter == 1 );

    my $objhash;
    my @dat = split( /\t/ );
    my $objkey = $dat[1];
    $objhash->{ 'organismGI' } = $dat[1];
    $objhash->{ 'organism' } = $dat[2];
    if ( $objhash->{ 'organism' } =~ /^ (.*)/ ) {
      $objhash->{ 'organism' } = $1;
    }
    if ( $dat[6] =~ /(\d+)\-(\d+)/ ) {
      $objhash->{ 'start' } = $1;
      $objhash->{ 'stop' } = $2;
    }

    $objhash->{ 'giNumber' } = $dat[7];
    if ( $objhash->{ 'giNumber' } =~ /^ (.*)/ ) {
      $objhash->{ 'giNumber' } = $1;
    }
    $objhash->{ 'distance' } = $dat[8];

    $objkey = $objkey . $objhash->{ 'start' }.$objhash->{ 'stop' };
    $ribohash->{ $objkey } = $objhash;
  }

  close FILE;
  return $ribohash;

}

sub createRiboswitchObjects {

  my ( $dbmaster, $ribohash, $riboseqhash, $k ) = @_;

  my $ribotypesarr = $dbmaster->RiboType->get_objects( { 'name' => $k } );
  my $ribotype = $ribotypesarr->[0];
  if ( !defined( $ribotype ) ) {
    $ribotype = $dbmaster->RiboType->create( { 'name' => $k } );
  }

  foreach my $key ( keys %$ribohash ) {
    my $riboswitcharr = $dbmaster->Riboswitch->get_objects( { 'start'    => $ribohash->{ $key }->{ 'start' },
							      'stop'     => $ribohash->{ $key }->{ 'stop' },
							      'organism' => $ribohash->{ $key }->{ 'organism' } } );

    my $riboswitch;
    if ( !defined( $riboswitcharr->[0] ) ) {

      my ( $contig, $succ ) = getContig( $dbmaster, $ribohash->{ $key }->{ 'organism' }, $ribohash->{ $key }->{ 'giNumber' } );
      
      $riboswitch = $dbmaster->Riboswitch->create( { 'start'    => $ribohash->{ $key }->{ 'start' },
						     'stop'     => $ribohash->{ $key }->{ 'stop' },
						     'organism' => $ribohash->{ $key }->{ 'organism' },
						     'organismGI' => $ribohash->{ $key }->{ 'organismGI' },
						     'distance' => $ribohash->{ $key }->{ 'distance' },
						     'sequence' => $riboseqhash->{ $key }->{ 'sequence' },
						     'downstreamGI' =>$ribohash->{ $key }->{ 'giNumber' },
						     'contig' => $contig,
						     'RiboType' => $ribotype,
						   } );

    }
    else {
      $riboswitch = $riboswitcharr->[0];
    }
#    my ( $ribo, $success ) = getContig( $dbmaster, $riboswitch );
    
  }  
  return 1;

}

sub getContig {

  my ( $dbmaster, $organism, $giGene ) = @_;

  my $fig = new FIG;

  my $success = 0;

  if ( !defined( $giGene ) ) {
    return ( undef, 0 );
  }

  my ( $contig, $genuspecies );
  my ( $peg_index_data ) = $fig->search_index( $giGene );
  if ( defined( $peg_index_data->[0]->[0] ) ) {
    my $taxid = $fig->genome_of( $peg_index_data->[0]->[0] );
    $genuspecies = $fig->genus_species( $taxid );
    my $contig_fl = $fig->feature_location( $peg_index_data->[0]->[0] );
    if ( $contig_fl =~ /(.*)\_(\d+)\_(\d+)/ ) {
      $contig = $1;
    }
    
    print STDERR "JJ$genuspecies"."JJ and JJ$organism"."JJ\n";
    
    if ( !defined( $genuspecies ) || !defined( $contig ) ) {
      return ( undef, 0 );
    }
  }
  else {
    print STDERR "Nothing found for $organism\n";
    return ( undef, 0 );
  }

  my $i = 0;
  while ( !( $genuspecies eq $organism ) ) {
    $i++;
    if ( defined( $peg_index_data->[$i]->[0] ) ) {
      my $taxid = $fig->genome_of( $peg_index_data->[$i]->[0] );
      $genuspecies = $fig->genus_species( $taxid );
      my $contig_fl = $fig->feature_location( $peg_index_data->[$i]->[0] );
      if ( $contig_fl =~ /(.*)\_(\d+)\_(\d+)/ ) {
	$contig = $1;
      }
      print STDERR "KK$genuspecies"."KK and KK$organism"."KK\n";
    }
    else { 
      print STDERR "Nothing found for $organism\n";
   
      return ( undef, 0 );
    }
  }

  return ( $contig, $success );

}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3