[Bio] / FigMetagenomeTools / renumber_sequences.pl Repository:
ViewVC logotype

Diff of /FigMetagenomeTools/renumber_sequences.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Mon Feb 19 17:15:26 2007 UTC revision 1.2, Fri Mar 16 20:51:25 2007 UTC
# Line 4  Line 4 
4    
5  use strict;  use strict;
6  use DBI;  use DBI;
 use lib "$ENV{HOME}/MIG/Modules";  
7  use Rob;  use Rob;
8    
9  my $dbh=DBI->connect("DBI:mysql:sequences", "rob", "forestry") or die "Can't connect to database sequences\n";  my @opts;
10    
11    my $dbh=DBI->connect($FIG_Config::metagenome_dsn, $FIG_Config::metagenome_dbuser, $FIG_Config::metagenome_dbpass,
12                        { RaiseError => 1 })
13            or die "Can't connect to database sequences\n";
14    
15    
16  my $usage=<<EOF;  my $usage=<<EOF;
17    
18  $0  $0
19  -f fasta file  -f fasta file
20  -q quality file  -q quality file
21    -g genome-name
22    -h host-name
23    -p assembled-path
24  OPTIONAL  OPTIONAL
25    
26  -n number of genome. If this is provided, then the data will not be added to the database, and only the files will be written.  -n number of genome. If this is provided, then the data will not be added to the database, and only the files will be written.
27    
28  EOF  EOF
29    
30  my ($fastaf, $qualf, $genomeno);  my ($fastaf, $qualf, $genomeno, $genome_name, $genome_host, $asm_path);
31  while (@ARGV) {  while (@ARGV) {
32   my $t=shift;   my $t=shift;
33   if ($t eq "-f") {$fastaf=shift}   if ($t eq "-f") {$fastaf=shift}
34   elsif ($t eq "-q") {$qualf=shift}   elsif ($t eq "-q") {$qualf=shift}
35   elsif ($t eq "-n") {$genomeno=shift}   elsif ($t eq "-n") {$genomeno=shift}
36     elsif ($t eq "-g") {$genome_name = shift}
37     elsif ($t eq "-h") {$genome_host = shift}
38     elsif ($t eq "-p") {$asm_path = shift}
39  }  }
40    
41    $dbh->begin_work();
42    
43  unless ($qualf) {$qualf = $fastaf.".qual"}  unless ($qualf) {$qualf = $fastaf.".qual"}
44    
45  die $usage unless (-e $fastaf && -e $qualf);  die $usage unless (-e $fastaf && -e $qualf);
# Line 51  Line 62 
62  }  }
63  else  else
64  {  {
65  # set the information for the genome table      # set the information for the genome table#
66        #
67        # Assume that if the genome name was passed on the command line that genome_host
68        # was also passed (meaning that an undefined value for genome_host means it is not defined,
69        # not that we need to ask the user).
70        #
71    
72        my $ask_user;
73        if (!defined($genome_name))
74        {
75            $ask_user++;
76          print "This is the information for the Genome table in the sequences database\nPlease enter the name of the genome:\n";          print "This is the information for the Genome table in the sequences database\nPlease enter the name of the genome:\n";
77          my $name=<STDIN>;          $genome_name = <STDIN>;
78          chomp($name);          chomp($genome_name);
79          my $exc=$dbh->prepare("select count from genome where name like '$name'");      }
80          $exc->execute || die $dbh->errstr;      my $res = $dbh->selectall_arrayref("select count from genome where name = ?", undef, $genome_name);
81          ($count)=$exc->fetchrow_array;      if (@$res)
82          die "The genome $name already appears in the database as number $count\n" if ($count);      {
83            die "The genome $genome_name already appears in the database as number $count\n";
84        }
85    
86        $count = $res->[0]->[0];
87    
88        if ($ask_user)
89        {
90          print "If the sequence is for a phage, and the host is known, please enter the host (or leave blank):\n";          print "If the sequence is for a phage, and the host is known, please enter the host (or leave blank):\n";
91          my $host=<STDIN>;          my $genome_host=<STDIN>;
92          chomp($host);          chomp($genome_host);
93    
94          print "If the sequence has been assembled, please enter the path to the assembled sequence (or leave blank):\n";          print "If the sequence has been assembled, please enter the path to the assembled sequence (or leave blank):\n";
95          my $path=<STDIN>;          my $asm_path=<STDIN>;
96          chomp($path);          chomp($asm_path);
97        }
98    
99          $exc=$dbh->prepare("insert into genome (count, name, host, assembled_sequence) values ('', '$name', '$host', '$path')");      $dbh->do("insert into genome (name, host, assembled_sequence) values (?, ?, ?)", undef,
100          $exc->execute || die $dbh->errstr;               $genome_name, $genome_host, $asm_path);
101          $exc=$dbh->prepare("select count from genome where name like '$name'");  
102          $exc->execute || die $dbh->errstr;      my $res = $dbh->selectcol_arrayref("select count from genome where name = ?", undef, $genome_name);
103          ($count)=$exc->fetchrow_array;      $res or die $dbh->errstr;
104        $count = $res->[0];
105    
106        $res = $dbh->selectcol_arrayref("select max(count) from DNA");
107        $res or die $dbh->errstr;
108        $max = $res->[0];
109    
         $exc=$dbh->prepare("select count from DNA");  
         $exc->execute || die $dbh->errstr;  
         $max=0;  
         while (my @ret=$exc->fetchrow_array) {$max=$ret[0] if ($ret[0] > $max)}  
110          $max++;          $max++;
111    
112          print "The new genome is number $count and the first sequence will be $max\nReading and dereplicating the fasta file\n";          print "The new genome is number $count and the first sequence will be $max\nReading and dereplicating the fasta file\n";
# Line 99  Line 128 
128  open (FA, ">$count.fa") || die "Can't open $count.fa for writing\n";  open (FA, ">$count.fa") || die "Can't open $count.fa for writing\n";
129  open (QU, ">$count.qual") || die "Can't open $count.qual for writing\n";  open (QU, ">$count.qual") || die "Can't open $count.qual for writing\n";
130    
131    my $exc=$dbh->prepare(qq(insert into DNA (count, genome, original_name, trace_file, sequence_file, quality_file)
132                             values (?, ?, ?, ?, ?, ?)));
133    
134  foreach my $s (keys %$fasta)  foreach my $s (keys %$fasta)
135  {  {
136          unless ($qual->{$s}) {print STDERR "No quality for $s. Skipped\n"; next}          unless ($qual->{$s}) {print STDERR "No quality for $s. Skipped\n"; next}
# Line 106  Line 138 
138          print QU ">$max\n", $qual->{$s}, "\n";          print QU ">$max\n", $qual->{$s}, "\n";
139          unless ($genomeno)          unless ($genomeno)
140          {          {
141                  my $exc=$dbh->prepare("insert into DNA (count, genome, original_name, trace_file, sequence_file, quality_file) values              $exc->execute($max, $count, $s, '', "$count.fa", "$count.qual")  || die $dbh->errstr;
                                 ($max, '$count', '$s', '', '$count.fa', '$count.qual')");  
                 $exc->execute || die $dbh->errstr;  
142          }          }
143          $max++;          $max++;
144  }  }
145    
146    $dbh->commit();

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3