[Bio] / FigMetagenomeTools / renumber_sequences.pl Repository:
ViewVC logotype

Diff of /FigMetagenomeTools/renumber_sequences.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Mon Feb 19 17:15:26 2007 UTC revision 1.3, Sat Apr 14 20:56:12 2007 UTC
# Line 4  Line 4 
4    
5  use strict;  use strict;
6  use DBI;  use DBI;
 use lib "$ENV{HOME}/MIG/Modules";  
7  use Rob;  use Rob;
8    
9  my $dbh=DBI->connect("DBI:mysql:sequences", "rob", "forestry") or die "Can't connect to database sequences\n";  my @opts;
10    
11    my $dbh=DBI->connect($FIG_Config::metagenome_dsn, $FIG_Config::metagenome_dbuser, $FIG_Config::metagenome_dbpass,
12                        { RaiseError => 1 })
13            or die "Can't connect to database sequences\n";
14    
15    
16  my $usage=<<EOF;  my $usage=<<EOF;
17    
18  $0  $0
19  -f fasta file  -f fasta file
20  -q quality file  -q quality file
21    -g genome-name
22    -h host-name
23    -p assembled-path
24  OPTIONAL  OPTIONAL
25    
26  -n number of genome. If this is provided, then the data will not be added to the database, and only the files will be written.  -n number of genome. If this is provided, then the data will not be added to the database, and only the files will be written.
27    
28  EOF  EOF
29    
30  my ($fastaf, $qualf, $genomeno);  my ($fastaf, $qualf, $genomeno, $genome_name, $genome_host, $asm_path);
31  while (@ARGV) {  while (@ARGV) {
32   my $t=shift;   my $t=shift;
33   if ($t eq "-f") {$fastaf=shift}   if ($t eq "-f") {$fastaf=shift}
34   elsif ($t eq "-q") {$qualf=shift}   elsif ($t eq "-q") {$qualf=shift}
35   elsif ($t eq "-n") {$genomeno=shift}   elsif ($t eq "-n") {$genomeno=shift}
36     elsif ($t eq "-g") {$genome_name = shift}
37     elsif ($t eq "-h") {$genome_host = shift}
38     elsif ($t eq "-p") {$asm_path = shift}
39  }  }
40    
41  unless ($qualf) {$qualf = $fastaf.".qual"}  unless ($qualf) {$qualf = $fastaf.".qual"}
42    
43  die $usage unless (-e $fastaf && -e $qualf);  die $usage unless (-e $fastaf && -e $qualf);
44  my ($count, $max);  my ($count, $max);
45    
46    $dbh->begin_work();
47    
48  if ($genomeno)  if ($genomeno)
49  {  {
50          $count=$genomeno;          $count=$genomeno;
# Line 51  Line 63 
63  }  }
64  else  else
65  {  {
66  # set the information for the genome table      # set the information for the genome table#
67        #
68        # Assume that if the genome name was passed on the command line that genome_host
69        # was also passed (meaning that an undefined value for genome_host means it is not defined,
70        # not that we need to ask the user).
71        #
72    
73        my $ask_user;
74        if (!defined($genome_name))
75        {
76            $ask_user++;
77          print "This is the information for the Genome table in the sequences database\nPlease enter the name of the genome:\n";          print "This is the information for the Genome table in the sequences database\nPlease enter the name of the genome:\n";
78          my $name=<STDIN>;          $genome_name = <STDIN>;
79          chomp($name);          chomp($genome_name);
80          my $exc=$dbh->prepare("select count from genome where name like '$name'");      }
81          $exc->execute || die $dbh->errstr;      my $res = $dbh->selectall_arrayref("select count from genome where name = ?", undef, $genome_name);
82          ($count)=$exc->fetchrow_array;      if (@$res)
83          die "The genome $name already appears in the database as number $count\n" if ($count);      {
84            die "The genome $genome_name already appears in the database as number $count\n";
85        }
86    
87        $count = $res->[0]->[0];
88    
89        if ($ask_user)
90        {
91          print "If the sequence is for a phage, and the host is known, please enter the host (or leave blank):\n";          print "If the sequence is for a phage, and the host is known, please enter the host (or leave blank):\n";
92          my $host=<STDIN>;          my $genome_host=<STDIN>;
93          chomp($host);          chomp($genome_host);
94    
95          print "If the sequence has been assembled, please enter the path to the assembled sequence (or leave blank):\n";          print "If the sequence has been assembled, please enter the path to the assembled sequence (or leave blank):\n";
96          my $path=<STDIN>;          my $asm_path=<STDIN>;
97          chomp($path);          chomp($asm_path);
98        }
99    
100          $exc=$dbh->prepare("insert into genome (count, name, host, assembled_sequence) values ('', '$name', '$host', '$path')");      $dbh->do("insert into genome (name, host, assembled_sequence) values (?, ?, ?)", undef,
101          $exc->execute || die $dbh->errstr;               $genome_name, $genome_host, $asm_path);
102          $exc=$dbh->prepare("select count from genome where name like '$name'");  
103          $exc->execute || die $dbh->errstr;      my $res = $dbh->selectcol_arrayref("select count from genome where name = ?", undef, $genome_name);
104          ($count)=$exc->fetchrow_array;      $res or die $dbh->errstr;
105        $count = $res->[0];
106    
107        $res = $dbh->selectcol_arrayref("select max(count) from DNA");
108        $res or die $dbh->errstr;
109        $max = $res->[0];
110    
         $exc=$dbh->prepare("select count from DNA");  
         $exc->execute || die $dbh->errstr;  
         $max=0;  
         while (my @ret=$exc->fetchrow_array) {$max=$ret[0] if ($ret[0] > $max)}  
111          $max++;          $max++;
112    
113          print "The new genome is number $count and the first sequence will be $max\nReading and dereplicating the fasta file\n";          print "The new genome is number $count and the first sequence will be $max\nReading and dereplicating the fasta file\n";
114  }  }
115    $dbh->commit();
116    
117  # read and de replicate the fasta file  # read and de replicate the fasta file
118  my $fasta=Rob->read_fasta($fastaf);  my $fasta=Rob->read_fasta($fastaf);
# Line 99  Line 130 
130  open (FA, ">$count.fa") || die "Can't open $count.fa for writing\n";  open (FA, ">$count.fa") || die "Can't open $count.fa for writing\n";
131  open (QU, ">$count.qual") || die "Can't open $count.qual for writing\n";  open (QU, ">$count.qual") || die "Can't open $count.qual for writing\n";
132    
133    my $exc=$dbh->prepare(qq(insert into DNA (count, genome, original_name, trace_file, sequence_file, quality_file)
134                             values (?, ?, ?, ?, ?, ?)));
135    
136  foreach my $s (keys %$fasta)  foreach my $s (keys %$fasta)
137  {  {
138          unless ($qual->{$s}) {print STDERR "No quality for $s. Skipped\n"; next}          unless ($qual->{$s}) {print STDERR "No quality for $s. Skipped\n"; next}
# Line 106  Line 140 
140          print QU ">$max\n", $qual->{$s}, "\n";          print QU ">$max\n", $qual->{$s}, "\n";
141          unless ($genomeno)          unless ($genomeno)
142          {          {
143                  my $exc=$dbh->prepare("insert into DNA (count, genome, original_name, trace_file, sequence_file, quality_file) values              $exc->execute($max, $count, $s, '', "$count.fa", "$count.qual")  || die $dbh->errstr;
                                 ($max, '$count', '$s', '', '$count.fa', '$count.qual')");  
                 $exc->execute || die $dbh->errstr;  
144          }          }
145          $max++;          $max++;
146  }  }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3