# -*- perl -*- use strict; use FIG; my $fig = new FIG; use DBrtns; my $temp_dir = "$FIG_Config::temp"; my($organisms_dir) = "$FIG_Config::organisms"; my($genome,@types,$type,$id,$loc,@aliases,$aliases,$contig); # usage: load_features [G1 G2 G3 ... ] open(REL,">$temp_dir/tmpfeat$$") || die "could not open $temp_dir/tmpfeat$$"; open(ALIAS,"| sort -u > $temp_dir/tmpalias$$") || die "could not open $temp_dir/tmpalias$$"; my $dbf = $fig->{_dbf}; my @genomes; if (@ARGV == 0) { $dbf->drop_table( tbl => "features" ); $dbf->drop_table( tbl => "ext_alias" ); $dbf->create_table( tbl => 'ext_alias', flds => "id varchar(32), alias varchar(32), genome varchar(16)" ); if ($FIG_Config::dbms eq "Pg") { $dbf->create_table( tbl => "features", flds => "id varchar(32), idN INTEGER, type varchar(16),genome varchar(16)," . "location varchar(5000)," . "contig varchar(96), minloc INTEGER, maxloc INTEGER," . "aliases TEXT" ); } elsif ($FIG_Config::dbms eq "mysql") { $dbf->create_table( tbl => "features", flds => "id varchar(32), idN INTEGER, type varchar(16),genome varchar(16)," . "location TEXT," . "contig varchar(96), minloc INTEGER, maxloc INTEGER," . "aliases TEXT" ); } @genomes = $fig->genomes; # Here we extract external aliases from the peg.synonyms table, when they can be inferred # accurately. open(SYN,"<$FIG_Config::global/peg.synonyms") || die "could not open $FIG_Config::global/peg.synonyms"; while (defined($_ = )) { chop; my($x,$y) = split(/\t/,$_); my @ids = map { $_ =~ /^([^,]+),(\d+)/; [$1,$2] } ($x,split(/;/,$y)); my @fig = (); my(@nonfig) = (); foreach $_ (@ids) { if ($_->[0] =~ /^fig\|/) { push(@fig,$_); } else { push(@nonfig,$_); } } my $x; foreach $x (@fig) { my($peg,$peg_ln) = @$x; my $genome = &FIG::genome_of($peg); foreach $_ (@nonfig) { if ((@fig == 1) || ($peg_ln == $_->[1])) { print ALIAS "$peg\t$_->[0]\t$genome\n"; } } } } close(SYN); } else { @genomes = @ARGV; foreach $genome (@genomes) { $dbf->SQL("DELETE FROM features WHERE ( genome = \'$genome\' )"); $dbf->SQL("DELETE FROM ext_alias WHERE ( genome = \'$genome\' )"); } } my $changes = {}; if (open(TMP,"<$FIG_Config::global/changed.location.features")) { while ($_ = ) { if ($_ =~ /^(fig\|\d+\.\d+\.[a-zA-Z]+\.\d+)/) { $changes->{$1}++; } } close(TMP); } foreach $genome (@genomes) { opendir(FEAT,"$organisms_dir/$genome/Features") || die "could not open $genome/Features"; @types = grep { $_ =~ /^[a-zA-Z]+$/ } readdir(FEAT); closedir(FEAT); foreach $type (@types) { if ((-s "$organisms_dir/$genome/Features/$type/tbl") && open(TBL,"<$organisms_dir/$genome/Features/$type/tbl")) { # print STDERR "loading $genome/Features/$type/tbl\n"; while (defined($_ = )) { chop; ($id,$loc,@aliases) = split(/\t/,$_); if ($id && ($_ = $changes->{$id})) # check for obsolete entries due to location changes { $changes->{$id}--; next; } if ($id) { my($minloc,$maxloc); if ($loc) { $loc =~ s/\s+$//; ($contig,$minloc,$maxloc) = &FIG::boundaries_of($loc); if ($minloc && $maxloc) { ($minloc < $maxloc) || (($minloc,$maxloc) = ($maxloc,$minloc)); } } if (! $contig) { $loc = $contig = $minloc = $maxloc = ""; } if (@aliases > 0) { $aliases = join(",",grep(/\S/,@aliases)); my $alias; foreach $alias (@aliases) { if ($alias =~ /^(NP_|gi\||sp\|\tr\||kegg\||uni\|)/) { print ALIAS "$id\t$alias\t$genome\n"; } } } else { $aliases = ""; } $minloc = (! $minloc) ? 0 : $minloc; $maxloc = (! $maxloc) ? 0 : $maxloc; if ((length($loc) < 5000) && (length($contig) < 96) && (length($id) < 32) && ($id =~ /(\d+)$/)) { print REL "$id\t$1\t$type\t$genome\t$loc\t$contig\t$minloc\t$maxloc\t$aliases\n"; } } } close(TBL); } } } close(REL); close(ALIAS); $dbf->load_table( tbl => "features", file => "$temp_dir/tmpfeat$$" ); $dbf->load_table( tbl => "ext_alias", file => "$temp_dir/tmpalias$$" ); if (@ARGV == 0) { $dbf->create_index( idx => "ext_alias_alias_ix", tbl => "ext_alias", type => "btree", flds => "alias" ); $dbf->create_index( idx => "ext_alias_genome_ix", tbl => "ext_alias", type => "btree", flds => "genome" ); $dbf->create_index( idx => "ext_alias_id_ix", tbl => "ext_alias", type => "btree", flds => "id" ); $dbf->create_index( idx => "features_id_ix", tbl => "features", type => "btree", flds => "id" ); $dbf->create_index( idx => "features_org_ix", tbl => "features", type => "btree", flds => "genome" ); $dbf->create_index( idx => "features_type_ix", type => "btree", tbl => "features", flds => "type" ); $dbf->create_index( idx => "features_beg_ix", type => "btree", tbl => "features", flds => "genome,contig,minloc" ); $dbf->vacuum_it("features") } unlink("$temp_dir/tmpfeat$$"); unlink("$temp_dir/tmpalias$$");