[Bio] / FigKernelScripts / load_attributes.pl Repository:
ViewVC logotype

View of /FigKernelScripts/load_attributes.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Fri Mar 4 06:22:10 2005 UTC (15 years, 1 month ago) by redwards
Branch: MAIN
Changes since 1.3: +85 -39 lines
Big updates to FIG.pm for the addition of methods to properly handle attributes. Also modified genome_statistics to take advantage of these new methods. load_attributes will load genomes and other attributes but doesn't not yet use the new methods (but should), and Attributes.html is some docs.

use FIG;
my $fig = new FIG;

# there are two hidden utilities here. If you call the file:
# Filename		Output
# load_attributes 	nothing
# load_attributes -v	information about those organisms that we add information about
# load_attributes -vv	information about all organisms


my $verbose=shift; # a hidden command.

my $dbf = $fig->{_dbf};
$dbf->drop_table( tbl => "attribute" );
$dbf->create_table( tbl => 'attribute',
		    flds => "fid varchar(64), tag varchar(64), val text, url text"
		    );

my(%kv,$genome);
foreach $genome ($fig->genomes)
{
   # I have rewritten this to allow the following things:
   	# 1. Attributes for genomes are now available in $FIG_Config::organisms/$genome/Attributes
	# 2. Attributes for features (not just pegs) are now available in $FIG_Config::organisms/$genome/Features/*/Attributes
	# 3. in 2, above, we don't know what * is. It should minimally be pegs and tRNA's but there is no reason it is not
	#	something else.

    undef %kv;
    my $stderr; # error messages that show progress, but I only want to print if we have something interesting to say
    # The files that we will read for this genome are stored in @allfiles.
    my @allfiles;
    
    # Find the genome attributes files.
    $stderr .= "Reading from ". $fig->genus_species($genome). " ($genome)\n";
    
    my $dir = "$FIG_Config::organisms/$genome/Attributes";
    if (opendir(ATTR,$dir))
    {
       my @files = grep { $_ !~ /^\./ } readdir(ATTR);
       $stderr .= "\tFound ". (scalar @files) . " genome attributes >" . (join "<>", @files) . "<\n";
       closedir(ATTR);
      
       # here is an alternate way of making assigned_attributes at the end of the list if the file exists
       # my speculation is that this is so these overwrite all other attributes, si?
       push @allfiles,  map  { $_ = "$dir/$_"} grep { $_ ne "assigned_attributes" } @files;
       push @allfiles, "$dir/assigned_attributes" if (-e "$dir/assigned_attributes"); 
    }
    
    # Now find the other attributes files
    # We should use File::Find here, but I am not sure if that is in the default distro, so I'll just write a quickie. Not as good, though
   
    opendir(FEATURES, "$FIG_Config::organisms/$genome/Features") || die "Can't open dir $FIG_Config::organisms/$genome/Features/";
    foreach my $dir (readdir(FEATURES)) {
     next if ($dir =~ /^\./);
     if (-e "$FIG_Config::organisms/$genome/Features/$dir/Attributes" && opendir(ATTR, "$FIG_Config::organisms/$genome/Features/$dir/Attributes"))
     {
 	my @files = grep { $_ !~ /^\./ } readdir(ATTR);
	closedir(ATTR);
        next unless (scalar @files); # you could have an empty attributes dir, no problem
	$stderr .= "\tFound ". (scalar @files) . " attributes files in $dir\n";
        push @allfiles,  map  { $_ = "$FIG_Config::organisms/$genome/Features/$dir/Attributes/$_" } grep { $_ ne "assigned_attributes" } @files;
	push @allfiles, "$FIG_Config::organisms/$genome/Features/$dir/Attributes/assigned_attributes" 
	    if (-e "$FIG_Config::organisms/$genome/Features/$dir/Attributes/assigned_attributes");
     }
    }


    # now read each of the files and save the data
    foreach my $file (@allfiles) 
    {
         if ((-s "$file") && open(TMPATTR,"<$file"))
	 {
             while (defined($_ = <TMPATTR>))
             {
	         if ($_ =~ /^(\S+)\t(\S+)\t(.*)$/)
		 {
	 	     if ($3)
		     {
		         $kv{"$1\t$2"} = $3;
		     }
	             else
		     {
			 delete $kv{"$1\t$2"};
		     }
		 }
	     }
	     close(TMPATTR);
	 }
    }
    if ($verbose eq "-v" && scalar keys %kv) 
    {
     print "$stderr\tWe have ", scalar keys %kv, " attributes to add for ", $fig->genus_species($genome), " ($genome)\n";
    }
    elsif ($verbose eq "-vv") {
     print "$stderr\tWe have ", scalar keys %kv, " attributes to add for ", $fig->genus_species($genome), " ($genome)\n";
    }

    if (open(TMPATTR,">$FIG_Config::temp/tmp$$"))
    {
	    my($pegK,$k,$v);
	    foreach $pegK (sort keys(%kv))
	    {
		($peg,$k) = split(/\t/,$pegK);
		$v = $kv{$pegK};
		print TMPATTR "$peg\t$k\t$v\n";
	    }
	    close(TMPATTR);
	    $dbf->load_table( tbl => "attribute",
			      file => "$FIG_Config::temp/tmp$$" );
	    unlink("$FIG_Config::temp/tmp$$");
    }
    else
    {
	    die "$FIG_Config::temp/tmp$$";
    }
}
$dbf->create_index( idx  => "attribute_fid_ix",
		    tbl  => "attribute",
		    type => "btree",
		    flds => "fid" 
		  );

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3