[Bio] / Babel / bin / load_subsystems_data.pl Repository:
ViewVC logotype

Annotation of /Babel/bin/load_subsystems_data.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : tharriso 1.1 #!/usr/bin/env perl
2 :    
3 :     use strict;
4 :     use warnings;
5 :    
6 :    
7 :     use Data::Dumper;
8 :     use XML::Simple;
9 :     use Getopt::Long;
10 :    
11 :     use FIG_Config;
12 :     use Babel;
13 :    
14 :     my $usage = "$0 [--verbose] [--dbtype TYPE] [--dbname NAME] [--dbuser USER] [--dbhost HOST] --subsystem SUBSYSTEM_FILE\n";
15 :     my $source_file = '';
16 :     my $verbose = '';
17 :     my $source = "SEED";
18 :     my $dbname = $FIG_Config::babel_db;
19 :     my $dbuser = $FIG_Config::babel_dbuser;
20 :     my $dbhost = $FIG_Config::babel_dbhost;
21 :     my $dbtype = $FIG_Config::babel_dbtype;
22 :    
23 :     if ( (@ARGV > 0) && ($ARGV[0] =~ /-h/) ) { print STDERR $usage; exit; }
24 :     if ( ! GetOptions("verbose!" => \$verbose,
25 :     "subsystem=s" => \$source_file,
26 :     'dbname:s' => \$dbname,
27 :     'dbuser:s' => \$dbuser,
28 :     'dbhost:s' => \$dbhost,
29 :     'dbtype:s' => \$dbtype
30 :     ) ) {
31 :     print STDERR $usage; exit;
32 :     }
33 :     if ( (! $source_file) || (! -s $source_file) ) {
34 :     print STDERR $usage; exit;
35 :     }
36 :    
37 :     # get data from subsystem file: func => [ step1, step2, subsys ]
38 :     print "Reading subsystem data from $source_file ... " if ($verbose);
39 :     my $ss_func2subsys = &load_data_from_file($source_file, $verbose);
40 :     print "Done\n" if ($verbose);
41 :    
42 :     # get Babel db handle
43 :     print "Initializing Babel DB\n" if ($verbose);
44 :     my ($dbh, $babel);
45 :     if ($dbname && $dbuser && $dbhost && $dbtype) {
46 :     $dbh = DBI->connect("DBI:$dbtype:dbname=$dbname;host=$dbhost", $dbuser, '');
47 :     $babel = Babel->new($dbh);
48 :     }
49 :     else {
50 :     $babel = new Babel;
51 :     $dbh = $babel->dbh();
52 :     }
53 :    
54 :     # load subsystem table: func => [ ids ]
55 :     print "Clearing current sybsystem table ... " if ($verbose);
56 :     $dbh->do("truncate table ach_subsystems");
57 :     print "Done\n" if ($verbose);
58 :    
59 :     print "Loading subsystem table from data ... " if ($verbose);
60 :     my $ss_func2ids = &load_table_from_data($ss_func2subsys, $dbh);
61 :     print "Done\n" if ($verbose);
62 :    
63 :     print "Clearing subsystem data from functions table ... " if ($verbose);
64 :     $dbh->do("alter table ach_functions drop column subsystem");
65 :     $dbh->do("alter table ach_functions add column subsystem integer[]");
66 :     $dbh->do("create index functions_subsystem on ach_functions (subsystem)");
67 :     print "Done\n" if ($verbose);
68 :    
69 :     # get SEED functions from Babel: md5 => func => id
70 :     print "Getting function sets from DB for $source ... " if ($verbose);
71 :     my $md5_func_sets = {};
72 :     foreach ( @{ $babel->get_function_set_4_source($source) } ) {
73 :     $md5_func_sets->{ $_->[0] }->{ $_->[1] } = $_->[2];
74 :     }
75 :     print "Done (" . scalar(keys %$md5_func_sets) . " sets found)\n" if ($verbose);
76 :    
77 :     # map subsystem functions to babel SEED functions
78 :     my $found = {};
79 :     my $total = {};
80 :     my $ssid_funcids = {};
81 :     my $funcid_ssids = {};
82 :    
83 :     print "Mapping subsystems to functions ... \n" if ($verbose);
84 :     foreach my $md5 ( keys %$md5_func_sets ) {
85 :     my $ach_func2id = $md5_func_sets->{$md5};
86 :    
87 :     while ( my ($ach_f, $ach_id) = each %$ach_func2id ) {
88 :     if (exists $ss_func2ids->{$ach_f}) {
89 :     foreach my $ss_id ( @{$ss_func2ids->{$ach_f}} ) {
90 :     $funcid_ssids->{$ach_id}->{$ss_id} = 1;
91 :     $ssid_funcids->{$ss_id}->{$ach_id} = 1;
92 :     }
93 :     $found->{$ach_f} = 1;
94 :     }
95 :     $total->{$ach_f} = 1;
96 :     }
97 :     }
98 :     print "\tFound " . scalar(keys %$found) . " functions (out of " . scalar(keys %$total) . ")\n";
99 :    
100 :     print "Updating " . scalar(keys %$ssid_funcids) . " subsystem entries ... " if ($verbose);
101 :     while ( my ($ssid, $funcids) = each %$ssid_funcids ) {
102 :     my $a_str = "\'{" . join(",", keys %$funcids) . "}\'";
103 :     $dbh->do("update ach_subsystems set function = $a_str where _id = $ssid");
104 :     }
105 :     print "Done\n" if ($verbose);
106 :    
107 :     print "Updating " . scalar(keys %$funcid_ssids) . " function entries ... " if ($verbose);
108 :     while ( my ($funcid, $ssids) = each %$funcid_ssids ) {
109 :     my $a_str = "\'{" . join(",", keys %$ssids) . "}\'";
110 :     $dbh->do("update ach_functions set subsystem = $a_str where _id = $funcid");
111 :     }
112 :     print "Done\n\n" if ($verbose);
113 :    
114 :    
115 :     sub load_data_from_file {
116 :     my ($source_file, $verbose) = @_;
117 :    
118 :     my $funcs = {};
119 :    
120 :     open (FILE, $source_file) or die "Can't open $source_file";
121 :     while( my $line = <FILE>) {
122 :     chomp $line;
123 :     my @fields = split(/\t/, $line);
124 :    
125 :     unless (scalar @fields == 4) {
126 :     print STDERR "Error: bad line '$line'\n";
127 :     }
128 :    
129 :     my ($subsys, $step1, $step2, $func) = @fields;
130 :     if (! $func) { next; }
131 :    
132 :     $subsys = $subsys || "Unknown";
133 :     $step1 = $step1 || "Unknown";
134 :     $step2 = $step2 || "Unknown";
135 :     push @{ $funcs->{$func} }, [ $step1, $step2, $subsys ];
136 :     }
137 :     return $funcs;
138 :     }
139 :    
140 :     sub load_table_from_data {
141 :     my ($func2subsys, $dbh) = @_;
142 :    
143 :     my $insert = {};
144 :     my $funcs = {};
145 :     my $i = 1;
146 :    
147 :     while ( my ($func, $val) = each %$func2subsys ) {
148 :     foreach my $set ( @$val ) {
149 :     my ($step1, $step2, $subsys) = @$set;
150 :     my $key = "$step1$step2$subsys";
151 :     if (exists $insert->{$key}) {
152 :     push @{ $funcs->{$func} }, $insert->{$key};
153 :     }
154 :     else {
155 :     my $qstep1 = $dbh->quote($step1);
156 :     my $qstep2 = $dbh->quote($step2);
157 :     my $qsubsys = $dbh->quote($subsys);
158 :     $dbh->do("insert into ach_subsystems (_id, step1, step2, subsystem) values ($i, $qstep1, $qstep2, $qsubsys)");
159 :    
160 :     $insert->{$key} = $i;
161 :     push @{ $funcs->{$func} }, $i;
162 :     $i += 1;
163 :     }
164 :     }
165 :     }
166 :     return $funcs;
167 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3