[Bio] / Babel / bin / load_subsystems_data.pl Repository:
ViewVC logotype

Annotation of /Babel/bin/load_subsystems_data.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : tharriso 1.1 #!/usr/bin/env perl
2 :    
3 :     use strict;
4 :     use warnings;
5 :    
6 :    
7 :     use Data::Dumper;
8 :     use XML::Simple;
9 :     use Getopt::Long;
10 :    
11 :     use FIG_Config;
12 : tharriso 1.2 use Babel::lib::Babel;
13 : tharriso 1.1
14 :     my $usage = "$0 [--verbose] [--dbtype TYPE] [--dbname NAME] [--dbuser USER] [--dbhost HOST] --subsystem SUBSYSTEM_FILE\n";
15 :     my $source_file = '';
16 :     my $verbose = '';
17 :     my $source = "SEED";
18 :     my $dbname = $FIG_Config::babel_db;
19 :     my $dbuser = $FIG_Config::babel_dbuser;
20 :     my $dbhost = $FIG_Config::babel_dbhost;
21 :     my $dbtype = $FIG_Config::babel_dbtype;
22 :    
23 :     if ( (@ARGV > 0) && ($ARGV[0] =~ /-h/) ) { print STDERR $usage; exit; }
24 :     if ( ! GetOptions("verbose!" => \$verbose,
25 :     "subsystem=s" => \$source_file,
26 :     'dbname:s' => \$dbname,
27 :     'dbuser:s' => \$dbuser,
28 :     'dbhost:s' => \$dbhost,
29 :     'dbtype:s' => \$dbtype
30 :     ) ) {
31 :     print STDERR $usage; exit;
32 :     }
33 :     if ( (! $source_file) || (! -s $source_file) ) {
34 :     print STDERR $usage; exit;
35 :     }
36 :    
37 :     # get data from subsystem file: func => [ step1, step2, subsys ]
38 :     print "Reading subsystem data from $source_file ... " if ($verbose);
39 :     my $ss_func2subsys = &load_data_from_file($source_file, $verbose);
40 :     print "Done\n" if ($verbose);
41 :    
42 :     # get Babel db handle
43 :     print "Initializing Babel DB\n" if ($verbose);
44 :     my ($dbh, $babel);
45 :     if ($dbname && $dbuser && $dbhost && $dbtype) {
46 :     $dbh = DBI->connect("DBI:$dbtype:dbname=$dbname;host=$dbhost", $dbuser, '');
47 : tharriso 1.2 $babel = Babel::lib::Babel->new($dbh);
48 : tharriso 1.1 }
49 :     else {
50 :     $babel = new Babel;
51 :     $dbh = $babel->dbh();
52 :     }
53 :    
54 :     # load subsystem table: func => [ ids ]
55 :     print "Clearing current sybsystem table ... " if ($verbose);
56 :     $dbh->do("truncate table ach_subsystems");
57 :     print "Done\n" if ($verbose);
58 :    
59 :     print "Loading subsystem table from data ... " if ($verbose);
60 :     my $ss_func2ids = &load_table_from_data($ss_func2subsys, $dbh);
61 :     print "Done\n" if ($verbose);
62 :    
63 :     print "Clearing subsystem data from functions table ... " if ($verbose);
64 :     $dbh->do("alter table ach_functions drop column subsystem");
65 :     $dbh->do("alter table ach_functions add column subsystem integer[]");
66 :     $dbh->do("create index functions_subsystem on ach_functions (subsystem)");
67 :     print "Done\n" if ($verbose);
68 :    
69 :     # get SEED functions from Babel: md5 => func => id
70 :     print "Getting function sets from DB for $source ... " if ($verbose);
71 :     my $md5_func_sets = {};
72 :     foreach ( @{ $babel->get_function_set_4_source($source) } ) {
73 :     $md5_func_sets->{ $_->[0] }->{ $_->[1] } = $_->[2];
74 :     }
75 :     print "Done (" . scalar(keys %$md5_func_sets) . " sets found)\n" if ($verbose);
76 :    
77 :     # map subsystem functions to babel SEED functions
78 :     my $found = {};
79 :     my $total = {};
80 :     my $ssid_funcids = {};
81 :     my $funcid_ssids = {};
82 :    
83 :     print "Mapping subsystems to functions ... \n" if ($verbose);
84 :     foreach my $md5 ( keys %$md5_func_sets ) {
85 :     my $ach_func2id = $md5_func_sets->{$md5};
86 :    
87 :     while ( my ($ach_f, $ach_id) = each %$ach_func2id ) {
88 : tharriso 1.2 $ach_f =~ s/\[SS\]//;
89 :     $ach_f =~ s/^\s+//;
90 :     $ach_f =~ s/\s+$//;
91 :     if ( exists $ss_func2ids->{$ach_f} ) {
92 :     foreach my $ss_id ( @{ $ss_func2ids->{$ach_f} } ) {
93 :     $funcid_ssids->{$ach_id}->{$ss_id} = 1;
94 :     $ssid_funcids->{$ss_id}->{$ach_id} = 1;
95 :     }
96 : tharriso 1.1 $found->{$ach_f} = 1;
97 :     }
98 :     $total->{$ach_f} = 1;
99 :     }
100 :     }
101 :     print "\tFound " . scalar(keys %$found) . " functions (out of " . scalar(keys %$total) . ")\n";
102 :    
103 :     print "Updating " . scalar(keys %$ssid_funcids) . " subsystem entries ... " if ($verbose);
104 :     while ( my ($ssid, $funcids) = each %$ssid_funcids ) {
105 :     my $a_str = "\'{" . join(",", keys %$funcids) . "}\'";
106 :     $dbh->do("update ach_subsystems set function = $a_str where _id = $ssid");
107 :     }
108 :     print "Done\n" if ($verbose);
109 :    
110 :     print "Updating " . scalar(keys %$funcid_ssids) . " function entries ... " if ($verbose);
111 :     while ( my ($funcid, $ssids) = each %$funcid_ssids ) {
112 :     my $a_str = "\'{" . join(",", keys %$ssids) . "}\'";
113 :     $dbh->do("update ach_functions set subsystem = $a_str where _id = $funcid");
114 :     }
115 :     print "Done\n\n" if ($verbose);
116 :    
117 :    
118 :     sub load_data_from_file {
119 :     my ($source_file, $verbose) = @_;
120 :    
121 :     my $funcs = {};
122 :    
123 :     open (FILE, $source_file) or die "Can't open $source_file";
124 :     while( my $line = <FILE>) {
125 :     chomp $line;
126 :     my @fields = split(/\t/, $line);
127 :    
128 :     unless (scalar @fields == 4) {
129 :     print STDERR "Error: bad line '$line'\n";
130 :     }
131 :    
132 :     my ($subsys, $step1, $step2, $func) = @fields;
133 :     if (! $func) { next; }
134 :    
135 :     $subsys = $subsys || "Unknown";
136 :     $step1 = $step1 || "Unknown";
137 :     $step2 = $step2 || "Unknown";
138 :     push @{ $funcs->{$func} }, [ $step1, $step2, $subsys ];
139 :     }
140 :     return $funcs;
141 :     }
142 :    
143 :     sub load_table_from_data {
144 :     my ($func2subsys, $dbh) = @_;
145 :    
146 :     my $insert = {};
147 :     my $funcs = {};
148 :     my $i = 1;
149 :    
150 :     while ( my ($func, $val) = each %$func2subsys ) {
151 :     foreach my $set ( @$val ) {
152 :     my ($step1, $step2, $subsys) = @$set;
153 :     my $key = "$step1$step2$subsys";
154 :     if (exists $insert->{$key}) {
155 :     push @{ $funcs->{$func} }, $insert->{$key};
156 :     }
157 :     else {
158 :     my $qstep1 = $dbh->quote($step1);
159 :     my $qstep2 = $dbh->quote($step2);
160 :     my $qsubsys = $dbh->quote($subsys);
161 :     $dbh->do("insert into ach_subsystems (_id, step1, step2, subsystem) values ($i, $qstep1, $qstep2, $qsubsys)");
162 :    
163 :     $insert->{$key} = $i;
164 :     push @{ $funcs->{$func} }, $i;
165 :     $i += 1;
166 :     }
167 :     }
168 :     }
169 :     return $funcs;
170 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3