[Bio] / FigKernelPackages / P2Pupdate.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/P2Pupdate.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.38, Fri Sep 17 15:39:59 2004 UTC revision 1.39, Mon Dec 6 18:26:56 2004 UTC
# Line 995  Line 995 
995          version => $version,          version => $version,
996          exchangable => $exc,          exchangable => $exc,
997          curator => $curator,          curator => $curator,
998            curation_log => $info[3],
999          fig => $fig,          fig => $fig,
1000      };      };
1001    
# Line 1062  Line 1063 
1063      open($fh, "<$self->{file}") or die "Cannot open $self->{file}: $!\n";      open($fh, "<$self->{file}") or die "Cannot open $self->{file}: $!\n";
1064    
1065      #      #
1066      # Skip intro section      # Skip intro section - we already read this information in the constructor.
1067      #      #
1068    
1069      while (<$fh>)      while (<$fh>)
# Line 1186  Line 1187 
1187  }  }
1188    
1189  #  #
1190  # Analyze this subsystem for compatibility with this SEED install.  # Compute or load from cache the PEG translations for this subsystem.
 #  
 # Returns three lists:  
 #  
 # A major conflict list, consisting of tuples  
 # [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs] where $ss_peg  
 # is the peg in the subsystem being analyzied, and $ss_func is  
 # its assigned function in that subsystem. $loc_peg is the peg  
 # in the local SEED, and $loc_func its local assignment. $subs is  
 # the list of pairs [$subsystem_name, $role] denoting the subsystem(s)  
 # that $loc_peg particpates in.  
 #  
 # A conflict is flagged if the local function is different than  
 # the one being imported, and if the local peg is in a subsystem.  
 #  
 # A minor conflict list, consisting of tuples [$ss_peg, $ss_func, $loc_peg, $loc_func].  
 #  
 #  
 # The second list is a list of subsystem pegs that do not have  
 # a local equivalent. Each entry is a triple  
 # [peg, orgname, function].  
1191  #  #
1192    sub ensure_peg_translations
 sub analyze  
1193  {  {
1194      my($self) = @_;      my($self) = @_;
     my $fig = $self->{fig};  
1195    
1196      #      #
1197      # First we map the PEGs in this subsystem to PEGs in the      # First we map the PEGs in this subsystem to PEGs in the
# Line 1222  Line 1201 
1201      # and a hash of peg->sequence as the second argument.      # and a hash of peg->sequence as the second argument.
1202      #      #
1203    
1204        my $fig = $self->{fig};
1205    
1206      my %pegs;      my %pegs;
1207      my %seqs_of;      my %seqs_of;
1208    
# Line 1232  Line 1213 
1213          $seqs_of{$peg} = $self->{peg_seq}->{$peg};          $seqs_of{$peg} = $self->{peg_seq}->{$peg};
1214      }      }
1215    
1216      my $tran_peg = $fig->translate_pegs(\%pegs, \%seqs_of);      sub show_cb
1217        {
1218            print "$_[0]<p>\n";
1219        }
1220    
1221        my $cached_translation_file = "$self->{qdir}/peg_translation";
1222    
1223        my $tran_peg;
1224    
1225        if (-f $cached_translation_file and -s $cached_translation_file > 0)
1226        {
1227            #
1228            # Read the cached translations.
1229            #
1230    
1231            if (open(my $fh, "<$cached_translation_file"))
1232            {
1233                warn "Reading cached peg translations\n";
1234                $tran_peg = {};
1235                while (<$fh>)
1236                {
1237                    chomp;
1238                    my($k, $v) = split(/\t/);
1239                    $tran_peg->{$k} = $v;
1240                }
1241                close($fh);
1242            }
1243        }
1244    
1245        if (!$tran_peg)
1246        {
1247            $tran_peg = $fig->translate_pegs(\%pegs, \%seqs_of, \&show_cb);
1248    
1249      #      #
1250      # tran_peg is now a hash from subsystem_peg->local_peg      # tran_peg is now a hash from subsystem_peg->local_peg
# Line 1243  Line 1255 
1255      # for use during installation.      # for use during installation.
1256      #      #
1257    
1258            if (open(my $fh, ">$self->{qdir}/peg_translation"))
1259      {      {
         open(my $fh, ">$self->{qdir}/peg_translation");  
1260          for my $p (keys(%$tran_peg))          for my $p (keys(%$tran_peg))
1261          {          {
1262              my $tp = $tran_peg->{$p};              my $tp = $tran_peg->{$p};
# Line 1252  Line 1264 
1264          }          }
1265          close($fh);          close($fh);
1266      }      }
1267        }
1268        $self->{tran_peg} = $tran_peg;
1269        return $tran_peg;
1270    }
1271    
1272    #
1273    # Analyze this subsystem for compatibility with this SEED install.
1274    #
1275    # Returns three lists:
1276    #
1277    # A major conflict list, consisting of tuples
1278    # [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs] where $ss_peg
1279    # is the peg in the subsystem being analyzied, and $ss_func is
1280    # its assigned function in that subsystem. $loc_peg is the peg
1281    # in the local SEED, and $loc_func its local assignment. $subs is
1282    # the list of pairs [$subsystem_name, $role] denoting the subsystem(s)
1283    # that $loc_peg particpates in.
1284    #
1285    # A conflict is flagged if the local function is different than
1286    # the one being imported, and if the local peg is in a subsystem.
1287    #
1288    # A minor conflict list, consisting of tuples [$ss_peg, $ss_func, $loc_peg, $loc_func].
1289    #
1290    #
1291    # The second list is a list of subsystem pegs that do not have
1292    # a local equivalent. Each entry is a triple
1293    # [peg, orgname, function].
1294    #
1295    
1296    sub analyze
1297    {
1298        my($self) = @_;
1299        my $fig = $self->{fig};
1300    
1301        my $tran_peg = $self->ensure_peg_translations();
1302    
1303      #      #
1304      # Now we walk the PEGs, determining a) which are missing      # Now we walk the PEGs, determining a) which are missing
1305      # in the local SEED, and b) which have a conflicting assignment.      # in the local SEED, and b) which have a conflicting assignment.
1306      #      #
1307        #
1308        # We also need to determine if this assignment will cause
1309        # pegs to be filled into subsystem roles that were not
1310        # otherwise going to be added.
1311        #
1312        # To enable this, we determine from the subsystem index
1313        # the list all roles that are present in subsystems on
1314        # this SEED.
1315        #
1316    
1317        my $sub_name = $self->name();
1318        my $subsystem_roles = $fig->subsystem_roles();
1319    
1320        my(@conflict, $minor_conflict, $missing);
1321    
     my($conflict, $minor_conflict, $missing);  
     $conflict = [];  
1322      $missing = [];      $missing = [];
1323    
1324        print "Determining conflicts...<p>\n";
1325    
1326      for my $pegent (@{$self->{pegs}})      for my $pegent (@{$self->{pegs}})
1327      {      {
1328          my($ss_peg, undef, $ss_org, $ss_func) = @$pegent;          my($ss_peg, undef, $ss_org, $ss_func) = @$pegent;
1329    
1330            #
1331            # If this peg has a local translation, determine if
1332            # the associated assignment conflicts with a local assignment.
1333            #
1334            # One type of conflict occurs when the new assignment would cause
1335            # the peg to be removed from a subsystem. This occurs when the
1336            # new functional assignment is different from the current
1337            # assignment, and the peg is already in a subsystem.
1338            #
1339            # Another type of conflict occurs when the new assignment
1340            # for a peg matches a role that exists in a locally-installed
1341            # subsystem. This will cause the peg to be added to a
1342            # subsystem upon refill of that subsystem.
1343            #
1344            # It is possible for both the above conditions to hold,
1345            # in which case a peg would be moved out of one
1346            # subsystem into another.
1347            #
1348            # We denote these cases in the generated conflict list by
1349            # annotating the entry with the list of subsystems from which
1350            # the peg would be removed if the assignment were to be
1351            # accepted, and the list of subsystems to which the
1352            # peg would be added.
1353            #
1354    
1355          if (my $loc_peg = $tran_peg->{$ss_peg})          if (my $loc_peg = $tran_peg->{$ss_peg})
1356          {          {
1357                my $subs_removed = [];
1358                my $subs_added = [];
1359    
1360                #
1361                # Determine what our locally-assigned function is, and what
1362                # subsystem this peg appears in.
1363                #
1364    
1365              my $loc_func = $fig->function_of($loc_peg);              my $loc_func = $fig->function_of($loc_peg);
             my @subs = $fig->subsystems_for_peg($loc_peg);  
1366    
1367              #              #
1368              # If the functions don't match, it's a conflict.              # If the functions don't match, it's a conflict.
1369              # If the local function is in a subsystem, it's a major              # If the local function is in a subsystem, it's a major
1370              # conflict. If it's not, it's a minor conflict.              # conflict. If it's not, it's a minor conflict.
1371              #              #
1372                # We actually let the major/minor determination be done by
1373                # the analysis display code, since the difference is only in whether
1374                # there are subsystems.
1375                #
1376    
1377              if ($loc_func ne $ss_func)              if ($loc_func ne $ss_func)
1378              {              {
1379                  push(@$conflict, [$ss_peg, $ss_func, $loc_peg, $loc_func, [@subs]]);  
1380                    #
1381                    # If the function defined in the new subsystem is different than
1382                    # the current function, we mark a conflict. Along with the conflict
1383                    # we include a list of the subsystems in which the local peg
1384                    # is included.
1385                    #
1386                    # We use the subsystems_for_peg method to determine local subsystems
1387                    # associated with a peg. It returns a list of pairs [subsystem, rolename].
1388                    #
1389    
1390                    #
1391                    # What if we are loading a new version of an existing subsystem, and
1392                    # a role name has changed?
1393                    #
1394                    # In this case, $loc_func ne $ss_func, $loc_peg will appear in the local copy of
1395                    # the subsystem we are loading, and hence as a candidate for removal from that subsystem.
1396                    # This may be thought of as a spurious message, and leads me to want to remove
1397                    # such warnings (if I'm updating a subsystem, I can expect that the pegs in that
1398                    # subsystem will change).
1399                    #
1400    
1401                    my @removed = $fig->subsystems_for_peg($loc_peg);
1402                    push(@$subs_removed, grep { $_->[0] ne $sub_name } @removed);
1403    
1404                    #
1405                    # We also check to see if the new function is present
1406                    # as a role in any local subsystem. If it is, then when that subsystem
1407                    # is refilled, this peg will appear in it.
1408                    #
1409                    # This is also
1410                    #
1411    
1412                    if (my $loc_ss = $subsystem_roles->{$ss_func})
1413                    {
1414                        push(@$subs_added, @$loc_ss);
1415              }              }
1416    
1417                    push(@conflict, [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs_removed, $subs_added]);
1418                }
1419    
1420          }          }
1421          else          else
1422          {          {
# Line 1288  Line 1424 
1424          }          }
1425      }      }
1426    
1427        my $conflict = [sort { @{$b->[4]} + @{$b->[5]} <=> @{$a->[4]} + @{$a->[5]}  } @conflict];
1428    
1429        return ($conflict, $missing);
1430    }
1431    
1432    sub read_cached_analysis
1433    {
1434        my($self) = @_;
1435    
1436        my $cfile = "$self->{qdir}/conflicts";
1437        my $mfile = "$self->{qdir}/missing";
1438    
1439        my($conflict, $missing);
1440        $conflict = [];
1441        $missing = [];
1442    
1443        if (open(my $fh, "<$cfile"))
1444        {
1445    
1446            while (<$fh>)
1447            {
1448                chomp;
1449    
1450                my($ss_peg, $ss_func, $loc_peg, $loc_func) = split(/\t/);
1451    
1452                my $subs_removed = <$fh>;
1453                my $subs_added = <$fh>;
1454    
1455                chomp($subs_removed);
1456                chomp($subs_added);
1457    
1458                my @subs_removed_raw = split(/\t/, $subs_removed);
1459                my $subs_added_list = [split(/\t/, $subs_added)];
1460    
1461                my $subs_removed_list = [];
1462    
1463                while (@subs_removed_raw)
1464                {
1465                    my($v1, $v2, @rest) = @subs_removed_raw;
1466                    @subs_removed_raw = @rest;
1467                    push(@$subs_removed_list, [$v1, $v2]);
1468                }
1469    
1470                push(@$conflict, [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs_removed_list, $subs_added_list]);
1471            }
1472        }
1473    
1474        if (open(my $fh, "<$mfile"))
1475        {
1476    
1477            while (<$fh>)
1478            {
1479                chomp;
1480    
1481                my(@a) = split(/\t/);
1482                push(@$missing, [@a]);
1483            }
1484        }
1485    
1486      return ($conflict, $missing);      return ($conflict, $missing);
1487  }  }
1488    
1489    #
1490    # Install this subsystem.
1491    #
1492    # $dont_assign is a list of local PEGs that should not have their assignments overwritten.
1493    #
1494    # We return a list of for-the-installer messages that will be presented when the install completes.
1495    #
1496    sub install
1497    {
1498        my($self, $dont_assign) = @_;
1499    
1500        my @messages;
1501    
1502        my $fig = $self->{fig};
1503        my $subsystems_dir = "$FIG_Config::data/Subsystems";
1504    
1505        my $sub_name = $self->name();
1506        $sub_name =~ s/ /_/g;
1507        my $sub_dir = "$subsystems_dir/$sub_name";
1508        my $ver = $self->version();
1509    
1510        #
1511        # First check to see if we already have this subsystem installed.
1512        #
1513    
1514        if (-d $sub_dir and (my $cur_ver = $fig->subsystem_version($sub_name)) >= $ver)
1515        {
1516            warn "Not importing $sub_name: current version $cur_ver >= imported version $ver";
1517        }
1518    
1519        warn "Importing $sub_name version $ver\n";
1520        push(@messages, "Importing $sub_name version $ver\n");
1521    
1522        if (! -d $sub_dir)
1523        {
1524            mkdir($sub_dir, 0777) or die "Cannot mkdir $sub_dir: $!";
1525        }
1526    
1527        #
1528        # Write the header/meta information.
1529        #
1530    
1531        my $fh;
1532        my $imported_from = "???";
1533    
1534        open($fh, ">$sub_dir/VERSION") or die "Cannot open $sub_dir/VERSION for writing: $!";
1535        print $fh "$ver\n";
1536        close($fh);
1537        chmod(0666, "$sub_dir/VERSION");
1538    
1539        open($fh, ">$sub_dir/EXCHANGABLE") or die "Cannot open $sub_dir/EXCHANGABLE for writing: $!";
1540        print $fh $self->exchangable() . "\n";
1541        close($fh);
1542        chmod(0666, "$sub_dir/EXCHANGABLE");
1543    
1544        open($fh, ">$sub_dir/curation.log") or die "Cannot open $sub_dir/curation.logt for writing: $!";
1545        print $fh "$self->{curation_log}\n";
1546        my $time = time;
1547        print $fh "$time\t$imported_from\timported_from\n";
1548        close($fh);
1549        chmod(0666, "$sub_dir/curation.log");
1550    
1551        open($fh, ">$sub_dir/notes") or die "Cannot open $sub_dir/notes for writing: $!";
1552        print $fh $self->{notes_txt} . "\n";
1553        close($fh);
1554        chmod(0666, "$sub_dir/notes");
1555    
1556        my $tran_peg = $self->ensure_peg_translations();
1557    
1558        #
1559        # We can start writing the spreadsheet.
1560        #
1561    
1562        my $ssa_fh;
1563        open($ssa_fh, ">$sub_dir/spreadsheet") or die "Cannot open $sub_dir/spreadsheet for writing: $!";
1564    
1565        #
1566        # Start with the roles and subsets.
1567        #
1568    
1569        print $ssa_fh $self->{role_text};
1570        print $ssa_fh "//\n";
1571    
1572        print $ssa_fh $self->{subsets_text};
1573        print $ssa_fh "//\n";
1574    
1575        for my $g (@{$self->{genomes}})
1576        {
1577            my $gobj = $self->{genome_objs}->{$g};
1578            my ($trans_genome, @row) = $gobj->translate($tran_peg);
1579    
1580            if ($trans_genome)
1581            {
1582                print $ssa_fh join("\t", $trans_genome, $gobj->{variant}, @row), "\n";
1583            }
1584        }
1585    
1586        close($ssa_fh);
1587    
1588        #
1589        # The subsystem itself is now in place.
1590        #
1591        # Enter the new assignments, saving the old assignments in the  spool dir.
1592        #
1593    
1594        my $now = time;
1595    
1596        my $old_funcs_fh;
1597        open($old_funcs_fh, ">>$self->{qdir}/old_assignments.$now");
1598    
1599        my $curator = $self->curator();
1600    
1601        for my $pegent (@{$self->{pegs}})
1602        {
1603            my($peg, $aliases, $org, $func) = @$pegent;
1604            my $tpeg = $tran_peg->{$peg};
1605    
1606            if (!$tpeg)
1607            {
1608                warn "Couldn't translate $peg (from $org)\n";
1609                push(@messages, "Couldn't translate $peg (from $org)");
1610                next;
1611            }
1612    
1613            my $old = $fig->function_of($tpeg);
1614    
1615            if ($old ne $func)
1616            {
1617                print $old_funcs_fh "$tpeg\t$old\t$curator\t$func\n";
1618                $fig->add_annotation($tpeg, $curator,
1619                                     "Assigning function $func based on installation of subsystem $self->{name}");
1620    
1621                if ($curator =~ /master:(.*)/)
1622                {
1623                    my $user = $1;
1624                    $fig->assign_function($tpeg, "master", $func, "");
1625                    $fig->add_annotation($tpeg, $user, "Set master function  to\n$func\n");
1626                }
1627                else
1628                {
1629                    $fig->assign_function($tpeg, $curator, $func, "");
1630                    $fig->add_annotation($tpeg, $curator, "Set function  to\n$func\n");
1631                }
1632            }
1633            else
1634            {
1635                # print "$tpeg already has assignment $func\n";
1636            }
1637        }
1638        close($old_funcs_fh);
1639        return @messages;
1640    }
1641    
1642  sub name  sub name
1643  {  {
# Line 1317  Line 1664 
1664      return $self->{curator};      return $self->{curator};
1665  }  }
1666    
1667    sub analysis_complete
1668    {
1669        my($self) = @_;
1670    
1671        return -f "$self->{qdir}/analysis_complete";
1672    }
1673    
1674    sub analysis_jobid
1675    {
1676        my($self) = @_;
1677    
1678        my $jid_file = "$self->{qdir}/analysis_jobid";
1679    
1680        return &FIG::file_head($jid_file, 1);
1681    }
1682    
1683  package GenomeObj;  package GenomeObj;
1684    
1685    use strict;
1686    use Data::Dumper;
1687    
1688    #
1689    # A genomeobj is a small datatype that holds the data in a row of a
1690    # spreadsheet file.
1691    #
1692    
1693  sub new  sub new
1694  {  {
1695      my($class, $subfile, $fig, $genome, $variant, $items) = @_;      my($class, $subfile, $fig, $genome, $variant, $items) = @_;
# Line 1334  Line 1705 
1705    
1706  }  }
1707    
1708    #
1709    # Translate this row to a new context.
1710    #
1711    # $trans_peg is a hash mapping from spreadsheet PEG to local PEG
1712    #
1713    sub translate
1714    {
1715        my($self, $trans_peg) = @_;
1716        my $fig = $self->{fig};
1717    
1718        my $genome = $self->{genome};
1719    
1720        my $parsed_items = [];
1721        $self->{parsed_items} = $parsed_items;
1722        my $trans_items = [];
1723        $self->{trans_items} = $trans_items;
1724    
1725        #
1726        # Hash of genomes seen in this row.
1727        my %genomes;
1728    
1729        for my $item (@{$self->{items}})
1730        {
1731            my $l = [ map { $_ eq '' ? undef : "fig|$genome.peg.$_" } split(/,/, $item) ];
1732            my $t = [ map { $trans_peg->{$_} } @$l ];
1733    
1734            push(@$parsed_items, $l);
1735            push(@$trans_items, $t);
1736    
1737            #
1738            # Count the genomes that are seen in the translated pegs.
1739            #
1740    
1741            for my $tpeg (@$t)
1742            {
1743                my $tg = $fig->genome_of($tpeg);
1744                $genomes{$tg}++ if $tg ne "";
1745            }
1746    
1747        }
1748    
1749        #
1750        # Now determine the dominant organism for this translated row.
1751        #
1752    
1753        my @orgs = sort { $genomes{$b} <=> $genomes{$a} } keys(%genomes);
1754    
1755        # print "@{$self->{items}}\n";
1756        # print join(" ", map { "$_: $genomes{$_} " } @orgs ), "\n";
1757    
1758        unless (@orgs == 1          # Single organism
1759            or
1760            (@orgs > 1 and $genomes{$orgs[0]} > (2 * $genomes{$orgs[1]})) # First org has > 2x the second org
1761            )
1762        {
1763            warn "Could not determine translation for $genome\n";
1764            return undef;
1765        }
1766    
1767        #
1768        # The dominant organism is the first in the list.
1769        #
1770    
1771        my $dom = $orgs[0];
1772    
1773        #
1774        # Run through the translated pegs, and remove the ones that are
1775        # not in the dominant organism.
1776        #
1777    
1778        my @res;
1779        for my $i (0..@$trans_items - 1)
1780        {
1781            my $t = $trans_items->[$i];
1782    
1783            my @nt;
1784            for my $peg (@$t)
1785            {
1786                if ($peg =~ /^fig\|(\d+\.\d+)\.peg\.(\d+)$/)
1787                {
1788                    if ($1 eq $dom)
1789                    {
1790                        push(@nt, $2);
1791                    }
1792                }
1793            }
1794            push(@res, join(",", @nt));
1795        }
1796        return $dom, @res;
1797    }
1798    
1799  1  1

Legend:
Removed from v.1.38  
changed lines
  Added in v.1.39

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3