[Bio] / FigKernelPackages / P2Pupdate.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/P2Pupdate.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.32, Wed Jun 16 14:13:35 2004 UTC revision 1.33, Mon Jun 28 20:10:11 2004 UTC
# Line 1  Line 1 
1  package P2Pupdate;  package P2Pupdate;
2    
3    use strict;
4    
5  use FIG_Config;  use FIG_Config;
6  use FIG;  use FIG;
7  use Carp;  use Carp;
# Line 328  Line 330 
330      {      {
331          if ($line =~ /^(\S+)\s+\=/)          if ($line =~ /^(\S+)\s+\=/)
332          {          {
333              $var = $1;              my $var = $1;
334              $varQ = quotemeta $var;              my $varQ = quotemeta $var;
335    
336              for ($i=0; ($i < $#new) && ($new[$i] !~ /^$varQ\s+\=/); $i++) {}              for ($i=0; ($i < $#new) && ($new[$i] !~ /^$varQ\s+\=/); $i++) {}
337              if ($i == $#new)              if ($i == $#new)
# Line 390  Line 392 
392      if (open(ANNOTATIONS,">$file"))      if (open(ANNOTATIONS,">$file"))
393      {      {
394          my @annotations = sort { $a->[0] cmp $b->[0] } $fig->annotations_made($genomes,$who,$date);          my @annotations = sort { $a->[0] cmp $b->[0] } $fig->annotations_made($genomes,$who,$date);
395          foreach $x (@annotations)          foreach my $x (@annotations)
396          {          {
397              print ANNOTATIONS join("\n",@$x),"\n///\n";              print ANNOTATIONS join("\n",@$x),"\n///\n";
398          }          }
399          print ANNOTATIONS "//\n";          print ANNOTATIONS "//\n";
400    
401          foreach $x (@annotations)          foreach my $x (@annotations)
402          {          {
403              $peg = $x->[0];              my $peg = $x->[0];
404              my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);              my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);
405              print ANNOTATIONS join("\t",($peg,join(",",@aliases),$fig->genus_species($fig->genome_of($peg)),scalar $fig->function_of($peg))) . "\n";              print ANNOTATIONS join("\t",($peg,join(",",@aliases),$fig->genus_species($fig->genome_of($peg)),scalar $fig->function_of($peg))) . "\n";
406          }          }
407          print ANNOTATIONS "//\n";          print ANNOTATIONS "//\n";
408    
409          foreach $x (@annotations)          foreach my $x (@annotations)
410          {          {
411                my $peg;
412              ($peg,undef) = @$x;              ($peg,undef) = @$x;
413              my $seq = $fig->get_translation($peg);              my $seq = $fig->get_translation($peg);
414              &FIG::display_id_and_seq($peg,\$seq,\*ANNOTATIONS);              &FIG::display_id_and_seq($peg,\$seq,\*ANNOTATIONS);
# Line 439  Line 442 
442      $/ = "\n//\n";      $/ = "\n//\n";
443      if (defined($line = <IN>))      if (defined($line = <IN>))
444      {      {
445            my(@annotations);
446    
447          $line =~ s/\n\/\/\n/\n/s;          $line =~ s/\n\/\/\n/\n/s;
448          $line =~ s/\n\/\/\/\n//s;          $line =~ s/\n\/\/\/\n//s;
449          @ann = split(/\n\/\/\/\n/,$line);          @ann = split(/\n\/\/\/\n/,$line);
# Line 483  Line 488 
488          if (-d "$fig_disk/BackupAnnotations") { system "rm -rf $fig_disk/BackupAnnotations" }          if (-d "$fig_disk/BackupAnnotations") { system "rm -rf $fig_disk/BackupAnnotations" }
489          mkdir("$fig_disk/BackupAnnotations",0777);          mkdir("$fig_disk/BackupAnnotations",0777);
490          mkdir("$fig_disk/BackupAnnotations/New",0777);          mkdir("$fig_disk/BackupAnnotations/New",0777);
491          for ($i=0; ($i < @annotation); $i++)          my $i;
492            for ($i=0; ($i < @annotations); $i++)
493          {          {
494              if (($i == 0) || ($fig->genome_of($annotations[$i]->[0]) ne $fig->genome_of($annotations[$i-1]->[0])))              if (($i == 0) || ($fig->genome_of($annotations[$i]->[0]) ne $fig->genome_of($annotations[$i-1]->[0])))
495              {              {
# Line 540  Line 546 
546      opendir(TMP,"$fig_disk/BackupAnnotations") || die "could not open $fig_disk/BackupAnnotations";      opendir(TMP,"$fig_disk/BackupAnnotations") || die "could not open $fig_disk/BackupAnnotations";
547      my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(TMP);      my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(TMP);
548      closedir(TMP);      closedir(TMP);
549      foreach $genome (@genomes)      foreach my $genome (@genomes)
550      {      {
551          unlink("$fig_disk/FIG/Data/Organisms/$genome/annotations");          unlink("$fig_disk/FIG/Data/Organisms/$genome/annotations");
552          &FIG::run("cp $fig_disk/BackupAnnotations/$genome $fig_disk/FIG/Data/Organisms/$genome/annotations");          &FIG::run("cp $fig_disk/BackupAnnotations/$genome $fig_disk/FIG/Data/Organisms/$genome/annotations");
# Line 576  Line 582 
582      if (open(ASSIGNMENTS,">$file"))      if (open(ASSIGNMENTS,">$file"))
583      {      {
584          print ASSIGNMENTS "$user\t$who\t$date\n";          print ASSIGNMENTS "$user\t$who\t$date\n";
585          my @assignments = sort { $a->[0] cmp $b->[0] } $fig->assignments_made($genomes,$who,$date);          my @assignments = sort { $a->[0] cmp $b->[0] } $fig->assignments_made_full($genomes,$who,$date);
586          foreach $x (@assignments)          foreach $x (@assignments)
587          {          {
588              print ASSIGNMENTS join("\t",@$x),"\n";              my($peg, $function, $adate, $awho) = @$x;
589                print ASSIGNMENTS join("\t", $peg, $function),"\n";
590          }          }
591          print ASSIGNMENTS "//\n";          print ASSIGNMENTS "//\n";
592    
# Line 587  Line 594 
594          {          {
595              ($peg,undef) = @$x;              ($peg,undef) = @$x;
596              my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);              my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);
597              print ASSIGNMENTS join("\t",($peg,join(",",@aliases),$fig->genus_species($fig->genome_of($peg)),scalar $fig->function_of($peg))) . "\n";  
598                my $alias_txt = join(",",@aliases);
599                my $gs_txt = $fig->genus_species($fig->genome_of($peg));
600                my $func_txt = scalar $fig->function_of($peg);
601    
602                print ASSIGNMENTS join("\t",($peg,
603                                             $alias_txt,
604                                             $gs_txt,
605                                             $func_txt)) . "\n";
606          }          }
607          print ASSIGNMENTS "//\n";          print ASSIGNMENTS "//\n";
608    
# Line 781  Line 796 
796      &FIG::run("$FIG_Config::bin/import_subsystems master last_release < $package");      &FIG::run("$FIG_Config::bin/import_subsystems master last_release < $package");
797  }  }
798    
799    package SubsystemFile;
800    
801    use Data::Dumper;
802    use strict;
803    
804    sub new
805    {
806        my($class, $qdir, $file, $fig) = @_;
807        my(@info);
808    
809        @info = FIG::file_head($file, 4);
810        if (!@info)
811        {
812            warn "Cannot open $file\n";
813            return undef;
814        }
815    
816        chomp(@info);
817    
818        my $name = $info[0];
819        my $version = $info[1];
820        my $exc = $info[2];
821    
822        my @c = split(/\t/, $info[3]);
823    
824        my $curator = $c[1];
825    
826        my $self = {
827            qdir => $qdir,
828            file => $file,
829            name => $name,
830            version => $version,
831            exchangable => $exc,
832            curator => $curator,
833            fig => $fig,
834        };
835    
836        return bless($self, $class);
837    
838    }
839    
840    #
841    # Load the export file into internal data structures.
842    #
843    # It's structured as
844    #
845    # name
846    # version
847    # exchangable
848    # creation date <tab> curator <tab> "started"
849    # //
850    # roles
851    # //
852    # subsets
853    # //
854    # spreadsheet
855    # //
856    # assignments
857    # //
858    # sequences
859    # //
860    # notes
861    # //
862    #
863    # Subsections:
864    #
865    # roles:
866    #
867    #    abbr <tab> role-name
868    #
869    # subsets has meaning to the acutal subsystems, but we'll use it as a string.
870    #
871    # spreadsheet:
872    #
873    #    genome <tab> variant <tab> items
874    #
875    # Where items is tab-separated columns, each of which is comma-separated peg number in the genome
876    #
877    # assignments:
878    #
879    #  fid <tab> aliases <tab> organism <tab> function
880    #
881    # sequences:
882    #
883    #  list of fasta's
884    #
885    # notes:
886    #
887    #  plain text
888    #
889    sub load
890    {
891        my($self) = @_;
892    
893        my $fig = $self->{fig};
894    
895        my($fh);
896    
897        open($fh, "<$self->{file}") or die "Cannot open $self->{file}: $!\n";
898    
899        #
900        # Skip intro section
901        #
902    
903        while (<$fh>)
904        {
905            chomp;
906            last if m,^//,;
907        }
908    
909        #
910        # Read the roles.
911        #
912    
913    
914        my $nroles;
915    
916        while (<$fh>)
917        {
918            last if m,^//,;
919    
920            $self->{role_text} .= $_;
921            chomp $_;
922    
923            my($abbr, $role) = split(/\t/);
924    
925            warn "Have role $role\n";
926    
927            push(@{$self->{roles}}, $role);
928            push(@{$self->{abbrs}}, $abbr);
929    
930            $nroles++;
931        }
932    
933        #
934        # Read in subsets as a string.
935        #
936    
937        while (<$fh>)
938        {
939            last if m,^//,;
940    
941            $self->{subsets_text} .= $_;
942        }
943    
944        #
945        # Read the spreadsheet.
946        #
947    
948        while (<$fh>)
949        {
950            last if m,^//,;
951    
952            $self->{spreadsheet_text} .= $_;
953    
954            chomp;
955    
956            my($genome, $variant, @items) = split(/\t/, $_, $nroles + 2);
957    
958            push(@{$self->{genomes}}, $genome);
959    
960            my $gobj = GenomeObj->new($self, $fig, $genome, $variant, [@items]);
961    
962            $self->{genome_objs}->{$genome} = $gobj;
963        }
964    
965        #
966        # Read PEG info
967        #
968    
969        while (<$fh>)
970        {
971            last if m,^//,;
972    
973            chomp;
974    
975            my ($peg, $aliases, $org, $func) = split(/\t/);
976    
977            push(@{$self->{pegs}}, [$peg, $aliases, $org, $func]);
978        }
979    
980        #
981        # Read sequence info
982        #
983    
984        my($cur, $cur_peg);
985    
986        while (<$fh>)
987        {
988            if (/^>(fig\|\d+\.\d+\.peg\.\d+)/)
989            {
990                if ($cur)
991                {
992                    $cur =~ s/\s+//gs;
993                    $self->{peg_seq}->{$cur_peg} = $cur;
994                }
995                $cur_peg = $1;
996                $cur = '';
997            }
998            elsif (m,^//,)
999            {
1000                $cur =~ s/\s+//gs;
1001                $self->{peg_seq}->{$cur_peg} = $cur;
1002                last;
1003            }
1004            else
1005            {
1006                $cur .= $_;
1007            }
1008        }
1009    
1010        #
1011        # Read notes as a string
1012        #
1013    
1014        while (<$fh>)
1015        {
1016            last if m,^//,;
1017    
1018            $self->{notes_txt} .= $_;
1019        }
1020    
1021    }
1022    
1023    #
1024    # Analyze this subsystem for compatibility with this SEED install.
1025    #
1026    # Returns three lists:
1027    #
1028    # A major conflict list, consisting of tuples
1029    # [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs] where $ss_peg
1030    # is the peg in the subsystem being analyzied, and $ss_func is
1031    # its assigned function in that subsystem. $loc_peg is the peg
1032    # in the local SEED, and $loc_func its local assignment. $subs is
1033    # the list of pairs [$subsystem_name, $role] denoting the subsystem(s)
1034    # that $loc_peg particpates in.
1035    #
1036    # A conflict is flagged if the local function is different than
1037    # the one being imported, and if the local peg is in a subsystem.
1038    #
1039    # A minor conflict list, consisting of tuples [$ss_peg, $ss_func, $loc_peg, $loc_func].
1040    #
1041    #
1042    # The second list is a list of subsystem pegs that do not have
1043    # a local equivalent. Each entry is a triple
1044    # [peg, orgname, function].
1045    #
1046    
1047    sub analyze
1048    {
1049        my($self) = @_;
1050        my $fig = $self->{fig};
1051    
1052        #
1053        # First we map the PEGs in this subsystem to PEGs in the
1054        # local SEED.
1055        #
1056        # translate_pegs requires a hash of peg->[aliases] as the first argument,
1057        # and a hash of peg->sequence as the second argument.
1058        #
1059    
1060        my %pegs;
1061        my %seqs_of;
1062    
1063        for my $pegent (@{$self->{pegs}})
1064        {
1065            my($peg, $aliases, $org, $func) = @$pegent;
1066            $pegs{$peg} = [$aliases, $org, $func];
1067            $seqs_of{$peg} = $self->{peg_seq}->{$peg};
1068        }
1069    
1070        my $tran_peg = $fig->translate_pegs(\%pegs, \%seqs_of);
1071    
1072        #
1073        # tran_peg is now a hash from subsystem_peg->local_peg
1074        #
1075    
1076        #
1077        # Write the translations out to a file in the queue directory
1078        # for use during installation.
1079        #
1080    
1081        {
1082            open(my $fh, ">$self->{qdir}/peg_translation");
1083            for my $p (keys(%$tran_peg))
1084            {
1085                my $tp = $tran_peg->{$p};
1086                print $fh "$p\t$tp\n";
1087            }
1088            close($fh);
1089        }
1090    
1091        #
1092        # Now we walk the PEGs, determining a) which are missing
1093        # in the local SEED, and b) which have a conflicting assignment.
1094        #
1095    
1096        my($conflict, $minor_conflict, $missing);
1097        $conflict = [];
1098        $missing = [];
1099    
1100        for my $pegent (@{$self->{pegs}})
1101        {
1102            my($ss_peg, undef, $ss_org, $ss_func) = @$pegent;
1103    
1104            if (my $loc_peg = $tran_peg->{$ss_peg})
1105            {
1106                my $loc_func = $fig->function_of($loc_peg);
1107                my @subs = $fig->subsystems_for_peg($loc_peg);
1108    
1109                #
1110                # If the functions don't match, it's a conflict.
1111                # If the local function is in a subsystem, it's a major
1112                # conflict. If it's not, it's a minor conflict.
1113                #
1114    
1115                if ($loc_func ne $ss_func)
1116                {
1117                    push(@$conflict, [$ss_peg, $ss_func, $loc_peg, $loc_func, [@subs]]);
1118                }
1119            }
1120            else
1121            {
1122                push(@$missing, [$ss_peg, $ss_org, $ss_func]);
1123            }
1124        }
1125    
1126        return ($conflict, $missing);
1127    }
1128    
1129    
1130    sub name
1131    {
1132        my($self) = @_;
1133        return $self->{name};
1134    }
1135    
1136    
1137    sub version
1138    {
1139        my($self) = @_;
1140        return $self->{version};
1141    }
1142    
1143    sub exchangable
1144    {
1145        my($self) = @_;
1146        return $self->{exchangable};
1147    }
1148    
1149    sub curator
1150    {
1151        my($self) = @_;
1152        return $self->{curator};
1153    }
1154    
1155    package GenomeObj;
1156    
1157    sub new
1158    {
1159        my($class, $subfile, $fig, $genome, $variant, $items) = @_;
1160    
1161        my $self = {
1162            fig => $fig,
1163            subfile => $subfile,
1164            genome => $genome,
1165            variant => $variant,
1166            items => $items,
1167        };
1168        return bless($self, $class);
1169    
1170    }
1171    
1172    
1173  1  1

Legend:
Removed from v.1.32  
changed lines
  Added in v.1.33

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3