[Bio] / FigKernelScripts / make_fam_tabs.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/make_fam_tabs.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Mon Aug 1 18:52:58 2005 UTC revision 1.3, Mon Nov 28 04:31:07 2005 UTC
# Line 20  Line 20 
20  $cid = 1;  $cid = 1;
21  while (defined($_ = <SYN>))  while (defined($_ = <SYN>))
22  {  {
23      chop;      chomp;
24      @prots = split(/\t/,$_);      @prots = split(/\t/,$_);
25      foreach $prot (@prots)      foreach $prot (@prots)
26      {      {
# Line 29  Line 29 
29      }      }
30      $cid++;      $cid++;
31  }  }
 close(TAB1);  
32    
33    
34  foreach $source (@specific)  foreach $source (@specific)
35  {  {
36        next unless (-d "$sources/$source"); # don't try and process it if it is not a directory!
37      open(IDMAP,"<$sources/$source/id.map") || die "could not open $sources/$source/id.map";      open(IDMAP,"<$sources/$source/id.map") || die "could not open $sources/$source/id.map";
38      while (defined($_ = <IDMAP>))      while (defined($_ = <IDMAP>))
39      {      {
40          chop;          chomp;
41          ($fam,$lid) = split(/\t/,$_);          ($fam,$lid) = split(/\t/,$_);
42          if ($cid = $to_cid{$lid})          if (!$to_cid{$lid})
         {  
             print TAB2 "$fam\t$cid\n";  
         }  
         else  
43          {          {
44              print STDERR "could not handle $_\n";           # we don't have a synonym for this protein
45             # we can either ignore it or just give it a unique id. Lets do that.
46             $to_cid{$lid}=$cid;
47             $cid++;
48             # and write it to the file
49             print TAB1 "$prot\t$cid\n";
50          }          }
51            print TAB2 "$fam\t$to_cid{$lid}\n";
52      }      }
53      close(IDMAP);      close(IDMAP);
54    
# Line 54  Line 56 
56      {      {
57          while (defined($_ = <FAMFUNC>))          while (defined($_ = <FAMFUNC>))
58          {          {
59              if (($_ =~ /^(\S+)\t(\S[^\t]*\S)$/) && (length($2) < 256))              # ignore things that begin with a # or things that are just white space
60              {              # some files also have the last line with just a | on it. This should just be skipped
61                  print TAB3 $_;              next if (/^\s+$/ || /^\s*\#/ || /^\|/);
62              }              chomp;
63              else              # this was originally written with this regexp
64              {              # but there were problems when the function was not defined and also with some functions
65                  print STDERR "BAD: $_";              # that had leading or trailing spaces. These are removed now.
66              }              #if (($_ =~ /^(\S+)\t(\S[^\t]*\S)$/) && (length($2) < 256))
67    
68                if ($_ !~ /\t/) {print STDERR "Not enough columns in $sources/$source/family.funcs at $_\n"; next}
69                my @line=split /\t/;
70                if ($#line > 1) {print STDERR "Too many columns in $sources/$source/family.funcs at $_\n"; next}
71    
72                # trim leading and trailing spaces
73                $line[1] =~ s/^\s+//; $line[1] =~ s/\s*$//;
74                $line[0] =~ s/^\s+//; $line[0] =~ s/\s*$//;
75    
76                # define the function if it is unknown
77                unless (defined $line[1]) {$line[1] = "unknown"}
78                print TAB3 join("\t", @line), "\n";
79    
80          }          }
81          close(FAMFUNC);          close(FAMFUNC);
82      }      }

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3