[Bio] / FigKernelPackages / raelib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/raelib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.11, Fri Apr 8 20:22:23 2005 UTC revision 1.31, Sat May 6 22:04:12 2006 UTC
# Line 1  Line 1 
1    #
2    # Copyright (c) 2003-2006 University of Chicago and Fellowship
3    # for Interpretations of Genomes. All Rights Reserved.
4    #
5    # This file is part of the SEED Toolkit.
6    #
7    # The SEED Toolkit is free software. You can redistribute
8    # it and/or modify it under the terms of the SEED Toolkit
9    # Public License.
10    #
11    # You should have received a copy of the SEED Toolkit Public License
12    # along with this program; if not write to the University of Chicago
13    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14    # Genomes at veronika@thefig.info or download a copy from
15    # http://www.theseed.org/LICENSE.TXT.
16    #
17    
18  # -*- perl -*-  # -*- perl -*-
19    
20  =pod  =pod
21    
22  =head1  =head1 RAE Library
23    
24   Some routines and things that Rob uses. Please feel free to use at will and incorporate into   Some routines and things that Rob uses. Please feel free to use at will and incorporate into
25   your own code or move them into FIG.pm or elsewhere.   your own code or move them into FIG.pm or elsewhere.
26    
27     For questions about this email RobE@theFIG.info
28    
29  =cut  =cut
30    
31  package raelib;  package raelib;
32  use strict;  use strict;
33    use Bio::SeqIO;
34    use Bio::Seq;
35    use Bio::SeqFeature::Generic;
36    
37    # we don't know whether the Spreadsheet::WriteExcel methods are available on all systems, and even on the CI systems they are currently in my shared directory
38    # so we use an eval and set the boolean if we are cool.
39    my $useexcel;
40    BEGIN {
41        use lib '/home/seed/Rob/perl/lib/perl5/site_perl/5.8.5/';
42        eval "use Spreadsheet::WriteExcel";
43        unless ($@) {$useexcel=1}
44    }
45    
46  use FIG;  use FIG;
47  my $fig=new FIG;  my $fig=new FIG;
48    
# Line 21  Line 53 
53  =cut  =cut
54    
55  sub new {  sub new {
56   my $self=shift;   my ($class)=@_;
57   return $self   my $self={};
58     return bless $self, $class;
59  }  }
60    
61    
# Line 70  Line 103 
103    
104  =head2 pirsfcorrespondence  =head2 pirsfcorrespondence
105    
106   Generate the pirsf->fig id correspondence. This is only done once and the correspondence file is written.  Generate the pirsf->fig id correspondence. This is only done once and the correspondence file is written. This is so that we can easily go back and forth.
  This is so that we can easily go back and forth.  
107    
108   The correspondence has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat   The correspondence has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat
109    
# Line 80  Line 112 
112     to      : file to write information to     to      : file to write information to
113     verbose : report on progress     verbose : report on progress
114    
115    Note that if the from filename ends in .gz it assumed to be a gzipped file and will be opened accordingly.
116    
117   Returns the number of lines in the pirsinfo file that were read.   Returns the number of lines in the pirsinfo file that were read.
118    
119  =cut  =cut
# Line 90  Line 124 
124    print STDERR "File $from does not exist as called in $0\n";    print STDERR "File $from does not exist as called in $0\n";
125    return 0;    return 0;
126   }   }
127     if ($from =~ /\.gz$/) {
128      open(IN, "|gunzip -c $from") || die "Can't open $from using a gunzip pipe";
129     }
130     else {
131   open (IN, $from) || die "Can't open $from";   open (IN, $from) || die "Can't open $from";
132     }
133   open (OUT, ">$to") || die "Can't write to $to";   open (OUT, ">$to") || die "Can't write to $to";
134   my $linecount;   my $linecount;
135   while (<IN>) {   while (<IN>) {
136    $linecount++;    $linecount++;
137    unless ($linecount % 10000) {print STDERR "Correspondence of $linecount lines calculated\n"}    if ($verbose && !($linecount % 10000))  {print STDERR "Parsed $linecount lines\n"}
138    if (/^>/) {print OUT; next}    if (/^>/) {print OUT; next}
139    chomp;    chomp;
140    my $done;    foreach my $peg ($self->swiss_pir_ids($_)) {
   foreach my $peg ($fig->by_alias("uni|$_")) {  
141     print OUT $_, "\t", $peg, "\n";     print OUT $_, "\t", $peg, "\n";
    $done=1;  
142    }    }
   unless ($done) {print OUT $_, "\t\n"}  
143   }   }
144   close IN;   close IN;
145   close OUT;   close OUT;
146   return $linecount;   return $linecount;
147  }  }
148    
149    =head2 uniprotcorrespondence
150    
151    Generate a correspondence table between uniprot knowledge base IDs and FIG ID's.
152    
153    The uniprot KB file is in the form:  UniProtKB_Primary_Accession | UniProtKB_ID | Section | Protein Name
154    
155     This method takes three arguments:
156       from    : uniprotKB file
157       to      : file to write information to
158       verbose : report on progress
159    
160    Note that if the from filename ends in .gz it assumed to be a gzipped file and will be opened accordingly.
161    
162     Returns the number of lines in the uniprotkb file that were read.
163    
164    =cut
165    
166    sub uniprotcorrespondence {
167     my ($self, $from, $to, $verbose)=@_;
168     unless (-e $from) {
169      print STDERR "File $from does not exist as called in $0\n";
170      return 0;
171     }
172     if ($from =~ /\.gz$/) {
173      open(IN, "|gunzip -c $from") || die "Can't open $from using a gunzip pipe";
174     }
175     else {
176      open (IN, $from) || die "Can't open $from";
177     }
178     open (OUT, ">$to") || die "Can't write to $to";
179     my $linecount;
180     while (<IN>) {
181      chomp;
182      $linecount++;
183      if ($verbose && !($linecount % 10000))  {print STDERR "Parsed $linecount lines\n"}
184      my @line=split /\s+\|\s+/;
185      my $added;
186      foreach my $peg ($self->swiss_pir_ids($line[0])) {
187       print OUT "$_ | $peg\n";
188       $added=1;
189      }
190      unless ($added) {print OUT "$_\n"}
191     }
192     close IN;
193     close OUT;
194     return $linecount;
195    }
196    
197    =head2 prositecorrespondence
198    
199    Generate a correspondence table between prosite and seed using sp id's and seed ids.
200    
201    The SwissProt prosite file is from ftp://ca.expasy.org/databases/prosite/release_with_updates/prosite.dat and is in horrible swiss prot format, so we'll parse out those things that we need and put them in the file
202    
203    The output file will have the following columns:
204    
205    prosite family accession number, prosite family name, family type, swiss-prot protein id, fig protein id.
206    
207    The family type is one of rule, pattern, or matrix. Right now (Prosite Release 19.2 of 24-May-2005) there are 4 rules, 1322 patterns, and 521 matrices.
208    
209     This method takes three arguments:
210       from    : prosite file
211       to      : file to write information to
212       verbose : report on progress
213    
214    Note that if the from filename ends in .gz it assumed to be a gzipped file and will be opened accordingly.
215    
216     Returns the number of lines in the prosite file that were read.
217    
218    =cut
219    
220    sub prositecorrespondence {
221     my ($self, $from, $to, $verbose)=@_;
222     unless (-e $from) {
223      print STDERR "File $from does not exist as called in $0\n";
224      return 0;
225     }
226     if ($from =~ /\.gz$/) {
227      open(IN, "|gunzip -c $from") || die "Can't open $from using a gunzip pipe";
228     }
229     else {
230      open (IN, $from) || die "Can't open $from";
231     }
232     open (OUT, ">$to") || die "Can't write to $to";
233     my $linecount;
234     my ($famac, $famname, $famtype)=('','','');
235     while (<IN>) {
236      chomp;
237      $linecount++;
238      if ($verbose && !($linecount % 10000))  {print STDERR "Parsed $linecount lines\n"}
239      if (m#//#) {($famac, $famname, $famtype)=('','',''); next}
240      elsif (m/^ID\s*(.*?);\s*(\S+)/) {($famname, $famtype)=($1, $2); next}
241      elsif (m/^AC\s*(\S+)/) {$famac=$1; $famac =~ s/\;\s*$//; next}
242      next unless (m/^DR/); # ignore all the other crap in the prosite file for now. Note we might, at some point, want to grab all that, but that is for another time.
243      #
244      # this is the format of the DR lines:
245      # DR   P11460, FATB_VIBAN , T; P40409, FEUA_BACSU , T; P37580, FHUD_BACSU , T;
246      s/^DR\s*//;
247      foreach my $piece (split /\s*\;\s*/, $_) {
248       my ($acc, $nam, $unk)=split /\s*\,\s*/, $piece;
249       foreach my $fig ($self->swiss_pir_ids($acc)) {
250        print OUT join "\t", ($famac, $famname, $famtype, $acc, $fig), "\n";
251       }
252      }
253     }
254    }
255    
256    =head2 swiss_pir_ids()
257    
258    SwissProt/PIR have lots of ID's that we want to get, usually in this order - uni --> tr --> sp. This routine will map swissprot/pir ids to fig id's, and return an array of FIG id's that match the ID.
259    
260    =cut
261    
262    sub swiss_pir_ids {
263     my ($self, $id)=@_;
264     return () unless ($id);
265     $id =~ s/^\s+//; $id =~ s/\s+$//; # trim off the whitespace
266    
267     my @return=($fig->by_alias("uni|$id"));
268     return @return if ($return[0]);
269    
270     @return=($fig->by_alias("tr|$id"));
271     return @return if ($return[0]);
272    
273     @return=($fig->by_alias("sp|$id"));
274     return @return if ($return[0]);
275    
276     return ();
277    }
278    
279  =head2 ss_by_id  =head2 ss_by_id
280    
# Line 244  Line 409 
409    
410  This is the list from index.cgi, that I call often. It has one minor modification: the value returned is solely the organisms id and does not contain genus_species information. I abstracted this here: 1, so I could call it often, and 2, so I could edit it once.  This is the list from index.cgi, that I call often. It has one minor modification: the value returned is solely the organisms id and does not contain genus_species information. I abstracted this here: 1, so I could call it often, and 2, so I could edit it once.
411    
412  use like this push @$html, $raelib->scrolling_org_list($cgi, $multiple);  use like this push @$html, $raelib->scrolling_org_list($cgi, $multiple, $default);
413    
414  multiple selections will only be set if $multiple is true  multiple selections will only be set if $multiple is true
415    
416    default will set a default to override (maybe) korgs
417    
418  =cut  =cut
419    
420  sub scrolling_org_list {  sub scrolling_org_list {
421   my ($self, $cgi, $multiple)=@_;   my ($self, $cgi, $multiple, $default)=@_;
422   unless ($multiple) {$multiple=0}   unless ($multiple) {$multiple=0}
423    
424   my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );   my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );
# Line 301  Line 468 
468    
469   foreach (@$orgs) {   foreach (@$orgs) {
470     my $gs = $fig->genus_species($_);     my $gs = $fig->genus_species($_);
471     my $gc=scalar $fig->all_contigs($_);     my $gc=$fig->number_of_contigs($_);
472     $label->{$_} = "$gs ($_) [$gc contigs]";     $label->{$_} = "$gs ($_) [$gc contigs]";
473    }    }
474    
# Line 317  Line 484 
484                                          -labels   => $label,                                          -labels   => $label,
485                                          -size     => 10,                                          -size     => 10,
486                                          -multiple => $multiple,                                          -multiple => $multiple,
487                                            -default  => $default,
488                                        ), $cgi->br,                                        ), $cgi->br,
489                    "$n_genomes genomes shown ",                    "$n_genomes genomes shown ",
490                    $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,                    $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
# Line 327  Line 495 
495                    "</TD>",                    "</TD>",
496                    "   </TR>\n",                    "   </TR>\n",
497                    "</TABLE>\n",                    "</TABLE>\n",
                   $cgi->hr  
498          );          );
499  }  }
500    
501    
502    =head2 scrolling_subsys_list
503    
504    Create a scrolling list of all subsystems. Just like scrolling_org_list, this will make the list and allow you to select multiples.
505    
506    use like this
507    
508    push @$html, $raelib->scrolling_subsys_list($cgi, $multiple);
509    
510    =cut
511    
512    sub scrolling_subsys_list {
513     my ($self, $cgi, $multiple)=@_;
514     $multiple=0 unless (defined $multiple);
515     my @ss=sort {uc($a) cmp uc($b)} $fig->all_subsystems();
516     my $label;
517     # generate labels for the list
518     foreach my $s (@ss) {my $k=$s; $k =~ s/\_/ /g; $k =~ s/  / /g; $k =~ s/\s+$//; $label->{$s}=$k}
519     return $cgi->scrolling_list(
520      -name    => 'subsystems',
521      -values  => \@ss,
522      -labels  => $label,
523      -size    => 10,
524      -multiple=> $multiple,
525     );
526    }
527    
528    =head2 subsys_names_for_display
529    
530    Return a list of subsystem names for display. This will take a list as an argument and return a nice clean list for display.
531    
532    $raelib->subsys_names_for_display(@ss);
533    or
534    $raelib->subsys_names_for_display($fig->all_subsystems());
535    
536    =cut
537    
538    sub subsys_names_for_display {
539     my ($self, @ss)=@_;
540     foreach (@ss) {s/\_/ /g; 1 while (s/  / /g); s/\s+$//}
541     return @ss;
542    }
543    
544    =head2 GenBank
545    
546     This object will take a genome number and return a Bio::Seq::RichSeq object that has the whole genome
547     in GenBank format. This should be a nice way of getting some data out, but will probably be quite slow
548     at building the object.
549    
550     Note that you need to call this with the genome name and the contig. This will then go through that contig.
551    
552     Something like this should work
553    
554     foreach my $contig ($fig->all_contigs($genome)) {
555      my $seqobj=FIGRob->GenBank($genome, $contig);
556      # process the contig
557     }
558    
559    =cut
560    
561    sub GenBank {
562     my ($self, $genome, $contig)=@_;
563     my $gs=$fig->genus_species($genome);
564     return unless ($gs);
565     unless ($contig) {
566      print STDERR "You didn't provide a contig for $gs. I think that was a mistake. Sorry\n";
567      return;
568     }
569     my $len=$fig->contig_ln($genome, $contig);
570     unless ($len) {
571      print STDERR "$contig from $gs doesn't appear to have a length. Is it right?\n";
572      return;
573     }
574    
575    
576     # first find all the pegs ...
577     my $features; # all the features in the genome
578     my $allpegs; # all the pegs
579     my $translation; # all the protein sequences
580     foreach my $peg ($fig->pegs_of($genome)) {
581      my @location=$fig->feature_location($peg);
582      my $func=$fig->function_of($peg);
583      foreach my $loc (@location) {
584       $loc =~ /^(.*)\_(\d+)\_(\d+)$/;
585       my ($cg, $start, $stop)=($1, $2, $3);
586       next unless ($cg eq $contig);
587       # save this information for later
588       $features->{'peg'}->{$loc}=$func;
589       $allpegs->{'peg'}->{$loc}=$peg;
590       $translation->{'peg'}->{$loc}=$fig->get_translation($peg);
591      }
592     }
593     # ... and all the RNAs
594     foreach my $peg ($fig->rnas_of($genome)) {
595      my @location=$fig->feature_location($peg);
596      my $func=$fig->function_of($peg);
597      foreach my $loc (@location) {
598       $loc =~ /^(.*)\_(\d+)\_(\d+)$/;
599       my ($cg, $start, $stop)=($1, $2, $3);
600       next unless ($cg eq $contig);
601       # save this information for later
602       $features->{'rna'}->{$loc}=$func;
603       $allpegs->{'rna'}->{$loc}=$peg;
604      }
605     }
606    
607    
608     # now get all the contigs out
609     my $seq=$fig->dna_seq($genome, $contig."_1_".$len);
610     my $description = "Contig $contig from " . $fig->genus_species($genome);
611     my $sobj=Bio::Seq->new(
612              -seq              =>  $seq,
613              -id               =>  $contig,
614              -desc             =>  $description,
615              -accession_number =>  $genome
616              );
617     foreach my $prot (keys %{$features->{'peg'}}) {
618       $prot =~ /^(.*)\_(\d+)\_(\d+)$/;
619       my ($cg, $start, $stop)=($1, $2, $3);
620       my $strand=1;
621       if ($stop < $start) {
622        ($stop, $start)=($start, $stop);
623        $strand=-1;
624     }
625    
626     my $feat=Bio::SeqFeature::Generic->new(
627            -start         =>  $start,
628            -end           =>  $stop,
629            -strand        =>  $strand,
630            -primary       =>  'CDS',
631            -display_name  =>  $allpegs->{'peg'}->{$prot},
632            -source_tag    =>  'the SEED',
633            -tag           =>
634                           {
635                           db_xref     =>   $allpegs->{'peg'}->{$prot},
636                           note        =>  'Generated by the Fellowship for the Interpretation of Genomes',
637                           function    =>  $features->{'peg'}->{$prot},
638                           translation =>  $translation->{'peg'}->{$prot}
639                          }
640           );
641    
642       $sobj->add_SeqFeature($feat);
643     }
644    
645     foreach my $prot (keys %{$features->{'rna'}}) {
646       $prot =~ /^(.*)\_(\d+)\_(\d+)$/;
647       my ($cg, $start, $stop)=($1, $2, $3);
648       my $strand=1;
649       if ($stop < $start) {
650        ($stop, $start)=($start, $stop);
651        $strand=-1;
652       }
653    
654       my $feat=Bio::SeqFeature::Generic->new(
655            -start         =>  $start,
656            -end           =>  $stop,
657            -strand        =>  $strand,
658            -primary       =>  'RNA',
659            -source_tag    =>  'the SEED',
660            -display_name  =>  $allpegs->{'rna'}->{$prot},
661            -tag           =>
662                          {
663                           db_xref     =>   $allpegs->{'rna'}->{$prot},
664                           note        =>  'Generated by the Fellowship for the Interpretation of Genomes',
665                           function    =>  $features->{'rna'}->{$prot},
666                          }
667           );
668    
669      $sobj->add_SeqFeature($feat);
670     }
671     return $sobj;
672    }
673    
674    =head2 best_hit
675    
676     Returns the FIG id of the single best hit to a peg
677    
678     eg
679    
680     my $bh=$fr->best_hit($peg);
681     print 'function is ', scalar $fig->function_of($bh);
682    
683    =cut
684    
685    sub best_hit {
686     my ($self, $peg)=@_;
687     return unless ($peg);
688    
689     my ($maxN, $maxP)=(1, 1e-5);
690     my @sims=$fig->sims($peg, $maxN, $maxP, 'fig');
691     return ${$sims[0]}[1];
692    }
693    
694    
695    =head1 read_fasta
696    
697    Read a fasta format file and return a reference to a hash with the data. The key is the ID and the value is the sequence. If you supply the optional keep comments then the comments (anything after the first white space are returned as a sepaarte hash).
698    
699    Usage:
700    my $fasta=$raelib->read_fasta($file);
701    my ($fasta, $comments)=$raelib->read_fasta($file, 1);
702    
703    =cut
704    
705    sub read_fasta {
706     my ($self, $file, $keepcomments)=@_;
707     open (IN, $file) || die "Can't open $file";
708     my %f; my $t; my $s; my %c;
709     while (<IN>) {
710      chomp;
711      if (/^>/) {
712       if ($s) {
713        $f{$t}=$s;
714        undef $s;
715       }
716       s/^>(\S+)\s*//;
717       $t=$1;
718       $c{$t}=$_ if ($_);
719      }
720      else {$s .= $_}
721     }
722     $f{$t}=$s;
723     if ($keepcomments) {return (\%f, \%c)}
724     else {return \%f}
725    }
726    
727    =head1 rc
728    
729    Reverse complement. It's too easy.
730    
731    =cut
732    
733    sub rc {
734     my ($self, $seq)=@_;
735     $seq=~tr/GATCgatc/CTAGctag/;
736     $seq = reverse $seq;
737     return $seq;
738    }
739    
740    
741    =head2 cookies
742    
743    Handle cookies. This method will get and set the value of the FIG cookie. Cookies are name/value pairs that are stored on the users computer. We then retrieve them using this method. The cookies are passed in as a reference to a hash, and the method returns a tuple of the cookie that can be passed to the browser and a reference to a hash with the data.
744    
745    If you do not pass any arguments the whole cookie will be returned.
746    
747    Use as follows:
748    
749    ($cookie, $data) = raelib->cookie($cgi, \%data);
750    
751    You do not need to pass in any data, in that case you will just get the cookie back
752    
753    Underneath, I create a single cookie called FIG which stores all the information. The names and value pairs are stored using = to join name to value and ; to concatenate. This way we can create a single cookie with all the data. I am using the FIG::clean_attribute_key method to remove unwanted characters from the name/value pairs, so don't use them.
754    
755    Note that for the moment I have put this routine here since it needs to maintain the state of the cookie (i.e. it needs to know what $self is). It should really be in HTML.pm but that is not, as far as I can tell, maintaining states?
756    
757    =cut
758    
759    sub cookie {
760     my ($self, $cgi, $input)=@_;
761     return unless ($cgi);
762     $self->{'cookie'}=$cgi->cookie(-name=>"FIG") unless ($self->{'cookie'});
763    
764     # first, create a hash from the existing cookie data
765     my $cookie;
766     map {
767      my ($kname, $kvalue)=split /\=/, $_;
768      $cookie->{$kname}=$kvalue;
769     } split /\;/, $self->{'cookie'};
770    
771     if ($input)
772     {
773      # add the values that were passed in
774      map {$cookie->{FIG->clean_attribute_key($_)}=$input->{$_}} keys %$input;
775      # put everything back together and set the cookie
776      my $newcookie=join ";", map {$_ . "=" . $cookie->{$_}} keys %$cookie;
777      $self->{'cookie'}=$cgi->cookie(-name=>"FIG", -value=>$newcookie, -expires=>'+1y');
778     }
779    
780     return ($self->{'cookie'}, $cookie);
781    }
782    
783    
784    
785    =head1 commify
786    
787    Put commas in numbers. I think this comes straight from the perl cookbook and is very useful for nice displays
788    
789    =cut
790    
791    sub commify {
792        my($self,$n) = @_;
793        my(@n) = ();
794        my($i);
795    
796        for ($i = (length($n) - 3); ($i > 0); $i -= 3)
797        {
798            unshift(@n,",",substr($n,$i,3));
799        }
800        unshift(@n,substr($n,0,$i+3));
801        return join("",@n);
802    }
803    
804    
805    =head1 tab2excel
806    
807    This is experimental as of May, 2006.
808    
809    There are a couple of perl modules that allow you to write to excel files, and so I am trying out the idea of taking our standard $tab table respresentation that is used in HTML.pm and making an excel file that people could download. It seems like that would be a great tool for them to have.
810    
811    At the moment the excel modules are in my shared space on the CI machines, and so won't work in every seed installation. Therefore the $useexcel boolean is set at compile time if we successfully load the module.
812    
813    The issues are:
814        1. creating the excel file
815        2. reading through @$tab and presenting the data
816        3. Checking @$tab because each element can be a reference to an array with color or formatting information
817    
818    Formatting
819    
820    A separate set of formats must be created for each color and font combination since the formats are applied at the end of the processing of the file.
821    
822    
823    Usage:
824        Note the usage is ALMOST the same as make_table, but not quite. First, options is a reference to a hash rather than the hash itself
825        and second, the additional option "filename" that is the filename to be written;
826    
827        $url = $raelib->tab2excel($col_hdrs, $tab, $title, $options, "filename");
828    
829        The filename will be created in $FIG_Config::temp. The extension .xls will be added to the filename if it is not present.
830    
831    Returns:
832        A link to the file in the format
833            <p><a href="...">filename</a> [Download Excel file]</p>
834    
835    
836    =cut
837    
838    sub tab2excel {
839        my($self, $col_hdrs, $tab, $title, $options, $filename)=@_;
840    
841        return "<p>Couldn't load Spreadsheet::WriteExcel</p>\n" unless ($useexcel);
842        $filename =~ s/^.*\///; # remove any path information. We are going to only write to FIG_Config::temp
843        unless ($filename =~ /\.xls$/) {$filename .=".xls"}
844        my $excelfile=$FIG_Config::temp."/$filename";
845    
846        # Each excel file consists of the file, and then of worksheets from within the file. These are the tabs at the bottom of the screen
847        # that can be added with "Insert->new worksheet" from the menus.
848        # Create a new workbook called simple.xls and add a worksheet
849        my $workbook  = Spreadsheet::WriteExcel->new($excelfile);
850        $workbook->set_tempdir($FIG_Config::temp); # you don't have to do this, but it may speed things up and reduce memory load.
851        my $worksheet = $workbook->add_worksheet($title);
852    
853        # define some excel colors in our hash. Then if the table calls for more colors we'll add them later.
854        # this is just to set the defaults for a couple of obvious ones so that we don't bother using custom colors for them
855        my $excelcolor= {"#000000" => 1, "#FFFFFF" => 2, "#FF0000" => 3, "#00FF00" => 4, "#0000FF" => 5, "#FFFF00" => 6, "#FF00FF" => 7, "#00FFFF" => 8};
856    
857        # The general syntax for output to an excel file is write($row, $column, $value, $format). Note that row and
858        # column are zero indexed
859    
860        # look through the options and see what the formating issues are
861        my $border = defined $options->{border} ? $options->{border} : 0;
862        my $format;
863        $format->{default} = $workbook->add_format(border=>$border, size=>12);
864    
865    
866        # write the column headers
867        # define a new format that is bold
868        $format->{header} = $workbook->add_format();
869        $format->{header}->copy($format->{default});
870        $format->{header}->set_bold();
871    
872        for my $i (0 .. $#$col_hdrs)
873        {
874            $worksheet->write(0, $i, $col_hdrs->[$i], $format->{header});
875        }
876    
877        # now loop through the table and write them out. Remember to break on array refs
878        my $row_idx=1;
879        my $col_idx=0;
880        foreach my $row (@$tab)
881        {
882            foreach my $cell (@$row)
883            {
884                my $useformat=$format->{default};
885                if (ref($cell) eq "ARRAY")
886                {
887                    ($cell, $useformat, $format, $excelcolor)=$self->parse_cell($cell, $format, $excelcolor, $workbook);
888                }
889    
890                $cell=$self->clean_excel_cell($cell);
891                # this is a botch, but in some circumstances we need to split the cell out. e.g. if it is a URL
892                # in this case we have a reference to an array, and we'll use  a slight modification on the process
893                if (ref($cell) eq "ARRAY") {$worksheet->write($row_idx, $col_idx, @$cell, $useformat)}
894                else {$worksheet->write($row_idx, $col_idx, $cell, $useformat)}
895    
896                # increment to the next column
897                $col_idx++;
898            }
899            # new line, and start of line
900            $row_idx++;
901            $col_idx=0;
902        }
903    
904        #finally close and write the table
905        $workbook->close();
906    
907    
908        # now generate the link to return
909        my $size=(stat($excelfile))[7];
910        $size=int($size/1000);
911        my $link="<p><a href=\"".$fig->temp_url."/$filename\">$filename</a> [Download table in Excel format. $size kb]</p>\n";
912        return $link;
913    }
914    
915    
916    
917    
918    
919    
920    
921    
922    
923    =head2 parse_cell()
924    
925    A method to take the cell from the table where there is some formatting information and figure out what we know. Return the data and the format.
926    
927    Requires the cell and the current $format.
928    
929    When applied to <td> the default formats that we'll deal with at the moment are
930         align=
931         background-color=
932         color=
933         bgcolor=
934    
935    Colors are funky in excel because it only has a limited palette. We rename colors as needed, and then save those so that we can use them again. We're only allowed 55 colors in excel (numbered 8..63). Because its a little stupid to mess with black and white and so on, I ignore those, and also start renumbering at color number 20, giving us 43 different colors.
936    
937    The reference to the hash excelcolor has the custom excel colors stored in it for a few colors, and others are added to it.
938    
939    =cut
940    
941    sub parse_cell {
942        my ($self, $arr, $format, $excelcolor, $workbook)=@_;
943        return ($arr, $format->{default}) unless (ref($arr) eq "ARRAY");
944        my ($cell, $tag)=@$arr;
945        $tag =~ s/\'/"/g; # this just makes it easier to parse the things like align='center' and align="center" that are both valid
946    
947        # we are going to define a series of formats that we can apply, this will have  a key that is
948        # center.bgcolor.fgcolor. Then if we already have that, we can use it, if not, we'll define it
949    
950        my ($center, $bgcolor, $fgcolor)=(undef, undef, undef);
951    
952        if ($tag =~ /align\=\"(.*?)\"/i) {$center=$1}
953        if ($tag =~ /background-color\=\"(.*?)\"/i || $tag =~ /bgcolor\=\"(.*?)\"/i)
954        {
955            my $color=$1;
956            if (!$excelcolor->{$color})
957            {
958                # find out the last custom color used and increment it
959                my $max=19; # we are not going to use a color less than 20
960                foreach my $k (keys %$excelcolor) {($k > $max) ? ($max=$k) :1}
961                $max++;
962                $excelcolor->{$color}=$workbook->set_custom_color($max, $color);
963            }
964            $bgcolor=$excelcolor->{$color};
965        }
966        elsif ($tag =~ /color\=\"(.*?)\"/i)
967        {
968            my $color=$1;
969            if (!$excelcolor->{$color})
970            {
971                # find out the last custom color used and increment it
972                my $max=19; # we are not going to use a color less than 20
973                foreach my $k (keys %$excelcolor) {($k > $max) ? ($max=$k) :1}
974                $max++;
975                $excelcolor->{$color}=$workbook->set_custom_color($max, $color);
976            }
977            $fgcolor=$excelcolor->{$color};
978        }
979    
980        if (!defined $format->{$center.$bgcolor.$fgcolor})
981        {
982            $format->{$center.$bgcolor.$fgcolor}=$workbook->add_format();
983            $format->{$center.$bgcolor.$fgcolor}->copy($format->{default});
984            $center && $format->{$center.$bgcolor.$fgcolor}->set_align($center);
985            $bgcolor && $format->{$center.$bgcolor.$fgcolor}->set_bg_color($bgcolor);
986            $fgcolor && $format->{$center.$bgcolor.$fgcolor}->set_color($fgcolor);
987        }
988    
989        return ($cell, $format->{$center.$bgcolor.$fgcolor}, $format, $excelcolor);
990    }
991    
992    
993    =head1 clean_excel_cell
994    
995    Process the cells to remove &nbsp; and also convert relative URLs to full URLs
996    
997    =cut
998    
999    sub clean_excel_cell {
1000        my ($self, $cell)=@_;
1001        if ($cell =~ /^\s*\&nbsp\;\s*$/) {$cell=undef} # ignore white space
1002        if ($cell =~ /\<a href=.(.*?).>(.*)<\/a>/)
1003        {
1004            # this is tricky because if the cell is a url then we need two separate things, the url and the link name
1005            my ($url, $link)=($1, $2);
1006            unless ($url =~ /^http/) {$url=$FIG_Config::cgi_url."/$url"}
1007            $cell=[$url, $link];
1008        }
1009        return $cell;
1010    }
1011    
1012    
1013    
1014    
1015  1;  1;
1016    

Legend:
Removed from v.1.11  
changed lines
  Added in v.1.31

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3