[Bio] / FigKernelPackages / raelib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/raelib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.30, Fri Feb 10 00:39:30 2006 UTC revision 1.31, Sat May 6 22:04:12 2006 UTC
# Line 24  Line 24 
24   Some routines and things that Rob uses. Please feel free to use at will and incorporate into   Some routines and things that Rob uses. Please feel free to use at will and incorporate into
25   your own code or move them into FIG.pm or elsewhere.   your own code or move them into FIG.pm or elsewhere.
26    
27     For questions about this email RobE@theFIG.info
28    
29  =cut  =cut
30    
31  package raelib;  package raelib;
# Line 31  Line 33 
33  use Bio::SeqIO;  use Bio::SeqIO;
34  use Bio::Seq;  use Bio::Seq;
35  use Bio::SeqFeature::Generic;  use Bio::SeqFeature::Generic;
36    
37    # we don't know whether the Spreadsheet::WriteExcel methods are available on all systems, and even on the CI systems they are currently in my shared directory
38    # so we use an eval and set the boolean if we are cool.
39    my $useexcel;
40    BEGIN {
41        use lib '/home/seed/Rob/perl/lib/perl5/site_perl/5.8.5/';
42        eval "use Spreadsheet::WriteExcel";
43        unless ($@) {$useexcel=1}
44    }
45    
46  use FIG;  use FIG;
47  my $fig=new FIG;  my $fig=new FIG;
48    
# Line 790  Line 802 
802  }  }
803    
804    
805    =head1 tab2excel
806    
807    This is experimental as of May, 2006.
808    
809    There are a couple of perl modules that allow you to write to excel files, and so I am trying out the idea of taking our standard $tab table respresentation that is used in HTML.pm and making an excel file that people could download. It seems like that would be a great tool for them to have.
810    
811    At the moment the excel modules are in my shared space on the CI machines, and so won't work in every seed installation. Therefore the $useexcel boolean is set at compile time if we successfully load the module.
812    
813    The issues are:
814        1. creating the excel file
815        2. reading through @$tab and presenting the data
816        3. Checking @$tab because each element can be a reference to an array with color or formatting information
817    
818    Formatting
819    
820    A separate set of formats must be created for each color and font combination since the formats are applied at the end of the processing of the file.
821    
822    
823    Usage:
824        Note the usage is ALMOST the same as make_table, but not quite. First, options is a reference to a hash rather than the hash itself
825        and second, the additional option "filename" that is the filename to be written;
826    
827        $url = $raelib->tab2excel($col_hdrs, $tab, $title, $options, "filename");
828    
829        The filename will be created in $FIG_Config::temp. The extension .xls will be added to the filename if it is not present.
830    
831    Returns:
832        A link to the file in the format
833            <p><a href="...">filename</a> [Download Excel file]</p>
834    
835    
836    =cut
837    
838    sub tab2excel {
839        my($self, $col_hdrs, $tab, $title, $options, $filename)=@_;
840    
841        return "<p>Couldn't load Spreadsheet::WriteExcel</p>\n" unless ($useexcel);
842        $filename =~ s/^.*\///; # remove any path information. We are going to only write to FIG_Config::temp
843        unless ($filename =~ /\.xls$/) {$filename .=".xls"}
844        my $excelfile=$FIG_Config::temp."/$filename";
845    
846        # Each excel file consists of the file, and then of worksheets from within the file. These are the tabs at the bottom of the screen
847        # that can be added with "Insert->new worksheet" from the menus.
848        # Create a new workbook called simple.xls and add a worksheet
849        my $workbook  = Spreadsheet::WriteExcel->new($excelfile);
850        $workbook->set_tempdir($FIG_Config::temp); # you don't have to do this, but it may speed things up and reduce memory load.
851        my $worksheet = $workbook->add_worksheet($title);
852    
853        # define some excel colors in our hash. Then if the table calls for more colors we'll add them later.
854        # this is just to set the defaults for a couple of obvious ones so that we don't bother using custom colors for them
855        my $excelcolor= {"#000000" => 1, "#FFFFFF" => 2, "#FF0000" => 3, "#00FF00" => 4, "#0000FF" => 5, "#FFFF00" => 6, "#FF00FF" => 7, "#00FFFF" => 8};
856    
857        # The general syntax for output to an excel file is write($row, $column, $value, $format). Note that row and
858        # column are zero indexed
859    
860        # look through the options and see what the formating issues are
861        my $border = defined $options->{border} ? $options->{border} : 0;
862        my $format;
863        $format->{default} = $workbook->add_format(border=>$border, size=>12);
864    
865    
866        # write the column headers
867        # define a new format that is bold
868        $format->{header} = $workbook->add_format();
869        $format->{header}->copy($format->{default});
870        $format->{header}->set_bold();
871    
872        for my $i (0 .. $#$col_hdrs)
873        {
874            $worksheet->write(0, $i, $col_hdrs->[$i], $format->{header});
875        }
876    
877        # now loop through the table and write them out. Remember to break on array refs
878        my $row_idx=1;
879        my $col_idx=0;
880        foreach my $row (@$tab)
881        {
882            foreach my $cell (@$row)
883            {
884                my $useformat=$format->{default};
885                if (ref($cell) eq "ARRAY")
886                {
887                    ($cell, $useformat, $format, $excelcolor)=$self->parse_cell($cell, $format, $excelcolor, $workbook);
888                }
889    
890                $cell=$self->clean_excel_cell($cell);
891                # this is a botch, but in some circumstances we need to split the cell out. e.g. if it is a URL
892                # in this case we have a reference to an array, and we'll use  a slight modification on the process
893                if (ref($cell) eq "ARRAY") {$worksheet->write($row_idx, $col_idx, @$cell, $useformat)}
894                else {$worksheet->write($row_idx, $col_idx, $cell, $useformat)}
895    
896                # increment to the next column
897                $col_idx++;
898            }
899            # new line, and start of line
900            $row_idx++;
901            $col_idx=0;
902        }
903    
904        #finally close and write the table
905        $workbook->close();
906    
907    
908        # now generate the link to return
909        my $size=(stat($excelfile))[7];
910        $size=int($size/1000);
911        my $link="<p><a href=\"".$fig->temp_url."/$filename\">$filename</a> [Download table in Excel format. $size kb]</p>\n";
912        return $link;
913    }
914    
915    
916    
917    
918    
919    
920    
921    
922    
923    =head2 parse_cell()
924    
925    A method to take the cell from the table where there is some formatting information and figure out what we know. Return the data and the format.
926    
927    Requires the cell and the current $format.
928    
929    When applied to <td> the default formats that we'll deal with at the moment are
930         align=
931         background-color=
932         color=
933         bgcolor=
934    
935    Colors are funky in excel because it only has a limited palette. We rename colors as needed, and then save those so that we can use them again. We're only allowed 55 colors in excel (numbered 8..63). Because its a little stupid to mess with black and white and so on, I ignore those, and also start renumbering at color number 20, giving us 43 different colors.
936    
937    The reference to the hash excelcolor has the custom excel colors stored in it for a few colors, and others are added to it.
938    
939    =cut
940    
941    sub parse_cell {
942        my ($self, $arr, $format, $excelcolor, $workbook)=@_;
943        return ($arr, $format->{default}) unless (ref($arr) eq "ARRAY");
944        my ($cell, $tag)=@$arr;
945        $tag =~ s/\'/"/g; # this just makes it easier to parse the things like align='center' and align="center" that are both valid
946    
947        # we are going to define a series of formats that we can apply, this will have  a key that is
948        # center.bgcolor.fgcolor. Then if we already have that, we can use it, if not, we'll define it
949    
950        my ($center, $bgcolor, $fgcolor)=(undef, undef, undef);
951    
952        if ($tag =~ /align\=\"(.*?)\"/i) {$center=$1}
953        if ($tag =~ /background-color\=\"(.*?)\"/i || $tag =~ /bgcolor\=\"(.*?)\"/i)
954        {
955            my $color=$1;
956            if (!$excelcolor->{$color})
957            {
958                # find out the last custom color used and increment it
959                my $max=19; # we are not going to use a color less than 20
960                foreach my $k (keys %$excelcolor) {($k > $max) ? ($max=$k) :1}
961                $max++;
962                $excelcolor->{$color}=$workbook->set_custom_color($max, $color);
963            }
964            $bgcolor=$excelcolor->{$color};
965        }
966        elsif ($tag =~ /color\=\"(.*?)\"/i)
967        {
968            my $color=$1;
969            if (!$excelcolor->{$color})
970            {
971                # find out the last custom color used and increment it
972                my $max=19; # we are not going to use a color less than 20
973                foreach my $k (keys %$excelcolor) {($k > $max) ? ($max=$k) :1}
974                $max++;
975                $excelcolor->{$color}=$workbook->set_custom_color($max, $color);
976            }
977            $fgcolor=$excelcolor->{$color};
978        }
979    
980        if (!defined $format->{$center.$bgcolor.$fgcolor})
981        {
982            $format->{$center.$bgcolor.$fgcolor}=$workbook->add_format();
983            $format->{$center.$bgcolor.$fgcolor}->copy($format->{default});
984            $center && $format->{$center.$bgcolor.$fgcolor}->set_align($center);
985            $bgcolor && $format->{$center.$bgcolor.$fgcolor}->set_bg_color($bgcolor);
986            $fgcolor && $format->{$center.$bgcolor.$fgcolor}->set_color($fgcolor);
987        }
988    
989        return ($cell, $format->{$center.$bgcolor.$fgcolor}, $format, $excelcolor);
990    }
991    
992    
993    =head1 clean_excel_cell
994    
995    Process the cells to remove &nbsp; and also convert relative URLs to full URLs
996    
997    =cut
998    
999    sub clean_excel_cell {
1000        my ($self, $cell)=@_;
1001        if ($cell =~ /^\s*\&nbsp\;\s*$/) {$cell=undef} # ignore white space
1002        if ($cell =~ /\<a href=.(.*?).>(.*)<\/a>/)
1003        {
1004            # this is tricky because if the cell is a url then we need two separate things, the url and the link name
1005            my ($url, $link)=($1, $2);
1006            unless ($url =~ /^http/) {$url=$FIG_Config::cgi_url."/$url"}
1007            $cell=[$url, $link];
1008        }
1009        return $cell;
1010    }
1011    
1012    
1013    
1014    

Legend:
Removed from v.1.30  
changed lines
  Added in v.1.31

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3