1package XML::SAXDriver::Excel; 2 3use strict; 4 5use Spreadsheet::ParseExcel; 6 7use base qw(XML::SAX::Base); 8use vars qw($VERSION $NS_SAXDriver_Excel); 9$VERSION = '0.06'; 10$NS_SAXDriver_Excel = 'http://xmlns.perl.org/sax/XML::SAXDriver::Excel'; 11 12sub _parse_bytestream 13{ 14 my $self = shift; 15 my $stream = shift; 16 17 die ("Cannot use String to parse a binary Excel file. You can only use a file by setting SystemId"); 18 19 $self->_parse_Excel(\$stream); 20} 21sub _parse_string 22{ 23 my $self = shift; 24 my $stream = shift; 25 26 ### This is experimental due to the binary streams 27 #require IO::String; 28 #$io = IO::String->new($var); 29 #$self->_parse_Excel($io); 30 31 die ("Cannot use String to parse a binary Excel file. You can only use a file by setting SystemId"); 32 33 #my @strings = split("\n", $self->{ParseOptions}->{Source}{String}); 34 #$self->_parse_Excel(\@strings); 35} 36sub _parse_systemid 37{ 38 my $self = shift; 39 my $path = shift; 40 41 $self->_init(); 42 $self->{ParseOptions}->{Parser}->Parse($path); 43 $self->_end(); 44 45} 46 47sub _init { 48 my $self = shift; 49 50 ### Reset vars before parsing 51 $self->{_row} = []; ## Used to push row values per row 52 $self->{_row_num} = -1; ## Set at -1 since rows are counted from 0 53 $self->{_last_row_num} = 0; ## Used to save the last row value received 54 $self->{_last_col} = 0; 55 56 $self->{ParseOptions}->{Parser} ||= Spreadsheet::ParseExcel->new(CellHandler => \&cb_routine, Object => $self, NotSetCell => 1); 57 58 $self->{ParseOptions}->{Headings_Handler} ||= \&_normalize_heading; 59 60 $self->{_document} = {}; 61 $self->{ParseOptions}->{Handler}->start_document($self->{_document}); 62 $self->xml_decl($self->{ParseOptions}->{Declaration}); 63 my $pm_excel = $self->_create_node( 64 Prefix => 'SAXDriver::Excel', 65 NamespaceURI => $NS_SAXDriver_Excel, 66 ); 67 $self->start_prefix_mapping($pm_excel); 68 $self->end_prefix_mapping($pm_excel); 69 70 $self->{_doc_element} = { 71 Name => $self->{ParseOptions}->{File_Tag} || "records", 72 Attributes => {}, 73 }; 74 75 $self->{ParseOptions}->{Handler}->start_element($self->{_doc_element}); 76} 77 78 ## Parse file or string 79 80 81sub _end 82{ 83 my $self = shift; 84 _print_xml_finish($self); 85 86 ### Reset vars after parsing 87 $self->{_row} = []; ## Used to push row values per row 88 $self->{_row_num} = -1; ## Set at -1 since rows are counted from 0 89 $self->{_last_row_num} = 0; ## Used to save the last row value received 90 91 $self->{ParseOptions}->{Handler}->end_element($self->{_doc_element}); 92 93 return $self->{ParseOptions}->{Handler}->end_document($self->{_document}); 94 95} 96 97sub cb_routine($$$$$$) 98{ 99 my ($self, $oBook, $iSheet, $iRow, $iCol, $oCell) = @_; 100 101 my $oWkS = $oBook->{Worksheet}[$iSheet]; 102 103 $self->{ParseOptions}->{Col_Headings} ||= []; 104 105if ($iCol < $oWkS->{MaxCol}) 106 { 107 108 if ($self->{_last_col} > $iCol) 109 { 110 while ($self->{_last_col} < $oWkS->{MaxCol}) 111 { 112 push(@{$self->{_row}}, undef); 113 $self->{_last_col}++; 114 } 115 _print_xml(@_); 116 } 117 118 if ($self->{_last_col} < $iCol) 119 { 120 while ($self->{_last_col} < $iCol) 121 { 122 push(@{$self->{_row}}, undef); 123 $self->{_last_col}++; 124 } 125 } 126 127 push(@{$self->{_row}}, $oCell->Value()); 128 $self->{_last_row_num} = $iRow; 129 $self->{_last_col}++; 130 return; 131 132 133 } 134 135 push(@{$self->{_row}}, $oCell->Value());# if $flag == 0; 136 137 _print_xml(@_); 138 return; 139 140} 141 142sub _normalize_heading ### Default if no Headings_Handler is provided 143{ 144 my $heading= shift; 145 my $sub_char = shift || '_'; 146 $heading =~ s/^\s//g; 147 $heading =~ s/\s$//g; 148 $heading =~ s/^([^a-zA-Z|^_|^:])/$sub_char/g; ### We used to also replace the xml in the beginning, but I took it of per recommendation of Michael Rodriguez. 149 $heading =~ s/[^a-zA-Z|^-|^.|^0-9|^:]/$sub_char/g; 150 return $heading; 151} 152 153 154sub _print_xml 155{ 156 my ($self, $oBook, $iSheet, $iRow, $iCol, $oCell) = @_; ### Remember self is passed through the Spreadsheet::ParseExcel object 157 158 my $oWkS = $oBook->{Worksheet}[$iSheet]; 159 160 $self->{_last_row_num} = $iRow; 161 162 163 $self->{_last_col} = 0; 164 my $temp_row = $oCell->Value(); 165 $self->{_row_num} = $self->{_last_row_num}; 166 167 168 if (!@{$self->{ParseOptions}->{Col_Headings}} && !$self->{ParseOptions}->{Dynamic_Col_Headings}) 169 { 170 my $i = 1; 171 @{$self->{ParseOptions}->{Col_Headings}} = map { "column" . $i++ } @{$self->{_row}}; 172 } 173 elsif (!@{$self->{ParseOptions}->{Col_Headings}} && $self->{ParseOptions}->{Dynamic_Col_Headings}) 174 { 175 @{$self->{ParseOptions}->{Col_Headings}} = map { $self->{ParseOptions}->{Headings_Handler}->($_, $self->{ParseOptions}->{SubChar}); } @{$self->{_row}}; 176 $self->{_row} = []; ### Clear the @$row array 177 return; ### So that it does not print the column headings as the content of the first node. 178 } 179 180 181 my $el = { 182 Name => $self->{ParseOptions}->{Parent_Tag} || "record", 183 Attributes => {}, 184 }; 185 186 $self->{ParseOptions}->{Handler}->start_element($el); 187 188 for (my $i = 0; $i <= $#{$self->{ParseOptions}->{Col_Headings}}; $i++) { 189 my $column = { Name => $self->{ParseOptions}->{Col_Headings}->[$i], Attributes => {} }; 190 191 $self->{ParseOptions}->{Handler}->start_element($column); 192 $self->{ParseOptions}->{Handler}->characters({Data => $self->{_row}->[$i]}); 193 $self->{ParseOptions}->{Handler}->end_element($column); 194 } 195 196 $self->{ParseOptions}->{Handler}->end_element($el); 197 198 $self->{_row} = []; ### Clear $row and start the new row processing 199 200} 201 202sub _print_xml_finish 203{ 204 my $self = shift; 205 206 while (@{$self->{_row}} < 9) 207 { 208 push(@{$self->{_row}}, undef); 209 } 210 211 my $el = { 212 Name => $self->{ParseOptions}->{Parent_Tag} || "record", 213 Attributes => {}, 214 }; 215 216 $self->{ParseOptions}->{Handler}->start_element($el); 217 218 for (my $i = 0; $i <= $#{$self->{ParseOptions}->{Col_Headings}}; $i++) { 219 my $column = { Name => $self->{ParseOptions}->{Col_Headings}->[$i], Attributes => {} }; 220 221 $self->{ParseOptions}->{Handler}->start_element($column); 222 $self->{ParseOptions}->{Handler}->characters({Data => $self->{_row}->[$i]}); 223 $self->{ParseOptions}->{Handler}->end_element($column); 224 } 225 226 $self->{ParseOptions}->{Handler}->end_element($el); 227} 228 229sub _create_node { 230 shift; 231 # this may check for a factory later 232 return {@_}; 233} 234 2351; 236__END__ 237 238 239 240 241=head1 NAME 242 243 XML::SAXDriver::Excel - SAXDriver for converting Excel files to XML 244 245=head1 SYNOPSIS 246 247 use XML::SAXDriver::Excel; 248 my $driver = XML::SAXDriver::Excel->new(%attr); 249 $driver->parse(%attr); 250 251=head1 DESCRIPTION 252 253 XML::SAXDriver::Excel was developed as a complement to 254 XML::Excel, though it provides a SAX interface, for 255 gained performance and efficiency, to Excel files. 256 Specific object attributes and handlers are set to 257 define the behavior of the parse() method. It does 258 not matter where you define your attributes. If they 259 are defined in the new() method, they will apply to 260 all parse() calls. You can override in any call to 261 parse() and it will remain local to that function call 262 and not effect the rest of the object. 263 264=head1 XML::SAXDriver::Excel properties 265 266 Source - (Reference to a String, ByteStream, SystemId) 267 268 String - **currently not supported** Contains literal Excel data. 269 Ex (Source => {String => $foo}) 270 271 ByteStream - **currently not supported** Contains a filehandle reference. 272 Ex. (Source => {ByteStream => \*STDIN}) 273 274 SystemId - Contains the path to the file containing 275 the Excel data. Ex (Source => {SystemId => '../excel/foo.xls'}) 276 277 278 Handler - Contains the object to be used as a XML print handler 279 280 DTDHandler - Contains the object to be used as a XML DTD handler. 281 ****There is no DTD support available at this time. 282 I'll make it available in the next version.**** 283 284 SubChar - Specifies the character(s) to use to substitute illegal chars in xml tag names, that 285 will be generated from the first row, but setting the Dynamic_Col_Headings. 286 287 Col_Headings - Reference to the array of column names to be used for XML tag names. 288 289 Dynamic_Col_Headings - Should be set if you want the XML tag names generated dynamically 290 from the first row in Excel file. **Make sure that the number of columns 291 in your first row is equal to the largest row in the document. You 292 don't generally have to worry about if you are submitting valid Excel 293 data, where each row will have the same number of columns, even if 294 they are empty. 295 296 Headings_Handler - Should be used along with Dynamic_Col_Headings to provide a heading 297 normalization handler, to conform the headings to the XML 1.0 298 specifications. If not provided, a default will be used that only 299 works with ASCII chars, therefore any other character sets need to 300 provide a custom handler! The handler sub will be passed the heading 301 string as the first argument. 302 303=head1 AUTHOR 304 305Ilya Sterin (isterin@cpan.org) 306 307=head1 SEE ALSO 308 309XML::Excel 310Spreadsheet::ParseExcel 311 312=cut 313