1=head1 NAME 2 3PDL::IO::FastRaw -- A simple, fast and convenient io format for PerlDL. 4 5=head1 VERSION 6 7This documentation refers to PDL::IO::FastRaw version 0.0.2, I guess. 8 9=head1 SYNOPSIS 10 11 use PDL; 12 use PDL::IO::FastRaw; 13 14 writefraw($pdl,"fname"); # write a raw file 15 16 $pdl2 = readfraw("fname"); # read a raw file 17 $pdl2 = PDL->readfraw("fname"); 18 19 $pdl3 = mapfraw("fname2",{ReadOnly => 1}); # mmap a file, don't read yet 20 21 $pdl4 = maptextfraw("fname3",{...}); # map a text file into a 1-D pdl. 22 23 24=head1 DESCRIPTION 25 26This is a very simple and fast io format for PerlDL. 27The disk data consists of two files, a header metadata file 28in ASCII and a binary file consisting simply of consecutive 29bytes, shorts or whatever. 30 31It is hoped that this will not only make for a simple PerlDL module 32for saving and retrieving these files but also make it easy 33for other programs to use these files. 34 35The format of the ASCII header is simply 36 37 <typeid> 38 <ndims> 39 <dim0> <dim1> ... 40 41You should probably stick with the default header name. You may want 42to specify your own header, however, such as when you have a large 43collection of data files with identical dimensions and data types. 44Under these circumstances, simply specify the C<Header> option in the 45options hash. 46 47The binary files are in general 48NOT interchangeable between different architectures since the binary 49file is simply dumped from the memory region of the piddle. 50This is what makes the approach efficient. 51 52It is also possible to mmap the file which can give a large 53speedup in certain situations as well as save a lot of memory 54by using a disk file as virtual memory. When a file is mapped, 55parts of it are read only as they are accessed in the memory 56(or as the kernel decides: if you are reading the pages in order, 57it may well preread some for you). 58 59Note that memory savings and copy-on-write are operating-system 60dependent - see Core.xs and your operating system documentation 61for exact semantics of whatever. Basically, if you write to a 62mmapped file without C<ReadOnly>, the change will be reflected 63in the file immediately. C<ReadOnly> doesn't really make it impossible 64to write to the piddle but maps the memory privately so the file 65will not be changed when you change the piddle. Be aware though 66that mmapping a 40Mb file without C<ReadOnly> spends no virtual 67memory but with C<ReadOnly> it does reserve 40Mb. 68 69=head2 Example: Converting ASCII to raw 70 71You have a whole slew of data files in ASCII from an experiment 72that you ran in your lab. You're still tweaking the analysis 73and plots, so you'd like if your data could load as fast as 74possible. Eventually you'll read the data into your scripts 75using C<readfraw>, but the first thing you might do is create 76a script that converts all the data files to raw files: 77 78 #!/usr/local/bin/perl 79 # Assumes that the data files end with a .asc or .dat extension 80 # and saves the raw file output with a .bdat extension. 81 # call with 82 # >./convert_to_raw.pl file1.dat file2.dat ... 83 # or 84 # >./convert_to_raw.pl *.dat 85 86 use PDL; 87 use PDL::IO::FastRaw; # for saving raw files 88 use PDL::IO::Misc; # for reading ASCII files with rcols 89 while(shift) { # run through the entire supplied list of file names 90 ($newName = $_) =~ s/\.(asc|dat)/.bdat/; 91 print "Saving contents of $_ to $newName\n"; 92 $data = rcols($_); 93 writefraw($data, $newName); 94 } 95 96 97=head2 Example: readfraw 98 99Now that you've gotten your data into a raw file format, you can 100start working on your analysis scripts. If you scripts used C<rcols> 101in the past, the reading portion of the script should go much, 102much faster now: 103 104 #!/usr/local/bin/perl 105 # My plotting script. 106 # Assume I've specified the files to plot on the command line like 107 # >./plot_script.pl file1.bdat file2.bdat ... 108 # or 109 # >./plot_script.pl *.bdat 110 111 use PDL; 112 use PDL::IO::FastRaw; 113 while(shift) { # run through the entire supplied list of file names 114 $data = readfraw($_); 115 my_plot_func($data); 116 } 117 118=head2 Example: Custom headers 119 120In the first example, I allow C<writefraw> to use the standard header 121file name, which would be C<file.bdat.hdr>. However, I often measure 122time series that have identical length, so all of those header files 123are redundant. To fix that, I simply pass the Header option to the 124C<writefraw> command. A modified script would look like this: 125 126 #!/usr/local/bin/perl 127 # Assumes that the data files end with a .asc or .dat extension 128 # and saves the raw file output with a .bdat extension. 129 # call with 130 # >./convert_to_raw.pl [-hHeaderFile] <fileglob> [-hHeaderFile] <fileglob> ... 131 132 use PDL; 133 use PDL::IO::FastRaw; # for saving raw files 134 use PDL::IO::Misc; # for reading ASCII files with rcols 135 my $header_file = undef; 136 CL_OPTION: while($_ = shift @ARGV) { # run through the entire list of command-line options 137 if(/-h(.*)/) { 138 $header_file = $1; 139 next CL_OPTION; 140 } 141 ($newName = $_) =~ s/\.(asc|dat)/.bdat/; 142 print "Saving contents of $_ to $newName\n"; 143 $data = rcols($_); 144 writefraw($data, $newName, {Header => $header_file}); 145 } 146 147Modifying the read script is left as an exercise for the reader. :] 148 149 150=head2 Example: Using mapfraw 151 152Sometimes you'll want to use C<mapfraw> rather than the read/write 153functions. In fact, the original author of the module doesn't 154use the read/write functions anymore, prefering to always use 155C<mapfraw>. How would you go about doing this? 156 157Assuming you've already saved your data into the raw format, the 158only change you would have to make to the script in example 2 would 159be to change the call to C<readfraw> to C<mapfraw>. That's it. 160You will probably see differences in performance, though I (David 161Mertens) couldn't tell you about them because I haven't played 162around with C<mapfraw> much myself. 163 164What if you eschew the use of C<writefraw> and prefer to only use 165C<mapfraw>? How would you save your data to a raw format? In that 166case, you would have to create a C<mapfraw> piddle with the correct 167dimensions first using 168 169 $piddle_on_hd = mapfraw('fname', {Creat => 1, Dims => [dim1, dim2, ...]}); 170 171Note that you must specify the dimensions and you must tell 172C<mapfraw> to create the new piddle for you by setting the 173C<Creat> option to a true value, not C<Create> (note the missing 174final 'e'). 175 176 177=head1 FUNCTIONS 178 179=head2 readfraw 180 181=for ref 182 183Read a raw format binary file 184 185=for usage 186 187 $pdl2 = readfraw("fname"); 188 $pdl2 = PDL->readfraw("fname"); 189 $pdl2 = readfraw("fname", {Header => 'headerfname'}); 190 191=for options 192 193The C<readfraw> command 194supports the following option: 195 196=over 8 197 198=item Header 199 200Specify the header file name. 201 202=back 203 204=head2 writefraw 205 206=for ref 207 208Write a raw format binary file 209 210=for usage 211 212 writefraw($pdl,"fname"); 213 writefraw($pdl,"fname", {Header => 'headerfname'}); 214 215=for options 216 217The C<writefraw> command 218supports the following option: 219 220=over 8 221 222=item Header 223 224Specify the header file name. 225 226=back 227 228=head2 mapfraw 229 230=for ref 231 232Memory map a raw format binary file (see the module docs also) 233 234=for usage 235 236 $pdl3 = mapfraw("fname2",{ReadOnly => 1}); 237 238=for options 239 240The C<mapfraw> command 241supports the following options (not all combinations make sense): 242 243=over 8 244 245=item Dims, Datatype 246 247If creating a new file or if you want to specify your own header 248data for the file, you can give an array reference and a scalar, 249respectively. 250 251=item Creat 252 253Create the file. Also writes out a header for the file. 254 255=item Trunc 256 257Set the file size. Automatically enabled with C<Creat>. NOTE: This also 258clears the file to all zeroes. 259 260=item ReadOnly 261 262Disallow writing to the file. 263 264=item Header 265 266Specify the header file name. 267 268=back 269 270=head2 maptextfraw 271 272=for ref 273 274Memory map a text file (see the module docs also). 275 276Note that this function maps the raw format so if you are 277using an operating system which does strange things to e.g. 278line delimiters upon reading a text file, you get the raw (binary) 279representation. 280 281The file doesn't really need to be text but it is just mapped 282as one large binary chunk. 283 284This function is just a convenience wrapper which firsts C<stat>s 285the file and sets the dimensions and datatype. 286 287=for usage 288 289 $pdl4 = maptextfraw("fname", {options} 290 291=for options 292 293The options other than Dims, Datatype of C<mapfraw> are 294supported. 295 296=head1 BUGS 297 298Should be documented better. C<writefraw> and C<readfraw> should 299also have options (the author nowadays only uses C<mapfraw> ;) 300 301=head1 AUTHOR 302 303Copyright (C) Tuomas J. Lukka 1997. 304All rights reserved. There is no warranty. You are allowed 305to redistribute this software / documentation under certain 306conditions. For details, see the file COPYING in the PDL 307distribution. If this file is separated from the PDL distribution, 308the copyright notice should be included in the file. 309 310 311=cut 312 313package PDL::IO::FastRaw; 314 315## use version; our $VERSION = qv('0.0.3'); 316our $VERSION = '0.000003'; 317$VERSION = eval $VERSION; 318 319BEGIN { 320 our $have_file_map = 0; 321 322 eval "use File::Map 0.57 qw(:all)"; 323 $have_file_map = 1 unless $@; 324} 325 326require Exporter; 327use PDL::Core ''; 328use PDL::Exporter; 329use FileHandle; 330 331@PDL::IO::FastRaw::ISA = qw/PDL::Exporter/; 332 333@EXPORT_OK = qw/writefraw readfraw mapfraw maptextfraw/; 334%EXPORT_TAGS = (Func=>[@EXPORT_OK]); 335 336# Exported functions 337 338*writefraw = \&PDL::writefraw; 339sub readfraw {PDL->readfraw(@_)} 340sub mapfraw {PDL->mapfraw(@_)} 341sub maptextfraw {PDL->maptextfraw(@_)} 342 343sub _read_frawhdr { 344 my($name,$opts) = @_; 345 my $hname = $opts->{Header} || "$name.hdr"; 346 my $h = new FileHandle "$hname" 347 or barf "Couldn't open '$hname' for reading"; 348 chomp(my $tid = <$h>); 349 chomp(my $ndims = <$h>); 350 chomp(my $str = <$h>); if(!defined $str) {barf("Format error in '$hname'");} 351 my @dims = split ' ',$str; 352 if($#dims != $ndims-1) { 353 barf("Format error reading fraw header file '$hname'"); 354 } 355 return { 356 Type => $tid, 357 Dims => \@dims, 358 NDims => $ndims 359 }; 360} 361 362sub _writefrawhdr { 363 my($pdl,$name,$opts) = @_; 364 my $hname = $opts->{Header} || "$name.hdr"; 365 my $h = new FileHandle ">$hname" 366 or barf "Couldn't open '$hname' for writing"; 367 print $h map {"$_\n"} ($pdl->get_datatype, 368 $pdl->getndims, (join ' ',$pdl->dims)); 369} 370 371sub PDL::writefraw { 372 my($pdl,$name,$opts) = @_; 373 _writefrawhdr($pdl,$name,$opts); 374 my $d = new FileHandle ">$name" 375 or barf "Couldn't open '$name' for writing"; 376 binmode $d; 377 print $d ${$pdl->get_dataref}; 378} 379 380sub PDL::readfraw { 381 my $class = shift; 382 my($name,$opts) = @_; 383 my $d = new FileHandle "$name" 384 or barf "Couldn't open '$name' for reading"; 385 binmode $d; 386 my $hdr = _read_frawhdr($name,$opts); 387 my $pdl = $class->zeroes ((new PDL::Type($hdr->{Type})), @{$hdr->{Dims}}); 388 my $len = length ${$pdl->get_dataref}; 389# wrong. 390# $d->sysread(${$pdl->get_dataref},$len) == $len 391# or barf "Couldn't read enough data from '$name'"; 392 my $index = 0; 393 my $data; 394 my $retlen; 395 while (($retlen = $d->sysread($data, $len)) != 0) { 396 substr(${$pdl->get_dataref},$index,$len) = $data; 397 $index += $retlen; 398 $len -= $retlen; 399 } 400 $pdl->upd_data(); 401 return $pdl; 402} 403 404sub PDL::mapfraw { 405 my $class = shift; 406 my($name,$opts) = @_; 407 my $hdr; 408 if($opts->{Dims}) { 409 my $datatype = $opts->{Datatype}; 410 if(!defined $datatype) {$datatype = $PDL_D;} 411 $hdr->{Type} = $datatype; 412 $hdr->{Dims} = $opts->{Dims}; 413 $hdr->{NDims} = scalar(@{$opts->{Dims}}); 414 } else { 415 $hdr = _read_frawhdr($name,$opts); 416 } 417 $s = PDL::Core::howbig($hdr->{Type}); 418 for(@{$hdr->{Dims}}) { 419 $s *= $_; 420 } 421 my $pdl = $class->zeroes(new PDL::Type($hdr->{Type})); 422 $pdl->setdims($hdr->{Dims}); 423 424 if ($have_file_map and not defined($PDL::force_use_mmap_code) ) { 425 $pdl->set_data_by_file_map( 426 $name, 427 $s, 428 1, 429 ($opts->{ReadOnly}?0:1), 430 ($opts->{Creat}?1:0), 431 (0644), 432 ($opts->{Creat} || $opts->{Trunc} ? 1:0) 433 ); 434 } else { 435 warn "mapfraw: direct mmap support will be deprecated, please install File::Map\n"; 436 $pdl->set_data_by_mmap( 437 $name, 438 $s, 439 1, 440 ($opts->{ReadOnly}?0:1), 441 ($opts->{Creat}?1:0), 442 (0644), 443 ($opts->{Creat} || $opts->{Trunc} ? 1:0) 444 ); 445 } 446 447 if($opts->{Creat}) { 448 _writefrawhdr($pdl,$name,$opts); 449 } 450 return $pdl; 451} 452 453sub PDL::maptextfraw { 454 my($class, $name, $opts) = @_; 455 $opts = {%$opts}; # Copy just in case 456 my @s = stat $name; 457 $opts->{Dims} = [$s[7]]; 458 $opts->{Datatype} = &PDL::byte; 459 return PDL::mapfraw($class, $name, $opts); 460} 461 4621; 463