1 /******************************************************************************
2  * $Id: rdataset.cpp 27942 2014-11-11 00:57:41Z rouault $
3  *
4  * Project:  R Format Driver
5  * Purpose:  Read/write R stats package object format.
6  * Author:   Frank Warmerdam, warmerdam@pobox.com
7  *
8  ******************************************************************************
9  * Copyright (c) 2009, Frank Warmerdam <warmerdam@pobox.com>
10  * Copyright (c) 2009-2010, Even Rouault <even dot rouault at mines-paris dot org>
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a
13  * copy of this software and associated documentation files (the "Software"),
14  * to deal in the Software without restriction, including without limitation
15  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
16  * and/or sell copies of the Software, and to permit persons to whom the
17  * Software is furnished to do so, subject to the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included
20  * in all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
23  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  ****************************************************************************/
30 
31 #include "gdal_pam.h"
32 #include "cpl_string.h"
33 #include "../raw/rawdataset.h"
34 
35 CPL_CVSID("$Id: rdataset.cpp 27942 2014-11-11 00:57:41Z rouault $");
36 
37 CPL_C_START
38 void    GDALRegister_R(void);
39 CPL_C_END
40 
41 GDALDataset *
42 RCreateCopy( const char * pszFilename, GDALDataset *poSrcDS,
43              int bStrict, char ** papszOptions,
44              GDALProgressFunc pfnProgress, void * pProgressData );
45 
46 #define R_NILSXP        0
47 #define R_LISTSXP       2
48 #define R_CHARSXP       9
49 #define R_INTSXP        13
50 #define R_REALSXP       14
51 #define R_STRSXP        16
52 
53 /************************************************************************/
54 /* ==================================================================== */
55 /*                               RDataset                               */
56 /* ==================================================================== */
57 /************************************************************************/
58 
59 class RDataset : public GDALPamDataset
60 {
61     friend class RRasterBand;
62     VSILFILE       *fp;
63     int         bASCII;
64     CPLString   osLastStringRead;
65 
66     vsi_l_offset nStartOfData;
67 
68     double     *padfMatrixValues;
69 
70     const char *ASCIIFGets();
71     int         ReadInteger();
72     double      ReadFloat();
73     const char *ReadString();
74     int         ReadPair( CPLString &osItemName, int &nItemType );
75 
76   public:
77                 RDataset();
78                 ~RDataset();
79 
80     static GDALDataset  *Open( GDALOpenInfo * );
81     static int          Identify( GDALOpenInfo * );
82 };
83 
84 /************************************************************************/
85 /* ==================================================================== */
86 /*                            RRasterBand                               */
87 /* ==================================================================== */
88 /************************************************************************/
89 
90 class RRasterBand : public GDALPamRasterBand
91 {
92     friend class RDataset;
93 
94     const double *padfMatrixValues;
95 
96   public:
97 
98                 RRasterBand( RDataset *, int, const double * );
99                 ~RRasterBand();
100 
101     virtual CPLErr          IReadBlock( int, int, void * );
102 };
103 
104 /************************************************************************/
105 /*                            RRasterBand()                             */
106 /************************************************************************/
107 
RRasterBand(RDataset * poDS,int nBand,const double * padfMatrixValues)108 RRasterBand::RRasterBand( RDataset *poDS, int nBand,
109                           const double *padfMatrixValues )
110 {
111     this->poDS = poDS;
112     this->nBand = nBand;
113     this->padfMatrixValues = padfMatrixValues;
114 
115     eDataType = GDT_Float64;
116 
117     nBlockXSize = poDS->nRasterXSize;
118     nBlockYSize = 1;
119 }
120 
121 /************************************************************************/
122 /*                            ~RRasterBand()                            */
123 /************************************************************************/
124 
~RRasterBand()125 RRasterBand::~RRasterBand()
126 {
127 }
128 
129 /************************************************************************/
130 /*                             IReadBlock()                             */
131 /************************************************************************/
132 
IReadBlock(CPL_UNUSED int nBlockXOff,int nBlockYOff,void * pImage)133 CPLErr RRasterBand::IReadBlock( CPL_UNUSED int nBlockXOff,
134                                 int nBlockYOff,
135                                 void * pImage )
136 {
137     memcpy( pImage, padfMatrixValues + nBlockYOff * nBlockXSize,
138             nBlockXSize * 8 );
139     return CE_None;
140 }
141 
142 /************************************************************************/
143 /* ==================================================================== */
144 /*                              RDataset()                              */
145 /* ==================================================================== */
146 /************************************************************************/
147 
148 /************************************************************************/
149 /*                              RDataset()                              */
150 /************************************************************************/
151 
RDataset()152 RDataset::RDataset()
153 {
154     fp = NULL;
155     padfMatrixValues = NULL;
156 }
157 
158 /************************************************************************/
159 /*                             ~RDataset()                              */
160 /************************************************************************/
161 
~RDataset()162 RDataset::~RDataset()
163 {
164     FlushCache();
165     CPLFree(padfMatrixValues);
166 
167     if( fp )
168         VSIFCloseL( fp );
169 }
170 
171 /************************************************************************/
172 /*                             ASCIIFGets()                             */
173 /*                                                                      */
174 /*      Fetch one line from an ASCII source into osLastStringRead.      */
175 /************************************************************************/
176 
ASCIIFGets()177 const char *RDataset::ASCIIFGets()
178 
179 {
180     char chNextChar;
181 
182     osLastStringRead.resize(0);
183 
184     do
185     {
186         chNextChar = '\n';
187         VSIFReadL( &chNextChar, 1, 1, fp );
188         if( chNextChar != '\n' )
189             osLastStringRead += chNextChar;
190     } while( chNextChar != '\n' && chNextChar != '\0' );
191 
192     return osLastStringRead;
193 }
194 
195 /************************************************************************/
196 /*                            ReadInteger()                             */
197 /************************************************************************/
198 
ReadInteger()199 int RDataset::ReadInteger()
200 
201 {
202     if( bASCII )
203     {
204         return atoi(ASCIIFGets());
205     }
206     else
207     {
208         GInt32  nValue;
209 
210         if( VSIFReadL( &nValue, 4, 1, fp ) != 1 )
211             return -1;
212         CPL_MSBPTR32( &nValue );
213 
214         return nValue;
215     }
216 }
217 
218 /************************************************************************/
219 /*                             ReadFloat()                              */
220 /************************************************************************/
221 
ReadFloat()222 double RDataset::ReadFloat()
223 
224 {
225     if( bASCII )
226     {
227         return CPLAtof(ASCIIFGets());
228     }
229     else
230     {
231         double  dfValue;
232 
233         if( VSIFReadL( &dfValue, 8, 1, fp ) != 1 )
234             return -1;
235         CPL_MSBPTR64( &dfValue );
236 
237         return dfValue;
238     }
239 }
240 
241 /************************************************************************/
242 /*                             ReadString()                             */
243 /************************************************************************/
244 
ReadString()245 const char *RDataset::ReadString()
246 
247 {
248     if( ReadInteger() % 256 != R_CHARSXP )
249     {
250         osLastStringRead = "";
251         return "";
252     }
253 
254     size_t nLen = ReadInteger();
255 
256     char *pachWrkBuf = (char *) VSIMalloc(nLen);
257     if (pachWrkBuf == NULL)
258     {
259         osLastStringRead = "";
260         return "";
261     }
262     if( VSIFReadL( pachWrkBuf, 1, nLen, fp ) != nLen )
263     {
264         osLastStringRead = "";
265         CPLFree( pachWrkBuf );
266         return "";
267     }
268 
269     if( bASCII )
270     {
271         /* suck up newline and any extra junk */
272         ASCIIFGets();
273     }
274 
275     osLastStringRead.assign( pachWrkBuf, nLen );
276     CPLFree( pachWrkBuf );
277 
278     return osLastStringRead;
279 }
280 
281 /************************************************************************/
282 /*                              ReadPair()                              */
283 /************************************************************************/
284 
ReadPair(CPLString & osObjName,int & nObjCode)285 int RDataset::ReadPair( CPLString &osObjName, int &nObjCode )
286 
287 {
288     nObjCode = ReadInteger();
289     if( nObjCode == 254 )
290         return TRUE;
291 
292     if( (nObjCode % 256) != R_LISTSXP )
293     {
294         CPLError( CE_Failure, CPLE_OpenFailed,
295                   "Did not find expected object pair object." );
296         return FALSE;
297     }
298 
299     int nPairCount = ReadInteger();
300     if( nPairCount != 1 )
301     {
302         CPLError( CE_Failure, CPLE_OpenFailed,
303                   "Did not find expected pair count of 1." );
304         return FALSE;
305     }
306 
307 /* -------------------------------------------------------------------- */
308 /*      Read the object name.                                           */
309 /* -------------------------------------------------------------------- */
310     const char *pszName = ReadString();
311     if( pszName == NULL || pszName[0] == '\0' )
312         return FALSE;
313 
314     osObjName = pszName;
315 
316 /* -------------------------------------------------------------------- */
317 /*      Confirm that we have a numeric matrix object.                   */
318 /* -------------------------------------------------------------------- */
319     nObjCode = ReadInteger();
320 
321     return TRUE;
322 }
323 
324 /************************************************************************/
325 /*                              Identify()                              */
326 /************************************************************************/
327 
Identify(GDALOpenInfo * poOpenInfo)328 int RDataset::Identify( GDALOpenInfo *poOpenInfo )
329 {
330     if( poOpenInfo->nHeaderBytes < 50 )
331         return FALSE;
332 
333 /* -------------------------------------------------------------------- */
334 /*      If the extension is .rda and the file type is gzip              */
335 /*      compressed we assume it is a gziped R binary file.              */
336 /* -------------------------------------------------------------------- */
337     if( memcmp(poOpenInfo->pabyHeader,"\037\213\b",3) == 0
338         && EQUAL(CPLGetExtension(poOpenInfo->pszFilename),"rda") )
339         return TRUE;
340 
341 /* -------------------------------------------------------------------- */
342 /*      Is this an ASCII or XDR binary R file?                          */
343 /* -------------------------------------------------------------------- */
344     if( !EQUALN((const char *)poOpenInfo->pabyHeader,"RDA2\nA\n",7)
345         && !EQUALN((const char *)poOpenInfo->pabyHeader,"RDX2\nX\n",7) )
346         return FALSE;
347 
348     return TRUE;
349 }
350 
351 /************************************************************************/
352 /*                                Open()                                */
353 /************************************************************************/
354 
Open(GDALOpenInfo * poOpenInfo)355 GDALDataset *RDataset::Open( GDALOpenInfo * poOpenInfo )
356 {
357     if( !Identify( poOpenInfo ) )
358         return NULL;
359 
360 /* -------------------------------------------------------------------- */
361 /*      Confirm the requested access is supported.                      */
362 /* -------------------------------------------------------------------- */
363     if( poOpenInfo->eAccess == GA_Update )
364     {
365         CPLError( CE_Failure, CPLE_NotSupported,
366                   "The R driver does not support update access to existing"
367                   " datasets.\n" );
368         return NULL;
369     }
370 
371 /* -------------------------------------------------------------------- */
372 /*      Do we need to route the file through the decompression          */
373 /*      machinery?                                                      */
374 /* -------------------------------------------------------------------- */
375     CPLString osAdjustedFilename;
376 
377     if( memcmp(poOpenInfo->pabyHeader,"\037\213\b",3) == 0 )
378         osAdjustedFilename = "/vsigzip/";
379 
380     osAdjustedFilename += poOpenInfo->pszFilename;
381 
382 /* -------------------------------------------------------------------- */
383 /*      Establish this as a dataset and open the file using VSI*L.      */
384 /* -------------------------------------------------------------------- */
385     RDataset *poDS = new RDataset();
386 
387     poDS->fp = VSIFOpenL( osAdjustedFilename, "r" );
388     if( poDS->fp == NULL )
389     {
390         delete poDS;
391         return NULL;
392     }
393 
394     poDS->bASCII = EQUALN((const char *)poOpenInfo->pabyHeader,"RDA2\nA\n",7);
395 
396 /* -------------------------------------------------------------------- */
397 /*      Confirm this is a version 2 file.                               */
398 /* -------------------------------------------------------------------- */
399     VSIFSeekL( poDS->fp, 7, SEEK_SET );
400     if( poDS->ReadInteger() != R_LISTSXP )
401     {
402         delete poDS;
403         CPLError( CE_Failure, CPLE_OpenFailed,
404                   "It appears %s is not a version 2 R object file after all!",
405                   poOpenInfo->pszFilename );
406         return NULL;
407     }
408 
409 /* -------------------------------------------------------------------- */
410 /*      Skip the version values.                                        */
411 /* -------------------------------------------------------------------- */
412     poDS->ReadInteger();
413     poDS->ReadInteger();
414 
415 /* -------------------------------------------------------------------- */
416 /*      Confirm we have a numeric vector object in a pairlist.          */
417 /* -------------------------------------------------------------------- */
418     CPLString osObjName;
419     int nObjCode;
420 
421     if( !poDS->ReadPair( osObjName, nObjCode ) )
422     {
423         delete poDS;
424         return NULL;
425     }
426 
427     if( nObjCode % 256 != R_REALSXP )
428     {
429         delete poDS;
430         CPLError( CE_Failure, CPLE_OpenFailed,
431                   "Failed to find expected numeric vector object." );
432         return NULL;
433     }
434 
435     poDS->SetMetadataItem( "R_OBJECT_NAME", osObjName );
436 
437 /* -------------------------------------------------------------------- */
438 /*      Read the count.                                                 */
439 /* -------------------------------------------------------------------- */
440     int nValueCount = poDS->ReadInteger();
441 
442     poDS->nStartOfData = VSIFTellL( poDS->fp );
443 
444 /* -------------------------------------------------------------------- */
445 /*      Read/Skip ahead to attributes.                                  */
446 /* -------------------------------------------------------------------- */
447     if( poDS->bASCII )
448     {
449         poDS->padfMatrixValues = (double*) VSIMalloc2( nValueCount, sizeof(double) );
450         if (poDS->padfMatrixValues == NULL)
451         {
452             CPLError(CE_Failure, CPLE_AppDefined,
453                      "Cannot allocate %d doubles", nValueCount);
454             delete poDS;
455             return NULL;
456         }
457         for( int iValue = 0; iValue < nValueCount; iValue++ )
458             poDS->padfMatrixValues[iValue] = poDS->ReadFloat();
459     }
460     else
461     {
462         VSIFSeekL( poDS->fp, 8 * nValueCount, SEEK_CUR );
463     }
464 
465 /* -------------------------------------------------------------------- */
466 /*      Read pairs till we run out, trying to find a few items that     */
467 /*      have special meaning to us.                                     */
468 /* -------------------------------------------------------------------- */
469     poDS->nRasterXSize = poDS->nRasterYSize = 0;
470     int nBandCount = 0;
471 
472     while( poDS->ReadPair( osObjName, nObjCode ) && nObjCode != 254 )
473     {
474         if( osObjName == "dim" && nObjCode % 256 == R_INTSXP )
475         {
476             int nCount = poDS->ReadInteger();
477             if( nCount == 2 )
478             {
479                 poDS->nRasterXSize = poDS->ReadInteger();
480                 poDS->nRasterYSize = poDS->ReadInteger();
481                 nBandCount = 1;
482             }
483             else if( nCount == 3 )
484             {
485                 poDS->nRasterXSize = poDS->ReadInteger();
486                 poDS->nRasterYSize = poDS->ReadInteger();
487                 nBandCount = poDS->ReadInteger();
488             }
489             else
490             {
491                 CPLError( CE_Failure, CPLE_AppDefined,
492                           "R 'dim' dimension wrong." );
493                 delete poDS;
494                 return NULL;
495             }
496         }
497         else if( nObjCode % 256 == R_REALSXP )
498         {
499             int nCount = poDS->ReadInteger();
500             while( nCount-- > 0 && !VSIFEofL(poDS->fp) )
501                 poDS->ReadFloat();
502         }
503         else if( nObjCode % 256 == R_INTSXP )
504         {
505             int nCount = poDS->ReadInteger();
506             while( nCount-- > 0 && !VSIFEofL(poDS->fp) )
507                 poDS->ReadInteger();
508         }
509         else if( nObjCode % 256 == R_STRSXP )
510         {
511             int nCount = poDS->ReadInteger();
512             while( nCount-- > 0 && !VSIFEofL(poDS->fp) )
513                 poDS->ReadString();
514         }
515         else if( nObjCode % 256 == R_CHARSXP )
516         {
517             poDS->ReadString();
518         }
519     }
520 
521     if( poDS->nRasterXSize == 0 )
522     {
523         delete poDS;
524         CPLError( CE_Failure, CPLE_AppDefined,
525                   "Failed to find dim dimension information for R dataset." );
526         return NULL;
527     }
528 
529     if (!GDALCheckDatasetDimensions(poDS->nRasterXSize, poDS->nRasterYSize) ||
530         !GDALCheckBandCount(nBandCount, TRUE))
531     {
532         delete poDS;
533         return NULL;
534     }
535 
536     if( nValueCount
537         < ((GIntBig) nBandCount) * poDS->nRasterXSize * poDS->nRasterYSize )
538     {
539         CPLError( CE_Failure, CPLE_AppDefined,
540                   "Not enough pixel data." );
541         delete poDS;
542         return NULL;
543     }
544 
545 /* -------------------------------------------------------------------- */
546 /*      Create the raster band object(s).                               */
547 /* -------------------------------------------------------------------- */
548     for( int iBand = 0; iBand < nBandCount; iBand++ )
549     {
550         GDALRasterBand *poBand;
551 
552         if( poDS->bASCII )
553             poBand = new RRasterBand( poDS, iBand+1,
554                                       poDS->padfMatrixValues + iBand * poDS->nRasterXSize * poDS->nRasterYSize );
555         else
556             poBand = new RawRasterBand( poDS, iBand+1, poDS->fp,
557                                         poDS->nStartOfData
558                                         + poDS->nRasterXSize*poDS->nRasterYSize*8*iBand,
559                                         8, poDS->nRasterXSize * 8,
560                                         GDT_Float64, !CPL_IS_LSB,
561                                         TRUE, FALSE );
562 
563         poDS->SetBand( iBand+1, poBand );
564     }
565 
566 /* -------------------------------------------------------------------- */
567 /*      Initialize any PAM information.                                 */
568 /* -------------------------------------------------------------------- */
569     poDS->SetDescription( poOpenInfo->pszFilename );
570     poDS->TryLoadXML();
571 
572 /* -------------------------------------------------------------------- */
573 /*      Check for overviews.                                            */
574 /* -------------------------------------------------------------------- */
575     poDS->oOvManager.Initialize( poDS, poOpenInfo->pszFilename );
576 
577     return( poDS );
578 }
579 
580 /************************************************************************/
581 /*                        GDALRegister_R()                              */
582 /************************************************************************/
583 
GDALRegister_R()584 void GDALRegister_R()
585 
586 {
587     GDALDriver  *poDriver;
588 
589     if( GDALGetDriverByName( "R" ) == NULL )
590     {
591         poDriver = new GDALDriver();
592 
593         poDriver->SetDescription( "R" );
594         poDriver->SetMetadataItem( GDAL_DCAP_RASTER, "YES" );
595         poDriver->SetMetadataItem( GDAL_DMD_LONGNAME,
596                                    "R Object Data Store" );
597         poDriver->SetMetadataItem( GDAL_DMD_HELPTOPIC,
598                                    "frmt_r.html" );
599         poDriver->SetMetadataItem( GDAL_DMD_EXTENSION, "rda" );
600         poDriver->SetMetadataItem( GDAL_DMD_CREATIONDATATYPES, "Float32" );
601         poDriver->SetMetadataItem( GDAL_DMD_CREATIONOPTIONLIST,
602 "<CreationOptionList>"
603 "   <Option name='ASCII' type='boolean' description='For ASCII output, default NO'/>"
604 "   <Option name='COMPRESS' type='boolean' description='Produced Compressed output, default YES'/>"
605 "</CreationOptionList>" );
606 
607         poDriver->SetMetadataItem( GDAL_DCAP_VIRTUALIO, "YES" );
608 
609         poDriver->pfnOpen = RDataset::Open;
610         poDriver->pfnIdentify = RDataset::Identify;
611         poDriver->pfnCreateCopy = RCreateCopy;
612 
613         GetGDALDriverManager()->RegisterDriver( poDriver );
614     }
615 }
616