1 /**********************************************************************
2  *
3  * Project:  CPL - Common Portability Library
4  * Purpose:  Implement VSI large file api for HDFS
5  * Author:   James McClain, <jmcclain@azavea.com>
6  *
7  **********************************************************************
8  * Copyright (c) 2010-2015, Even Rouault <even dot rouault at spatialys.com>
9  * Copyright (c) 2018, Azavea
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 //! @cond Doxygen_Suppress
31 
32 #include <string>
33 
34 #include <fcntl.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 
38 #if !defined(_MSC_VER)
39 #include <unistd.h>
40 #endif
41 
42 #include <cstring>
43 #include <climits>
44 
45 #include "cpl_port.h"
46 #include "cpl_vsi.h"
47 
48 #include "cpl_conv.h"
49 #include "cpl_error.h"
50 #include "cpl_vsi_virtual.h"
51 
52 CPL_CVSID("$Id: cpl_vsil_hdfs.cpp a044c83f8091becdd11e27be6e9c08d0d3478126 2021-02-24 11:38:17 +0100 Even Rouault $")
53 
54 #ifdef HDFS_ENABLED
55 
56 #include "hdfs.h"
57 
58 
59 /************************************************************************/
60 /* ==================================================================== */
61 /*                        VSIHdfsHandle                               */
62 /* ==================================================================== */
63 /************************************************************************/
64 
65 #define SILENCE(expr) {\
66   int hOldStderr = dup(2);\
67   int hNewStderr = open("/dev/null", O_WRONLY);\
68 \
69   if ((hOldStderr != -1) && (hNewStderr != -1) && (dup2(hNewStderr, 2) != -1)) {\
70       close(hNewStderr);\
71       expr;\
72       dup2(hOldStderr, 2);\
73       close(hOldStderr);\
74   }\
75   else {\
76     if (hOldStderr != -1) close(hOldStderr);\
77     if (hNewStderr != -1) close(hNewStderr);\
78     expr;\
79   }\
80 }
81 
82 class VSIHdfsHandle final : public VSIVirtualHandle
83 {
84   private:
85     CPL_DISALLOW_COPY_ASSIGN(VSIHdfsHandle)
86 
87     hdfsFile poFile = nullptr;
88     hdfsFS poFilesystem = nullptr;
89     std::string oFilename;
90     bool bEOF = false;
91 
92   public:
93 #if __cplusplus >= 201103L
94      static constexpr const char * VSIHDFS = "/vsihdfs/";
95 #else
96      static const char * VSIHDFS = "/vsihdfs/";
97 #endif
98      VSIHdfsHandle(hdfsFile poFile,
99                    hdfsFS poFilesystem,
100                    const char * pszFilename,
101                    bool bReadOnly);
102     ~VSIHdfsHandle() override;
103 
104     int Seek(vsi_l_offset nOffset, int nWhence) override;
105     vsi_l_offset Tell() override;
106     size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override;
107     size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override;
108     vsi_l_offset Length();
109     int Eof() override;
110     int Flush() override;
111     int Close() override;
112 };
113 
VSIHdfsHandle(hdfsFile _poFile,hdfsFS _poFilesystem,const char * pszFilename,bool)114 VSIHdfsHandle::VSIHdfsHandle(hdfsFile _poFile,
115                              hdfsFS _poFilesystem,
116                              const char * pszFilename,
117                              bool /*_bReadOnly*/)
118   : poFile(_poFile), poFilesystem(_poFilesystem), oFilename(pszFilename)
119 {}
120 
~VSIHdfsHandle()121 VSIHdfsHandle::~VSIHdfsHandle()
122 {
123   Close();
124 }
125 
Seek(vsi_l_offset nOffset,int nWhence)126 int VSIHdfsHandle::Seek(vsi_l_offset nOffset, int nWhence)
127 {
128   bEOF = false;
129   switch(nWhence) {
130   case SEEK_SET:
131     return hdfsSeek(poFilesystem, poFile, nOffset);
132   case SEEK_CUR:
133     return hdfsSeek(poFilesystem,
134                     poFile,
135                     nOffset + Tell());
136   case SEEK_END:
137     return hdfsSeek(poFilesystem,
138                     poFile,
139                     static_cast<tOffset>(Length()) - nOffset);
140   default:
141     return -1;
142   }
143 }
144 
145 vsi_l_offset
Tell()146 VSIHdfsHandle::Tell()
147 {
148   return hdfsTell(poFilesystem, poFile);
149 }
150 
151 size_t
Read(void * pBuffer,size_t nSize,size_t nMemb)152 VSIHdfsHandle::Read(void *pBuffer, size_t nSize, size_t nMemb)
153 {
154   if (nSize == 0 || nMemb == 0)
155     return 0;
156 
157   size_t bytes_wanted = nSize * nMemb;
158   size_t bytes_read = 0;
159 
160   while (bytes_read < bytes_wanted)
161     {
162       tSize bytes = 0;
163       size_t bytes_to_request = bytes_wanted - bytes_read;
164 
165       // The `Read` function can take 64-bit arguments for its
166       // read-request size, whereas `hdfsRead` may only take a 32-bit
167       // argument.  If the former requests an amount larger than can
168       // be encoded in a signed 32-bit number, break the request into
169       // 2GB batches.
170       bytes = hdfsRead(poFilesystem, poFile,
171                        static_cast<char*>(pBuffer) + bytes_read,
172                        bytes_to_request > INT_MAX ? INT_MAX : bytes_to_request);
173 
174       if (bytes > 0) {
175         bytes_read += bytes;
176       }
177       if (bytes == 0) {
178         bEOF = true;
179         return bytes_read/nSize;
180       }
181       else if (bytes < 0) {
182         bEOF = false;
183         return 0;
184       }
185     }
186 
187   return bytes_read/nSize;
188 }
189 
190 size_t
Write(const void *,size_t,size_t)191 VSIHdfsHandle::Write(const void *, size_t, size_t)
192 {
193   CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
194   return -1;
195 }
196 
197 vsi_l_offset
Length()198 VSIHdfsHandle::Length()
199 {
200   hdfsFileInfo * poInfo = hdfsGetPathInfo(poFilesystem, oFilename.c_str());
201   if (poInfo != nullptr) {
202     tOffset nSize = poInfo->mSize;
203     hdfsFreeFileInfo(poInfo, 1);
204     return static_cast<vsi_l_offset>(nSize);
205   }
206   return -1;
207 }
208 
209 int
Eof()210 VSIHdfsHandle::Eof()
211 {
212   return bEOF;
213 }
214 
215 int
Flush()216 VSIHdfsHandle::Flush()
217 {
218   return hdfsFlush(poFilesystem, poFile);
219 }
220 
221 int
Close()222 VSIHdfsHandle::Close()
223 {
224   int retval = 0;
225 
226   if (poFilesystem != nullptr && poFile != nullptr)
227     retval = hdfsCloseFile(poFilesystem, poFile);
228   poFile = nullptr;
229   poFilesystem = nullptr;
230 
231   return retval;
232 }
233 
234 class VSIHdfsFilesystemHandler final : public VSIFilesystemHandler
235 {
236   private:
237     CPL_DISALLOW_COPY_ASSIGN(VSIHdfsFilesystemHandler)
238 
239     hdfsFS poFilesystem = nullptr;
240     CPLMutex *hMutex = nullptr;
241 
242   public:
243     VSIHdfsFilesystemHandler();
244     ~VSIHdfsFilesystemHandler() override;
245 
246     void EnsureFilesystem();
247     VSIVirtualHandle *Open(const char *pszFilename,
248                            const char *pszAccess,
249                            bool bSetError,
250                            CSLConstList /* papszOptions */) override;
251     int Stat(const char *pszFilename,
252              VSIStatBufL *pStatBuf,
253              int nFlags) override;
254     int Unlink(const char *pszFilename) override;
255     int Mkdir(const char *pszDirname, long nMode) override;
256     int Rmdir(const char *pszDirname) override;
257     char ** ReadDir(const char *pszDirname) override;
258     int Rename(const char *oldpath, const char *newpath) override;
259 
260 };
261 
VSIHdfsFilesystemHandler()262 VSIHdfsFilesystemHandler::VSIHdfsFilesystemHandler()
263 {}
264 
~VSIHdfsFilesystemHandler()265 VSIHdfsFilesystemHandler::~VSIHdfsFilesystemHandler()
266 {
267   if(hMutex != nullptr) {
268     CPLDestroyMutex(hMutex);
269     hMutex = nullptr;
270   }
271 
272   if (poFilesystem != nullptr)
273     hdfsDisconnect(poFilesystem);
274   poFilesystem = nullptr;
275 }
276 
277 void
EnsureFilesystem()278 VSIHdfsFilesystemHandler::EnsureFilesystem() {
279   CPLMutexHolder oHolder(&hMutex);
280   if (poFilesystem == nullptr)
281     poFilesystem = hdfsConnect("default", 0);
282 }
283 
284 VSIVirtualHandle *
Open(const char * pszFilename,const char * pszAccess,bool,CSLConstList)285 VSIHdfsFilesystemHandler::Open( const char *pszFilename,
286                                 const char *pszAccess,
287                                 bool,
288                                 CSLConstList /* papszOptions */ )
289 {
290   EnsureFilesystem();
291 
292   if (strchr(pszAccess, 'w') != nullptr || strchr(pszAccess, 'a') != nullptr) {
293     CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
294     return nullptr;
295   }
296 
297   if (strncmp(pszFilename, VSIHdfsHandle::VSIHDFS, strlen(VSIHdfsHandle::VSIHDFS)) != 0) {
298     return nullptr;
299   }
300   else {
301     const char * pszPath = pszFilename + strlen(VSIHdfsHandle::VSIHDFS);
302 
303     // Open HDFS file, sending Java stack traces to /dev/null.
304     hdfsFile poFile = nullptr;
305     SILENCE(poFile = hdfsOpenFile(poFilesystem, pszPath, O_RDONLY, 0, 0, 0));
306 
307     if (poFile != nullptr) {
308       VSIHdfsHandle * poHandle = new VSIHdfsHandle(poFile, poFilesystem, pszPath, true);
309       return poHandle;
310     }
311   }
312   return nullptr;
313 }
314 
315 int
Stat(const char * pszeFilename,VSIStatBufL * pStatBuf,int)316 VSIHdfsFilesystemHandler::Stat( const char *pszeFilename, VSIStatBufL *pStatBuf, int)
317 {
318   EnsureFilesystem();
319 
320   hdfsFileInfo * poInfo = hdfsGetPathInfo(poFilesystem, pszeFilename + strlen(VSIHdfsHandle::VSIHDFS));
321 
322   if (poInfo != nullptr) {
323     pStatBuf->st_dev = static_cast<dev_t>(0);                               /* ID of device containing file */
324     pStatBuf->st_ino = static_cast<ino_t>(0);                               /* inode number */
325     switch(poInfo->mKind) {                                                 /* protection */
326     case tObjectKind::kObjectKindFile:
327       pStatBuf->st_mode = S_IFREG;
328       break;
329     case tObjectKind::kObjectKindDirectory:
330       pStatBuf->st_mode = S_IFDIR;
331       break;
332     default:
333       CPLError(CE_Failure, CPLE_AppDefined, "Unrecognized object kind");
334     }
335     pStatBuf->st_nlink = static_cast<nlink_t>(0);                           /* number of hard links */
336     pStatBuf->st_uid = getuid();                                            /* user ID of owner */
337     pStatBuf->st_gid = getgid();                                            /* group ID of owner */
338     pStatBuf->st_rdev = static_cast<dev_t>(0);                              /* device ID (if special file) */
339     pStatBuf->st_size = static_cast<off_t>(poInfo->mSize);                  /* total size, in bytes */
340     pStatBuf->st_blksize = static_cast<blksize_t>(poInfo->mBlockSize);      /* blocksize for filesystem I/O */
341     pStatBuf->st_blocks = static_cast<blkcnt_t>((poInfo->mBlockSize>>9)+1); /* number of 512B blocks allocated */
342     pStatBuf->st_atime = static_cast<time_t>(poInfo->mLastAccess);          /* time of last access */
343     pStatBuf->st_mtime = static_cast<time_t>(poInfo->mLastMod);             /* time of last modification */
344     pStatBuf->st_ctime = static_cast<time_t>(poInfo->mLastMod);             /* time of last status change */
345     hdfsFreeFileInfo(poInfo, 1);
346     return 0;
347   }
348 
349   return -1;
350 }
351 
352 int
Unlink(const char *)353 VSIHdfsFilesystemHandler::Unlink(const char *)
354 {
355   CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
356   return -1;
357 }
358 
359 int
Mkdir(const char *,long)360 VSIHdfsFilesystemHandler::Mkdir(const char *, long)
361 {
362   CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
363   return -1;
364 }
365 
366 int
Rmdir(const char *)367 VSIHdfsFilesystemHandler::Rmdir(const char *)
368 {
369   CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
370   return -1;
371 }
372 
373 char **
ReadDir(const char * pszDirname)374 VSIHdfsFilesystemHandler::ReadDir(const char *pszDirname)
375 {
376   EnsureFilesystem();
377 
378   int mEntries = 0;
379   hdfsFileInfo * paoInfo = hdfsListDirectory(poFilesystem, pszDirname + strlen(VSIHdfsHandle::VSIHDFS), &mEntries);
380   char ** retval = nullptr;
381 
382   if (paoInfo != nullptr) {
383     CPLStringList aosNames;
384     for (int i = 0; i < mEntries; ++i)
385       aosNames.AddString(paoInfo[i].mName);
386     retval = aosNames.StealList();
387     hdfsFreeFileInfo(paoInfo, mEntries);
388     return retval;
389   }
390   return nullptr;
391 }
392 
393 int
Rename(const char *,const char *)394 VSIHdfsFilesystemHandler::Rename(const char *, const char *)
395 {
396   CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
397   return -1;
398 }
399 
400 #endif
401 
402 //! @endcond
403 
404 #ifdef HDFS_ENABLED
405 
406 /************************************************************************/
407 /*                       VSIInstallHdfsHandler()                        */
408 /************************************************************************/
409 
410 /**
411  * \brief Install /vsihdfs/ file system handler (requires JVM and HDFS support)
412  *
413  * @since GDAL 2.4.0
414  */
VSIInstallHdfsHandler()415 void VSIInstallHdfsHandler()
416 {
417     VSIFileManager::InstallHandler(VSIHdfsHandle::VSIHDFS, new VSIHdfsFilesystemHandler);
418 }
419 
420 #else
421 
422 /************************************************************************/
423 /*                       VSIInstallHdfsHandler()                        */
424 /************************************************************************/
425 
426 /**
427  * \brief Install /vsihdfs/ file system handler (non-functional stub)
428  *
429  * @since GDAL 2.4.0
430  */
VSIInstallHdfsHandler(void)431 void VSIInstallHdfsHandler( void )
432 {
433     // Not supported.
434 }
435 
436 #endif
437