1 /**********************************************************************
2 *
3 * Project: CPL - Common Portability Library
4 * Purpose: Implement VSI large file api for HDFS
5 * Author: James McClain, <jmcclain@azavea.com>
6 *
7 **********************************************************************
8 * Copyright (c) 2010-2015, Even Rouault <even dot rouault at spatialys.com>
9 * Copyright (c) 2018, Azavea
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included
19 * in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 * DEALINGS IN THE SOFTWARE.
28 ****************************************************************************/
29
30 //! @cond Doxygen_Suppress
31
32 #include <string>
33
34 #include <fcntl.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37
38 #if !defined(_MSC_VER)
39 #include <unistd.h>
40 #endif
41
42 #include <cstring>
43 #include <climits>
44
45 #include "cpl_port.h"
46 #include "cpl_vsi.h"
47
48 #include "cpl_conv.h"
49 #include "cpl_error.h"
50 #include "cpl_vsi_virtual.h"
51
52 CPL_CVSID("$Id: cpl_vsil_hdfs.cpp a044c83f8091becdd11e27be6e9c08d0d3478126 2021-02-24 11:38:17 +0100 Even Rouault $")
53
54 #ifdef HDFS_ENABLED
55
56 #include "hdfs.h"
57
58
59 /************************************************************************/
60 /* ==================================================================== */
61 /* VSIHdfsHandle */
62 /* ==================================================================== */
63 /************************************************************************/
64
65 #define SILENCE(expr) {\
66 int hOldStderr = dup(2);\
67 int hNewStderr = open("/dev/null", O_WRONLY);\
68 \
69 if ((hOldStderr != -1) && (hNewStderr != -1) && (dup2(hNewStderr, 2) != -1)) {\
70 close(hNewStderr);\
71 expr;\
72 dup2(hOldStderr, 2);\
73 close(hOldStderr);\
74 }\
75 else {\
76 if (hOldStderr != -1) close(hOldStderr);\
77 if (hNewStderr != -1) close(hNewStderr);\
78 expr;\
79 }\
80 }
81
82 class VSIHdfsHandle final : public VSIVirtualHandle
83 {
84 private:
85 CPL_DISALLOW_COPY_ASSIGN(VSIHdfsHandle)
86
87 hdfsFile poFile = nullptr;
88 hdfsFS poFilesystem = nullptr;
89 std::string oFilename;
90 bool bEOF = false;
91
92 public:
93 #if __cplusplus >= 201103L
94 static constexpr const char * VSIHDFS = "/vsihdfs/";
95 #else
96 static const char * VSIHDFS = "/vsihdfs/";
97 #endif
98 VSIHdfsHandle(hdfsFile poFile,
99 hdfsFS poFilesystem,
100 const char * pszFilename,
101 bool bReadOnly);
102 ~VSIHdfsHandle() override;
103
104 int Seek(vsi_l_offset nOffset, int nWhence) override;
105 vsi_l_offset Tell() override;
106 size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override;
107 size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override;
108 vsi_l_offset Length();
109 int Eof() override;
110 int Flush() override;
111 int Close() override;
112 };
113
VSIHdfsHandle(hdfsFile _poFile,hdfsFS _poFilesystem,const char * pszFilename,bool)114 VSIHdfsHandle::VSIHdfsHandle(hdfsFile _poFile,
115 hdfsFS _poFilesystem,
116 const char * pszFilename,
117 bool /*_bReadOnly*/)
118 : poFile(_poFile), poFilesystem(_poFilesystem), oFilename(pszFilename)
119 {}
120
~VSIHdfsHandle()121 VSIHdfsHandle::~VSIHdfsHandle()
122 {
123 Close();
124 }
125
Seek(vsi_l_offset nOffset,int nWhence)126 int VSIHdfsHandle::Seek(vsi_l_offset nOffset, int nWhence)
127 {
128 bEOF = false;
129 switch(nWhence) {
130 case SEEK_SET:
131 return hdfsSeek(poFilesystem, poFile, nOffset);
132 case SEEK_CUR:
133 return hdfsSeek(poFilesystem,
134 poFile,
135 nOffset + Tell());
136 case SEEK_END:
137 return hdfsSeek(poFilesystem,
138 poFile,
139 static_cast<tOffset>(Length()) - nOffset);
140 default:
141 return -1;
142 }
143 }
144
145 vsi_l_offset
Tell()146 VSIHdfsHandle::Tell()
147 {
148 return hdfsTell(poFilesystem, poFile);
149 }
150
151 size_t
Read(void * pBuffer,size_t nSize,size_t nMemb)152 VSIHdfsHandle::Read(void *pBuffer, size_t nSize, size_t nMemb)
153 {
154 if (nSize == 0 || nMemb == 0)
155 return 0;
156
157 size_t bytes_wanted = nSize * nMemb;
158 size_t bytes_read = 0;
159
160 while (bytes_read < bytes_wanted)
161 {
162 tSize bytes = 0;
163 size_t bytes_to_request = bytes_wanted - bytes_read;
164
165 // The `Read` function can take 64-bit arguments for its
166 // read-request size, whereas `hdfsRead` may only take a 32-bit
167 // argument. If the former requests an amount larger than can
168 // be encoded in a signed 32-bit number, break the request into
169 // 2GB batches.
170 bytes = hdfsRead(poFilesystem, poFile,
171 static_cast<char*>(pBuffer) + bytes_read,
172 bytes_to_request > INT_MAX ? INT_MAX : bytes_to_request);
173
174 if (bytes > 0) {
175 bytes_read += bytes;
176 }
177 if (bytes == 0) {
178 bEOF = true;
179 return bytes_read/nSize;
180 }
181 else if (bytes < 0) {
182 bEOF = false;
183 return 0;
184 }
185 }
186
187 return bytes_read/nSize;
188 }
189
190 size_t
Write(const void *,size_t,size_t)191 VSIHdfsHandle::Write(const void *, size_t, size_t)
192 {
193 CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
194 return -1;
195 }
196
197 vsi_l_offset
Length()198 VSIHdfsHandle::Length()
199 {
200 hdfsFileInfo * poInfo = hdfsGetPathInfo(poFilesystem, oFilename.c_str());
201 if (poInfo != nullptr) {
202 tOffset nSize = poInfo->mSize;
203 hdfsFreeFileInfo(poInfo, 1);
204 return static_cast<vsi_l_offset>(nSize);
205 }
206 return -1;
207 }
208
209 int
Eof()210 VSIHdfsHandle::Eof()
211 {
212 return bEOF;
213 }
214
215 int
Flush()216 VSIHdfsHandle::Flush()
217 {
218 return hdfsFlush(poFilesystem, poFile);
219 }
220
221 int
Close()222 VSIHdfsHandle::Close()
223 {
224 int retval = 0;
225
226 if (poFilesystem != nullptr && poFile != nullptr)
227 retval = hdfsCloseFile(poFilesystem, poFile);
228 poFile = nullptr;
229 poFilesystem = nullptr;
230
231 return retval;
232 }
233
234 class VSIHdfsFilesystemHandler final : public VSIFilesystemHandler
235 {
236 private:
237 CPL_DISALLOW_COPY_ASSIGN(VSIHdfsFilesystemHandler)
238
239 hdfsFS poFilesystem = nullptr;
240 CPLMutex *hMutex = nullptr;
241
242 public:
243 VSIHdfsFilesystemHandler();
244 ~VSIHdfsFilesystemHandler() override;
245
246 void EnsureFilesystem();
247 VSIVirtualHandle *Open(const char *pszFilename,
248 const char *pszAccess,
249 bool bSetError,
250 CSLConstList /* papszOptions */) override;
251 int Stat(const char *pszFilename,
252 VSIStatBufL *pStatBuf,
253 int nFlags) override;
254 int Unlink(const char *pszFilename) override;
255 int Mkdir(const char *pszDirname, long nMode) override;
256 int Rmdir(const char *pszDirname) override;
257 char ** ReadDir(const char *pszDirname) override;
258 int Rename(const char *oldpath, const char *newpath) override;
259
260 };
261
VSIHdfsFilesystemHandler()262 VSIHdfsFilesystemHandler::VSIHdfsFilesystemHandler()
263 {}
264
~VSIHdfsFilesystemHandler()265 VSIHdfsFilesystemHandler::~VSIHdfsFilesystemHandler()
266 {
267 if(hMutex != nullptr) {
268 CPLDestroyMutex(hMutex);
269 hMutex = nullptr;
270 }
271
272 if (poFilesystem != nullptr)
273 hdfsDisconnect(poFilesystem);
274 poFilesystem = nullptr;
275 }
276
277 void
EnsureFilesystem()278 VSIHdfsFilesystemHandler::EnsureFilesystem() {
279 CPLMutexHolder oHolder(&hMutex);
280 if (poFilesystem == nullptr)
281 poFilesystem = hdfsConnect("default", 0);
282 }
283
284 VSIVirtualHandle *
Open(const char * pszFilename,const char * pszAccess,bool,CSLConstList)285 VSIHdfsFilesystemHandler::Open( const char *pszFilename,
286 const char *pszAccess,
287 bool,
288 CSLConstList /* papszOptions */ )
289 {
290 EnsureFilesystem();
291
292 if (strchr(pszAccess, 'w') != nullptr || strchr(pszAccess, 'a') != nullptr) {
293 CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
294 return nullptr;
295 }
296
297 if (strncmp(pszFilename, VSIHdfsHandle::VSIHDFS, strlen(VSIHdfsHandle::VSIHDFS)) != 0) {
298 return nullptr;
299 }
300 else {
301 const char * pszPath = pszFilename + strlen(VSIHdfsHandle::VSIHDFS);
302
303 // Open HDFS file, sending Java stack traces to /dev/null.
304 hdfsFile poFile = nullptr;
305 SILENCE(poFile = hdfsOpenFile(poFilesystem, pszPath, O_RDONLY, 0, 0, 0));
306
307 if (poFile != nullptr) {
308 VSIHdfsHandle * poHandle = new VSIHdfsHandle(poFile, poFilesystem, pszPath, true);
309 return poHandle;
310 }
311 }
312 return nullptr;
313 }
314
315 int
Stat(const char * pszeFilename,VSIStatBufL * pStatBuf,int)316 VSIHdfsFilesystemHandler::Stat( const char *pszeFilename, VSIStatBufL *pStatBuf, int)
317 {
318 EnsureFilesystem();
319
320 hdfsFileInfo * poInfo = hdfsGetPathInfo(poFilesystem, pszeFilename + strlen(VSIHdfsHandle::VSIHDFS));
321
322 if (poInfo != nullptr) {
323 pStatBuf->st_dev = static_cast<dev_t>(0); /* ID of device containing file */
324 pStatBuf->st_ino = static_cast<ino_t>(0); /* inode number */
325 switch(poInfo->mKind) { /* protection */
326 case tObjectKind::kObjectKindFile:
327 pStatBuf->st_mode = S_IFREG;
328 break;
329 case tObjectKind::kObjectKindDirectory:
330 pStatBuf->st_mode = S_IFDIR;
331 break;
332 default:
333 CPLError(CE_Failure, CPLE_AppDefined, "Unrecognized object kind");
334 }
335 pStatBuf->st_nlink = static_cast<nlink_t>(0); /* number of hard links */
336 pStatBuf->st_uid = getuid(); /* user ID of owner */
337 pStatBuf->st_gid = getgid(); /* group ID of owner */
338 pStatBuf->st_rdev = static_cast<dev_t>(0); /* device ID (if special file) */
339 pStatBuf->st_size = static_cast<off_t>(poInfo->mSize); /* total size, in bytes */
340 pStatBuf->st_blksize = static_cast<blksize_t>(poInfo->mBlockSize); /* blocksize for filesystem I/O */
341 pStatBuf->st_blocks = static_cast<blkcnt_t>((poInfo->mBlockSize>>9)+1); /* number of 512B blocks allocated */
342 pStatBuf->st_atime = static_cast<time_t>(poInfo->mLastAccess); /* time of last access */
343 pStatBuf->st_mtime = static_cast<time_t>(poInfo->mLastMod); /* time of last modification */
344 pStatBuf->st_ctime = static_cast<time_t>(poInfo->mLastMod); /* time of last status change */
345 hdfsFreeFileInfo(poInfo, 1);
346 return 0;
347 }
348
349 return -1;
350 }
351
352 int
Unlink(const char *)353 VSIHdfsFilesystemHandler::Unlink(const char *)
354 {
355 CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
356 return -1;
357 }
358
359 int
Mkdir(const char *,long)360 VSIHdfsFilesystemHandler::Mkdir(const char *, long)
361 {
362 CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
363 return -1;
364 }
365
366 int
Rmdir(const char *)367 VSIHdfsFilesystemHandler::Rmdir(const char *)
368 {
369 CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
370 return -1;
371 }
372
373 char **
ReadDir(const char * pszDirname)374 VSIHdfsFilesystemHandler::ReadDir(const char *pszDirname)
375 {
376 EnsureFilesystem();
377
378 int mEntries = 0;
379 hdfsFileInfo * paoInfo = hdfsListDirectory(poFilesystem, pszDirname + strlen(VSIHdfsHandle::VSIHDFS), &mEntries);
380 char ** retval = nullptr;
381
382 if (paoInfo != nullptr) {
383 CPLStringList aosNames;
384 for (int i = 0; i < mEntries; ++i)
385 aosNames.AddString(paoInfo[i].mName);
386 retval = aosNames.StealList();
387 hdfsFreeFileInfo(paoInfo, mEntries);
388 return retval;
389 }
390 return nullptr;
391 }
392
393 int
Rename(const char *,const char *)394 VSIHdfsFilesystemHandler::Rename(const char *, const char *)
395 {
396 CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
397 return -1;
398 }
399
400 #endif
401
402 //! @endcond
403
404 #ifdef HDFS_ENABLED
405
406 /************************************************************************/
407 /* VSIInstallHdfsHandler() */
408 /************************************************************************/
409
410 /**
411 * \brief Install /vsihdfs/ file system handler (requires JVM and HDFS support)
412 *
413 * @since GDAL 2.4.0
414 */
VSIInstallHdfsHandler()415 void VSIInstallHdfsHandler()
416 {
417 VSIFileManager::InstallHandler(VSIHdfsHandle::VSIHDFS, new VSIHdfsFilesystemHandler);
418 }
419
420 #else
421
422 /************************************************************************/
423 /* VSIInstallHdfsHandler() */
424 /************************************************************************/
425
426 /**
427 * \brief Install /vsihdfs/ file system handler (non-functional stub)
428 *
429 * @since GDAL 2.4.0
430 */
VSIInstallHdfsHandler(void)431 void VSIInstallHdfsHandler( void )
432 {
433 // Not supported.
434 }
435
436 #endif
437