1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "nacl_io/httpfs/http_fs_node.h"
6 
7 #include <assert.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <string.h>
11 
12 #include <ppapi/c/pp_errors.h>
13 
14 #include "nacl_io/httpfs/http_fs.h"
15 #include "nacl_io/http_status_codes.h"
16 #include "nacl_io/kernel_handle.h"
17 #include "nacl_io/osinttypes.h"
18 
19 #if defined(WIN32)
20 #define snprintf _snprintf
21 #endif
22 
23 namespace nacl_io {
24 
25 namespace {
26 
27 // If we're attempting to read a partial request, but the server returns a full
28 // request, we need to read all of the data up to the start of our partial
29 // request into a dummy buffer. This is the maximum size of that buffer.
30 const int MAX_READ_BUFFER_SIZE = 64 * 1024;
31 
ParseHeaders(const char * headers,int32_t headers_length)32 StringMap_t ParseHeaders(const char* headers, int32_t headers_length) {
33   enum State {
34     FINDING_KEY,
35     SKIPPING_WHITESPACE,
36     FINDING_VALUE,
37   };
38 
39   StringMap_t result;
40   std::string key;
41   std::string value;
42 
43   State state = FINDING_KEY;
44   const char* start = headers;
45   for (int i = 0; i < headers_length; ++i) {
46     switch (state) {
47       case FINDING_KEY:
48         if (headers[i] == ':') {
49           // Found key.
50           key.assign(start, &headers[i] - start);
51           key = NormalizeHeaderKey(key);
52           state = SKIPPING_WHITESPACE;
53         }
54         break;
55 
56       case SKIPPING_WHITESPACE:
57         if (headers[i] == ' ') {
58           // Found whitespace, keep going...
59           break;
60         }
61 
62         // Found a non-whitespace, mark this as the start of the value.
63         start = &headers[i];
64         state = FINDING_VALUE;
65 
66         // NOTE: Avoid fallthrough as it produces a warning on newer compilers,
67         // but can't easily be silenced by the older NaCl compilers.
68         //
69         // Fallthrough to start processing value without incrementing i.
70         goto finding_value;
71 
72       finding_value:
73       case FINDING_VALUE:
74         if (headers[i] == '\n') {
75           // Found value.
76           value.assign(start, &headers[i] - start);
77           result[key] = value;
78           start = &headers[i + 1];
79           state = FINDING_KEY;
80         }
81         break;
82     }
83   }
84 
85   return result;
86 }
87 
ParseContentLength(const StringMap_t & headers,off_t * content_length)88 bool ParseContentLength(const StringMap_t& headers, off_t* content_length) {
89   StringMap_t::const_iterator iter = headers.find("Content-Length");
90   if (iter == headers.end())
91     return false;
92 
93   *content_length = strtoull(iter->second.c_str(), NULL, 10);
94   return true;
95 }
96 
ParseContentRange(const StringMap_t & headers,off_t * read_start,off_t * read_end,off_t * entity_length)97 bool ParseContentRange(const StringMap_t& headers,
98                        off_t* read_start,
99                        off_t* read_end,
100                        off_t* entity_length) {
101   StringMap_t::const_iterator iter = headers.find("Content-Range");
102   if (iter == headers.end())
103     return false;
104 
105   // The key should look like "bytes ##-##/##" or "bytes ##-##/*". The last
106   // value is the entity length, which can potentially be * (i.e. unknown).
107   off_t read_start_int;
108   off_t read_end_int;
109   off_t entity_length_int;
110   int result = sscanf(iter->second.c_str(),
111                       "bytes %" SCNi64 "-%" SCNi64 "/%" SCNi64,
112                       &read_start_int,
113                       &read_end_int,
114                       &entity_length_int);
115 
116   // The Content-Range header specifies an inclusive range: e.g. the first ten
117   // bytes is "bytes 0-9/*". Convert it to a half-open range by incrementing
118   // read_end.
119   if (result == 2) {
120     if (read_start)
121       *read_start = read_start_int;
122     if (read_end)
123       *read_end = read_end_int + 1;
124     if (entity_length)
125       *entity_length = 0;
126     return true;
127   } else if (result == 3) {
128     if (read_start)
129       *read_start = read_start_int;
130     if (read_end)
131       *read_end = read_end_int + 1;
132     if (entity_length)
133       *entity_length = entity_length_int;
134     return true;
135   }
136 
137   return false;
138 }
139 
140 // Maps an HTTP |status_code| onto the appropriate errno code.
HTTPStatusCodeToErrno(int status_code)141 int HTTPStatusCodeToErrno(int status_code) {
142   switch (status_code) {
143     case STATUSCODE_OK:
144     case STATUSCODE_PARTIAL_CONTENT:
145       return 0;
146     case STATUSCODE_FORBIDDEN:
147       return EACCES;
148     case STATUSCODE_NOT_FOUND:
149       return ENOENT;
150   }
151   if (status_code >= 400 && status_code < 500)
152     return EINVAL;
153   return EIO;
154 }
155 
156 }  // namespace
157 
SetCachedSize(off_t size)158 void HttpFsNode::SetCachedSize(off_t size) {
159   has_cached_size_ = true;
160   stat_.st_size = size;
161 }
162 
FSync()163 Error HttpFsNode::FSync() {
164   return EACCES;
165 }
166 
GetDents(size_t offs,struct dirent * pdir,size_t count,int * out_bytes)167 Error HttpFsNode::GetDents(size_t offs,
168                            struct dirent* pdir,
169                            size_t count,
170                            int* out_bytes) {
171   *out_bytes = 0;
172   return EACCES;
173 }
174 
GetStat(struct stat * stat)175 Error HttpFsNode::GetStat(struct stat* stat) {
176   AUTO_LOCK(node_lock_);
177   return GetStat_Locked(stat);
178 }
179 
Read(const HandleAttr & attr,void * buf,size_t count,int * out_bytes)180 Error HttpFsNode::Read(const HandleAttr& attr,
181                        void* buf,
182                        size_t count,
183                        int* out_bytes) {
184   *out_bytes = 0;
185 
186   AUTO_LOCK(node_lock_);
187   if (cache_content_) {
188     if (cached_data_.empty()) {
189       Error error = DownloadToCache();
190       if (error)
191         return error;
192     }
193 
194     return ReadPartialFromCache(attr, buf, count, out_bytes);
195   }
196 
197   return DownloadPartial(attr, buf, count, out_bytes);
198 }
199 
FTruncate(off_t size)200 Error HttpFsNode::FTruncate(off_t size) {
201   return EACCES;
202 }
203 
Write(const HandleAttr & attr,const void * buf,size_t count,int * out_bytes)204 Error HttpFsNode::Write(const HandleAttr& attr,
205                         const void* buf,
206                         size_t count,
207                         int* out_bytes) {
208   // TODO(binji): support POST?
209   *out_bytes = 0;
210   return EACCES;
211 }
212 
GetSize(off_t * out_size)213 Error HttpFsNode::GetSize(off_t* out_size) {
214   *out_size = 0;
215 
216   // TODO(binji): This value should be cached properly; i.e. obey the caching
217   // headers returned by the server.
218   AUTO_LOCK(node_lock_);
219   struct stat statbuf;
220   Error error = GetStat_Locked(&statbuf);
221   if (error)
222     return error;
223 
224   *out_size = stat_.st_size;
225   return 0;
226 }
227 
HttpFsNode(Filesystem * filesystem,const std::string & url,bool cache_content)228 HttpFsNode::HttpFsNode(Filesystem* filesystem,
229                        const std::string& url,
230                        bool cache_content)
231     : Node(filesystem),
232       url_(url),
233       buffer_(NULL),
234       buffer_len_(0),
235       cache_content_(cache_content),
236       has_cached_size_(false) {
237   // http nodes are read-only by default
238   SetMode(S_IRALL);
239 }
240 
~HttpFsNode()241 HttpFsNode::~HttpFsNode() {
242   free(buffer_);
243 }
244 
GetStat_Locked(struct stat * stat)245 Error HttpFsNode::GetStat_Locked(struct stat* stat) {
246   // Assume we need to 'HEAD' if we do not know the size, otherwise, assume
247   // that the information is constant.  We can add a timeout if needed.
248   HttpFs* filesystem = static_cast<HttpFs*>(filesystem_);
249   if (!has_cached_size_ || !filesystem->cache_stat_) {
250     StringMap_t headers;
251     ScopedResource loader(filesystem_->ppapi());
252     ScopedResource request(filesystem_->ppapi());
253     ScopedResource response(filesystem_->ppapi());
254     int32_t statuscode;
255     StringMap_t response_headers;
256     const char* method = "HEAD";
257 
258     if (filesystem->is_blob_url_) {
259       // Blob URLs do not support HEAD requests, but do give the content length
260       // in their response headers. We issue a single-byte GET request to
261       // retrieve the content length.
262       method = "GET";
263       headers["Range"] = "bytes=0-0";
264     }
265 
266     Error error = OpenUrl(method,
267                           &headers,
268                           &loader,
269                           &request,
270                           &response,
271                           &statuscode,
272                           &response_headers);
273     if (error)
274       return error;
275 
276     off_t entity_length;
277     if (ParseContentRange(response_headers, NULL, NULL, &entity_length)) {
278       SetCachedSize(static_cast<off_t>(entity_length));
279     } else if (ParseContentLength(response_headers, &entity_length)) {
280       SetCachedSize(static_cast<off_t>(entity_length));
281     } else if (cache_content_) {
282       // The server didn't give a content length; download the data to memory
283       // via DownloadToCache, which will also set stat_.st_size;
284       error = DownloadToCache();
285       if (error)
286         return error;
287     } else {
288       // The user doesn't want to cache content, but we didn't get a
289       // "Content-Length" header. Read the entire entity, and throw it away.
290       // Don't use DownloadToCache, as that will still allocate enough memory
291       // for the entire entity.
292       off_t bytes_read;
293       error = DownloadToTemp(&bytes_read);
294       if (error)
295         return error;
296 
297       SetCachedSize(bytes_read);
298     }
299 
300     stat_.st_atime = 0;  // TODO(binji): Use "Last-Modified".
301     stat_.st_mtime = 0;
302     stat_.st_ctime = 0;
303 
304     SetType(S_IFREG);
305   }
306 
307   // Fill the stat structure if provided
308   if (stat)
309     *stat = stat_;
310 
311   return 0;
312 }
313 
OpenUrl(const char * method,StringMap_t * request_headers,ScopedResource * out_loader,ScopedResource * out_request,ScopedResource * out_response,int32_t * out_statuscode,StringMap_t * out_response_headers)314 Error HttpFsNode::OpenUrl(const char* method,
315                           StringMap_t* request_headers,
316                           ScopedResource* out_loader,
317                           ScopedResource* out_request,
318                           ScopedResource* out_response,
319                           int32_t* out_statuscode,
320                           StringMap_t* out_response_headers) {
321   // Clear all out parameters.
322   *out_statuscode = 0;
323   out_response_headers->clear();
324 
325   // Assume lock_ is already held.
326   PepperInterface* ppapi = filesystem_->ppapi();
327 
328   HttpFs* mount_http = static_cast<HttpFs*>(filesystem_);
329   out_request->Reset(
330       mount_http->MakeUrlRequestInfo(url_, method, request_headers));
331   if (!out_request->pp_resource())
332     return EINVAL;
333 
334   URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
335   URLResponseInfoInterface* response_interface =
336       ppapi->GetURLResponseInfoInterface();
337   VarInterface* var_interface = ppapi->GetVarInterface();
338 
339   out_loader->Reset(loader_interface->Create(ppapi->GetInstance()));
340   if (!out_loader->pp_resource())
341     return EINVAL;
342 
343   int32_t result = loader_interface->Open(out_loader->pp_resource(),
344                                           out_request->pp_resource(),
345                                           PP_BlockUntilComplete());
346   if (result != PP_OK)
347     return PPERROR_TO_ERRNO(result);
348 
349   out_response->Reset(
350       loader_interface->GetResponseInfo(out_loader->pp_resource()));
351   if (!out_response->pp_resource())
352     return EINVAL;
353 
354   // Get response statuscode.
355   PP_Var statuscode = response_interface->GetProperty(
356       out_response->pp_resource(), PP_URLRESPONSEPROPERTY_STATUSCODE);
357 
358   if (statuscode.type != PP_VARTYPE_INT32)
359     return EINVAL;
360 
361   *out_statuscode = statuscode.value.as_int;
362 
363   // Only accept OK or Partial Content.
364   Error error = HTTPStatusCodeToErrno(*out_statuscode);
365   if (error)
366     return error;
367 
368   // Get response headers.
369   PP_Var response_headers_var = response_interface->GetProperty(
370       out_response->pp_resource(), PP_URLRESPONSEPROPERTY_HEADERS);
371 
372   uint32_t response_headers_length;
373   const char* response_headers_str =
374       var_interface->VarToUtf8(response_headers_var, &response_headers_length);
375 
376   *out_response_headers =
377       ParseHeaders(response_headers_str, response_headers_length);
378 
379   var_interface->Release(response_headers_var);
380 
381   return 0;
382 }
383 
DownloadToCache()384 Error HttpFsNode::DownloadToCache() {
385   StringMap_t headers;
386   ScopedResource loader(filesystem_->ppapi());
387   ScopedResource request(filesystem_->ppapi());
388   ScopedResource response(filesystem_->ppapi());
389   int32_t statuscode;
390   StringMap_t response_headers;
391   Error error = OpenUrl("GET",
392                         &headers,
393                         &loader,
394                         &request,
395                         &response,
396                         &statuscode,
397                         &response_headers);
398   if (error)
399     return error;
400 
401   off_t content_length = 0;
402   if (ParseContentLength(response_headers, &content_length)) {
403     cached_data_.resize(content_length);
404     int real_size;
405     error = ReadResponseToBuffer(
406         loader, cached_data_.data(), content_length, &real_size);
407     if (error)
408       return error;
409 
410     SetCachedSize(real_size);
411     cached_data_.resize(real_size);
412     return 0;
413   }
414 
415   int bytes_read;
416   error = ReadEntireResponseToCache(loader, &bytes_read);
417   if (error)
418     return error;
419 
420   SetCachedSize(bytes_read);
421   return 0;
422 }
423 
ReadPartialFromCache(const HandleAttr & attr,void * buf,int count,int * out_bytes)424 Error HttpFsNode::ReadPartialFromCache(const HandleAttr& attr,
425                                        void* buf,
426                                        int count,
427                                        int* out_bytes) {
428   *out_bytes = 0;
429   off_t size = cached_data_.size();
430 
431   if (attr.offs + count > size)
432     count = size - attr.offs;
433 
434   if (count <= 0)
435     return 0;
436 
437   memcpy(buf, &cached_data_.data()[attr.offs], count);
438   *out_bytes = count;
439   return 0;
440 }
441 
DownloadPartial(const HandleAttr & attr,void * buf,off_t count,int * out_bytes)442 Error HttpFsNode::DownloadPartial(const HandleAttr& attr,
443                                   void* buf,
444                                   off_t count,
445                                   int* out_bytes) {
446   *out_bytes = 0;
447 
448   StringMap_t headers;
449 
450   char buffer[100];
451   // Range request is inclusive: 0-99 returns 100 bytes.
452   snprintf(&buffer[0],
453            sizeof(buffer),
454            "bytes=%" PRIi64 "-%" PRIi64,
455            attr.offs,
456            attr.offs + count - 1);
457   headers["Range"] = buffer;
458 
459   ScopedResource loader(filesystem_->ppapi());
460   ScopedResource request(filesystem_->ppapi());
461   ScopedResource response(filesystem_->ppapi());
462   int32_t statuscode;
463   StringMap_t response_headers;
464   Error error = OpenUrl("GET",
465                         &headers,
466                         &loader,
467                         &request,
468                         &response,
469                         &statuscode,
470                         &response_headers);
471   if (error) {
472     if (statuscode == STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE) {
473       // We're likely trying to read past the end. Return 0 bytes.
474       *out_bytes = 0;
475       return 0;
476     }
477 
478     return error;
479   }
480 
481   off_t read_start = 0;
482   if (statuscode == STATUSCODE_OK) {
483     // No partial result, read everything starting from the part we care about.
484     off_t content_length;
485     if (ParseContentLength(response_headers, &content_length)) {
486       if (attr.offs >= content_length)
487         return EINVAL;
488 
489       // Clamp count, if trying to read past the end of the file.
490       if (attr.offs + count > content_length) {
491         count = content_length - attr.offs;
492       }
493     }
494   } else if (statuscode == STATUSCODE_PARTIAL_CONTENT) {
495     // Determine from the headers where we are reading.
496     off_t read_end;
497     off_t entity_length;
498     if (ParseContentRange(
499             response_headers, &read_start, &read_end, &entity_length)) {
500       if (read_start > attr.offs || read_start > read_end) {
501         // If this error occurs, the server is returning bogus values.
502         return EINVAL;
503       }
504 
505       // Clamp count, if trying to read past the end of the file.
506       count = std::min(read_end - read_start, count);
507     } else {
508       // Partial Content without Content-Range. Assume that the server gave us
509       // exactly what we asked for. This can happen even when the server
510       // returns 200 -- the cache may return 206 in this case, but not modify
511       // the headers.
512       read_start = attr.offs;
513     }
514   }
515 
516   if (read_start < attr.offs) {
517     // We aren't yet at the location where we want to start reading. Read into
518     // our dummy buffer until then.
519     int bytes_to_read = attr.offs - read_start;
520     int bytes_read;
521     error = ReadResponseToTemp(loader, bytes_to_read, &bytes_read);
522     if (error)
523       return error;
524 
525     // Tried to read past the end of the entity.
526     if (bytes_read < bytes_to_read) {
527       *out_bytes = 0;
528       return 0;
529     }
530   }
531 
532   return ReadResponseToBuffer(loader, buf, count, out_bytes);
533 }
534 
DownloadToTemp(off_t * out_bytes)535 Error HttpFsNode::DownloadToTemp(off_t* out_bytes) {
536   StringMap_t headers;
537   ScopedResource loader(filesystem_->ppapi());
538   ScopedResource request(filesystem_->ppapi());
539   ScopedResource response(filesystem_->ppapi());
540   int32_t statuscode;
541   StringMap_t response_headers;
542   Error error = OpenUrl("GET",
543                         &headers,
544                         &loader,
545                         &request,
546                         &response,
547                         &statuscode,
548                         &response_headers);
549   if (error)
550     return error;
551 
552   off_t content_length = 0;
553   if (ParseContentLength(response_headers, &content_length)) {
554     *out_bytes = content_length;
555     return 0;
556   }
557 
558   return ReadEntireResponseToTemp(loader, out_bytes);
559 }
560 
ReadEntireResponseToTemp(const ScopedResource & loader,off_t * out_bytes)561 Error HttpFsNode::ReadEntireResponseToTemp(const ScopedResource& loader,
562                                            off_t* out_bytes) {
563   *out_bytes = 0;
564 
565   const int kBytesToRead = MAX_READ_BUFFER_SIZE;
566   buffer_ = (char*)realloc(buffer_, kBytesToRead);
567   assert(buffer_);
568   if (!buffer_) {
569     buffer_len_ = 0;
570     return ENOMEM;
571   }
572   buffer_len_ = kBytesToRead;
573 
574   while (true) {
575     int bytes_read;
576     Error error =
577         ReadResponseToBuffer(loader, buffer_, kBytesToRead, &bytes_read);
578     if (error)
579       return error;
580 
581     *out_bytes += bytes_read;
582 
583     if (bytes_read < kBytesToRead)
584       return 0;
585   }
586 }
587 
ReadEntireResponseToCache(const ScopedResource & loader,int * out_bytes)588 Error HttpFsNode::ReadEntireResponseToCache(const ScopedResource& loader,
589                                             int* out_bytes) {
590   *out_bytes = 0;
591   const int kBytesToRead = MAX_READ_BUFFER_SIZE;
592 
593   while (true) {
594     // Always recalculate the buf pointer because it may have moved when
595     // cached_data_ was resized.
596     cached_data_.resize(*out_bytes + kBytesToRead);
597     void* buf = cached_data_.data() + *out_bytes;
598 
599     int bytes_read;
600     Error error = ReadResponseToBuffer(loader, buf, kBytesToRead, &bytes_read);
601     if (error)
602       return error;
603 
604     *out_bytes += bytes_read;
605 
606     if (bytes_read < kBytesToRead) {
607       // Shrink the cached data buffer to the correct size.
608       cached_data_.resize(*out_bytes);
609       return 0;
610     }
611   }
612 }
613 
ReadResponseToTemp(const ScopedResource & loader,int count,int * out_bytes)614 Error HttpFsNode::ReadResponseToTemp(const ScopedResource& loader,
615                                      int count,
616                                      int* out_bytes) {
617   *out_bytes = 0;
618 
619   if (buffer_len_ < count) {
620     int new_len = std::min(count, MAX_READ_BUFFER_SIZE);
621     buffer_ = (char*)realloc(buffer_, new_len);
622     assert(buffer_);
623     if (!buffer_) {
624       buffer_len_ = 0;
625       return ENOMEM;
626     }
627     buffer_len_ = new_len;
628   }
629 
630   int bytes_left = count;
631   while (bytes_left > 0) {
632     int bytes_to_read = std::min(bytes_left, buffer_len_);
633     int bytes_read;
634     Error error = ReadResponseToBuffer(
635         loader, buffer_, bytes_to_read, &bytes_read);
636     if (error)
637       return error;
638 
639     if (bytes_read == 0)
640       return 0;
641 
642     bytes_left -= bytes_read;
643     *out_bytes += bytes_read;
644   }
645 
646   return 0;
647 }
648 
ReadResponseToBuffer(const ScopedResource & loader,void * buf,int count,int * out_bytes)649 Error HttpFsNode::ReadResponseToBuffer(const ScopedResource& loader,
650                                        void* buf,
651                                        int count,
652                                        int* out_bytes) {
653   *out_bytes = 0;
654 
655   PepperInterface* ppapi = filesystem_->ppapi();
656   URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
657 
658   char* out_buffer = static_cast<char*>(buf);
659   int bytes_to_read = count;
660   while (bytes_to_read > 0) {
661     int bytes_read =
662         loader_interface->ReadResponseBody(loader.pp_resource(),
663                                            out_buffer,
664                                            bytes_to_read,
665                                            PP_BlockUntilComplete());
666 
667     if (bytes_read == 0) {
668       // This is not an error -- it may just be that we were trying to read
669       // more data than exists.
670       *out_bytes = count - bytes_to_read;
671       return 0;
672     }
673 
674     if (bytes_read < 0)
675       return PPERROR_TO_ERRNO(bytes_read);
676 
677     assert(bytes_read <= bytes_to_read);
678     bytes_to_read -= bytes_read;
679     out_buffer += bytes_read;
680   }
681 
682   *out_bytes = count;
683   return 0;
684 }
685 
686 }  // namespace nacl_io
687