1 /* <!-- copyright */
2 /*
3  * aria2 - The high speed download utility
4  *
5  * Copyright (C) 2006 Tatsuhiro Tsujikawa
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  *
21  * In addition, as a special exception, the copyright holders give
22  * permission to link the code of portions of this program with the
23  * OpenSSL library under certain conditions as described in each
24  * individual source file, and distribute linked combinations
25  * including the two.
26  * You must obey the GNU General Public License in all respects
27  * for all of the code used other than OpenSSL.  If you modify
28  * file(s) with this exception, you may extend this exception to your
29  * version of the file(s), but you are not obligated to do so.  If you
30  * do not wish to do so, delete this exception statement from your
31  * version.  If you delete this exception statement from all source
32  * files in the program, then also delete it here.
33  */
34 /* copyright --> */
35 #include "util.h"
36 
37 #ifdef __sun
38 // For opensolaris, just include signal.h which includes sys/signal.h
39 #  ifdef HAVE_SIGNAL_H
40 #    include <signal.h>
41 #  endif // HAVE_SIGNAL_H
42 #else    // !__sun
43 #  ifdef HAVE_SYS_SIGNAL_H
44 #    include <sys/signal.h>
45 #  endif // HAVE_SYS_SIGNAL_H
46 #  ifdef HAVE_SIGNAL_H
47 #    include <signal.h>
48 #  endif // HAVE_SIGNAL_H
49 #endif   // !__sun
50 
51 #include <sys/types.h>
52 #ifdef HAVE_PWD_H
53 #  include <pwd.h>
54 #endif // HAVE_PWD_H
55 
56 #include <array>
57 #include <cerrno>
58 #include <cassert>
59 #include <cstring>
60 #include <cstdio>
61 #include <cstdlib>
62 #include <sstream>
63 #include <ostream>
64 #include <algorithm>
65 #include <fstream>
66 #include <iomanip>
67 
68 #include "SimpleRandomizer.h"
69 #include "File.h"
70 #include "Randomizer.h"
71 #include "a2netcompat.h"
72 #include "BitfieldMan.h"
73 #include "DefaultDiskWriter.h"
74 #include "FatalException.h"
75 #include "FileEntry.h"
76 #include "A2STR.h"
77 #include "array_fun.h"
78 #include "bitfield.h"
79 #include "DownloadHandlerConstants.h"
80 #include "RequestGroup.h"
81 #include "LogFactory.h"
82 #include "Logger.h"
83 #include "Option.h"
84 #include "DownloadContext.h"
85 #include "BufferedFile.h"
86 #include "SocketCore.h"
87 #include "Lock.h"
88 
89 #include "MessageDigest.h"
90 #include "message_digest_helper.h"
91 
92 // For libc6 which doesn't define ULLONG_MAX properly because of broken limits.h
93 #ifndef ULLONG_MAX
94 #  define ULLONG_MAX 18446744073709551615ULL
95 #endif // ULLONG_MAX
96 
97 namespace aria2 {
98 
99 #ifdef __MINGW32__
100 namespace {
utf8ToWChar(wchar_t * out,size_t outLength,const char * src)101 int utf8ToWChar(wchar_t* out, size_t outLength, const char* src)
102 {
103   return MultiByteToWideChar(CP_UTF8, 0, src, -1, out, outLength);
104 }
105 } // namespace
106 
107 namespace {
wCharToUtf8(char * out,size_t outLength,const wchar_t * src)108 int wCharToUtf8(char* out, size_t outLength, const wchar_t* src)
109 {
110   return WideCharToMultiByte(CP_UTF8, 0, src, -1, out, outLength, nullptr,
111                              nullptr);
112 }
113 } // namespace
114 
utf8ToWChar(const char * src)115 std::wstring utf8ToWChar(const char* src)
116 {
117   int len = utf8ToWChar(nullptr, 0, src);
118   if (len <= 0) {
119     abort();
120   }
121   auto buf = make_unique<wchar_t[]>((size_t)len);
122   len = utf8ToWChar(buf.get(), len, src);
123   if (len <= 0) {
124     abort();
125   }
126   else {
127     return buf.get();
128   }
129 }
130 
utf8ToWChar(const std::string & src)131 std::wstring utf8ToWChar(const std::string& src)
132 {
133   return utf8ToWChar(src.c_str());
134 }
135 
wCharToUtf8(const std::wstring & wsrc)136 std::string wCharToUtf8(const std::wstring& wsrc)
137 {
138   int len = wCharToUtf8(nullptr, 0, wsrc.c_str());
139   if (len <= 0) {
140     abort();
141   }
142   auto buf = make_unique<char[]>((size_t)len);
143   len = wCharToUtf8(buf.get(), len, wsrc.c_str());
144   if (len <= 0) {
145     abort();
146   }
147   else {
148     return buf.get();
149   }
150 }
151 
toForwardSlash(const std::string & src)152 std::string toForwardSlash(const std::string& src)
153 {
154   auto dst = src;
155   std::transform(std::begin(dst), std::end(dst), std::begin(dst),
156                  [](char c) { return c == '\\' ? '/' : c; });
157   return dst;
158 }
159 
160 #endif // __MINGW32__
161 
162 namespace util {
163 
164 const char DEFAULT_STRIP_CHARSET[] = "\r\n\t ";
165 
strip(const std::string & str,const char * chars)166 std::string strip(const std::string& str, const char* chars)
167 {
168   std::pair<std::string::const_iterator, std::string::const_iterator> p =
169       stripIter(str.begin(), str.end(), chars);
170   return std::string(p.first, p.second);
171 }
172 
itos(int64_t value,bool comma)173 std::string itos(int64_t value, bool comma)
174 {
175   bool flag = false;
176   std::string str;
177   if (value < 0) {
178     if (value == INT64_MIN) {
179       if (comma) {
180         str = "-9,223,372,036,854,775,808";
181       }
182       else {
183         str = "-9223372036854775808";
184       }
185       return str;
186     }
187     flag = true;
188     value = -value;
189   }
190   str = uitos(value, comma);
191   if (flag) {
192     str.insert(str.begin(), '-');
193   }
194   return str;
195 }
196 
difftv(struct timeval tv1,struct timeval tv2)197 int64_t difftv(struct timeval tv1, struct timeval tv2)
198 {
199   if ((tv1.tv_sec < tv2.tv_sec) ||
200       ((tv1.tv_sec == tv2.tv_sec) && (tv1.tv_usec < tv2.tv_usec))) {
201     return 0;
202   }
203   return ((int64_t)(tv1.tv_sec - tv2.tv_sec) * 1000000 + tv1.tv_usec -
204           tv2.tv_usec);
205 }
206 
difftvsec(struct timeval tv1,struct timeval tv2)207 int32_t difftvsec(struct timeval tv1, struct timeval tv2)
208 {
209   if (tv1.tv_sec < tv2.tv_sec) {
210     return 0;
211   }
212   return tv1.tv_sec - tv2.tv_sec;
213 }
214 
replace(const std::string & target,const std::string & oldstr,const std::string & newstr)215 std::string replace(const std::string& target, const std::string& oldstr,
216                     const std::string& newstr)
217 {
218   if (target.empty() || oldstr.empty()) {
219     return target;
220   }
221   std::string result;
222   std::string::size_type p = 0;
223   std::string::size_type np = target.find(oldstr);
224   while (np != std::string::npos) {
225     result.append(target.begin() + p, target.begin() + np);
226     result += newstr;
227     p = np + oldstr.size();
228     np = target.find(oldstr, p);
229   }
230   result.append(target.begin() + p, target.end());
231   return result;
232 }
233 
isAlpha(const char c)234 bool isAlpha(const char c)
235 {
236   return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
237 }
238 
isDigit(const char c)239 bool isDigit(const char c) { return '0' <= c && c <= '9'; }
240 
isHexDigit(const char c)241 bool isHexDigit(const char c)
242 {
243   return isDigit(c) || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
244 }
245 
isHexDigit(const std::string & s)246 bool isHexDigit(const std::string& s)
247 {
248   for (const auto& c : s) {
249     if (!isHexDigit(c)) {
250       return false;
251     }
252   }
253   return true;
254 }
255 
inRFC3986ReservedChars(const char c)256 bool inRFC3986ReservedChars(const char c)
257 {
258   static const char reserved[] = {':', '/',  '?', '#', '[', ']', '@', '!', '$',
259                                   '&', '\'', '(', ')', '*', '+', ',', ';', '='};
260   return std::find(std::begin(reserved), std::end(reserved), c) !=
261          std::end(reserved);
262 }
263 
inRFC3986UnreservedChars(const char c)264 bool inRFC3986UnreservedChars(const char c)
265 {
266   static const char unreserved[] = {'-', '.', '_', '~'};
267   return isAlpha(c) || isDigit(c) ||
268          std::find(std::begin(unreserved), std::end(unreserved), c) !=
269              std::end(unreserved);
270 }
271 
inRFC2978MIMECharset(const char c)272 bool inRFC2978MIMECharset(const char c)
273 {
274   static const char chars[] = {'!', '#', '$', '%', '&', '\'', '+',
275                                '-', '^', '_', '`', '{', '}',  '~'};
276   return isAlpha(c) || isDigit(c) ||
277          std::find(std::begin(chars), std::end(chars), c) != std::end(chars);
278 }
279 
inRFC2616HttpToken(const char c)280 bool inRFC2616HttpToken(const char c)
281 {
282   static const char chars[] = {'!', '#', '$', '%', '&', '\'', '*', '+',
283                                '-', '.', '^', '_', '`', '|',  '~'};
284   return isAlpha(c) || isDigit(c) ||
285          std::find(std::begin(chars), std::end(chars), c) != std::end(chars);
286 }
287 
inRFC5987AttrChar(const char c)288 bool inRFC5987AttrChar(const char c)
289 {
290   return inRFC2616HttpToken(c) && c != '*' && c != '\'' && c != '%';
291 }
292 
293 // Returns nonzero if |c| is in ISO/IEC 8859-1 character set.
isIso8859p1(unsigned char c)294 bool isIso8859p1(unsigned char c)
295 {
296   return (0x20u <= c && c <= 0x7eu) || 0xa0u <= c;
297 }
298 
isLws(const char c)299 bool isLws(const char c) { return c == ' ' || c == '\t'; }
isCRLF(const char c)300 bool isCRLF(const char c) { return c == '\r' || c == '\n'; }
301 
302 namespace {
303 
isUtf8Tail(unsigned char ch)304 bool isUtf8Tail(unsigned char ch) { return in(ch, 0x80u, 0xbfu); }
305 
inPercentEncodeMini(const unsigned char c)306 bool inPercentEncodeMini(const unsigned char c)
307 {
308   return c > 0x20 && c < 0x7fu &&
309          // Chromium escapes following characters. Firefox4 escapes more.
310          c != '"' && c != '<' && c != '>';
311 }
312 
313 } // namespace
314 
isUtf8(const std::string & str)315 bool isUtf8(const std::string& str)
316 {
317   for (std::string::const_iterator s = str.begin(), eos = str.end(); s != eos;
318        ++s) {
319     unsigned char firstChar = *s;
320     // See ABNF in http://tools.ietf.org/search/rfc3629#section-4
321     if (in(firstChar, 0x20u, 0x7eu) || firstChar == 0x08u || // \b
322         firstChar == 0x09u ||                                // \t
323         firstChar == 0x0au ||                                // \n
324         firstChar == 0x0cu ||                                // \f
325         firstChar == 0x0du                                   // \r
326     ) {
327       // UTF8-1 (without ctrl chars)
328     }
329     else if (in(firstChar, 0xc2u, 0xdfu)) {
330       // UTF8-2
331       if (++s == eos || !isUtf8Tail(*s)) {
332         return false;
333       }
334     }
335     else if (0xe0u == firstChar) {
336       // UTF8-3
337       if (++s == eos || !in(static_cast<unsigned char>(*s), 0xa0u, 0xbfu) ||
338           ++s == eos || !isUtf8Tail(*s)) {
339         return false;
340       }
341     }
342     else if (in(firstChar, 0xe1u, 0xecu) || in(firstChar, 0xeeu, 0xefu)) {
343       // UTF8-3
344       if (++s == eos || !isUtf8Tail(*s) || ++s == eos || !isUtf8Tail(*s)) {
345         return false;
346       }
347     }
348     else if (0xedu == firstChar) {
349       // UTF8-3
350       if (++s == eos || !in(static_cast<unsigned char>(*s), 0x80u, 0x9fu) ||
351           ++s == eos || !isUtf8Tail(*s)) {
352         return false;
353       }
354     }
355     else if (0xf0u == firstChar) {
356       // UTF8-4
357       if (++s == eos || !in(static_cast<unsigned char>(*s), 0x90u, 0xbfu) ||
358           ++s == eos || !isUtf8Tail(*s) || ++s == eos || !isUtf8Tail(*s)) {
359         return false;
360       }
361     }
362     else if (in(firstChar, 0xf1u, 0xf3u)) {
363       // UTF8-4
364       if (++s == eos || !isUtf8Tail(*s) || ++s == eos || !isUtf8Tail(*s) ||
365           ++s == eos || !isUtf8Tail(*s)) {
366         return false;
367       }
368     }
369     else if (0xf4u == firstChar) {
370       // UTF8-4
371       if (++s == eos || !in(static_cast<unsigned char>(*s), 0x80u, 0x8fu) ||
372           ++s == eos || !isUtf8Tail(*s) || ++s == eos || !isUtf8Tail(*s)) {
373         return false;
374       }
375     }
376     else {
377       return false;
378     }
379   }
380   return true;
381 }
382 
percentEncode(const unsigned char * target,size_t len)383 std::string percentEncode(const unsigned char* target, size_t len)
384 {
385   std::string dest;
386   for (size_t i = 0; i < len; ++i) {
387     if (inRFC3986UnreservedChars(target[i])) {
388       dest += target[i];
389     }
390     else {
391       dest.append(fmt("%%%02X", target[i]));
392     }
393   }
394   return dest;
395 }
396 
percentEncode(const std::string & target)397 std::string percentEncode(const std::string& target)
398 {
399   if (std::find_if_not(target.begin(), target.end(),
400                        inRFC3986UnreservedChars) == target.end()) {
401     return target;
402   }
403   return percentEncode(reinterpret_cast<const unsigned char*>(target.c_str()),
404                        target.size());
405 }
406 
percentEncodeMini(const std::string & src)407 std::string percentEncodeMini(const std::string& src)
408 {
409   if (std::find_if_not(src.begin(), src.end(), inPercentEncodeMini) ==
410       src.end()) {
411     return src;
412   }
413   std::string result;
414   for (auto c : src) {
415     if (!inPercentEncodeMini(c)) {
416       result += fmt("%%%02X", static_cast<unsigned char>(c));
417     }
418     else {
419       result += c;
420     }
421   }
422   return result;
423 }
424 
torrentPercentEncode(const unsigned char * target,size_t len)425 std::string torrentPercentEncode(const unsigned char* target, size_t len)
426 {
427   std::string dest;
428   for (size_t i = 0; i < len; ++i) {
429     if (isAlpha(target[i]) || isDigit(target[i])) {
430       dest += target[i];
431     }
432     else {
433       dest.append(fmt("%%%02X", target[i]));
434     }
435   }
436   return dest;
437 }
438 
torrentPercentEncode(const std::string & target)439 std::string torrentPercentEncode(const std::string& target)
440 {
441   return torrentPercentEncode(
442       reinterpret_cast<const unsigned char*>(target.c_str()), target.size());
443 }
444 
percentDecode(std::string::const_iterator first,std::string::const_iterator last)445 std::string percentDecode(std::string::const_iterator first,
446                           std::string::const_iterator last)
447 {
448   std::string result;
449   for (; first != last; ++first) {
450     if (*first == '%') {
451       if (first + 1 != last && first + 2 != last && isHexDigit(*(first + 1)) &&
452           isHexDigit(*(first + 2))) {
453         result +=
454             hexCharToUInt(*(first + 1)) * 16 + hexCharToUInt(*(first + 2));
455         first += 2;
456       }
457       else {
458         result += *first;
459       }
460     }
461     else {
462       result += *first;
463     }
464   }
465   return result;
466 }
467 
toHex(const unsigned char * src,size_t len)468 std::string toHex(const unsigned char* src, size_t len)
469 {
470   std::string out(len * 2, '\0');
471   std::string::iterator o = out.begin();
472   const unsigned char* last = src + len;
473   for (const unsigned char* i = src; i != last; ++i) {
474     *o = (*i >> 4);
475     *(o + 1) = (*i) & 0x0fu;
476     for (int j = 0; j < 2; ++j) {
477       if (*o < 10) {
478         *o += '0';
479       }
480       else {
481         *o += 'a' - 10;
482       }
483       ++o;
484     }
485   }
486   return out;
487 }
488 
toHex(const char * src,size_t len)489 std::string toHex(const char* src, size_t len)
490 {
491   return toHex(reinterpret_cast<const unsigned char*>(src), len);
492 }
493 
toHex(const std::string & src)494 std::string toHex(const std::string& src)
495 {
496   return toHex(reinterpret_cast<const unsigned char*>(src.c_str()), src.size());
497 }
498 
hexCharToUInt(unsigned char ch)499 unsigned int hexCharToUInt(unsigned char ch)
500 {
501   if ('a' <= ch && ch <= 'f') {
502     ch -= 'a';
503     ch += 10;
504   }
505   else if ('A' <= ch && ch <= 'F') {
506     ch -= 'A';
507     ch += 10;
508   }
509   else if ('0' <= ch && ch <= '9') {
510     ch -= '0';
511   }
512   else {
513     ch = 255;
514   }
515   return ch;
516 }
517 
secfmt(time_t sec)518 std::string secfmt(time_t sec)
519 {
520   time_t tsec = sec;
521   std::string str;
522   if (sec >= 3600) {
523     str = fmt("%" PRId64 "h", static_cast<int64_t>(sec / 3600));
524     sec %= 3600;
525   }
526   if (sec >= 60) {
527     str += fmt("%dm", static_cast<int>(sec / 60));
528     sec %= 60;
529   }
530   if (sec || tsec == 0) {
531     str += fmt("%ds", static_cast<int>(sec));
532   }
533   return str;
534 }
535 
536 namespace {
537 template <typename T, typename F>
parseLong(T & res,F f,const std::string & s,int base)538 bool parseLong(T& res, F f, const std::string& s, int base)
539 {
540   if (s.empty()) {
541     return false;
542   }
543   char* endptr;
544   errno = 0;
545   res = f(s.c_str(), &endptr, base);
546   if (errno == ERANGE) {
547     return false;
548   }
549   if (*endptr != '\0') {
550     for (const char *i = endptr, *eoi = s.c_str() + s.size(); i < eoi; ++i) {
551       if (!isspace(*i)) {
552         return false;
553       }
554     }
555   }
556   return true;
557 }
558 } // namespace
559 
parseIntNoThrow(int32_t & res,const std::string & s,int base)560 bool parseIntNoThrow(int32_t& res, const std::string& s, int base)
561 {
562   long int t;
563   if (parseLong(t, strtol, s, base) &&
564       t >= std::numeric_limits<int32_t>::min() &&
565       t <= std::numeric_limits<int32_t>::max()) {
566     res = t;
567     return true;
568   }
569   else {
570     return false;
571   }
572 }
573 
parseUIntNoThrow(uint32_t & res,const std::string & s,int base)574 bool parseUIntNoThrow(uint32_t& res, const std::string& s, int base)
575 {
576   long int t;
577   if (parseLong(t, strtol, s, base) && t >= 0 &&
578       t <= std::numeric_limits<int32_t>::max()) {
579     res = t;
580     return true;
581   }
582   else {
583     return false;
584   }
585 }
586 
parseLLIntNoThrow(int64_t & res,const std::string & s,int base)587 bool parseLLIntNoThrow(int64_t& res, const std::string& s, int base)
588 {
589   int64_t t;
590   if (parseLong(t, strtoll, s, base)) {
591     res = t;
592     return true;
593   }
594   else {
595     return false;
596   }
597 }
598 
parseDoubleNoThrow(double & res,const std::string & s)599 bool parseDoubleNoThrow(double& res, const std::string& s)
600 {
601   if (s.empty()) {
602     return false;
603   }
604 
605   errno = 0;
606   char* endptr;
607   auto d = strtod(s.c_str(), &endptr);
608 
609   if (errno == ERANGE) {
610     return false;
611   }
612 
613   if (endptr != s.c_str() + s.size()) {
614     for (auto i = std::begin(s) + (endptr - s.c_str()); i != std::end(s); ++i) {
615       if (!isspace(*i)) {
616         return false;
617       }
618     }
619   }
620 
621   res = d;
622 
623   return true;
624 }
625 
parseIntSegments(const std::string & src)626 SegList<int> parseIntSegments(const std::string& src)
627 {
628   SegList<int> sgl;
629   for (std::string::const_iterator i = src.begin(), eoi = src.end();
630        i != eoi;) {
631     std::string::const_iterator j = std::find(i, eoi, ',');
632     if (j == i) {
633       ++i;
634       continue;
635     }
636     std::string::const_iterator p = std::find(i, j, '-');
637     if (p == j) {
638       int a;
639       if (parseIntNoThrow(a, std::string(i, j))) {
640         sgl.add(a, a + 1);
641       }
642       else {
643         throw DL_ABORT_EX(fmt("Bad range %s", std::string(i, j).c_str()));
644       }
645     }
646     else if (p == i || p + 1 == j) {
647       throw DL_ABORT_EX(fmt(MSG_INCOMPLETE_RANGE, std::string(i, j).c_str()));
648     }
649     else {
650       int a, b;
651       if (parseIntNoThrow(a, std::string(i, p)) &&
652           parseIntNoThrow(b, (std::string(p + 1, j)))) {
653         sgl.add(a, b + 1);
654       }
655       else {
656         throw DL_ABORT_EX(fmt("Bad range %s", std::string(i, j).c_str()));
657       }
658     }
659     if (j == eoi) {
660       break;
661     }
662     i = j + 1;
663   }
664   return sgl;
665 }
666 
667 namespace {
computeHeadPieces(std::vector<size_t> & indexes,const std::vector<std::shared_ptr<FileEntry>> & fileEntries,size_t pieceLength,int64_t head)668 void computeHeadPieces(
669     std::vector<size_t>& indexes,
670     const std::vector<std::shared_ptr<FileEntry>>& fileEntries,
671     size_t pieceLength, int64_t head)
672 {
673   if (head == 0) {
674     return;
675   }
676   for (const auto& fi : fileEntries) {
677     if (fi->getLength() == 0) {
678       continue;
679     }
680     const size_t lastIndex =
681         (fi->getOffset() + std::min(head, fi->getLength()) - 1) / pieceLength;
682     for (size_t idx = fi->getOffset() / pieceLength; idx <= lastIndex; ++idx) {
683       indexes.push_back(idx);
684     }
685   }
686 }
687 } // namespace
688 
689 namespace {
computeTailPieces(std::vector<size_t> & indexes,const std::vector<std::shared_ptr<FileEntry>> & fileEntries,size_t pieceLength,int64_t tail)690 void computeTailPieces(
691     std::vector<size_t>& indexes,
692     const std::vector<std::shared_ptr<FileEntry>>& fileEntries,
693     size_t pieceLength, int64_t tail)
694 {
695   if (tail == 0) {
696     return;
697   }
698   for (const auto& fi : fileEntries) {
699     if (fi->getLength() == 0) {
700       continue;
701     }
702     int64_t endOffset = fi->getLastOffset();
703     size_t fromIndex =
704         (endOffset - 1 - (std::min(tail, fi->getLength()) - 1)) / pieceLength;
705     const size_t toIndex = (endOffset - 1) / pieceLength;
706     while (fromIndex <= toIndex) {
707       indexes.push_back(fromIndex++);
708     }
709   }
710 }
711 } // namespace
712 
parsePrioritizePieceRange(std::vector<size_t> & result,const std::string & src,const std::vector<std::shared_ptr<FileEntry>> & fileEntries,size_t pieceLength,int64_t defaultSize)713 void parsePrioritizePieceRange(
714     std::vector<size_t>& result, const std::string& src,
715     const std::vector<std::shared_ptr<FileEntry>>& fileEntries,
716     size_t pieceLength, int64_t defaultSize)
717 {
718   std::vector<size_t> indexes;
719   std::vector<Scip> parts;
720   splitIter(src.begin(), src.end(), std::back_inserter(parts), ',', true);
721   for (const auto& i : parts) {
722     if (util::streq(i.first, i.second, "head")) {
723       computeHeadPieces(indexes, fileEntries, pieceLength, defaultSize);
724     }
725     else if (util::startsWith(i.first, i.second, "head=")) {
726       std::string sizestr(i.first + 5, i.second);
727       computeHeadPieces(indexes, fileEntries, pieceLength,
728                         std::max((int64_t)0, getRealSize(sizestr)));
729     }
730     else if (util::streq(i.first, i.second, "tail")) {
731       computeTailPieces(indexes, fileEntries, pieceLength, defaultSize);
732     }
733     else if (util::startsWith(i.first, i.second, "tail=")) {
734       std::string sizestr(i.first + 5, i.second);
735       computeTailPieces(indexes, fileEntries, pieceLength,
736                         std::max((int64_t)0, getRealSize(sizestr)));
737     }
738     else {
739       throw DL_ABORT_EX(
740           fmt("Unrecognized token %s", std::string(i.first, i.second).c_str()));
741     }
742   }
743   std::sort(indexes.begin(), indexes.end());
744   indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
745   result.insert(result.end(), indexes.begin(), indexes.end());
746 }
747 
748 // Converts ISO/IEC 8859-1 string to UTF-8 string.  If there is a
749 // character not in ISO/IEC 8859-1, returns empty string.
iso8859p1ToUtf8(const char * src,size_t len)750 std::string iso8859p1ToUtf8(const char* src, size_t len)
751 {
752   std::string dest;
753   for (const char *p = src, *last = src + len; p != last; ++p) {
754     unsigned char c = *p;
755     if (0xa0u <= c) {
756       if (c <= 0xbfu) {
757         dest += 0xc2u;
758       }
759       else {
760         dest += 0xc3u;
761       }
762       dest += c & (~0x40u);
763     }
764     else if (0x80u <= c && c <= 0x9fu) {
765       return "";
766     }
767     else {
768       dest += c;
769     }
770   }
771   return dest;
772 }
773 
iso8859p1ToUtf8(const std::string & src)774 std::string iso8859p1ToUtf8(const std::string& src)
775 {
776   return iso8859p1ToUtf8(src.c_str(), src.size());
777 }
778 
779 /* Start of utf8 dfa */
780 /* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
781  * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
782  *
783  * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
784  *
785  * Permission is hereby granted, free of charge, to any person
786  * obtaining a copy of this software and associated documentation
787  * files (the "Software"), to deal in the Software without
788  * restriction, including without limitation the rights to use, copy,
789  * modify, merge, publish, distribute, sublicense, and/or sell copies
790  * of the Software, and to permit persons to whom the Software is
791  * furnished to do so, subject to the following conditions:
792  *
793  * The above copyright notice and this permission notice shall be
794  * included in all copies or substantial portions of the Software.
795  *
796  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
797  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
798  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
799  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
800  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
801  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
802  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
803  * SOFTWARE.
804  */
805 #define UTF8_ACCEPT 0
806 #define UTF8_REJECT 12
807 
808 static const uint8_t utf8d[] = {
809     /*
810      * The first part of the table maps bytes to character classes that
811      * to reduce the size of the transition table and create bitmasks.
812      */
813     0,
814     0,
815     0,
816     0,
817     0,
818     0,
819     0,
820     0,
821     0,
822     0,
823     0,
824     0,
825     0,
826     0,
827     0,
828     0,
829     0,
830     0,
831     0,
832     0,
833     0,
834     0,
835     0,
836     0,
837     0,
838     0,
839     0,
840     0,
841     0,
842     0,
843     0,
844     0,
845     0,
846     0,
847     0,
848     0,
849     0,
850     0,
851     0,
852     0,
853     0,
854     0,
855     0,
856     0,
857     0,
858     0,
859     0,
860     0,
861     0,
862     0,
863     0,
864     0,
865     0,
866     0,
867     0,
868     0,
869     0,
870     0,
871     0,
872     0,
873     0,
874     0,
875     0,
876     0,
877     0,
878     0,
879     0,
880     0,
881     0,
882     0,
883     0,
884     0,
885     0,
886     0,
887     0,
888     0,
889     0,
890     0,
891     0,
892     0,
893     0,
894     0,
895     0,
896     0,
897     0,
898     0,
899     0,
900     0,
901     0,
902     0,
903     0,
904     0,
905     0,
906     0,
907     0,
908     0,
909     0,
910     0,
911     0,
912     0,
913     0,
914     0,
915     0,
916     0,
917     0,
918     0,
919     0,
920     0,
921     0,
922     0,
923     0,
924     0,
925     0,
926     0,
927     0,
928     0,
929     0,
930     0,
931     0,
932     0,
933     0,
934     0,
935     0,
936     0,
937     0,
938     0,
939     0,
940     0,
941     1,
942     1,
943     1,
944     1,
945     1,
946     1,
947     1,
948     1,
949     1,
950     1,
951     1,
952     1,
953     1,
954     1,
955     1,
956     1,
957     9,
958     9,
959     9,
960     9,
961     9,
962     9,
963     9,
964     9,
965     9,
966     9,
967     9,
968     9,
969     9,
970     9,
971     9,
972     9,
973     7,
974     7,
975     7,
976     7,
977     7,
978     7,
979     7,
980     7,
981     7,
982     7,
983     7,
984     7,
985     7,
986     7,
987     7,
988     7,
989     7,
990     7,
991     7,
992     7,
993     7,
994     7,
995     7,
996     7,
997     7,
998     7,
999     7,
1000     7,
1001     7,
1002     7,
1003     7,
1004     7,
1005     8,
1006     8,
1007     2,
1008     2,
1009     2,
1010     2,
1011     2,
1012     2,
1013     2,
1014     2,
1015     2,
1016     2,
1017     2,
1018     2,
1019     2,
1020     2,
1021     2,
1022     2,
1023     2,
1024     2,
1025     2,
1026     2,
1027     2,
1028     2,
1029     2,
1030     2,
1031     2,
1032     2,
1033     2,
1034     2,
1035     2,
1036     2,
1037     10,
1038     3,
1039     3,
1040     3,
1041     3,
1042     3,
1043     3,
1044     3,
1045     3,
1046     3,
1047     3,
1048     3,
1049     3,
1050     4,
1051     3,
1052     3,
1053     11,
1054     6,
1055     6,
1056     6,
1057     5,
1058     8,
1059     8,
1060     8,
1061     8,
1062     8,
1063     8,
1064     8,
1065     8,
1066     8,
1067     8,
1068     8,
1069 
1070     /*
1071      * The second part is a transition table that maps a combination
1072      * of a state of the automaton and a character class to a state.
1073      */
1074     0,
1075     12,
1076     24,
1077     36,
1078     60,
1079     96,
1080     84,
1081     12,
1082     12,
1083     12,
1084     48,
1085     72,
1086     12,
1087     12,
1088     12,
1089     12,
1090     12,
1091     12,
1092     12,
1093     12,
1094     12,
1095     12,
1096     12,
1097     12,
1098     12,
1099     0,
1100     12,
1101     12,
1102     12,
1103     12,
1104     12,
1105     0,
1106     12,
1107     0,
1108     12,
1109     12,
1110     12,
1111     24,
1112     12,
1113     12,
1114     12,
1115     12,
1116     12,
1117     24,
1118     12,
1119     24,
1120     12,
1121     12,
1122     12,
1123     12,
1124     12,
1125     12,
1126     12,
1127     12,
1128     12,
1129     24,
1130     12,
1131     12,
1132     12,
1133     12,
1134     12,
1135     24,
1136     12,
1137     12,
1138     12,
1139     12,
1140     12,
1141     12,
1142     12,
1143     24,
1144     12,
1145     12,
1146     12,
1147     12,
1148     12,
1149     12,
1150     12,
1151     12,
1152     12,
1153     36,
1154     12,
1155     36,
1156     12,
1157     12,
1158     12,
1159     36,
1160     12,
1161     12,
1162     12,
1163     12,
1164     12,
1165     36,
1166     12,
1167     36,
1168     12,
1169     12,
1170     12,
1171     36,
1172     12,
1173     12,
1174     12,
1175     12,
1176     12,
1177     12,
1178     12,
1179     12,
1180     12,
1181     12,
1182 };
1183 
utf8dfa(uint32_t * state,uint32_t * codep,uint32_t byte)1184 static uint32_t utf8dfa(uint32_t* state, uint32_t* codep, uint32_t byte)
1185 {
1186   uint32_t type = utf8d[byte];
1187 
1188   *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
1189                                    : (0xff >> type) & (byte);
1190 
1191   *state = utf8d[256 + *state + type];
1192   return *state;
1193 }
1194 
1195 /* End of utf8 dfa */
1196 
1197 typedef enum {
1198   CD_BEFORE_DISPOSITION_TYPE,
1199   CD_AFTER_DISPOSITION_TYPE,
1200   CD_DISPOSITION_TYPE,
1201   CD_BEFORE_DISPOSITION_PARM_NAME,
1202   CD_AFTER_DISPOSITION_PARM_NAME,
1203   CD_DISPOSITION_PARM_NAME,
1204   CD_BEFORE_VALUE,
1205   CD_AFTER_VALUE,
1206   CD_QUOTED_STRING,
1207   CD_TOKEN,
1208   CD_BEFORE_EXT_VALUE,
1209   CD_CHARSET,
1210   CD_LANGUAGE,
1211   CD_VALUE_CHARS,
1212   CD_VALUE_CHARS_PCT_ENCODED1,
1213   CD_VALUE_CHARS_PCT_ENCODED2
1214 } content_disposition_parse_state;
1215 
1216 typedef enum {
1217   CD_FILENAME_FOUND = 1,
1218   CD_EXT_FILENAME_FOUND = 1 << 1
1219 } content_disposition_parse_flag;
1220 
1221 typedef enum {
1222   CD_ENC_UNKNOWN,
1223   CD_ENC_UTF8,
1224   CD_ENC_ISO_8859_1
1225 } content_disposition_charset;
1226 
parse_content_disposition(char * dest,size_t destlen,const char ** charsetp,size_t * charsetlenp,const char * in,size_t len,bool defaultUTF8)1227 ssize_t parse_content_disposition(char* dest, size_t destlen,
1228                                   const char** charsetp, size_t* charsetlenp,
1229                                   const char* in, size_t len, bool defaultUTF8)
1230 {
1231   const char *p = in, *eop = in + len, *mark_first = nullptr,
1232              *mark_last = nullptr;
1233   int state = CD_BEFORE_DISPOSITION_TYPE;
1234   int in_file_parm = 0;
1235   int flags = 0;
1236   int quoted_seen = 0;
1237   int charset = 0;
1238   /* To suppress warnings */
1239   char* dp = dest;
1240   size_t dlen = destlen;
1241   uint32_t dfa_state = UTF8_ACCEPT;
1242   uint32_t dfa_code = 0;
1243   uint8_t pctval = 0;
1244 
1245   *charsetp = nullptr;
1246   *charsetlenp = 0;
1247 
1248   for (; p != eop; ++p) {
1249     switch (state) {
1250     case CD_BEFORE_DISPOSITION_TYPE:
1251       if (inRFC2616HttpToken(*p)) {
1252         state = CD_DISPOSITION_TYPE;
1253       }
1254       else if (!isLws(*p)) {
1255         return -1;
1256       }
1257       break;
1258     case CD_AFTER_DISPOSITION_TYPE:
1259     case CD_DISPOSITION_TYPE:
1260       if (*p == ';') {
1261         state = CD_BEFORE_DISPOSITION_PARM_NAME;
1262       }
1263       else if (isLws(*p)) {
1264         state = CD_AFTER_DISPOSITION_TYPE;
1265       }
1266       else if (state == CD_AFTER_DISPOSITION_TYPE || !inRFC2616HttpToken(*p)) {
1267         return -1;
1268       }
1269       break;
1270     case CD_BEFORE_DISPOSITION_PARM_NAME:
1271       if (inRFC2616HttpToken(*p)) {
1272         mark_first = p;
1273         state = CD_DISPOSITION_PARM_NAME;
1274       }
1275       else if (!isLws(*p)) {
1276         return -1;
1277       }
1278       break;
1279     case CD_AFTER_DISPOSITION_PARM_NAME:
1280     case CD_DISPOSITION_PARM_NAME:
1281       if (*p == '=') {
1282         if (state == CD_DISPOSITION_PARM_NAME) {
1283           mark_last = p;
1284         }
1285         in_file_parm = 0;
1286         if (strieq(mark_first, mark_last, "filename*")) {
1287           if ((flags & CD_EXT_FILENAME_FOUND) == 0) {
1288             in_file_parm = 1;
1289           }
1290           else {
1291             return -1;
1292           }
1293           state = CD_BEFORE_EXT_VALUE;
1294         }
1295         else if (strieq(mark_first, mark_last, "filename")) {
1296           if (flags & CD_FILENAME_FOUND) {
1297             return -1;
1298           }
1299           if ((flags & CD_EXT_FILENAME_FOUND) == 0) {
1300             in_file_parm = 1;
1301           }
1302           state = CD_BEFORE_VALUE;
1303         }
1304         else {
1305           /* ext-token must be characters in token, followed by "*" */
1306           if (mark_first != mark_last - 1 && *(mark_last - 1) == '*') {
1307             state = CD_BEFORE_EXT_VALUE;
1308           }
1309           else {
1310             state = CD_BEFORE_VALUE;
1311           }
1312         }
1313         if (in_file_parm) {
1314           dp = dest;
1315           dlen = destlen;
1316         }
1317       }
1318       else if (isLws(*p)) {
1319         mark_last = p;
1320         state = CD_AFTER_DISPOSITION_PARM_NAME;
1321       }
1322       else if (state == CD_AFTER_DISPOSITION_PARM_NAME ||
1323                !inRFC2616HttpToken(*p)) {
1324         return -1;
1325       }
1326       break;
1327     case CD_BEFORE_VALUE:
1328       if (*p == '"') {
1329         quoted_seen = 0;
1330         state = CD_QUOTED_STRING;
1331         if (defaultUTF8) {
1332           dfa_state = UTF8_ACCEPT;
1333           dfa_code = 0;
1334         }
1335       }
1336       else if (inRFC2616HttpToken(*p)) {
1337         if (in_file_parm) {
1338           if (dlen == 0) {
1339             return -1;
1340           }
1341           else {
1342             *dp++ = *p;
1343             --dlen;
1344           }
1345         }
1346         state = CD_TOKEN;
1347       }
1348       else if (!isLws(*p)) {
1349         return -1;
1350       }
1351       break;
1352     case CD_AFTER_VALUE:
1353       if (*p == ';') {
1354         state = CD_BEFORE_DISPOSITION_PARM_NAME;
1355       }
1356       else if (!isLws(*p)) {
1357         return -1;
1358       }
1359       break;
1360     case CD_QUOTED_STRING:
1361       if (*p == '\\' && quoted_seen == 0) {
1362         quoted_seen = 1;
1363       }
1364       else if (*p == '"' && quoted_seen == 0) {
1365         if (defaultUTF8 && dfa_state != UTF8_ACCEPT) {
1366           return -1;
1367         }
1368         if (in_file_parm) {
1369           flags |= CD_FILENAME_FOUND;
1370         }
1371         state = CD_AFTER_VALUE;
1372       }
1373       else {
1374         /* TEXT which is OCTET except CTLs, but including LWS. Accept
1375            ISO-8859-1 chars, or UTF-8 if defaultUTF8 is set */
1376         quoted_seen = 0;
1377         if (defaultUTF8) {
1378           if (utf8dfa(&dfa_state, &dfa_code, (unsigned char)*p) ==
1379               UTF8_REJECT) {
1380             return -1;
1381           }
1382         }
1383         else if (!isIso8859p1(*p)) {
1384           return -1;
1385         }
1386         if (in_file_parm) {
1387           if (dlen == 0) {
1388             return -1;
1389           }
1390           else {
1391             *dp++ = *p;
1392             --dlen;
1393           }
1394         }
1395       }
1396       break;
1397     case CD_TOKEN:
1398       if (inRFC2616HttpToken(*p)) {
1399         if (in_file_parm) {
1400           if (dlen == 0) {
1401             return -1;
1402           }
1403           else {
1404             *dp++ = *p;
1405             --dlen;
1406           }
1407         }
1408       }
1409       else if (*p == ';') {
1410         if (in_file_parm) {
1411           flags |= CD_FILENAME_FOUND;
1412         }
1413         state = CD_BEFORE_DISPOSITION_PARM_NAME;
1414       }
1415       else if (isLws(*p)) {
1416         if (in_file_parm) {
1417           flags |= CD_FILENAME_FOUND;
1418         }
1419         state = CD_AFTER_VALUE;
1420       }
1421       else {
1422         return -1;
1423       }
1424       break;
1425     case CD_BEFORE_EXT_VALUE:
1426       if (*p == '\'') {
1427         /* Empty charset is not allowed */
1428         return -1;
1429       }
1430       else if (inRFC2978MIMECharset(*p)) {
1431         mark_first = p;
1432         state = CD_CHARSET;
1433       }
1434       else if (!isLws(*p)) {
1435         return -1;
1436       }
1437       break;
1438     case CD_CHARSET:
1439       if (*p == '\'') {
1440         mark_last = p;
1441         *charsetp = mark_first;
1442         *charsetlenp = mark_last - mark_first;
1443         if (strieq(mark_first, mark_last, "utf-8")) {
1444           charset = CD_ENC_UTF8;
1445           dfa_state = UTF8_ACCEPT;
1446           dfa_code = 0;
1447         }
1448         else if (strieq(mark_first, mark_last, "iso-8859-1")) {
1449           charset = CD_ENC_ISO_8859_1;
1450         }
1451         else {
1452           charset = CD_ENC_UNKNOWN;
1453         }
1454         state = CD_LANGUAGE;
1455       }
1456       else if (!inRFC2978MIMECharset(*p)) {
1457         return -1;
1458       }
1459       break;
1460     case CD_LANGUAGE:
1461       if (*p == '\'') {
1462         if (in_file_parm) {
1463           dp = dest;
1464           dlen = destlen;
1465         }
1466         state = CD_VALUE_CHARS;
1467       }
1468       else if (*p != '-' && !isAlpha(*p) && !isDigit(*p)) {
1469         return -1;
1470       }
1471       break;
1472     case CD_VALUE_CHARS:
1473       if (inRFC5987AttrChar(*p)) {
1474         if (charset == CD_ENC_UTF8) {
1475           if (utf8dfa(&dfa_state, &dfa_code, static_cast<unsigned char>(*p)) ==
1476               UTF8_REJECT) {
1477             return -1;
1478           }
1479         }
1480         if (in_file_parm) {
1481           if (dlen == 0) {
1482             return -1;
1483           }
1484           else {
1485             *dp++ = *p;
1486             --dlen;
1487           }
1488         }
1489       }
1490       else if (*p == '%') {
1491         if (in_file_parm) {
1492           if (dlen == 0) {
1493             return -1;
1494           }
1495         }
1496         pctval = 0;
1497         state = CD_VALUE_CHARS_PCT_ENCODED1;
1498       }
1499       else if (*p == ';' || isLws(*p)) {
1500         if (charset == CD_ENC_UTF8 && dfa_state != UTF8_ACCEPT) {
1501           return -1;
1502         }
1503         if (in_file_parm) {
1504           flags |= CD_EXT_FILENAME_FOUND;
1505         }
1506         if (*p == ';') {
1507           state = CD_BEFORE_DISPOSITION_PARM_NAME;
1508         }
1509         else {
1510           state = CD_AFTER_VALUE;
1511         }
1512       }
1513       else if (!inRFC5987AttrChar(*p)) {
1514         return -1;
1515       }
1516       break;
1517     case CD_VALUE_CHARS_PCT_ENCODED1:
1518       if (isHexDigit(*p)) {
1519         pctval |= hexCharToUInt(*p) << 4;
1520         state = CD_VALUE_CHARS_PCT_ENCODED2;
1521       }
1522       else {
1523         return -1;
1524       }
1525       break;
1526     case CD_VALUE_CHARS_PCT_ENCODED2:
1527       if (isHexDigit(*p)) {
1528         pctval |= hexCharToUInt(*p);
1529         if (charset == CD_ENC_UTF8) {
1530           if (utf8dfa(&dfa_state, &dfa_code, pctval) == UTF8_REJECT) {
1531             return -1;
1532           }
1533         }
1534         else if (charset == CD_ENC_ISO_8859_1) {
1535           if (!isIso8859p1(pctval)) {
1536             return -1;
1537           }
1538         }
1539         if (in_file_parm) {
1540           *dp++ = pctval;
1541           --dlen;
1542         }
1543         state = CD_VALUE_CHARS;
1544       }
1545       else {
1546         return -1;
1547       }
1548       break;
1549     }
1550   }
1551   switch (state) {
1552   case CD_BEFORE_DISPOSITION_TYPE:
1553   case CD_AFTER_DISPOSITION_TYPE:
1554   case CD_DISPOSITION_TYPE:
1555   case CD_AFTER_VALUE:
1556   case CD_TOKEN:
1557     return destlen - dlen;
1558   case CD_VALUE_CHARS:
1559     if (charset == CD_ENC_UTF8 && dfa_state != UTF8_ACCEPT) {
1560       return -1;
1561     }
1562     return destlen - dlen;
1563   default:
1564     return -1;
1565   }
1566 }
1567 
getContentDispositionFilename(const std::string & header,bool defaultUTF8)1568 std::string getContentDispositionFilename(const std::string& header,
1569                                           bool defaultUTF8)
1570 {
1571   std::array<char, 1_k> cdval;
1572   size_t cdvallen = cdval.size();
1573   const char* charset;
1574   size_t charsetlen;
1575   ssize_t rv =
1576       parse_content_disposition(cdval.data(), cdvallen, &charset, &charsetlen,
1577                                 header.c_str(), header.size(), defaultUTF8);
1578   if (rv == -1) {
1579     return "";
1580   }
1581 
1582   std::string res;
1583   if ((charset && strieq(charset, charset + charsetlen, "iso-8859-1")) ||
1584       (!charset && !defaultUTF8)) {
1585     res = iso8859p1ToUtf8(cdval.data(), rv);
1586   }
1587   else {
1588     res.assign(cdval.data(), rv);
1589   }
1590   if (!detectDirTraversal(res) &&
1591       res.find_first_of("/\\") == std::string::npos) {
1592     return res;
1593   }
1594   return "";
1595 }
1596 
toUpper(std::string src)1597 std::string toUpper(std::string src)
1598 {
1599   uppercase(src);
1600   return src;
1601 }
1602 
toLower(std::string src)1603 std::string toLower(std::string src)
1604 {
1605   lowercase(src);
1606   return src;
1607 }
1608 
uppercase(std::string & s)1609 void uppercase(std::string& s)
1610 {
1611   std::transform(s.begin(), s.end(), s.begin(), toUpperChar);
1612 }
1613 
lowercase(std::string & s)1614 void lowercase(std::string& s)
1615 {
1616   std::transform(s.begin(), s.end(), s.begin(), toLowerChar);
1617 }
1618 
toUpperChar(char c)1619 char toUpperChar(char c)
1620 {
1621   if ('a' <= c && c <= 'z') {
1622     c += 'A' - 'a';
1623   }
1624   return c;
1625 }
1626 
toLowerChar(char c)1627 char toLowerChar(char c)
1628 {
1629   if ('A' <= c && c <= 'Z') {
1630     c += 'a' - 'A';
1631   }
1632   return c;
1633 }
1634 
isNumericHost(const std::string & name)1635 bool isNumericHost(const std::string& name)
1636 {
1637   struct addrinfo hints;
1638   struct addrinfo* res;
1639   memset(&hints, 0, sizeof(hints));
1640   hints.ai_family = AF_UNSPEC;
1641   hints.ai_flags = AI_NUMERICHOST;
1642   if (getaddrinfo(name.c_str(), nullptr, &hints, &res)) {
1643     return false;
1644   }
1645   freeaddrinfo(res);
1646   return true;
1647 }
1648 
1649 #if _WIN32
1650 namespace {
1651 static Lock win_signal_lock;
1652 
1653 static signal_handler_t win_int_handler = nullptr;
1654 static signal_handler_t win_term_handler = nullptr;
1655 
win_ign_handler(int)1656 static void win_ign_handler(int) {}
1657 
HandlerRoutine(DWORD ctrlType)1658 static BOOL WINAPI HandlerRoutine(DWORD ctrlType)
1659 {
1660   void (*handler)(int) = nullptr;
1661   switch (ctrlType) {
1662   case CTRL_C_EVENT:
1663   case CTRL_BREAK_EVENT: {
1664     // Handler will be called on a new/different thread.
1665     LockGuard lg(win_signal_lock);
1666     handler = win_int_handler;
1667   }
1668 
1669     if (handler) {
1670       handler(SIGINT);
1671       return TRUE;
1672     }
1673     return FALSE;
1674 
1675   case CTRL_LOGOFF_EVENT:
1676   case CTRL_CLOSE_EVENT:
1677   case CTRL_SHUTDOWN_EVENT: {
1678     // Handler will be called on a new/different thread.
1679     LockGuard lg(win_signal_lock);
1680     handler = win_term_handler;
1681     ;
1682   }
1683     if (handler) {
1684       handler(SIGTERM);
1685       return TRUE;
1686     }
1687     return FALSE;
1688   }
1689   return FALSE;
1690 }
1691 } // namespace
1692 #endif
1693 
setGlobalSignalHandler(int sig,sigset_t * mask,signal_handler_t handler,int flags)1694 void setGlobalSignalHandler(int sig, sigset_t* mask, signal_handler_t handler,
1695                             int flags)
1696 {
1697 #if _WIN32
1698   if (sig == SIGINT || sig == SIGTERM) {
1699     // Handler will be called on a new/different thread.
1700     LockGuard lg(win_signal_lock);
1701 
1702     if (handler == SIG_DFL) {
1703       handler = nullptr;
1704     }
1705     else if (handler == SIG_IGN) {
1706       handler = win_ign_handler;
1707     }
1708     // Not yet in use: add console handler.
1709     if (handler && !win_int_handler && !win_term_handler) {
1710       ::SetConsoleCtrlHandler(HandlerRoutine, TRUE);
1711     }
1712     if (sig == SIGINT) {
1713       win_int_handler = handler;
1714     }
1715     else {
1716       win_term_handler = handler;
1717     }
1718     // No handlers set: remove.
1719     if (!win_int_handler && !win_term_handler) {
1720       ::SetConsoleCtrlHandler(HandlerRoutine, FALSE);
1721     }
1722     return;
1723   }
1724 #endif
1725 
1726 #ifdef HAVE_SIGACTION
1727   struct sigaction sigact;
1728   sigact.sa_handler = handler;
1729   sigact.sa_flags = flags;
1730   sigact.sa_mask = *mask;
1731   if (sigaction(sig, &sigact, nullptr) == -1) {
1732     auto errNum = errno;
1733     A2_LOG_ERROR(fmt("sigaction() failed for signal %d: %s", sig,
1734                      safeStrerror(errNum).c_str()));
1735   }
1736 #else
1737   if (signal(sig, handler) == SIG_ERR) {
1738     auto errNum = errno;
1739     A2_LOG_ERROR(fmt("signal() failed for signal %d: %s", sig,
1740                      safeStrerror(errNum).c_str()));
1741   }
1742 #endif // HAVE_SIGACTION
1743 }
1744 
1745 #ifndef __MINGW32__
getHomeDir()1746 std::string getHomeDir()
1747 {
1748   const char* p = getenv("HOME");
1749   if (p) {
1750     return p;
1751   }
1752 #  ifdef HAVE_PWD_H
1753   auto pw = getpwuid(geteuid());
1754   if (pw && pw->pw_dir) {
1755     return pw->pw_dir;
1756   }
1757 #  endif // HAVE_PWD_H
1758   return A2STR::NIL;
1759 }
1760 
1761 #else  // __MINGW32__
1762 
getHomeDir()1763 std::string getHomeDir()
1764 {
1765   auto p = _wgetenv(L"HOME");
1766   if (p) {
1767     return toForwardSlash(wCharToUtf8(p));
1768   }
1769   p = _wgetenv(L"USERPROFILE");
1770   if (p) {
1771     return toForwardSlash(wCharToUtf8(p));
1772   }
1773   p = _wgetenv(L"HOMEDRIVE");
1774   if (p) {
1775     std::wstring homeDir = p;
1776     p = _wgetenv(L"HOMEPATH");
1777     if (p) {
1778       homeDir += p;
1779       return toForwardSlash(wCharToUtf8(homeDir));
1780     }
1781   }
1782   return A2STR::NIL;
1783 }
1784 #endif // __MINGW32__
1785 
getXDGDir(const std::string & environmentVariable,const std::string & fallbackDirectory)1786 std::string getXDGDir(const std::string& environmentVariable,
1787                       const std::string& fallbackDirectory)
1788 {
1789   std::string filename;
1790   const char* p = getenv(environmentVariable.c_str());
1791   if (p &&
1792 #ifndef __MINGW32__
1793       p[0] == '/'
1794 #else  // __MINGW32__
1795       p[0] && p[1] == ':'
1796 #endif // __MINGW32__
1797   ) {
1798     filename = p;
1799   }
1800   else {
1801     filename = fallbackDirectory;
1802   }
1803   return filename;
1804 }
1805 
getConfigFile()1806 std::string getConfigFile()
1807 {
1808   std::string filename = getHomeDir() + "/.aria2/aria2.conf";
1809   if (!File(filename).exists()) {
1810     filename = getXDGDir("XDG_CONFIG_HOME", getHomeDir() + "/.config") +
1811                "/aria2/aria2.conf";
1812   }
1813   return filename;
1814 }
1815 
getDHTFile(bool ipv6)1816 std::string getDHTFile(bool ipv6)
1817 {
1818   std::string filename =
1819       getHomeDir() + (ipv6 ? "/.aria2/dht6.dat" : "/.aria2/dht.dat");
1820   if (!File(filename).exists()) {
1821     filename = getXDGDir("XDG_CACHE_HOME", getHomeDir() + "/.cache") +
1822                (ipv6 ? "/aria2/dht6.dat" : "/aria2/dht.dat");
1823   }
1824   return filename;
1825 }
1826 
getRealSize(const std::string & sizeWithUnit)1827 int64_t getRealSize(const std::string& sizeWithUnit)
1828 {
1829   std::string::size_type p = sizeWithUnit.find_first_of("KMkm");
1830   std::string size;
1831   int32_t mult = 1;
1832   if (p == std::string::npos) {
1833     size = sizeWithUnit;
1834   }
1835   else {
1836     switch (sizeWithUnit[p]) {
1837     case 'K':
1838     case 'k':
1839       mult = 1_k;
1840       break;
1841     case 'M':
1842     case 'm':
1843       mult = 1_m;
1844       break;
1845     }
1846     size.assign(sizeWithUnit.begin(), sizeWithUnit.begin() + p);
1847   }
1848   int64_t v;
1849   if (!parseLLIntNoThrow(v, size) || v < 0) {
1850     throw DL_ABORT_EX(
1851         fmt("Bad or negative value detected: %s", sizeWithUnit.c_str()));
1852   }
1853   if (INT64_MAX / mult < v) {
1854     throw DL_ABORT_EX(
1855         fmt(MSG_STRING_INTEGER_CONVERSION_FAILURE, "overflow/underflow"));
1856   }
1857   return v * mult;
1858 }
1859 
abbrevSize(int64_t size)1860 std::string abbrevSize(int64_t size)
1861 {
1862   static const char* UNITS[] = {"", "Ki", "Mi", "Gi"};
1863   int64_t t = size;
1864   size_t uidx = 0;
1865   int r = 0;
1866   while (t >= static_cast<int64_t>(1_k) &&
1867          uidx + 1 < sizeof(UNITS) / sizeof(UNITS[0])) {
1868     lldiv_t d = lldiv(t, 1_k);
1869     t = d.quot;
1870     r = d.rem;
1871     ++uidx;
1872   }
1873   if (uidx + 1 < sizeof(UNITS) / sizeof(UNITS[0]) && t >= 922) {
1874     ++uidx;
1875     r = t;
1876     t = 0;
1877   }
1878   std::string res;
1879   res += itos(t, true);
1880   if (t < 10 && uidx > 0) {
1881     res += ".";
1882     res += itos(r * 10 / 1_k);
1883   }
1884   res += UNITS[uidx];
1885   return res;
1886 }
1887 
sleep(long seconds)1888 void sleep(long seconds)
1889 {
1890 #if defined(HAVE_WINSOCK2_H)
1891   ::Sleep(seconds * 1000);
1892 #elif HAVE_SLEEP
1893   ::sleep(seconds);
1894 #elif defined(HAVE_USLEEP)
1895   ::usleep(seconds * 1000000);
1896 #else
1897 #  error no sleep function is available (nanosleep?)
1898 #endif
1899 }
1900 
usleep(long microseconds)1901 void usleep(long microseconds)
1902 {
1903 #ifdef HAVE_USLEEP
1904   ::usleep(microseconds);
1905 #elif defined(HAVE_WINSOCK2_H)
1906 
1907   LARGE_INTEGER current, freq, end;
1908 
1909   static enum {
1910     GET_FREQUENCY,
1911     GET_MICROSECONDS,
1912     SKIP_MICROSECONDS
1913   } state = GET_FREQUENCY;
1914 
1915   if (state == GET_FREQUENCY) {
1916     if (QueryPerformanceFrequency(&freq))
1917       state = GET_MICROSECONDS;
1918     else
1919       state = SKIP_MICROSECONDS;
1920   }
1921 
1922   long msec = microseconds / 1000;
1923   microseconds %= 1000;
1924 
1925   if (state == GET_MICROSECONDS && microseconds) {
1926     QueryPerformanceCounter(&end);
1927 
1928     end.QuadPart += (freq.QuadPart * microseconds) / 1000000;
1929 
1930     while (QueryPerformanceCounter(&current) &&
1931            (current.QuadPart <= end.QuadPart))
1932       /* noop */;
1933   }
1934 
1935   if (msec)
1936     Sleep(msec);
1937 #else
1938 #  error no usleep function is available (nanosleep?)
1939 #endif
1940 }
1941 
mkdirs(const std::string & dirpath)1942 void mkdirs(const std::string& dirpath)
1943 {
1944   File dir(dirpath);
1945   if (!dir.mkdirs()) {
1946     int errNum = errno;
1947     if (!dir.isDir()) {
1948       throw DL_ABORT_EX3(
1949           errNum,
1950           fmt(EX_MAKE_DIR, dir.getPath().c_str(), safeStrerror(errNum).c_str()),
1951           error_code::DIR_CREATE_ERROR);
1952     }
1953   }
1954 }
1955 
convertBitfield(BitfieldMan * dest,const BitfieldMan * src)1956 void convertBitfield(BitfieldMan* dest, const BitfieldMan* src)
1957 {
1958   size_t numBlock = dest->countBlock();
1959   for (size_t index = 0; index < numBlock; ++index) {
1960     if (src->isBitSetOffsetRange((int64_t)index * dest->getBlockLength(),
1961                                  dest->getBlockLength())) {
1962       dest->setBit(index);
1963     }
1964   }
1965 }
1966 
toString(const std::shared_ptr<BinaryStream> & binaryStream)1967 std::string toString(const std::shared_ptr<BinaryStream>& binaryStream)
1968 {
1969   std::stringstream strm;
1970   char data[2048];
1971   while (1) {
1972     int32_t dataLength = binaryStream->readData(
1973         reinterpret_cast<unsigned char*>(data), sizeof(data), strm.tellp());
1974     strm.write(data, dataLength);
1975     if (dataLength == 0) {
1976       break;
1977     }
1978   }
1979   return strm.str();
1980 }
1981 
1982 #ifdef HAVE_POSIX_MEMALIGN
1983 /**
1984  * In linux 2.6, alignment and size should be a multiple of 512.
1985  */
allocateAlignedMemory(size_t alignment,size_t size)1986 void* allocateAlignedMemory(size_t alignment, size_t size)
1987 {
1988   void* buffer;
1989   int res;
1990   if ((res = posix_memalign(&buffer, alignment, size)) != 0) {
1991     throw FATAL_EXCEPTION(
1992         fmt("Error in posix_memalign: %s", util::safeStrerror(res).c_str()));
1993   }
1994   return buffer;
1995 }
1996 #endif // HAVE_POSIX_MEMALIGN
1997 
getNumericNameInfo(const struct sockaddr * sockaddr,socklen_t len)1998 Endpoint getNumericNameInfo(const struct sockaddr* sockaddr, socklen_t len)
1999 {
2000   char host[NI_MAXHOST];
2001   char service[NI_MAXSERV];
2002   int s = getnameinfo(sockaddr, len, host, NI_MAXHOST, service, NI_MAXSERV,
2003                       NI_NUMERICHOST | NI_NUMERICSERV);
2004   if (s != 0) {
2005     throw DL_ABORT_EX(
2006         fmt("Failed to get hostname and port. cause: %s", gai_strerror(s)));
2007   }
2008   return {host, sockaddr->sa_family,
2009           static_cast<uint16_t>(strtoul(service, nullptr, 10))};
2010 }
2011 
htmlEscape(const std::string & src)2012 std::string htmlEscape(const std::string& src)
2013 {
2014   std::string dest;
2015   dest.reserve(src.size());
2016   auto j = std::begin(src);
2017   for (auto i = std::begin(src); i != std::end(src); ++i) {
2018     char ch = *i;
2019     const char* repl;
2020     if (ch == '<') {
2021       repl = "&lt;";
2022     }
2023     else if (ch == '>') {
2024       repl = "&gt;";
2025     }
2026     else if (ch == '&') {
2027       repl = "&amp;";
2028     }
2029     else if (ch == '\'') {
2030       repl = "&#39;";
2031     }
2032     else if (ch == '"') {
2033       repl = "&quot;";
2034     }
2035     else {
2036       continue;
2037     }
2038     dest.append(j, i);
2039     j = i + 1;
2040     dest += repl;
2041   }
2042   dest.append(j, std::end(src));
2043   return dest;
2044 }
2045 
parseIndexPath(const std::string & line)2046 std::pair<size_t, std::string> parseIndexPath(const std::string& line)
2047 {
2048   auto p = divide(std::begin(line), std::end(line), '=');
2049   uint32_t index;
2050   if (!parseUIntNoThrow(index, std::string(p.first.first, p.first.second))) {
2051     throw DL_ABORT_EX("Bad path index");
2052   }
2053   if (p.second.first == p.second.second) {
2054     throw DL_ABORT_EX(fmt("Path with index=%u is empty.", index));
2055   }
2056   return std::make_pair(index, std::string(p.second.first, p.second.second));
2057 }
2058 
createIndexPaths(std::istream & i)2059 std::vector<std::pair<size_t, std::string>> createIndexPaths(std::istream& i)
2060 {
2061   std::vector<std::pair<size_t, std::string>> indexPaths;
2062   std::string line;
2063   while (getline(i, line)) {
2064     indexPaths.push_back(parseIndexPath(line));
2065   }
2066   return indexPaths;
2067 }
2068 
generateRandomData(unsigned char * data,size_t length)2069 void generateRandomData(unsigned char* data, size_t length)
2070 {
2071   const auto& rd = SimpleRandomizer::getInstance();
2072   return rd->getRandomBytes(data, length);
2073 }
2074 
saveAs(const std::string & filename,const std::string & data,bool overwrite)2075 bool saveAs(const std::string& filename, const std::string& data,
2076             bool overwrite)
2077 {
2078   if (!overwrite && File(filename).exists()) {
2079     return false;
2080   }
2081   std::string tempFilename = filename;
2082   tempFilename += "__temp";
2083   {
2084     BufferedFile fp(tempFilename.c_str(), BufferedFile::WRITE);
2085     if (!fp) {
2086       return false;
2087     }
2088     if (fp.write(data.data(), data.size()) != data.size()) {
2089       return false;
2090     }
2091     if (fp.close() == EOF) {
2092       return false;
2093     }
2094   }
2095   return File(tempFilename).renameTo(filename);
2096 }
2097 
applyDir(const std::string & dir,const std::string & relPath)2098 std::string applyDir(const std::string& dir, const std::string& relPath)
2099 {
2100   std::string s;
2101   if (dir.empty()) {
2102     s = "./";
2103     s += relPath;
2104   }
2105   else {
2106     s = dir;
2107     if (dir == "/") {
2108       s += relPath;
2109     }
2110     else {
2111       s += "/";
2112       s += relPath;
2113     }
2114   }
2115 #ifdef __MINGW32__
2116   for (std::string::iterator i = s.begin(), eoi = s.end(); i != eoi; ++i) {
2117     if (*i == '\\') {
2118       *i = '/';
2119     }
2120   }
2121 #endif // __MINGW32__
2122   return s;
2123 }
2124 
fixTaintedBasename(const std::string & src)2125 std::string fixTaintedBasename(const std::string& src)
2126 {
2127   return escapePath(replace(src, "/", "%2F"));
2128 }
2129 
generateRandomKey(unsigned char * key)2130 void generateRandomKey(unsigned char* key)
2131 {
2132   unsigned char bytes[40];
2133   generateRandomData(bytes, sizeof(bytes));
2134   message_digest::digest(key, 20, MessageDigest::sha1().get(), bytes,
2135                          sizeof(bytes));
2136 }
2137 
2138 // Returns true is given numeric ipv4addr is in Private Address Space.
2139 //
2140 // From Section.3 RFC1918
2141 // 10.0.0.0        -   10.255.255.255  (10/8 prefix)
2142 // 172.16.0.0      -   172.31.255.255  (172.16/12 prefix)
2143 // 192.168.0.0     -   192.168.255.255 (192.168/16 prefix)
inPrivateAddress(const std::string & ipv4addr)2144 bool inPrivateAddress(const std::string& ipv4addr)
2145 {
2146   if (util::startsWith(ipv4addr, "10.") ||
2147       util::startsWith(ipv4addr, "192.168.")) {
2148     return true;
2149   }
2150   if (util::startsWith(ipv4addr, "172.")) {
2151     for (int i = 16; i <= 31; ++i) {
2152       std::string t(fmt("%d.", i));
2153       if (util::startsWith(ipv4addr.begin() + 4, ipv4addr.end(), t.begin(),
2154                            t.end())) {
2155         return true;
2156       }
2157     }
2158   }
2159   return false;
2160 }
2161 
detectDirTraversal(const std::string & s)2162 bool detectDirTraversal(const std::string& s)
2163 {
2164   if (s.empty()) {
2165     return false;
2166   }
2167   for (auto c : s) {
2168     unsigned char ch = c;
2169     if (in(ch, 0x00u, 0x1fu) || ch == 0x7fu) {
2170       return true;
2171     }
2172   }
2173   return s == "." || s == ".." || s[0] == '/' || util::startsWith(s, "./") ||
2174          util::startsWith(s, "../") || s.find("/../") != std::string::npos ||
2175          s.find("/./") != std::string::npos || s[s.size() - 1] == '/' ||
2176          util::endsWith(s, "/.") || util::endsWith(s, "/..");
2177 }
2178 
escapePath(const std::string & s)2179 std::string escapePath(const std::string& s)
2180 {
2181 // We don't escape '/' because we use it as a path separator.
2182 #ifdef __MINGW32__
2183   static const char WIN_INVALID_PATH_CHARS[] = {'"', '*', ':',  '<',
2184                                                 '>', '?', '\\', '|'};
2185 #endif // __MINGW32__
2186   std::string d;
2187   for (auto cc : s) {
2188     unsigned char c = cc;
2189     if (in(c, 0x00u, 0x1fu) || c == 0x7fu
2190 #ifdef __MINGW32__
2191         || std::find(std::begin(WIN_INVALID_PATH_CHARS),
2192                      std::end(WIN_INVALID_PATH_CHARS),
2193                      c) != std::end(WIN_INVALID_PATH_CHARS)
2194 #endif // __MINGW32__
2195     ) {
2196       d += fmt("%%%02X", c);
2197     }
2198     else {
2199       d += c;
2200     }
2201   }
2202   return d;
2203 }
2204 
inSameCidrBlock(const std::string & ip1,const std::string & ip2,size_t bits)2205 bool inSameCidrBlock(const std::string& ip1, const std::string& ip2,
2206                      size_t bits)
2207 {
2208   unsigned char s1[16], s2[16];
2209   size_t len1, len2;
2210   if ((len1 = net::getBinAddr(s1, ip1)) == 0 ||
2211       (len2 = net::getBinAddr(s2, ip2)) == 0 || len1 != len2) {
2212     return false;
2213   }
2214   if (bits == 0) {
2215     return true;
2216   }
2217   if (bits > 8 * len1) {
2218     bits = 8 * len1;
2219   }
2220   int last = (bits - 1) / 8;
2221   for (int i = 0; i < last; ++i) {
2222     if (s1[i] != s2[i]) {
2223       return false;
2224     }
2225   }
2226   unsigned char mask = bitfield::lastByteMask(bits);
2227   return (s1[last] & mask) == (s2[last] & mask);
2228 }
2229 
2230 namespace {
2231 
executeHook(const std::string & command,a2_gid_t gid,size_t numFiles,const std::string & firstFilename)2232 void executeHook(const std::string& command, a2_gid_t gid, size_t numFiles,
2233                  const std::string& firstFilename)
2234 {
2235   const std::string gidStr = GroupId::toHex(gid);
2236   const std::string numFilesStr = util::uitos(numFiles);
2237 #ifndef __MINGW32__
2238   A2_LOG_INFO(fmt("Executing user command: %s %s %s %s", command.c_str(),
2239                   gidStr.c_str(), numFilesStr.c_str(), firstFilename.c_str()));
2240   pid_t cpid = fork();
2241   if (cpid == 0) {
2242     // child!
2243     execlp(command.c_str(), command.c_str(), gidStr.c_str(),
2244            numFilesStr.c_str(), firstFilename.c_str(),
2245            reinterpret_cast<char*>(0));
2246     perror(("Could not execute user command: " + command).c_str());
2247     _exit(EXIT_FAILURE);
2248     return;
2249   }
2250 
2251   if (cpid == -1) {
2252     A2_LOG_ERROR("fork() failed. Cannot execute user command.");
2253   }
2254   return;
2255 
2256 #else // __MINGW32__
2257   PROCESS_INFORMATION pi;
2258   STARTUPINFOW si;
2259 
2260   memset(&si, 0, sizeof(si));
2261   si.cb = sizeof(STARTUPINFO);
2262   memset(&pi, 0, sizeof(pi));
2263   bool batch = util::iendsWith(command, ".bat");
2264   std::string cmdline;
2265   std::string cmdexe;
2266 
2267   // XXX batch handling, in particular quoting, correct?
2268   if (batch) {
2269     const char* p = getenv("windir");
2270     if (p) {
2271       cmdexe = p;
2272       cmdexe += "\\system32\\cmd.exe";
2273     }
2274     else {
2275       A2_LOG_INFO("Failed to get windir environment variable."
2276                   " Executing batch file will fail.");
2277       // TODO Might be useless.
2278       cmdexe = "cmd.exe";
2279     }
2280     cmdline += "/C \"";
2281   }
2282   cmdline += "\"";
2283   cmdline += command;
2284   cmdline += "\"";
2285   cmdline += " ";
2286   cmdline += gidStr;
2287   cmdline += " ";
2288   cmdline += numFilesStr;
2289   cmdline += " \"";
2290   cmdline += firstFilename;
2291   cmdline += "\"";
2292   if (batch) {
2293     cmdline += "\"";
2294   }
2295   int cmdlineLen = utf8ToWChar(nullptr, 0, cmdline.c_str());
2296   assert(cmdlineLen > 0);
2297   auto wcharCmdline = make_unique<wchar_t[]>(cmdlineLen);
2298   cmdlineLen = utf8ToWChar(wcharCmdline.get(), cmdlineLen, cmdline.c_str());
2299   assert(cmdlineLen > 0);
2300   A2_LOG_INFO(fmt("Executing user command: %s", cmdline.c_str()));
2301   DWORD rc = CreateProcessW(batch ? utf8ToWChar(cmdexe).c_str() : nullptr,
2302                             wcharCmdline.get(), nullptr, nullptr, false, 0,
2303                             nullptr, 0, &si, &pi);
2304 
2305   if (!rc) {
2306     A2_LOG_ERROR("CreateProcess() failed. Cannot execute user command.");
2307   }
2308   return;
2309 
2310 #endif
2311 }
2312 
2313 } // namespace
2314 
executeHookByOptName(const std::shared_ptr<RequestGroup> & group,const Option * option,PrefPtr pref)2315 void executeHookByOptName(const std::shared_ptr<RequestGroup>& group,
2316                           const Option* option, PrefPtr pref)
2317 {
2318   executeHookByOptName(group.get(), option, pref);
2319 }
2320 
executeHookByOptName(const RequestGroup * group,const Option * option,PrefPtr pref)2321 void executeHookByOptName(const RequestGroup* group, const Option* option,
2322                           PrefPtr pref)
2323 {
2324   const std::string& cmd = option->get(pref);
2325   if (!cmd.empty()) {
2326     const std::shared_ptr<DownloadContext> dctx = group->getDownloadContext();
2327     std::string firstFilename;
2328     size_t numFiles = 0;
2329     if (!group->inMemoryDownload()) {
2330       std::shared_ptr<FileEntry> file = dctx->getFirstRequestedFileEntry();
2331       if (file) {
2332         firstFilename = file->getPath();
2333       }
2334       numFiles = dctx->countRequestedFileEntry();
2335     }
2336     executeHook(cmd, group->getGID(), numFiles, firstFilename);
2337   }
2338 }
2339 
createSafePath(const std::string & dir,const std::string & filename)2340 std::string createSafePath(const std::string& dir, const std::string& filename)
2341 {
2342   return util::applyDir(dir,
2343                         util::isUtf8(filename)
2344                             ? util::fixTaintedBasename(filename)
2345                             : util::escapePath(util::percentEncode(filename)));
2346 }
2347 
createSafePath(const std::string & filename)2348 std::string createSafePath(const std::string& filename)
2349 {
2350   return util::isUtf8(filename)
2351              ? util::fixTaintedBasename(filename)
2352              : util::escapePath(util::percentEncode(filename));
2353 }
2354 
encodeNonUtf8(const std::string & s)2355 std::string encodeNonUtf8(const std::string& s)
2356 {
2357   return util::isUtf8(s) ? s : util::percentEncode(s);
2358 }
2359 
makeString(const char * str)2360 std::string makeString(const char* str)
2361 {
2362   if (!str) {
2363     return A2STR::NIL;
2364   }
2365   return str;
2366 }
2367 
safeStrerror(int errNum)2368 std::string safeStrerror(int errNum) { return makeString(strerror(errNum)); }
2369 
noProxyDomainMatch(const std::string & hostname,const std::string & domain)2370 bool noProxyDomainMatch(const std::string& hostname, const std::string& domain)
2371 {
2372   if (!domain.empty() && domain[0] == '.' && !util::isNumericHost(hostname)) {
2373     return util::endsWith(hostname, domain);
2374   }
2375   return hostname == domain;
2376 }
2377 
tlsHostnameMatch(const std::string & pattern,const std::string & hostname)2378 bool tlsHostnameMatch(const std::string& pattern, const std::string& hostname)
2379 {
2380   std::string::const_iterator ptWildcard =
2381       std::find(pattern.begin(), pattern.end(), '*');
2382   if (ptWildcard == pattern.end()) {
2383     return strieq(pattern.begin(), pattern.end(), hostname.begin(),
2384                   hostname.end());
2385   }
2386   std::string::const_iterator ptLeftLabelEnd =
2387       std::find(pattern.begin(), pattern.end(), '.');
2388   bool wildcardEnabled = true;
2389   // Do case-insensitive match. At least 2 dots are required to enable
2390   // wildcard match. Also wildcard must be in the left-most label.
2391   // Don't attempt to match a presented identifier where the wildcard
2392   // character is embedded within an A-label.
2393   if (ptLeftLabelEnd == pattern.end() ||
2394       std::find(ptLeftLabelEnd + 1, pattern.end(), '.') == pattern.end() ||
2395       ptLeftLabelEnd < ptWildcard || istartsWith(pattern, "xn--")) {
2396     wildcardEnabled = false;
2397   }
2398   if (!wildcardEnabled) {
2399     return strieq(pattern.begin(), pattern.end(), hostname.begin(),
2400                   hostname.end());
2401   }
2402   std::string::const_iterator hnLeftLabelEnd =
2403       std::find(hostname.begin(), hostname.end(), '.');
2404   if (!strieq(ptLeftLabelEnd, pattern.end(), hnLeftLabelEnd, hostname.end())) {
2405     return false;
2406   }
2407   // Perform wildcard match. Here '*' must match at least one
2408   // character.
2409   if (hnLeftLabelEnd - hostname.begin() < ptLeftLabelEnd - pattern.begin()) {
2410     return false;
2411   }
2412   return istartsWith(hostname.begin(), hnLeftLabelEnd, pattern.begin(),
2413                      ptWildcard) &&
2414          iendsWith(hostname.begin(), hnLeftLabelEnd, ptWildcard + 1,
2415                    ptLeftLabelEnd);
2416 }
2417 
strieq(const std::string & a,const char * b)2418 bool strieq(const std::string& a, const char* b)
2419 {
2420   return strieq(a.begin(), a.end(), b);
2421 }
2422 
strieq(const std::string & a,const std::string & b)2423 bool strieq(const std::string& a, const std::string& b)
2424 {
2425   return strieq(a.begin(), a.end(), b.begin(), b.end());
2426 }
2427 
startsWith(const std::string & a,const char * b)2428 bool startsWith(const std::string& a, const char* b)
2429 {
2430   return startsWith(a.begin(), a.end(), b);
2431 }
2432 
startsWith(const std::string & a,const std::string & b)2433 bool startsWith(const std::string& a, const std::string& b)
2434 {
2435   return startsWith(a.begin(), a.end(), b.begin(), b.end());
2436 }
2437 
istartsWith(const std::string & a,const char * b)2438 bool istartsWith(const std::string& a, const char* b)
2439 {
2440   return istartsWith(a.begin(), a.end(), b);
2441 }
2442 
istartsWith(const std::string & a,const std::string & b)2443 bool istartsWith(const std::string& a, const std::string& b)
2444 {
2445   return istartsWith(std::begin(a), std::end(a), std::begin(b), std::end(b));
2446 }
2447 
endsWith(const std::string & a,const char * b)2448 bool endsWith(const std::string& a, const char* b)
2449 {
2450   return endsWith(a.begin(), a.end(), b, b + strlen(b));
2451 }
2452 
endsWith(const std::string & a,const std::string & b)2453 bool endsWith(const std::string& a, const std::string& b)
2454 {
2455   return endsWith(a.begin(), a.end(), b.begin(), b.end());
2456 }
2457 
iendsWith(const std::string & a,const char * b)2458 bool iendsWith(const std::string& a, const char* b)
2459 {
2460   return iendsWith(a.begin(), a.end(), b, b + strlen(b));
2461 }
2462 
iendsWith(const std::string & a,const std::string & b)2463 bool iendsWith(const std::string& a, const std::string& b)
2464 {
2465   return iendsWith(a.begin(), a.end(), b.begin(), b.end());
2466 }
2467 
strless(const char * a,const char * b)2468 bool strless(const char* a, const char* b) { return strcmp(a, b) < 0; }
2469 
2470 #ifdef ENABLE_SSL
toTLSVersion(const std::string & ver)2471 TLSVersion toTLSVersion(const std::string& ver)
2472 {
2473   if (ver == A2_V_TLS11) {
2474     return TLS_PROTO_TLS11;
2475   }
2476   if (ver == A2_V_TLS12) {
2477     return TLS_PROTO_TLS12;
2478   }
2479   if (ver == A2_V_TLS13) {
2480     return TLS_PROTO_TLS13;
2481   }
2482   return TLS_PROTO_TLS12;
2483 }
2484 #endif // ENABLE_SSL
2485 
2486 #ifdef __MINGW32__
formatLastError(int errNum)2487 std::string formatLastError(int errNum)
2488 {
2489   std::array<char, 4_k> buf;
2490   if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
2491                     nullptr, errNum,
2492                     // Default language
2493                     MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US),
2494                     static_cast<LPTSTR>(buf.data()), buf.size(),
2495                     nullptr) == 0) {
2496     return "";
2497   }
2498 
2499   return buf.data();
2500 }
2501 #endif // __MINGW32__
2502 
make_fd_cloexec(int fd)2503 void make_fd_cloexec(int fd)
2504 {
2505 #ifndef __MINGW32__
2506   int flags;
2507 
2508   // TODO from linux man page, fcntl() with F_GETFD or F_SETFD does
2509   // not return -1 with errno == EINTR.  Historically, aria2 code base
2510   // checks this case.  Probably, it is not needed.
2511   while ((flags = fcntl(fd, F_GETFD)) == -1 && errno == EINTR)
2512     ;
2513   if (flags == -1) {
2514     return;
2515   }
2516 
2517   while (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == -1 && errno == EINTR)
2518     ;
2519 #endif // !__MINGW32__
2520 }
2521 
2522 #ifdef __MINGW32__
gainPrivilege(LPCTSTR privName)2523 bool gainPrivilege(LPCTSTR privName)
2524 {
2525   LUID luid;
2526   TOKEN_PRIVILEGES tp;
2527 
2528   if (!LookupPrivilegeValue(nullptr, privName, &luid)) {
2529     auto errNum = GetLastError();
2530     A2_LOG_WARN(fmt("Lookup for privilege name %s failed. cause: %s", privName,
2531                     util::formatLastError(errNum).c_str()));
2532     return false;
2533   }
2534 
2535   tp.PrivilegeCount = 1;
2536   tp.Privileges[0].Luid = luid;
2537   tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
2538 
2539   HANDLE token;
2540   if (!OpenProcessToken(GetCurrentProcess(),
2541                         TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token)) {
2542     auto errNum = GetLastError();
2543     A2_LOG_WARN(fmt("Getting process token failed. cause: %s",
2544                     util::formatLastError(errNum).c_str()));
2545     return false;
2546   }
2547 
2548   auto tokenCloser = defer(token, CloseHandle);
2549 
2550   if (!AdjustTokenPrivileges(token, FALSE, &tp, 0, NULL, NULL)) {
2551     auto errNum = GetLastError();
2552     A2_LOG_WARN(fmt("Gaining privilege %s failed. cause: %s", privName,
2553                     util::formatLastError(errNum).c_str()));
2554     return false;
2555   }
2556 
2557   // Check privilege was really gained
2558   DWORD bufsize = 0;
2559   GetTokenInformation(token, TokenPrivileges, nullptr, 0, &bufsize);
2560   if (bufsize == 0) {
2561     A2_LOG_WARN("Checking privilege failed.");
2562     return false;
2563   }
2564 
2565   auto buf = make_unique<char[]>(bufsize);
2566   if (!GetTokenInformation(token, TokenPrivileges, buf.get(), bufsize,
2567                            &bufsize)) {
2568     auto errNum = GetLastError();
2569     A2_LOG_WARN(fmt("Checking privilege failed. cause: %s",
2570                     util::formatLastError(errNum).c_str()));
2571     return false;
2572   }
2573 
2574   auto privs = reinterpret_cast<TOKEN_PRIVILEGES*>(buf.get());
2575   for (size_t i = 0; i < privs->PrivilegeCount; ++i) {
2576     auto& priv = privs->Privileges[i];
2577     if (memcmp(&priv.Luid, &luid, sizeof(luid)) != 0) {
2578       continue;
2579     }
2580     if (priv.Attributes == SE_PRIVILEGE_ENABLED) {
2581       return true;
2582     }
2583 
2584     break;
2585   }
2586 
2587   A2_LOG_WARN(fmt("Gaining privilege %s failed.", privName));
2588 
2589   return false;
2590 }
2591 #endif // __MINGW32__
2592 
2593 } // namespace util
2594 
2595 } // namespace aria2
2596