1 /* <!-- copyright */
2 /*
3 * aria2 - The high speed download utility
4 *
5 * Copyright (C) 2006 Tatsuhiro Tsujikawa
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 * In addition, as a special exception, the copyright holders give
22 * permission to link the code of portions of this program with the
23 * OpenSSL library under certain conditions as described in each
24 * individual source file, and distribute linked combinations
25 * including the two.
26 * You must obey the GNU General Public License in all respects
27 * for all of the code used other than OpenSSL. If you modify
28 * file(s) with this exception, you may extend this exception to your
29 * version of the file(s), but you are not obligated to do so. If you
30 * do not wish to do so, delete this exception statement from your
31 * version. If you delete this exception statement from all source
32 * files in the program, then also delete it here.
33 */
34 /* copyright --> */
35 #include "util.h"
36
37 #ifdef __sun
38 // For opensolaris, just include signal.h which includes sys/signal.h
39 # ifdef HAVE_SIGNAL_H
40 # include <signal.h>
41 # endif // HAVE_SIGNAL_H
42 #else // !__sun
43 # ifdef HAVE_SYS_SIGNAL_H
44 # include <sys/signal.h>
45 # endif // HAVE_SYS_SIGNAL_H
46 # ifdef HAVE_SIGNAL_H
47 # include <signal.h>
48 # endif // HAVE_SIGNAL_H
49 #endif // !__sun
50
51 #include <sys/types.h>
52 #ifdef HAVE_PWD_H
53 # include <pwd.h>
54 #endif // HAVE_PWD_H
55
56 #include <array>
57 #include <cerrno>
58 #include <cassert>
59 #include <cstring>
60 #include <cstdio>
61 #include <cstdlib>
62 #include <sstream>
63 #include <ostream>
64 #include <algorithm>
65 #include <fstream>
66 #include <iomanip>
67
68 #include "SimpleRandomizer.h"
69 #include "File.h"
70 #include "Randomizer.h"
71 #include "a2netcompat.h"
72 #include "BitfieldMan.h"
73 #include "DefaultDiskWriter.h"
74 #include "FatalException.h"
75 #include "FileEntry.h"
76 #include "A2STR.h"
77 #include "array_fun.h"
78 #include "bitfield.h"
79 #include "DownloadHandlerConstants.h"
80 #include "RequestGroup.h"
81 #include "LogFactory.h"
82 #include "Logger.h"
83 #include "Option.h"
84 #include "DownloadContext.h"
85 #include "BufferedFile.h"
86 #include "SocketCore.h"
87 #include "Lock.h"
88
89 #include "MessageDigest.h"
90 #include "message_digest_helper.h"
91
92 // For libc6 which doesn't define ULLONG_MAX properly because of broken limits.h
93 #ifndef ULLONG_MAX
94 # define ULLONG_MAX 18446744073709551615ULL
95 #endif // ULLONG_MAX
96
97 namespace aria2 {
98
99 #ifdef __MINGW32__
100 namespace {
utf8ToWChar(wchar_t * out,size_t outLength,const char * src)101 int utf8ToWChar(wchar_t* out, size_t outLength, const char* src)
102 {
103 return MultiByteToWideChar(CP_UTF8, 0, src, -1, out, outLength);
104 }
105 } // namespace
106
107 namespace {
wCharToUtf8(char * out,size_t outLength,const wchar_t * src)108 int wCharToUtf8(char* out, size_t outLength, const wchar_t* src)
109 {
110 return WideCharToMultiByte(CP_UTF8, 0, src, -1, out, outLength, nullptr,
111 nullptr);
112 }
113 } // namespace
114
utf8ToWChar(const char * src)115 std::wstring utf8ToWChar(const char* src)
116 {
117 int len = utf8ToWChar(nullptr, 0, src);
118 if (len <= 0) {
119 abort();
120 }
121 auto buf = make_unique<wchar_t[]>((size_t)len);
122 len = utf8ToWChar(buf.get(), len, src);
123 if (len <= 0) {
124 abort();
125 }
126 else {
127 return buf.get();
128 }
129 }
130
utf8ToWChar(const std::string & src)131 std::wstring utf8ToWChar(const std::string& src)
132 {
133 return utf8ToWChar(src.c_str());
134 }
135
wCharToUtf8(const std::wstring & wsrc)136 std::string wCharToUtf8(const std::wstring& wsrc)
137 {
138 int len = wCharToUtf8(nullptr, 0, wsrc.c_str());
139 if (len <= 0) {
140 abort();
141 }
142 auto buf = make_unique<char[]>((size_t)len);
143 len = wCharToUtf8(buf.get(), len, wsrc.c_str());
144 if (len <= 0) {
145 abort();
146 }
147 else {
148 return buf.get();
149 }
150 }
151
toForwardSlash(const std::string & src)152 std::string toForwardSlash(const std::string& src)
153 {
154 auto dst = src;
155 std::transform(std::begin(dst), std::end(dst), std::begin(dst),
156 [](char c) { return c == '\\' ? '/' : c; });
157 return dst;
158 }
159
160 #endif // __MINGW32__
161
162 namespace util {
163
164 const char DEFAULT_STRIP_CHARSET[] = "\r\n\t ";
165
strip(const std::string & str,const char * chars)166 std::string strip(const std::string& str, const char* chars)
167 {
168 std::pair<std::string::const_iterator, std::string::const_iterator> p =
169 stripIter(str.begin(), str.end(), chars);
170 return std::string(p.first, p.second);
171 }
172
itos(int64_t value,bool comma)173 std::string itos(int64_t value, bool comma)
174 {
175 bool flag = false;
176 std::string str;
177 if (value < 0) {
178 if (value == INT64_MIN) {
179 if (comma) {
180 str = "-9,223,372,036,854,775,808";
181 }
182 else {
183 str = "-9223372036854775808";
184 }
185 return str;
186 }
187 flag = true;
188 value = -value;
189 }
190 str = uitos(value, comma);
191 if (flag) {
192 str.insert(str.begin(), '-');
193 }
194 return str;
195 }
196
difftv(struct timeval tv1,struct timeval tv2)197 int64_t difftv(struct timeval tv1, struct timeval tv2)
198 {
199 if ((tv1.tv_sec < tv2.tv_sec) ||
200 ((tv1.tv_sec == tv2.tv_sec) && (tv1.tv_usec < tv2.tv_usec))) {
201 return 0;
202 }
203 return ((int64_t)(tv1.tv_sec - tv2.tv_sec) * 1000000 + tv1.tv_usec -
204 tv2.tv_usec);
205 }
206
difftvsec(struct timeval tv1,struct timeval tv2)207 int32_t difftvsec(struct timeval tv1, struct timeval tv2)
208 {
209 if (tv1.tv_sec < tv2.tv_sec) {
210 return 0;
211 }
212 return tv1.tv_sec - tv2.tv_sec;
213 }
214
replace(const std::string & target,const std::string & oldstr,const std::string & newstr)215 std::string replace(const std::string& target, const std::string& oldstr,
216 const std::string& newstr)
217 {
218 if (target.empty() || oldstr.empty()) {
219 return target;
220 }
221 std::string result;
222 std::string::size_type p = 0;
223 std::string::size_type np = target.find(oldstr);
224 while (np != std::string::npos) {
225 result.append(target.begin() + p, target.begin() + np);
226 result += newstr;
227 p = np + oldstr.size();
228 np = target.find(oldstr, p);
229 }
230 result.append(target.begin() + p, target.end());
231 return result;
232 }
233
isAlpha(const char c)234 bool isAlpha(const char c)
235 {
236 return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
237 }
238
isDigit(const char c)239 bool isDigit(const char c) { return '0' <= c && c <= '9'; }
240
isHexDigit(const char c)241 bool isHexDigit(const char c)
242 {
243 return isDigit(c) || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
244 }
245
isHexDigit(const std::string & s)246 bool isHexDigit(const std::string& s)
247 {
248 for (const auto& c : s) {
249 if (!isHexDigit(c)) {
250 return false;
251 }
252 }
253 return true;
254 }
255
inRFC3986ReservedChars(const char c)256 bool inRFC3986ReservedChars(const char c)
257 {
258 static const char reserved[] = {':', '/', '?', '#', '[', ']', '@', '!', '$',
259 '&', '\'', '(', ')', '*', '+', ',', ';', '='};
260 return std::find(std::begin(reserved), std::end(reserved), c) !=
261 std::end(reserved);
262 }
263
inRFC3986UnreservedChars(const char c)264 bool inRFC3986UnreservedChars(const char c)
265 {
266 static const char unreserved[] = {'-', '.', '_', '~'};
267 return isAlpha(c) || isDigit(c) ||
268 std::find(std::begin(unreserved), std::end(unreserved), c) !=
269 std::end(unreserved);
270 }
271
inRFC2978MIMECharset(const char c)272 bool inRFC2978MIMECharset(const char c)
273 {
274 static const char chars[] = {'!', '#', '$', '%', '&', '\'', '+',
275 '-', '^', '_', '`', '{', '}', '~'};
276 return isAlpha(c) || isDigit(c) ||
277 std::find(std::begin(chars), std::end(chars), c) != std::end(chars);
278 }
279
inRFC2616HttpToken(const char c)280 bool inRFC2616HttpToken(const char c)
281 {
282 static const char chars[] = {'!', '#', '$', '%', '&', '\'', '*', '+',
283 '-', '.', '^', '_', '`', '|', '~'};
284 return isAlpha(c) || isDigit(c) ||
285 std::find(std::begin(chars), std::end(chars), c) != std::end(chars);
286 }
287
inRFC5987AttrChar(const char c)288 bool inRFC5987AttrChar(const char c)
289 {
290 return inRFC2616HttpToken(c) && c != '*' && c != '\'' && c != '%';
291 }
292
293 // Returns nonzero if |c| is in ISO/IEC 8859-1 character set.
isIso8859p1(unsigned char c)294 bool isIso8859p1(unsigned char c)
295 {
296 return (0x20u <= c && c <= 0x7eu) || 0xa0u <= c;
297 }
298
isLws(const char c)299 bool isLws(const char c) { return c == ' ' || c == '\t'; }
isCRLF(const char c)300 bool isCRLF(const char c) { return c == '\r' || c == '\n'; }
301
302 namespace {
303
isUtf8Tail(unsigned char ch)304 bool isUtf8Tail(unsigned char ch) { return in(ch, 0x80u, 0xbfu); }
305
inPercentEncodeMini(const unsigned char c)306 bool inPercentEncodeMini(const unsigned char c)
307 {
308 return c > 0x20 && c < 0x7fu &&
309 // Chromium escapes following characters. Firefox4 escapes more.
310 c != '"' && c != '<' && c != '>';
311 }
312
313 } // namespace
314
isUtf8(const std::string & str)315 bool isUtf8(const std::string& str)
316 {
317 for (std::string::const_iterator s = str.begin(), eos = str.end(); s != eos;
318 ++s) {
319 unsigned char firstChar = *s;
320 // See ABNF in http://tools.ietf.org/search/rfc3629#section-4
321 if (in(firstChar, 0x20u, 0x7eu) || firstChar == 0x08u || // \b
322 firstChar == 0x09u || // \t
323 firstChar == 0x0au || // \n
324 firstChar == 0x0cu || // \f
325 firstChar == 0x0du // \r
326 ) {
327 // UTF8-1 (without ctrl chars)
328 }
329 else if (in(firstChar, 0xc2u, 0xdfu)) {
330 // UTF8-2
331 if (++s == eos || !isUtf8Tail(*s)) {
332 return false;
333 }
334 }
335 else if (0xe0u == firstChar) {
336 // UTF8-3
337 if (++s == eos || !in(static_cast<unsigned char>(*s), 0xa0u, 0xbfu) ||
338 ++s == eos || !isUtf8Tail(*s)) {
339 return false;
340 }
341 }
342 else if (in(firstChar, 0xe1u, 0xecu) || in(firstChar, 0xeeu, 0xefu)) {
343 // UTF8-3
344 if (++s == eos || !isUtf8Tail(*s) || ++s == eos || !isUtf8Tail(*s)) {
345 return false;
346 }
347 }
348 else if (0xedu == firstChar) {
349 // UTF8-3
350 if (++s == eos || !in(static_cast<unsigned char>(*s), 0x80u, 0x9fu) ||
351 ++s == eos || !isUtf8Tail(*s)) {
352 return false;
353 }
354 }
355 else if (0xf0u == firstChar) {
356 // UTF8-4
357 if (++s == eos || !in(static_cast<unsigned char>(*s), 0x90u, 0xbfu) ||
358 ++s == eos || !isUtf8Tail(*s) || ++s == eos || !isUtf8Tail(*s)) {
359 return false;
360 }
361 }
362 else if (in(firstChar, 0xf1u, 0xf3u)) {
363 // UTF8-4
364 if (++s == eos || !isUtf8Tail(*s) || ++s == eos || !isUtf8Tail(*s) ||
365 ++s == eos || !isUtf8Tail(*s)) {
366 return false;
367 }
368 }
369 else if (0xf4u == firstChar) {
370 // UTF8-4
371 if (++s == eos || !in(static_cast<unsigned char>(*s), 0x80u, 0x8fu) ||
372 ++s == eos || !isUtf8Tail(*s) || ++s == eos || !isUtf8Tail(*s)) {
373 return false;
374 }
375 }
376 else {
377 return false;
378 }
379 }
380 return true;
381 }
382
percentEncode(const unsigned char * target,size_t len)383 std::string percentEncode(const unsigned char* target, size_t len)
384 {
385 std::string dest;
386 for (size_t i = 0; i < len; ++i) {
387 if (inRFC3986UnreservedChars(target[i])) {
388 dest += target[i];
389 }
390 else {
391 dest.append(fmt("%%%02X", target[i]));
392 }
393 }
394 return dest;
395 }
396
percentEncode(const std::string & target)397 std::string percentEncode(const std::string& target)
398 {
399 if (std::find_if_not(target.begin(), target.end(),
400 inRFC3986UnreservedChars) == target.end()) {
401 return target;
402 }
403 return percentEncode(reinterpret_cast<const unsigned char*>(target.c_str()),
404 target.size());
405 }
406
percentEncodeMini(const std::string & src)407 std::string percentEncodeMini(const std::string& src)
408 {
409 if (std::find_if_not(src.begin(), src.end(), inPercentEncodeMini) ==
410 src.end()) {
411 return src;
412 }
413 std::string result;
414 for (auto c : src) {
415 if (!inPercentEncodeMini(c)) {
416 result += fmt("%%%02X", static_cast<unsigned char>(c));
417 }
418 else {
419 result += c;
420 }
421 }
422 return result;
423 }
424
torrentPercentEncode(const unsigned char * target,size_t len)425 std::string torrentPercentEncode(const unsigned char* target, size_t len)
426 {
427 std::string dest;
428 for (size_t i = 0; i < len; ++i) {
429 if (isAlpha(target[i]) || isDigit(target[i])) {
430 dest += target[i];
431 }
432 else {
433 dest.append(fmt("%%%02X", target[i]));
434 }
435 }
436 return dest;
437 }
438
torrentPercentEncode(const std::string & target)439 std::string torrentPercentEncode(const std::string& target)
440 {
441 return torrentPercentEncode(
442 reinterpret_cast<const unsigned char*>(target.c_str()), target.size());
443 }
444
percentDecode(std::string::const_iterator first,std::string::const_iterator last)445 std::string percentDecode(std::string::const_iterator first,
446 std::string::const_iterator last)
447 {
448 std::string result;
449 for (; first != last; ++first) {
450 if (*first == '%') {
451 if (first + 1 != last && first + 2 != last && isHexDigit(*(first + 1)) &&
452 isHexDigit(*(first + 2))) {
453 result +=
454 hexCharToUInt(*(first + 1)) * 16 + hexCharToUInt(*(first + 2));
455 first += 2;
456 }
457 else {
458 result += *first;
459 }
460 }
461 else {
462 result += *first;
463 }
464 }
465 return result;
466 }
467
toHex(const unsigned char * src,size_t len)468 std::string toHex(const unsigned char* src, size_t len)
469 {
470 std::string out(len * 2, '\0');
471 std::string::iterator o = out.begin();
472 const unsigned char* last = src + len;
473 for (const unsigned char* i = src; i != last; ++i) {
474 *o = (*i >> 4);
475 *(o + 1) = (*i) & 0x0fu;
476 for (int j = 0; j < 2; ++j) {
477 if (*o < 10) {
478 *o += '0';
479 }
480 else {
481 *o += 'a' - 10;
482 }
483 ++o;
484 }
485 }
486 return out;
487 }
488
toHex(const char * src,size_t len)489 std::string toHex(const char* src, size_t len)
490 {
491 return toHex(reinterpret_cast<const unsigned char*>(src), len);
492 }
493
toHex(const std::string & src)494 std::string toHex(const std::string& src)
495 {
496 return toHex(reinterpret_cast<const unsigned char*>(src.c_str()), src.size());
497 }
498
hexCharToUInt(unsigned char ch)499 unsigned int hexCharToUInt(unsigned char ch)
500 {
501 if ('a' <= ch && ch <= 'f') {
502 ch -= 'a';
503 ch += 10;
504 }
505 else if ('A' <= ch && ch <= 'F') {
506 ch -= 'A';
507 ch += 10;
508 }
509 else if ('0' <= ch && ch <= '9') {
510 ch -= '0';
511 }
512 else {
513 ch = 255;
514 }
515 return ch;
516 }
517
secfmt(time_t sec)518 std::string secfmt(time_t sec)
519 {
520 time_t tsec = sec;
521 std::string str;
522 if (sec >= 3600) {
523 str = fmt("%" PRId64 "h", static_cast<int64_t>(sec / 3600));
524 sec %= 3600;
525 }
526 if (sec >= 60) {
527 str += fmt("%dm", static_cast<int>(sec / 60));
528 sec %= 60;
529 }
530 if (sec || tsec == 0) {
531 str += fmt("%ds", static_cast<int>(sec));
532 }
533 return str;
534 }
535
536 namespace {
537 template <typename T, typename F>
parseLong(T & res,F f,const std::string & s,int base)538 bool parseLong(T& res, F f, const std::string& s, int base)
539 {
540 if (s.empty()) {
541 return false;
542 }
543 char* endptr;
544 errno = 0;
545 res = f(s.c_str(), &endptr, base);
546 if (errno == ERANGE) {
547 return false;
548 }
549 if (*endptr != '\0') {
550 for (const char *i = endptr, *eoi = s.c_str() + s.size(); i < eoi; ++i) {
551 if (!isspace(*i)) {
552 return false;
553 }
554 }
555 }
556 return true;
557 }
558 } // namespace
559
parseIntNoThrow(int32_t & res,const std::string & s,int base)560 bool parseIntNoThrow(int32_t& res, const std::string& s, int base)
561 {
562 long int t;
563 if (parseLong(t, strtol, s, base) &&
564 t >= std::numeric_limits<int32_t>::min() &&
565 t <= std::numeric_limits<int32_t>::max()) {
566 res = t;
567 return true;
568 }
569 else {
570 return false;
571 }
572 }
573
parseUIntNoThrow(uint32_t & res,const std::string & s,int base)574 bool parseUIntNoThrow(uint32_t& res, const std::string& s, int base)
575 {
576 long int t;
577 if (parseLong(t, strtol, s, base) && t >= 0 &&
578 t <= std::numeric_limits<int32_t>::max()) {
579 res = t;
580 return true;
581 }
582 else {
583 return false;
584 }
585 }
586
parseLLIntNoThrow(int64_t & res,const std::string & s,int base)587 bool parseLLIntNoThrow(int64_t& res, const std::string& s, int base)
588 {
589 int64_t t;
590 if (parseLong(t, strtoll, s, base)) {
591 res = t;
592 return true;
593 }
594 else {
595 return false;
596 }
597 }
598
parseDoubleNoThrow(double & res,const std::string & s)599 bool parseDoubleNoThrow(double& res, const std::string& s)
600 {
601 if (s.empty()) {
602 return false;
603 }
604
605 errno = 0;
606 char* endptr;
607 auto d = strtod(s.c_str(), &endptr);
608
609 if (errno == ERANGE) {
610 return false;
611 }
612
613 if (endptr != s.c_str() + s.size()) {
614 for (auto i = std::begin(s) + (endptr - s.c_str()); i != std::end(s); ++i) {
615 if (!isspace(*i)) {
616 return false;
617 }
618 }
619 }
620
621 res = d;
622
623 return true;
624 }
625
parseIntSegments(const std::string & src)626 SegList<int> parseIntSegments(const std::string& src)
627 {
628 SegList<int> sgl;
629 for (std::string::const_iterator i = src.begin(), eoi = src.end();
630 i != eoi;) {
631 std::string::const_iterator j = std::find(i, eoi, ',');
632 if (j == i) {
633 ++i;
634 continue;
635 }
636 std::string::const_iterator p = std::find(i, j, '-');
637 if (p == j) {
638 int a;
639 if (parseIntNoThrow(a, std::string(i, j))) {
640 sgl.add(a, a + 1);
641 }
642 else {
643 throw DL_ABORT_EX(fmt("Bad range %s", std::string(i, j).c_str()));
644 }
645 }
646 else if (p == i || p + 1 == j) {
647 throw DL_ABORT_EX(fmt(MSG_INCOMPLETE_RANGE, std::string(i, j).c_str()));
648 }
649 else {
650 int a, b;
651 if (parseIntNoThrow(a, std::string(i, p)) &&
652 parseIntNoThrow(b, (std::string(p + 1, j)))) {
653 sgl.add(a, b + 1);
654 }
655 else {
656 throw DL_ABORT_EX(fmt("Bad range %s", std::string(i, j).c_str()));
657 }
658 }
659 if (j == eoi) {
660 break;
661 }
662 i = j + 1;
663 }
664 return sgl;
665 }
666
667 namespace {
computeHeadPieces(std::vector<size_t> & indexes,const std::vector<std::shared_ptr<FileEntry>> & fileEntries,size_t pieceLength,int64_t head)668 void computeHeadPieces(
669 std::vector<size_t>& indexes,
670 const std::vector<std::shared_ptr<FileEntry>>& fileEntries,
671 size_t pieceLength, int64_t head)
672 {
673 if (head == 0) {
674 return;
675 }
676 for (const auto& fi : fileEntries) {
677 if (fi->getLength() == 0) {
678 continue;
679 }
680 const size_t lastIndex =
681 (fi->getOffset() + std::min(head, fi->getLength()) - 1) / pieceLength;
682 for (size_t idx = fi->getOffset() / pieceLength; idx <= lastIndex; ++idx) {
683 indexes.push_back(idx);
684 }
685 }
686 }
687 } // namespace
688
689 namespace {
computeTailPieces(std::vector<size_t> & indexes,const std::vector<std::shared_ptr<FileEntry>> & fileEntries,size_t pieceLength,int64_t tail)690 void computeTailPieces(
691 std::vector<size_t>& indexes,
692 const std::vector<std::shared_ptr<FileEntry>>& fileEntries,
693 size_t pieceLength, int64_t tail)
694 {
695 if (tail == 0) {
696 return;
697 }
698 for (const auto& fi : fileEntries) {
699 if (fi->getLength() == 0) {
700 continue;
701 }
702 int64_t endOffset = fi->getLastOffset();
703 size_t fromIndex =
704 (endOffset - 1 - (std::min(tail, fi->getLength()) - 1)) / pieceLength;
705 const size_t toIndex = (endOffset - 1) / pieceLength;
706 while (fromIndex <= toIndex) {
707 indexes.push_back(fromIndex++);
708 }
709 }
710 }
711 } // namespace
712
parsePrioritizePieceRange(std::vector<size_t> & result,const std::string & src,const std::vector<std::shared_ptr<FileEntry>> & fileEntries,size_t pieceLength,int64_t defaultSize)713 void parsePrioritizePieceRange(
714 std::vector<size_t>& result, const std::string& src,
715 const std::vector<std::shared_ptr<FileEntry>>& fileEntries,
716 size_t pieceLength, int64_t defaultSize)
717 {
718 std::vector<size_t> indexes;
719 std::vector<Scip> parts;
720 splitIter(src.begin(), src.end(), std::back_inserter(parts), ',', true);
721 for (const auto& i : parts) {
722 if (util::streq(i.first, i.second, "head")) {
723 computeHeadPieces(indexes, fileEntries, pieceLength, defaultSize);
724 }
725 else if (util::startsWith(i.first, i.second, "head=")) {
726 std::string sizestr(i.first + 5, i.second);
727 computeHeadPieces(indexes, fileEntries, pieceLength,
728 std::max((int64_t)0, getRealSize(sizestr)));
729 }
730 else if (util::streq(i.first, i.second, "tail")) {
731 computeTailPieces(indexes, fileEntries, pieceLength, defaultSize);
732 }
733 else if (util::startsWith(i.first, i.second, "tail=")) {
734 std::string sizestr(i.first + 5, i.second);
735 computeTailPieces(indexes, fileEntries, pieceLength,
736 std::max((int64_t)0, getRealSize(sizestr)));
737 }
738 else {
739 throw DL_ABORT_EX(
740 fmt("Unrecognized token %s", std::string(i.first, i.second).c_str()));
741 }
742 }
743 std::sort(indexes.begin(), indexes.end());
744 indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
745 result.insert(result.end(), indexes.begin(), indexes.end());
746 }
747
748 // Converts ISO/IEC 8859-1 string to UTF-8 string. If there is a
749 // character not in ISO/IEC 8859-1, returns empty string.
iso8859p1ToUtf8(const char * src,size_t len)750 std::string iso8859p1ToUtf8(const char* src, size_t len)
751 {
752 std::string dest;
753 for (const char *p = src, *last = src + len; p != last; ++p) {
754 unsigned char c = *p;
755 if (0xa0u <= c) {
756 if (c <= 0xbfu) {
757 dest += 0xc2u;
758 }
759 else {
760 dest += 0xc3u;
761 }
762 dest += c & (~0x40u);
763 }
764 else if (0x80u <= c && c <= 0x9fu) {
765 return "";
766 }
767 else {
768 dest += c;
769 }
770 }
771 return dest;
772 }
773
iso8859p1ToUtf8(const std::string & src)774 std::string iso8859p1ToUtf8(const std::string& src)
775 {
776 return iso8859p1ToUtf8(src.c_str(), src.size());
777 }
778
779 /* Start of utf8 dfa */
780 /* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
781 * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
782 *
783 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
784 *
785 * Permission is hereby granted, free of charge, to any person
786 * obtaining a copy of this software and associated documentation
787 * files (the "Software"), to deal in the Software without
788 * restriction, including without limitation the rights to use, copy,
789 * modify, merge, publish, distribute, sublicense, and/or sell copies
790 * of the Software, and to permit persons to whom the Software is
791 * furnished to do so, subject to the following conditions:
792 *
793 * The above copyright notice and this permission notice shall be
794 * included in all copies or substantial portions of the Software.
795 *
796 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
797 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
798 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
799 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
800 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
801 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
802 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
803 * SOFTWARE.
804 */
805 #define UTF8_ACCEPT 0
806 #define UTF8_REJECT 12
807
808 static const uint8_t utf8d[] = {
809 /*
810 * The first part of the table maps bytes to character classes that
811 * to reduce the size of the transition table and create bitmasks.
812 */
813 0,
814 0,
815 0,
816 0,
817 0,
818 0,
819 0,
820 0,
821 0,
822 0,
823 0,
824 0,
825 0,
826 0,
827 0,
828 0,
829 0,
830 0,
831 0,
832 0,
833 0,
834 0,
835 0,
836 0,
837 0,
838 0,
839 0,
840 0,
841 0,
842 0,
843 0,
844 0,
845 0,
846 0,
847 0,
848 0,
849 0,
850 0,
851 0,
852 0,
853 0,
854 0,
855 0,
856 0,
857 0,
858 0,
859 0,
860 0,
861 0,
862 0,
863 0,
864 0,
865 0,
866 0,
867 0,
868 0,
869 0,
870 0,
871 0,
872 0,
873 0,
874 0,
875 0,
876 0,
877 0,
878 0,
879 0,
880 0,
881 0,
882 0,
883 0,
884 0,
885 0,
886 0,
887 0,
888 0,
889 0,
890 0,
891 0,
892 0,
893 0,
894 0,
895 0,
896 0,
897 0,
898 0,
899 0,
900 0,
901 0,
902 0,
903 0,
904 0,
905 0,
906 0,
907 0,
908 0,
909 0,
910 0,
911 0,
912 0,
913 0,
914 0,
915 0,
916 0,
917 0,
918 0,
919 0,
920 0,
921 0,
922 0,
923 0,
924 0,
925 0,
926 0,
927 0,
928 0,
929 0,
930 0,
931 0,
932 0,
933 0,
934 0,
935 0,
936 0,
937 0,
938 0,
939 0,
940 0,
941 1,
942 1,
943 1,
944 1,
945 1,
946 1,
947 1,
948 1,
949 1,
950 1,
951 1,
952 1,
953 1,
954 1,
955 1,
956 1,
957 9,
958 9,
959 9,
960 9,
961 9,
962 9,
963 9,
964 9,
965 9,
966 9,
967 9,
968 9,
969 9,
970 9,
971 9,
972 9,
973 7,
974 7,
975 7,
976 7,
977 7,
978 7,
979 7,
980 7,
981 7,
982 7,
983 7,
984 7,
985 7,
986 7,
987 7,
988 7,
989 7,
990 7,
991 7,
992 7,
993 7,
994 7,
995 7,
996 7,
997 7,
998 7,
999 7,
1000 7,
1001 7,
1002 7,
1003 7,
1004 7,
1005 8,
1006 8,
1007 2,
1008 2,
1009 2,
1010 2,
1011 2,
1012 2,
1013 2,
1014 2,
1015 2,
1016 2,
1017 2,
1018 2,
1019 2,
1020 2,
1021 2,
1022 2,
1023 2,
1024 2,
1025 2,
1026 2,
1027 2,
1028 2,
1029 2,
1030 2,
1031 2,
1032 2,
1033 2,
1034 2,
1035 2,
1036 2,
1037 10,
1038 3,
1039 3,
1040 3,
1041 3,
1042 3,
1043 3,
1044 3,
1045 3,
1046 3,
1047 3,
1048 3,
1049 3,
1050 4,
1051 3,
1052 3,
1053 11,
1054 6,
1055 6,
1056 6,
1057 5,
1058 8,
1059 8,
1060 8,
1061 8,
1062 8,
1063 8,
1064 8,
1065 8,
1066 8,
1067 8,
1068 8,
1069
1070 /*
1071 * The second part is a transition table that maps a combination
1072 * of a state of the automaton and a character class to a state.
1073 */
1074 0,
1075 12,
1076 24,
1077 36,
1078 60,
1079 96,
1080 84,
1081 12,
1082 12,
1083 12,
1084 48,
1085 72,
1086 12,
1087 12,
1088 12,
1089 12,
1090 12,
1091 12,
1092 12,
1093 12,
1094 12,
1095 12,
1096 12,
1097 12,
1098 12,
1099 0,
1100 12,
1101 12,
1102 12,
1103 12,
1104 12,
1105 0,
1106 12,
1107 0,
1108 12,
1109 12,
1110 12,
1111 24,
1112 12,
1113 12,
1114 12,
1115 12,
1116 12,
1117 24,
1118 12,
1119 24,
1120 12,
1121 12,
1122 12,
1123 12,
1124 12,
1125 12,
1126 12,
1127 12,
1128 12,
1129 24,
1130 12,
1131 12,
1132 12,
1133 12,
1134 12,
1135 24,
1136 12,
1137 12,
1138 12,
1139 12,
1140 12,
1141 12,
1142 12,
1143 24,
1144 12,
1145 12,
1146 12,
1147 12,
1148 12,
1149 12,
1150 12,
1151 12,
1152 12,
1153 36,
1154 12,
1155 36,
1156 12,
1157 12,
1158 12,
1159 36,
1160 12,
1161 12,
1162 12,
1163 12,
1164 12,
1165 36,
1166 12,
1167 36,
1168 12,
1169 12,
1170 12,
1171 36,
1172 12,
1173 12,
1174 12,
1175 12,
1176 12,
1177 12,
1178 12,
1179 12,
1180 12,
1181 12,
1182 };
1183
utf8dfa(uint32_t * state,uint32_t * codep,uint32_t byte)1184 static uint32_t utf8dfa(uint32_t* state, uint32_t* codep, uint32_t byte)
1185 {
1186 uint32_t type = utf8d[byte];
1187
1188 *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
1189 : (0xff >> type) & (byte);
1190
1191 *state = utf8d[256 + *state + type];
1192 return *state;
1193 }
1194
1195 /* End of utf8 dfa */
1196
1197 typedef enum {
1198 CD_BEFORE_DISPOSITION_TYPE,
1199 CD_AFTER_DISPOSITION_TYPE,
1200 CD_DISPOSITION_TYPE,
1201 CD_BEFORE_DISPOSITION_PARM_NAME,
1202 CD_AFTER_DISPOSITION_PARM_NAME,
1203 CD_DISPOSITION_PARM_NAME,
1204 CD_BEFORE_VALUE,
1205 CD_AFTER_VALUE,
1206 CD_QUOTED_STRING,
1207 CD_TOKEN,
1208 CD_BEFORE_EXT_VALUE,
1209 CD_CHARSET,
1210 CD_LANGUAGE,
1211 CD_VALUE_CHARS,
1212 CD_VALUE_CHARS_PCT_ENCODED1,
1213 CD_VALUE_CHARS_PCT_ENCODED2
1214 } content_disposition_parse_state;
1215
1216 typedef enum {
1217 CD_FILENAME_FOUND = 1,
1218 CD_EXT_FILENAME_FOUND = 1 << 1
1219 } content_disposition_parse_flag;
1220
1221 typedef enum {
1222 CD_ENC_UNKNOWN,
1223 CD_ENC_UTF8,
1224 CD_ENC_ISO_8859_1
1225 } content_disposition_charset;
1226
parse_content_disposition(char * dest,size_t destlen,const char ** charsetp,size_t * charsetlenp,const char * in,size_t len,bool defaultUTF8)1227 ssize_t parse_content_disposition(char* dest, size_t destlen,
1228 const char** charsetp, size_t* charsetlenp,
1229 const char* in, size_t len, bool defaultUTF8)
1230 {
1231 const char *p = in, *eop = in + len, *mark_first = nullptr,
1232 *mark_last = nullptr;
1233 int state = CD_BEFORE_DISPOSITION_TYPE;
1234 int in_file_parm = 0;
1235 int flags = 0;
1236 int quoted_seen = 0;
1237 int charset = 0;
1238 /* To suppress warnings */
1239 char* dp = dest;
1240 size_t dlen = destlen;
1241 uint32_t dfa_state = UTF8_ACCEPT;
1242 uint32_t dfa_code = 0;
1243 uint8_t pctval = 0;
1244
1245 *charsetp = nullptr;
1246 *charsetlenp = 0;
1247
1248 for (; p != eop; ++p) {
1249 switch (state) {
1250 case CD_BEFORE_DISPOSITION_TYPE:
1251 if (inRFC2616HttpToken(*p)) {
1252 state = CD_DISPOSITION_TYPE;
1253 }
1254 else if (!isLws(*p)) {
1255 return -1;
1256 }
1257 break;
1258 case CD_AFTER_DISPOSITION_TYPE:
1259 case CD_DISPOSITION_TYPE:
1260 if (*p == ';') {
1261 state = CD_BEFORE_DISPOSITION_PARM_NAME;
1262 }
1263 else if (isLws(*p)) {
1264 state = CD_AFTER_DISPOSITION_TYPE;
1265 }
1266 else if (state == CD_AFTER_DISPOSITION_TYPE || !inRFC2616HttpToken(*p)) {
1267 return -1;
1268 }
1269 break;
1270 case CD_BEFORE_DISPOSITION_PARM_NAME:
1271 if (inRFC2616HttpToken(*p)) {
1272 mark_first = p;
1273 state = CD_DISPOSITION_PARM_NAME;
1274 }
1275 else if (!isLws(*p)) {
1276 return -1;
1277 }
1278 break;
1279 case CD_AFTER_DISPOSITION_PARM_NAME:
1280 case CD_DISPOSITION_PARM_NAME:
1281 if (*p == '=') {
1282 if (state == CD_DISPOSITION_PARM_NAME) {
1283 mark_last = p;
1284 }
1285 in_file_parm = 0;
1286 if (strieq(mark_first, mark_last, "filename*")) {
1287 if ((flags & CD_EXT_FILENAME_FOUND) == 0) {
1288 in_file_parm = 1;
1289 }
1290 else {
1291 return -1;
1292 }
1293 state = CD_BEFORE_EXT_VALUE;
1294 }
1295 else if (strieq(mark_first, mark_last, "filename")) {
1296 if (flags & CD_FILENAME_FOUND) {
1297 return -1;
1298 }
1299 if ((flags & CD_EXT_FILENAME_FOUND) == 0) {
1300 in_file_parm = 1;
1301 }
1302 state = CD_BEFORE_VALUE;
1303 }
1304 else {
1305 /* ext-token must be characters in token, followed by "*" */
1306 if (mark_first != mark_last - 1 && *(mark_last - 1) == '*') {
1307 state = CD_BEFORE_EXT_VALUE;
1308 }
1309 else {
1310 state = CD_BEFORE_VALUE;
1311 }
1312 }
1313 if (in_file_parm) {
1314 dp = dest;
1315 dlen = destlen;
1316 }
1317 }
1318 else if (isLws(*p)) {
1319 mark_last = p;
1320 state = CD_AFTER_DISPOSITION_PARM_NAME;
1321 }
1322 else if (state == CD_AFTER_DISPOSITION_PARM_NAME ||
1323 !inRFC2616HttpToken(*p)) {
1324 return -1;
1325 }
1326 break;
1327 case CD_BEFORE_VALUE:
1328 if (*p == '"') {
1329 quoted_seen = 0;
1330 state = CD_QUOTED_STRING;
1331 if (defaultUTF8) {
1332 dfa_state = UTF8_ACCEPT;
1333 dfa_code = 0;
1334 }
1335 }
1336 else if (inRFC2616HttpToken(*p)) {
1337 if (in_file_parm) {
1338 if (dlen == 0) {
1339 return -1;
1340 }
1341 else {
1342 *dp++ = *p;
1343 --dlen;
1344 }
1345 }
1346 state = CD_TOKEN;
1347 }
1348 else if (!isLws(*p)) {
1349 return -1;
1350 }
1351 break;
1352 case CD_AFTER_VALUE:
1353 if (*p == ';') {
1354 state = CD_BEFORE_DISPOSITION_PARM_NAME;
1355 }
1356 else if (!isLws(*p)) {
1357 return -1;
1358 }
1359 break;
1360 case CD_QUOTED_STRING:
1361 if (*p == '\\' && quoted_seen == 0) {
1362 quoted_seen = 1;
1363 }
1364 else if (*p == '"' && quoted_seen == 0) {
1365 if (defaultUTF8 && dfa_state != UTF8_ACCEPT) {
1366 return -1;
1367 }
1368 if (in_file_parm) {
1369 flags |= CD_FILENAME_FOUND;
1370 }
1371 state = CD_AFTER_VALUE;
1372 }
1373 else {
1374 /* TEXT which is OCTET except CTLs, but including LWS. Accept
1375 ISO-8859-1 chars, or UTF-8 if defaultUTF8 is set */
1376 quoted_seen = 0;
1377 if (defaultUTF8) {
1378 if (utf8dfa(&dfa_state, &dfa_code, (unsigned char)*p) ==
1379 UTF8_REJECT) {
1380 return -1;
1381 }
1382 }
1383 else if (!isIso8859p1(*p)) {
1384 return -1;
1385 }
1386 if (in_file_parm) {
1387 if (dlen == 0) {
1388 return -1;
1389 }
1390 else {
1391 *dp++ = *p;
1392 --dlen;
1393 }
1394 }
1395 }
1396 break;
1397 case CD_TOKEN:
1398 if (inRFC2616HttpToken(*p)) {
1399 if (in_file_parm) {
1400 if (dlen == 0) {
1401 return -1;
1402 }
1403 else {
1404 *dp++ = *p;
1405 --dlen;
1406 }
1407 }
1408 }
1409 else if (*p == ';') {
1410 if (in_file_parm) {
1411 flags |= CD_FILENAME_FOUND;
1412 }
1413 state = CD_BEFORE_DISPOSITION_PARM_NAME;
1414 }
1415 else if (isLws(*p)) {
1416 if (in_file_parm) {
1417 flags |= CD_FILENAME_FOUND;
1418 }
1419 state = CD_AFTER_VALUE;
1420 }
1421 else {
1422 return -1;
1423 }
1424 break;
1425 case CD_BEFORE_EXT_VALUE:
1426 if (*p == '\'') {
1427 /* Empty charset is not allowed */
1428 return -1;
1429 }
1430 else if (inRFC2978MIMECharset(*p)) {
1431 mark_first = p;
1432 state = CD_CHARSET;
1433 }
1434 else if (!isLws(*p)) {
1435 return -1;
1436 }
1437 break;
1438 case CD_CHARSET:
1439 if (*p == '\'') {
1440 mark_last = p;
1441 *charsetp = mark_first;
1442 *charsetlenp = mark_last - mark_first;
1443 if (strieq(mark_first, mark_last, "utf-8")) {
1444 charset = CD_ENC_UTF8;
1445 dfa_state = UTF8_ACCEPT;
1446 dfa_code = 0;
1447 }
1448 else if (strieq(mark_first, mark_last, "iso-8859-1")) {
1449 charset = CD_ENC_ISO_8859_1;
1450 }
1451 else {
1452 charset = CD_ENC_UNKNOWN;
1453 }
1454 state = CD_LANGUAGE;
1455 }
1456 else if (!inRFC2978MIMECharset(*p)) {
1457 return -1;
1458 }
1459 break;
1460 case CD_LANGUAGE:
1461 if (*p == '\'') {
1462 if (in_file_parm) {
1463 dp = dest;
1464 dlen = destlen;
1465 }
1466 state = CD_VALUE_CHARS;
1467 }
1468 else if (*p != '-' && !isAlpha(*p) && !isDigit(*p)) {
1469 return -1;
1470 }
1471 break;
1472 case CD_VALUE_CHARS:
1473 if (inRFC5987AttrChar(*p)) {
1474 if (charset == CD_ENC_UTF8) {
1475 if (utf8dfa(&dfa_state, &dfa_code, static_cast<unsigned char>(*p)) ==
1476 UTF8_REJECT) {
1477 return -1;
1478 }
1479 }
1480 if (in_file_parm) {
1481 if (dlen == 0) {
1482 return -1;
1483 }
1484 else {
1485 *dp++ = *p;
1486 --dlen;
1487 }
1488 }
1489 }
1490 else if (*p == '%') {
1491 if (in_file_parm) {
1492 if (dlen == 0) {
1493 return -1;
1494 }
1495 }
1496 pctval = 0;
1497 state = CD_VALUE_CHARS_PCT_ENCODED1;
1498 }
1499 else if (*p == ';' || isLws(*p)) {
1500 if (charset == CD_ENC_UTF8 && dfa_state != UTF8_ACCEPT) {
1501 return -1;
1502 }
1503 if (in_file_parm) {
1504 flags |= CD_EXT_FILENAME_FOUND;
1505 }
1506 if (*p == ';') {
1507 state = CD_BEFORE_DISPOSITION_PARM_NAME;
1508 }
1509 else {
1510 state = CD_AFTER_VALUE;
1511 }
1512 }
1513 else if (!inRFC5987AttrChar(*p)) {
1514 return -1;
1515 }
1516 break;
1517 case CD_VALUE_CHARS_PCT_ENCODED1:
1518 if (isHexDigit(*p)) {
1519 pctval |= hexCharToUInt(*p) << 4;
1520 state = CD_VALUE_CHARS_PCT_ENCODED2;
1521 }
1522 else {
1523 return -1;
1524 }
1525 break;
1526 case CD_VALUE_CHARS_PCT_ENCODED2:
1527 if (isHexDigit(*p)) {
1528 pctval |= hexCharToUInt(*p);
1529 if (charset == CD_ENC_UTF8) {
1530 if (utf8dfa(&dfa_state, &dfa_code, pctval) == UTF8_REJECT) {
1531 return -1;
1532 }
1533 }
1534 else if (charset == CD_ENC_ISO_8859_1) {
1535 if (!isIso8859p1(pctval)) {
1536 return -1;
1537 }
1538 }
1539 if (in_file_parm) {
1540 *dp++ = pctval;
1541 --dlen;
1542 }
1543 state = CD_VALUE_CHARS;
1544 }
1545 else {
1546 return -1;
1547 }
1548 break;
1549 }
1550 }
1551 switch (state) {
1552 case CD_BEFORE_DISPOSITION_TYPE:
1553 case CD_AFTER_DISPOSITION_TYPE:
1554 case CD_DISPOSITION_TYPE:
1555 case CD_AFTER_VALUE:
1556 case CD_TOKEN:
1557 return destlen - dlen;
1558 case CD_VALUE_CHARS:
1559 if (charset == CD_ENC_UTF8 && dfa_state != UTF8_ACCEPT) {
1560 return -1;
1561 }
1562 return destlen - dlen;
1563 default:
1564 return -1;
1565 }
1566 }
1567
getContentDispositionFilename(const std::string & header,bool defaultUTF8)1568 std::string getContentDispositionFilename(const std::string& header,
1569 bool defaultUTF8)
1570 {
1571 std::array<char, 1_k> cdval;
1572 size_t cdvallen = cdval.size();
1573 const char* charset;
1574 size_t charsetlen;
1575 ssize_t rv =
1576 parse_content_disposition(cdval.data(), cdvallen, &charset, &charsetlen,
1577 header.c_str(), header.size(), defaultUTF8);
1578 if (rv == -1) {
1579 return "";
1580 }
1581
1582 std::string res;
1583 if ((charset && strieq(charset, charset + charsetlen, "iso-8859-1")) ||
1584 (!charset && !defaultUTF8)) {
1585 res = iso8859p1ToUtf8(cdval.data(), rv);
1586 }
1587 else {
1588 res.assign(cdval.data(), rv);
1589 }
1590 if (!detectDirTraversal(res) &&
1591 res.find_first_of("/\\") == std::string::npos) {
1592 return res;
1593 }
1594 return "";
1595 }
1596
toUpper(std::string src)1597 std::string toUpper(std::string src)
1598 {
1599 uppercase(src);
1600 return src;
1601 }
1602
toLower(std::string src)1603 std::string toLower(std::string src)
1604 {
1605 lowercase(src);
1606 return src;
1607 }
1608
uppercase(std::string & s)1609 void uppercase(std::string& s)
1610 {
1611 std::transform(s.begin(), s.end(), s.begin(), toUpperChar);
1612 }
1613
lowercase(std::string & s)1614 void lowercase(std::string& s)
1615 {
1616 std::transform(s.begin(), s.end(), s.begin(), toLowerChar);
1617 }
1618
toUpperChar(char c)1619 char toUpperChar(char c)
1620 {
1621 if ('a' <= c && c <= 'z') {
1622 c += 'A' - 'a';
1623 }
1624 return c;
1625 }
1626
toLowerChar(char c)1627 char toLowerChar(char c)
1628 {
1629 if ('A' <= c && c <= 'Z') {
1630 c += 'a' - 'A';
1631 }
1632 return c;
1633 }
1634
isNumericHost(const std::string & name)1635 bool isNumericHost(const std::string& name)
1636 {
1637 struct addrinfo hints;
1638 struct addrinfo* res;
1639 memset(&hints, 0, sizeof(hints));
1640 hints.ai_family = AF_UNSPEC;
1641 hints.ai_flags = AI_NUMERICHOST;
1642 if (getaddrinfo(name.c_str(), nullptr, &hints, &res)) {
1643 return false;
1644 }
1645 freeaddrinfo(res);
1646 return true;
1647 }
1648
1649 #if _WIN32
1650 namespace {
1651 static Lock win_signal_lock;
1652
1653 static signal_handler_t win_int_handler = nullptr;
1654 static signal_handler_t win_term_handler = nullptr;
1655
win_ign_handler(int)1656 static void win_ign_handler(int) {}
1657
HandlerRoutine(DWORD ctrlType)1658 static BOOL WINAPI HandlerRoutine(DWORD ctrlType)
1659 {
1660 void (*handler)(int) = nullptr;
1661 switch (ctrlType) {
1662 case CTRL_C_EVENT:
1663 case CTRL_BREAK_EVENT: {
1664 // Handler will be called on a new/different thread.
1665 LockGuard lg(win_signal_lock);
1666 handler = win_int_handler;
1667 }
1668
1669 if (handler) {
1670 handler(SIGINT);
1671 return TRUE;
1672 }
1673 return FALSE;
1674
1675 case CTRL_LOGOFF_EVENT:
1676 case CTRL_CLOSE_EVENT:
1677 case CTRL_SHUTDOWN_EVENT: {
1678 // Handler will be called on a new/different thread.
1679 LockGuard lg(win_signal_lock);
1680 handler = win_term_handler;
1681 ;
1682 }
1683 if (handler) {
1684 handler(SIGTERM);
1685 return TRUE;
1686 }
1687 return FALSE;
1688 }
1689 return FALSE;
1690 }
1691 } // namespace
1692 #endif
1693
setGlobalSignalHandler(int sig,sigset_t * mask,signal_handler_t handler,int flags)1694 void setGlobalSignalHandler(int sig, sigset_t* mask, signal_handler_t handler,
1695 int flags)
1696 {
1697 #if _WIN32
1698 if (sig == SIGINT || sig == SIGTERM) {
1699 // Handler will be called on a new/different thread.
1700 LockGuard lg(win_signal_lock);
1701
1702 if (handler == SIG_DFL) {
1703 handler = nullptr;
1704 }
1705 else if (handler == SIG_IGN) {
1706 handler = win_ign_handler;
1707 }
1708 // Not yet in use: add console handler.
1709 if (handler && !win_int_handler && !win_term_handler) {
1710 ::SetConsoleCtrlHandler(HandlerRoutine, TRUE);
1711 }
1712 if (sig == SIGINT) {
1713 win_int_handler = handler;
1714 }
1715 else {
1716 win_term_handler = handler;
1717 }
1718 // No handlers set: remove.
1719 if (!win_int_handler && !win_term_handler) {
1720 ::SetConsoleCtrlHandler(HandlerRoutine, FALSE);
1721 }
1722 return;
1723 }
1724 #endif
1725
1726 #ifdef HAVE_SIGACTION
1727 struct sigaction sigact;
1728 sigact.sa_handler = handler;
1729 sigact.sa_flags = flags;
1730 sigact.sa_mask = *mask;
1731 if (sigaction(sig, &sigact, nullptr) == -1) {
1732 auto errNum = errno;
1733 A2_LOG_ERROR(fmt("sigaction() failed for signal %d: %s", sig,
1734 safeStrerror(errNum).c_str()));
1735 }
1736 #else
1737 if (signal(sig, handler) == SIG_ERR) {
1738 auto errNum = errno;
1739 A2_LOG_ERROR(fmt("signal() failed for signal %d: %s", sig,
1740 safeStrerror(errNum).c_str()));
1741 }
1742 #endif // HAVE_SIGACTION
1743 }
1744
1745 #ifndef __MINGW32__
getHomeDir()1746 std::string getHomeDir()
1747 {
1748 const char* p = getenv("HOME");
1749 if (p) {
1750 return p;
1751 }
1752 # ifdef HAVE_PWD_H
1753 auto pw = getpwuid(geteuid());
1754 if (pw && pw->pw_dir) {
1755 return pw->pw_dir;
1756 }
1757 # endif // HAVE_PWD_H
1758 return A2STR::NIL;
1759 }
1760
1761 #else // __MINGW32__
1762
getHomeDir()1763 std::string getHomeDir()
1764 {
1765 auto p = _wgetenv(L"HOME");
1766 if (p) {
1767 return toForwardSlash(wCharToUtf8(p));
1768 }
1769 p = _wgetenv(L"USERPROFILE");
1770 if (p) {
1771 return toForwardSlash(wCharToUtf8(p));
1772 }
1773 p = _wgetenv(L"HOMEDRIVE");
1774 if (p) {
1775 std::wstring homeDir = p;
1776 p = _wgetenv(L"HOMEPATH");
1777 if (p) {
1778 homeDir += p;
1779 return toForwardSlash(wCharToUtf8(homeDir));
1780 }
1781 }
1782 return A2STR::NIL;
1783 }
1784 #endif // __MINGW32__
1785
getXDGDir(const std::string & environmentVariable,const std::string & fallbackDirectory)1786 std::string getXDGDir(const std::string& environmentVariable,
1787 const std::string& fallbackDirectory)
1788 {
1789 std::string filename;
1790 const char* p = getenv(environmentVariable.c_str());
1791 if (p &&
1792 #ifndef __MINGW32__
1793 p[0] == '/'
1794 #else // __MINGW32__
1795 p[0] && p[1] == ':'
1796 #endif // __MINGW32__
1797 ) {
1798 filename = p;
1799 }
1800 else {
1801 filename = fallbackDirectory;
1802 }
1803 return filename;
1804 }
1805
getConfigFile()1806 std::string getConfigFile()
1807 {
1808 std::string filename = getHomeDir() + "/.aria2/aria2.conf";
1809 if (!File(filename).exists()) {
1810 filename = getXDGDir("XDG_CONFIG_HOME", getHomeDir() + "/.config") +
1811 "/aria2/aria2.conf";
1812 }
1813 return filename;
1814 }
1815
getDHTFile(bool ipv6)1816 std::string getDHTFile(bool ipv6)
1817 {
1818 std::string filename =
1819 getHomeDir() + (ipv6 ? "/.aria2/dht6.dat" : "/.aria2/dht.dat");
1820 if (!File(filename).exists()) {
1821 filename = getXDGDir("XDG_CACHE_HOME", getHomeDir() + "/.cache") +
1822 (ipv6 ? "/aria2/dht6.dat" : "/aria2/dht.dat");
1823 }
1824 return filename;
1825 }
1826
getRealSize(const std::string & sizeWithUnit)1827 int64_t getRealSize(const std::string& sizeWithUnit)
1828 {
1829 std::string::size_type p = sizeWithUnit.find_first_of("KMkm");
1830 std::string size;
1831 int32_t mult = 1;
1832 if (p == std::string::npos) {
1833 size = sizeWithUnit;
1834 }
1835 else {
1836 switch (sizeWithUnit[p]) {
1837 case 'K':
1838 case 'k':
1839 mult = 1_k;
1840 break;
1841 case 'M':
1842 case 'm':
1843 mult = 1_m;
1844 break;
1845 }
1846 size.assign(sizeWithUnit.begin(), sizeWithUnit.begin() + p);
1847 }
1848 int64_t v;
1849 if (!parseLLIntNoThrow(v, size) || v < 0) {
1850 throw DL_ABORT_EX(
1851 fmt("Bad or negative value detected: %s", sizeWithUnit.c_str()));
1852 }
1853 if (INT64_MAX / mult < v) {
1854 throw DL_ABORT_EX(
1855 fmt(MSG_STRING_INTEGER_CONVERSION_FAILURE, "overflow/underflow"));
1856 }
1857 return v * mult;
1858 }
1859
abbrevSize(int64_t size)1860 std::string abbrevSize(int64_t size)
1861 {
1862 static const char* UNITS[] = {"", "Ki", "Mi", "Gi"};
1863 int64_t t = size;
1864 size_t uidx = 0;
1865 int r = 0;
1866 while (t >= static_cast<int64_t>(1_k) &&
1867 uidx + 1 < sizeof(UNITS) / sizeof(UNITS[0])) {
1868 lldiv_t d = lldiv(t, 1_k);
1869 t = d.quot;
1870 r = d.rem;
1871 ++uidx;
1872 }
1873 if (uidx + 1 < sizeof(UNITS) / sizeof(UNITS[0]) && t >= 922) {
1874 ++uidx;
1875 r = t;
1876 t = 0;
1877 }
1878 std::string res;
1879 res += itos(t, true);
1880 if (t < 10 && uidx > 0) {
1881 res += ".";
1882 res += itos(r * 10 / 1_k);
1883 }
1884 res += UNITS[uidx];
1885 return res;
1886 }
1887
sleep(long seconds)1888 void sleep(long seconds)
1889 {
1890 #if defined(HAVE_WINSOCK2_H)
1891 ::Sleep(seconds * 1000);
1892 #elif HAVE_SLEEP
1893 ::sleep(seconds);
1894 #elif defined(HAVE_USLEEP)
1895 ::usleep(seconds * 1000000);
1896 #else
1897 # error no sleep function is available (nanosleep?)
1898 #endif
1899 }
1900
usleep(long microseconds)1901 void usleep(long microseconds)
1902 {
1903 #ifdef HAVE_USLEEP
1904 ::usleep(microseconds);
1905 #elif defined(HAVE_WINSOCK2_H)
1906
1907 LARGE_INTEGER current, freq, end;
1908
1909 static enum {
1910 GET_FREQUENCY,
1911 GET_MICROSECONDS,
1912 SKIP_MICROSECONDS
1913 } state = GET_FREQUENCY;
1914
1915 if (state == GET_FREQUENCY) {
1916 if (QueryPerformanceFrequency(&freq))
1917 state = GET_MICROSECONDS;
1918 else
1919 state = SKIP_MICROSECONDS;
1920 }
1921
1922 long msec = microseconds / 1000;
1923 microseconds %= 1000;
1924
1925 if (state == GET_MICROSECONDS && microseconds) {
1926 QueryPerformanceCounter(&end);
1927
1928 end.QuadPart += (freq.QuadPart * microseconds) / 1000000;
1929
1930 while (QueryPerformanceCounter(¤t) &&
1931 (current.QuadPart <= end.QuadPart))
1932 /* noop */;
1933 }
1934
1935 if (msec)
1936 Sleep(msec);
1937 #else
1938 # error no usleep function is available (nanosleep?)
1939 #endif
1940 }
1941
mkdirs(const std::string & dirpath)1942 void mkdirs(const std::string& dirpath)
1943 {
1944 File dir(dirpath);
1945 if (!dir.mkdirs()) {
1946 int errNum = errno;
1947 if (!dir.isDir()) {
1948 throw DL_ABORT_EX3(
1949 errNum,
1950 fmt(EX_MAKE_DIR, dir.getPath().c_str(), safeStrerror(errNum).c_str()),
1951 error_code::DIR_CREATE_ERROR);
1952 }
1953 }
1954 }
1955
convertBitfield(BitfieldMan * dest,const BitfieldMan * src)1956 void convertBitfield(BitfieldMan* dest, const BitfieldMan* src)
1957 {
1958 size_t numBlock = dest->countBlock();
1959 for (size_t index = 0; index < numBlock; ++index) {
1960 if (src->isBitSetOffsetRange((int64_t)index * dest->getBlockLength(),
1961 dest->getBlockLength())) {
1962 dest->setBit(index);
1963 }
1964 }
1965 }
1966
toString(const std::shared_ptr<BinaryStream> & binaryStream)1967 std::string toString(const std::shared_ptr<BinaryStream>& binaryStream)
1968 {
1969 std::stringstream strm;
1970 char data[2048];
1971 while (1) {
1972 int32_t dataLength = binaryStream->readData(
1973 reinterpret_cast<unsigned char*>(data), sizeof(data), strm.tellp());
1974 strm.write(data, dataLength);
1975 if (dataLength == 0) {
1976 break;
1977 }
1978 }
1979 return strm.str();
1980 }
1981
1982 #ifdef HAVE_POSIX_MEMALIGN
1983 /**
1984 * In linux 2.6, alignment and size should be a multiple of 512.
1985 */
allocateAlignedMemory(size_t alignment,size_t size)1986 void* allocateAlignedMemory(size_t alignment, size_t size)
1987 {
1988 void* buffer;
1989 int res;
1990 if ((res = posix_memalign(&buffer, alignment, size)) != 0) {
1991 throw FATAL_EXCEPTION(
1992 fmt("Error in posix_memalign: %s", util::safeStrerror(res).c_str()));
1993 }
1994 return buffer;
1995 }
1996 #endif // HAVE_POSIX_MEMALIGN
1997
getNumericNameInfo(const struct sockaddr * sockaddr,socklen_t len)1998 Endpoint getNumericNameInfo(const struct sockaddr* sockaddr, socklen_t len)
1999 {
2000 char host[NI_MAXHOST];
2001 char service[NI_MAXSERV];
2002 int s = getnameinfo(sockaddr, len, host, NI_MAXHOST, service, NI_MAXSERV,
2003 NI_NUMERICHOST | NI_NUMERICSERV);
2004 if (s != 0) {
2005 throw DL_ABORT_EX(
2006 fmt("Failed to get hostname and port. cause: %s", gai_strerror(s)));
2007 }
2008 return {host, sockaddr->sa_family,
2009 static_cast<uint16_t>(strtoul(service, nullptr, 10))};
2010 }
2011
htmlEscape(const std::string & src)2012 std::string htmlEscape(const std::string& src)
2013 {
2014 std::string dest;
2015 dest.reserve(src.size());
2016 auto j = std::begin(src);
2017 for (auto i = std::begin(src); i != std::end(src); ++i) {
2018 char ch = *i;
2019 const char* repl;
2020 if (ch == '<') {
2021 repl = "<";
2022 }
2023 else if (ch == '>') {
2024 repl = ">";
2025 }
2026 else if (ch == '&') {
2027 repl = "&";
2028 }
2029 else if (ch == '\'') {
2030 repl = "'";
2031 }
2032 else if (ch == '"') {
2033 repl = """;
2034 }
2035 else {
2036 continue;
2037 }
2038 dest.append(j, i);
2039 j = i + 1;
2040 dest += repl;
2041 }
2042 dest.append(j, std::end(src));
2043 return dest;
2044 }
2045
parseIndexPath(const std::string & line)2046 std::pair<size_t, std::string> parseIndexPath(const std::string& line)
2047 {
2048 auto p = divide(std::begin(line), std::end(line), '=');
2049 uint32_t index;
2050 if (!parseUIntNoThrow(index, std::string(p.first.first, p.first.second))) {
2051 throw DL_ABORT_EX("Bad path index");
2052 }
2053 if (p.second.first == p.second.second) {
2054 throw DL_ABORT_EX(fmt("Path with index=%u is empty.", index));
2055 }
2056 return std::make_pair(index, std::string(p.second.first, p.second.second));
2057 }
2058
createIndexPaths(std::istream & i)2059 std::vector<std::pair<size_t, std::string>> createIndexPaths(std::istream& i)
2060 {
2061 std::vector<std::pair<size_t, std::string>> indexPaths;
2062 std::string line;
2063 while (getline(i, line)) {
2064 indexPaths.push_back(parseIndexPath(line));
2065 }
2066 return indexPaths;
2067 }
2068
generateRandomData(unsigned char * data,size_t length)2069 void generateRandomData(unsigned char* data, size_t length)
2070 {
2071 const auto& rd = SimpleRandomizer::getInstance();
2072 return rd->getRandomBytes(data, length);
2073 }
2074
saveAs(const std::string & filename,const std::string & data,bool overwrite)2075 bool saveAs(const std::string& filename, const std::string& data,
2076 bool overwrite)
2077 {
2078 if (!overwrite && File(filename).exists()) {
2079 return false;
2080 }
2081 std::string tempFilename = filename;
2082 tempFilename += "__temp";
2083 {
2084 BufferedFile fp(tempFilename.c_str(), BufferedFile::WRITE);
2085 if (!fp) {
2086 return false;
2087 }
2088 if (fp.write(data.data(), data.size()) != data.size()) {
2089 return false;
2090 }
2091 if (fp.close() == EOF) {
2092 return false;
2093 }
2094 }
2095 return File(tempFilename).renameTo(filename);
2096 }
2097
applyDir(const std::string & dir,const std::string & relPath)2098 std::string applyDir(const std::string& dir, const std::string& relPath)
2099 {
2100 std::string s;
2101 if (dir.empty()) {
2102 s = "./";
2103 s += relPath;
2104 }
2105 else {
2106 s = dir;
2107 if (dir == "/") {
2108 s += relPath;
2109 }
2110 else {
2111 s += "/";
2112 s += relPath;
2113 }
2114 }
2115 #ifdef __MINGW32__
2116 for (std::string::iterator i = s.begin(), eoi = s.end(); i != eoi; ++i) {
2117 if (*i == '\\') {
2118 *i = '/';
2119 }
2120 }
2121 #endif // __MINGW32__
2122 return s;
2123 }
2124
fixTaintedBasename(const std::string & src)2125 std::string fixTaintedBasename(const std::string& src)
2126 {
2127 return escapePath(replace(src, "/", "%2F"));
2128 }
2129
generateRandomKey(unsigned char * key)2130 void generateRandomKey(unsigned char* key)
2131 {
2132 unsigned char bytes[40];
2133 generateRandomData(bytes, sizeof(bytes));
2134 message_digest::digest(key, 20, MessageDigest::sha1().get(), bytes,
2135 sizeof(bytes));
2136 }
2137
2138 // Returns true is given numeric ipv4addr is in Private Address Space.
2139 //
2140 // From Section.3 RFC1918
2141 // 10.0.0.0 - 10.255.255.255 (10/8 prefix)
2142 // 172.16.0.0 - 172.31.255.255 (172.16/12 prefix)
2143 // 192.168.0.0 - 192.168.255.255 (192.168/16 prefix)
inPrivateAddress(const std::string & ipv4addr)2144 bool inPrivateAddress(const std::string& ipv4addr)
2145 {
2146 if (util::startsWith(ipv4addr, "10.") ||
2147 util::startsWith(ipv4addr, "192.168.")) {
2148 return true;
2149 }
2150 if (util::startsWith(ipv4addr, "172.")) {
2151 for (int i = 16; i <= 31; ++i) {
2152 std::string t(fmt("%d.", i));
2153 if (util::startsWith(ipv4addr.begin() + 4, ipv4addr.end(), t.begin(),
2154 t.end())) {
2155 return true;
2156 }
2157 }
2158 }
2159 return false;
2160 }
2161
detectDirTraversal(const std::string & s)2162 bool detectDirTraversal(const std::string& s)
2163 {
2164 if (s.empty()) {
2165 return false;
2166 }
2167 for (auto c : s) {
2168 unsigned char ch = c;
2169 if (in(ch, 0x00u, 0x1fu) || ch == 0x7fu) {
2170 return true;
2171 }
2172 }
2173 return s == "." || s == ".." || s[0] == '/' || util::startsWith(s, "./") ||
2174 util::startsWith(s, "../") || s.find("/../") != std::string::npos ||
2175 s.find("/./") != std::string::npos || s[s.size() - 1] == '/' ||
2176 util::endsWith(s, "/.") || util::endsWith(s, "/..");
2177 }
2178
escapePath(const std::string & s)2179 std::string escapePath(const std::string& s)
2180 {
2181 // We don't escape '/' because we use it as a path separator.
2182 #ifdef __MINGW32__
2183 static const char WIN_INVALID_PATH_CHARS[] = {'"', '*', ':', '<',
2184 '>', '?', '\\', '|'};
2185 #endif // __MINGW32__
2186 std::string d;
2187 for (auto cc : s) {
2188 unsigned char c = cc;
2189 if (in(c, 0x00u, 0x1fu) || c == 0x7fu
2190 #ifdef __MINGW32__
2191 || std::find(std::begin(WIN_INVALID_PATH_CHARS),
2192 std::end(WIN_INVALID_PATH_CHARS),
2193 c) != std::end(WIN_INVALID_PATH_CHARS)
2194 #endif // __MINGW32__
2195 ) {
2196 d += fmt("%%%02X", c);
2197 }
2198 else {
2199 d += c;
2200 }
2201 }
2202 return d;
2203 }
2204
inSameCidrBlock(const std::string & ip1,const std::string & ip2,size_t bits)2205 bool inSameCidrBlock(const std::string& ip1, const std::string& ip2,
2206 size_t bits)
2207 {
2208 unsigned char s1[16], s2[16];
2209 size_t len1, len2;
2210 if ((len1 = net::getBinAddr(s1, ip1)) == 0 ||
2211 (len2 = net::getBinAddr(s2, ip2)) == 0 || len1 != len2) {
2212 return false;
2213 }
2214 if (bits == 0) {
2215 return true;
2216 }
2217 if (bits > 8 * len1) {
2218 bits = 8 * len1;
2219 }
2220 int last = (bits - 1) / 8;
2221 for (int i = 0; i < last; ++i) {
2222 if (s1[i] != s2[i]) {
2223 return false;
2224 }
2225 }
2226 unsigned char mask = bitfield::lastByteMask(bits);
2227 return (s1[last] & mask) == (s2[last] & mask);
2228 }
2229
2230 namespace {
2231
executeHook(const std::string & command,a2_gid_t gid,size_t numFiles,const std::string & firstFilename)2232 void executeHook(const std::string& command, a2_gid_t gid, size_t numFiles,
2233 const std::string& firstFilename)
2234 {
2235 const std::string gidStr = GroupId::toHex(gid);
2236 const std::string numFilesStr = util::uitos(numFiles);
2237 #ifndef __MINGW32__
2238 A2_LOG_INFO(fmt("Executing user command: %s %s %s %s", command.c_str(),
2239 gidStr.c_str(), numFilesStr.c_str(), firstFilename.c_str()));
2240 pid_t cpid = fork();
2241 if (cpid == 0) {
2242 // child!
2243 execlp(command.c_str(), command.c_str(), gidStr.c_str(),
2244 numFilesStr.c_str(), firstFilename.c_str(),
2245 reinterpret_cast<char*>(0));
2246 perror(("Could not execute user command: " + command).c_str());
2247 _exit(EXIT_FAILURE);
2248 return;
2249 }
2250
2251 if (cpid == -1) {
2252 A2_LOG_ERROR("fork() failed. Cannot execute user command.");
2253 }
2254 return;
2255
2256 #else // __MINGW32__
2257 PROCESS_INFORMATION pi;
2258 STARTUPINFOW si;
2259
2260 memset(&si, 0, sizeof(si));
2261 si.cb = sizeof(STARTUPINFO);
2262 memset(&pi, 0, sizeof(pi));
2263 bool batch = util::iendsWith(command, ".bat");
2264 std::string cmdline;
2265 std::string cmdexe;
2266
2267 // XXX batch handling, in particular quoting, correct?
2268 if (batch) {
2269 const char* p = getenv("windir");
2270 if (p) {
2271 cmdexe = p;
2272 cmdexe += "\\system32\\cmd.exe";
2273 }
2274 else {
2275 A2_LOG_INFO("Failed to get windir environment variable."
2276 " Executing batch file will fail.");
2277 // TODO Might be useless.
2278 cmdexe = "cmd.exe";
2279 }
2280 cmdline += "/C \"";
2281 }
2282 cmdline += "\"";
2283 cmdline += command;
2284 cmdline += "\"";
2285 cmdline += " ";
2286 cmdline += gidStr;
2287 cmdline += " ";
2288 cmdline += numFilesStr;
2289 cmdline += " \"";
2290 cmdline += firstFilename;
2291 cmdline += "\"";
2292 if (batch) {
2293 cmdline += "\"";
2294 }
2295 int cmdlineLen = utf8ToWChar(nullptr, 0, cmdline.c_str());
2296 assert(cmdlineLen > 0);
2297 auto wcharCmdline = make_unique<wchar_t[]>(cmdlineLen);
2298 cmdlineLen = utf8ToWChar(wcharCmdline.get(), cmdlineLen, cmdline.c_str());
2299 assert(cmdlineLen > 0);
2300 A2_LOG_INFO(fmt("Executing user command: %s", cmdline.c_str()));
2301 DWORD rc = CreateProcessW(batch ? utf8ToWChar(cmdexe).c_str() : nullptr,
2302 wcharCmdline.get(), nullptr, nullptr, false, 0,
2303 nullptr, 0, &si, &pi);
2304
2305 if (!rc) {
2306 A2_LOG_ERROR("CreateProcess() failed. Cannot execute user command.");
2307 }
2308 return;
2309
2310 #endif
2311 }
2312
2313 } // namespace
2314
executeHookByOptName(const std::shared_ptr<RequestGroup> & group,const Option * option,PrefPtr pref)2315 void executeHookByOptName(const std::shared_ptr<RequestGroup>& group,
2316 const Option* option, PrefPtr pref)
2317 {
2318 executeHookByOptName(group.get(), option, pref);
2319 }
2320
executeHookByOptName(const RequestGroup * group,const Option * option,PrefPtr pref)2321 void executeHookByOptName(const RequestGroup* group, const Option* option,
2322 PrefPtr pref)
2323 {
2324 const std::string& cmd = option->get(pref);
2325 if (!cmd.empty()) {
2326 const std::shared_ptr<DownloadContext> dctx = group->getDownloadContext();
2327 std::string firstFilename;
2328 size_t numFiles = 0;
2329 if (!group->inMemoryDownload()) {
2330 std::shared_ptr<FileEntry> file = dctx->getFirstRequestedFileEntry();
2331 if (file) {
2332 firstFilename = file->getPath();
2333 }
2334 numFiles = dctx->countRequestedFileEntry();
2335 }
2336 executeHook(cmd, group->getGID(), numFiles, firstFilename);
2337 }
2338 }
2339
createSafePath(const std::string & dir,const std::string & filename)2340 std::string createSafePath(const std::string& dir, const std::string& filename)
2341 {
2342 return util::applyDir(dir,
2343 util::isUtf8(filename)
2344 ? util::fixTaintedBasename(filename)
2345 : util::escapePath(util::percentEncode(filename)));
2346 }
2347
createSafePath(const std::string & filename)2348 std::string createSafePath(const std::string& filename)
2349 {
2350 return util::isUtf8(filename)
2351 ? util::fixTaintedBasename(filename)
2352 : util::escapePath(util::percentEncode(filename));
2353 }
2354
encodeNonUtf8(const std::string & s)2355 std::string encodeNonUtf8(const std::string& s)
2356 {
2357 return util::isUtf8(s) ? s : util::percentEncode(s);
2358 }
2359
makeString(const char * str)2360 std::string makeString(const char* str)
2361 {
2362 if (!str) {
2363 return A2STR::NIL;
2364 }
2365 return str;
2366 }
2367
safeStrerror(int errNum)2368 std::string safeStrerror(int errNum) { return makeString(strerror(errNum)); }
2369
noProxyDomainMatch(const std::string & hostname,const std::string & domain)2370 bool noProxyDomainMatch(const std::string& hostname, const std::string& domain)
2371 {
2372 if (!domain.empty() && domain[0] == '.' && !util::isNumericHost(hostname)) {
2373 return util::endsWith(hostname, domain);
2374 }
2375 return hostname == domain;
2376 }
2377
tlsHostnameMatch(const std::string & pattern,const std::string & hostname)2378 bool tlsHostnameMatch(const std::string& pattern, const std::string& hostname)
2379 {
2380 std::string::const_iterator ptWildcard =
2381 std::find(pattern.begin(), pattern.end(), '*');
2382 if (ptWildcard == pattern.end()) {
2383 return strieq(pattern.begin(), pattern.end(), hostname.begin(),
2384 hostname.end());
2385 }
2386 std::string::const_iterator ptLeftLabelEnd =
2387 std::find(pattern.begin(), pattern.end(), '.');
2388 bool wildcardEnabled = true;
2389 // Do case-insensitive match. At least 2 dots are required to enable
2390 // wildcard match. Also wildcard must be in the left-most label.
2391 // Don't attempt to match a presented identifier where the wildcard
2392 // character is embedded within an A-label.
2393 if (ptLeftLabelEnd == pattern.end() ||
2394 std::find(ptLeftLabelEnd + 1, pattern.end(), '.') == pattern.end() ||
2395 ptLeftLabelEnd < ptWildcard || istartsWith(pattern, "xn--")) {
2396 wildcardEnabled = false;
2397 }
2398 if (!wildcardEnabled) {
2399 return strieq(pattern.begin(), pattern.end(), hostname.begin(),
2400 hostname.end());
2401 }
2402 std::string::const_iterator hnLeftLabelEnd =
2403 std::find(hostname.begin(), hostname.end(), '.');
2404 if (!strieq(ptLeftLabelEnd, pattern.end(), hnLeftLabelEnd, hostname.end())) {
2405 return false;
2406 }
2407 // Perform wildcard match. Here '*' must match at least one
2408 // character.
2409 if (hnLeftLabelEnd - hostname.begin() < ptLeftLabelEnd - pattern.begin()) {
2410 return false;
2411 }
2412 return istartsWith(hostname.begin(), hnLeftLabelEnd, pattern.begin(),
2413 ptWildcard) &&
2414 iendsWith(hostname.begin(), hnLeftLabelEnd, ptWildcard + 1,
2415 ptLeftLabelEnd);
2416 }
2417
strieq(const std::string & a,const char * b)2418 bool strieq(const std::string& a, const char* b)
2419 {
2420 return strieq(a.begin(), a.end(), b);
2421 }
2422
strieq(const std::string & a,const std::string & b)2423 bool strieq(const std::string& a, const std::string& b)
2424 {
2425 return strieq(a.begin(), a.end(), b.begin(), b.end());
2426 }
2427
startsWith(const std::string & a,const char * b)2428 bool startsWith(const std::string& a, const char* b)
2429 {
2430 return startsWith(a.begin(), a.end(), b);
2431 }
2432
startsWith(const std::string & a,const std::string & b)2433 bool startsWith(const std::string& a, const std::string& b)
2434 {
2435 return startsWith(a.begin(), a.end(), b.begin(), b.end());
2436 }
2437
istartsWith(const std::string & a,const char * b)2438 bool istartsWith(const std::string& a, const char* b)
2439 {
2440 return istartsWith(a.begin(), a.end(), b);
2441 }
2442
istartsWith(const std::string & a,const std::string & b)2443 bool istartsWith(const std::string& a, const std::string& b)
2444 {
2445 return istartsWith(std::begin(a), std::end(a), std::begin(b), std::end(b));
2446 }
2447
endsWith(const std::string & a,const char * b)2448 bool endsWith(const std::string& a, const char* b)
2449 {
2450 return endsWith(a.begin(), a.end(), b, b + strlen(b));
2451 }
2452
endsWith(const std::string & a,const std::string & b)2453 bool endsWith(const std::string& a, const std::string& b)
2454 {
2455 return endsWith(a.begin(), a.end(), b.begin(), b.end());
2456 }
2457
iendsWith(const std::string & a,const char * b)2458 bool iendsWith(const std::string& a, const char* b)
2459 {
2460 return iendsWith(a.begin(), a.end(), b, b + strlen(b));
2461 }
2462
iendsWith(const std::string & a,const std::string & b)2463 bool iendsWith(const std::string& a, const std::string& b)
2464 {
2465 return iendsWith(a.begin(), a.end(), b.begin(), b.end());
2466 }
2467
strless(const char * a,const char * b)2468 bool strless(const char* a, const char* b) { return strcmp(a, b) < 0; }
2469
2470 #ifdef ENABLE_SSL
toTLSVersion(const std::string & ver)2471 TLSVersion toTLSVersion(const std::string& ver)
2472 {
2473 if (ver == A2_V_TLS11) {
2474 return TLS_PROTO_TLS11;
2475 }
2476 if (ver == A2_V_TLS12) {
2477 return TLS_PROTO_TLS12;
2478 }
2479 if (ver == A2_V_TLS13) {
2480 return TLS_PROTO_TLS13;
2481 }
2482 return TLS_PROTO_TLS12;
2483 }
2484 #endif // ENABLE_SSL
2485
2486 #ifdef __MINGW32__
formatLastError(int errNum)2487 std::string formatLastError(int errNum)
2488 {
2489 std::array<char, 4_k> buf;
2490 if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
2491 nullptr, errNum,
2492 // Default language
2493 MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US),
2494 static_cast<LPTSTR>(buf.data()), buf.size(),
2495 nullptr) == 0) {
2496 return "";
2497 }
2498
2499 return buf.data();
2500 }
2501 #endif // __MINGW32__
2502
make_fd_cloexec(int fd)2503 void make_fd_cloexec(int fd)
2504 {
2505 #ifndef __MINGW32__
2506 int flags;
2507
2508 // TODO from linux man page, fcntl() with F_GETFD or F_SETFD does
2509 // not return -1 with errno == EINTR. Historically, aria2 code base
2510 // checks this case. Probably, it is not needed.
2511 while ((flags = fcntl(fd, F_GETFD)) == -1 && errno == EINTR)
2512 ;
2513 if (flags == -1) {
2514 return;
2515 }
2516
2517 while (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == -1 && errno == EINTR)
2518 ;
2519 #endif // !__MINGW32__
2520 }
2521
2522 #ifdef __MINGW32__
gainPrivilege(LPCTSTR privName)2523 bool gainPrivilege(LPCTSTR privName)
2524 {
2525 LUID luid;
2526 TOKEN_PRIVILEGES tp;
2527
2528 if (!LookupPrivilegeValue(nullptr, privName, &luid)) {
2529 auto errNum = GetLastError();
2530 A2_LOG_WARN(fmt("Lookup for privilege name %s failed. cause: %s", privName,
2531 util::formatLastError(errNum).c_str()));
2532 return false;
2533 }
2534
2535 tp.PrivilegeCount = 1;
2536 tp.Privileges[0].Luid = luid;
2537 tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
2538
2539 HANDLE token;
2540 if (!OpenProcessToken(GetCurrentProcess(),
2541 TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token)) {
2542 auto errNum = GetLastError();
2543 A2_LOG_WARN(fmt("Getting process token failed. cause: %s",
2544 util::formatLastError(errNum).c_str()));
2545 return false;
2546 }
2547
2548 auto tokenCloser = defer(token, CloseHandle);
2549
2550 if (!AdjustTokenPrivileges(token, FALSE, &tp, 0, NULL, NULL)) {
2551 auto errNum = GetLastError();
2552 A2_LOG_WARN(fmt("Gaining privilege %s failed. cause: %s", privName,
2553 util::formatLastError(errNum).c_str()));
2554 return false;
2555 }
2556
2557 // Check privilege was really gained
2558 DWORD bufsize = 0;
2559 GetTokenInformation(token, TokenPrivileges, nullptr, 0, &bufsize);
2560 if (bufsize == 0) {
2561 A2_LOG_WARN("Checking privilege failed.");
2562 return false;
2563 }
2564
2565 auto buf = make_unique<char[]>(bufsize);
2566 if (!GetTokenInformation(token, TokenPrivileges, buf.get(), bufsize,
2567 &bufsize)) {
2568 auto errNum = GetLastError();
2569 A2_LOG_WARN(fmt("Checking privilege failed. cause: %s",
2570 util::formatLastError(errNum).c_str()));
2571 return false;
2572 }
2573
2574 auto privs = reinterpret_cast<TOKEN_PRIVILEGES*>(buf.get());
2575 for (size_t i = 0; i < privs->PrivilegeCount; ++i) {
2576 auto& priv = privs->Privileges[i];
2577 if (memcmp(&priv.Luid, &luid, sizeof(luid)) != 0) {
2578 continue;
2579 }
2580 if (priv.Attributes == SE_PRIVILEGE_ENABLED) {
2581 return true;
2582 }
2583
2584 break;
2585 }
2586
2587 A2_LOG_WARN(fmt("Gaining privilege %s failed.", privName));
2588
2589 return false;
2590 }
2591 #endif // __MINGW32__
2592
2593 } // namespace util
2594
2595 } // namespace aria2
2596