1 // SciTE - Scintilla based Text Editor
2 /** @file Cookie.cxx
3  ** Examine start of files for coding cookies and type information.
4  **/
5 // Copyright 2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <cstdlib>
9 #include <cstring>
10 
11 #include <string>
12 #include <vector>
13 #include <chrono>
14 
15 #include "GUI.h"
16 
17 #include "StringHelpers.h"
18 #include "Cookie.h"
19 
ExtractLine(std::string_view sv)20 std::string_view ExtractLine(std::string_view sv) noexcept {
21 	std::string_view remainder = sv;
22 	while ((remainder.length() > 0) && (remainder[0] != '\r') && (remainder[0] != '\n')) {
23 		remainder.remove_prefix(1);
24 	}
25 	if ((remainder.length() > 1) && (remainder[0] == '\r') && (remainder[1] == '\n')) {
26 		remainder.remove_prefix(1);
27 	}
28 	if (remainder.length() > 0) {
29 		remainder.remove_prefix(1);
30 	}
31 	sv.remove_suffix(remainder.length());
32 	return sv;
33 }
34 
35 namespace {
36 
37 constexpr std::string_view codingCookie("coding");
38 constexpr std::string_view utf8Name("utf-8");
39 
isEncodingChar(char ch)40 constexpr bool isEncodingChar(char ch) noexcept {
41 	return (ch == '_') || (ch == '-') || (ch == '.') ||
42 	       (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
43 	       (ch >= '0' && ch <= '9');
44 }
45 
isSpaceChar(char ch)46 constexpr bool isSpaceChar(char ch) noexcept {
47 	return (ch == ' ') || (ch == '\t');
48 }
49 
CookieValue(std::string_view s)50 UniMode CookieValue(std::string_view s) noexcept {
51 	const size_t posCoding = s.find(codingCookie);
52 	if (posCoding != std::string_view::npos) {
53 		s.remove_prefix(posCoding + codingCookie.length());
54 		if ((s.length() > 0) && ((s[0] == ':') || (s[0] == '='))) {
55 			s.remove_prefix(1);
56 			if ((s.length() > 0) && ((s[0] == '\"') || (s[0] == '\''))) {
57 				s.remove_prefix(1);
58 			}
59 			while ((s.length() > 0) && (isSpaceChar(s[0]))) {
60 				s.remove_prefix(1);
61 			}
62 			size_t endCoding = 0;
63 			while ((endCoding < s.length()) &&
64 					(isEncodingChar(s[endCoding]))) {
65 				endCoding++;
66 			}
67 			s.remove_suffix(s.length() - endCoding);
68 			if (EqualCaseInsensitive(s, utf8Name)) {
69 				return uniCookie;
70 			}
71 		}
72 	}
73 	return uni8Bit;
74 }
75 
76 }
77 
CodingCookieValue(std::string_view sv)78 UniMode CodingCookieValue(std::string_view sv) noexcept {
79 	const std::string_view l1 = ExtractLine(sv);
80 	UniMode unicodeMode = CookieValue(l1);
81 	if (unicodeMode == uni8Bit) {
82 		sv.remove_prefix(l1.length());
83 		const std::string_view l2 = ExtractLine(sv);
84 		unicodeMode = CookieValue(l2);
85 	}
86 	return unicodeMode;
87 }
88 
89