1 /*
2 * Copyright (C) 2012-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8
9 #include "utils/POUtils.h"
10
11 #include "URL.h"
12 #include "filesystem/File.h"
13 #include "utils/log.h"
14
15 #include <stdlib.h>
16
CPODocument()17 CPODocument::CPODocument()
18 {
19 m_CursorPos = 0;
20 m_nextEntryPos = 0;
21 m_POfilelength = 0;
22 m_Entry.msgStrPlural.clear();
23 m_Entry.msgStrPlural.resize(1);
24 }
25
26 CPODocument::~CPODocument() = default;
27
LoadFile(const std::string & pofilename)28 bool CPODocument::LoadFile(const std::string &pofilename)
29 {
30 CURL poFileUrl(pofilename);
31 if (!XFILE::CFile::Exists(poFileUrl))
32 return false;
33
34 XFILE::CFile file;
35 XFILE::auto_buffer buf;
36 if (file.LoadFile(poFileUrl, buf) < 18) // at least a size of a minimalistic header
37 {
38 CLog::Log(LOGERROR, "%s: can't load file \"%s\" or file is too small", __FUNCTION__, pofilename.c_str());
39 return false;
40 }
41
42 m_strBuffer = '\n';
43 m_strBuffer.append(buf.get(), buf.size());
44 buf.clear();
45
46 ConvertLineEnds(pofilename);
47
48 // we make sure, to have an LF at the end of buffer
49 if (*m_strBuffer.rbegin() != '\n')
50 {
51 m_strBuffer += "\n";
52 }
53
54 m_POfilelength = m_strBuffer.size();
55
56 if (GetNextEntry() && m_Entry.Type == MSGID_FOUND)
57 return true;
58
59 CLog::Log(LOGERROR, "POParser: unable to read PO file header from file: %s", pofilename.c_str());
60 return false;
61 }
62
GetNextEntry()63 bool CPODocument::GetNextEntry()
64 {
65 do
66 {
67 // if we don't find LFLF, we reached the end of the buffer and the last entry to check
68 // we indicate this with setting m_nextEntryPos to the end of the buffer
69 if ((m_nextEntryPos = m_strBuffer.find("\n\n", m_CursorPos)) == std::string::npos)
70 m_nextEntryPos = m_POfilelength-1;
71
72 // now we read the actual entry into a temp string for further processing
73 m_Entry.Content.assign(m_strBuffer, m_CursorPos, m_nextEntryPos - m_CursorPos +1);
74 m_CursorPos = m_nextEntryPos+1; // jump cursor to the second LF character
75
76 if (FindLineStart ("\nmsgid ", m_Entry.msgID.Pos))
77 {
78 if (FindLineStart ("\nmsgctxt \"#", m_Entry.xIDPos) && ParseNumID())
79 {
80 m_Entry.Type = ID_FOUND; // we found an entry with a valid numeric id
81 return true;
82 }
83
84 size_t plurPos;
85 if (FindLineStart ("\nmsgid_plural ", plurPos))
86 {
87 m_Entry.Type = MSGID_PLURAL_FOUND; // we found a pluralized entry
88 return true;
89 }
90
91 m_Entry.Type = MSGID_FOUND; // we found a normal entry, with no numeric id
92 return true;
93 }
94 }
95 while (m_nextEntryPos != m_POfilelength-1);
96 // we reached the end of buffer AND we have not found a valid entry
97
98 return false;
99 }
100
ParseEntry(bool bisSourceLang)101 void CPODocument::ParseEntry(bool bisSourceLang)
102 {
103 if (bisSourceLang)
104 {
105 if (m_Entry.Type == ID_FOUND)
106 GetString(m_Entry.msgID);
107 else
108 m_Entry.msgID.Str.clear();
109 return;
110 }
111
112 if (m_Entry.Type != ID_FOUND)
113 {
114 GetString(m_Entry.msgID);
115 if (FindLineStart ("\nmsgctxt ", m_Entry.msgCtxt.Pos))
116 GetString(m_Entry.msgCtxt);
117 else
118 m_Entry.msgCtxt.Str.clear();
119 }
120
121 if (m_Entry.Type != MSGID_PLURAL_FOUND)
122 {
123 if (FindLineStart ("\nmsgstr ", m_Entry.msgStr.Pos))
124 {
125 GetString(m_Entry.msgStr);
126 GetString(m_Entry.msgID);
127 }
128 else
129 {
130 CLog::Log(LOGERROR, "POParser: missing msgstr line in entry. Failed entry: %s",
131 m_Entry.Content.c_str());
132 m_Entry.msgStr.Str.clear();
133 }
134 return;
135 }
136
137 // We found a plural form entry. We read it into a vector of CStrEntry types
138 m_Entry.msgStrPlural.clear();
139 std::string strPattern = "\nmsgstr[0] ";
140 CStrEntry strEntry;
141
142 for (int n=0; n<7 ; n++)
143 {
144 strPattern[8] = static_cast<char>(n+'0');
145 if (FindLineStart (strPattern, strEntry.Pos))
146 {
147 GetString(strEntry);
148 if (strEntry.Str.empty())
149 break;
150 m_Entry.msgStrPlural.push_back(strEntry);
151 }
152 else
153 break;
154 }
155
156 if (m_Entry.msgStrPlural.empty())
157 {
158 CLog::Log(LOGERROR, "POParser: msgstr[] plural lines have zero valid strings. "
159 "Failed entry: %s", m_Entry.Content.c_str());
160 m_Entry.msgStrPlural.resize(1); // Put 1 element with an empty string into the vector
161 }
162 }
163
GetPlurMsgstr(size_t plural) const164 const std::string& CPODocument::GetPlurMsgstr(size_t plural) const
165 {
166 if (m_Entry.msgStrPlural.size() < plural+1)
167 {
168 CLog::Log(LOGERROR, "POParser: msgstr[%i] plural field requested, but not found in PO file. "
169 "Failed entry: %s", static_cast<int>(plural), m_Entry.Content.c_str());
170 plural = m_Entry.msgStrPlural.size()-1;
171 }
172 return m_Entry.msgStrPlural[plural].Str;
173 }
174
UnescapeString(const std::string & strInput)175 std::string CPODocument::UnescapeString(const std::string &strInput)
176 {
177 std::string strOutput;
178 if (strInput.empty())
179 return strOutput;
180
181 char oescchar;
182 strOutput.reserve(strInput.size());
183 std::string::const_iterator it = strInput.begin();
184 while (it < strInput.end())
185 {
186 oescchar = *it++;
187 if (oescchar == '\\')
188 {
189 if (it == strInput.end())
190 {
191 CLog::Log(LOGERROR,
192 "POParser: warning, unhandled escape character "
193 "at line-end. Problematic entry: %s",
194 m_Entry.Content.c_str());
195 break;
196 }
197 switch (*it++)
198 {
199 case 'a': oescchar = '\a'; break;
200 case 'b': oescchar = '\b'; break;
201 case 'v': oescchar = '\v'; break;
202 case 'n': oescchar = '\n'; break;
203 case 't': oescchar = '\t'; break;
204 case 'r': oescchar = '\r'; break;
205 case '"': oescchar = '"' ; break;
206 case '0': oescchar = '\0'; break;
207 case 'f': oescchar = '\f'; break;
208 case '?': oescchar = '\?'; break;
209 case '\'': oescchar = '\''; break;
210 case '\\': oescchar = '\\'; break;
211
212 default:
213 {
214 CLog::Log(LOGERROR,
215 "POParser: warning, unhandled escape character. Problematic entry: %s",
216 m_Entry.Content.c_str());
217 continue;
218 }
219 }
220 }
221 strOutput.push_back(oescchar);
222 }
223 return strOutput;
224 }
225
FindLineStart(const std::string & strToFind,size_t & FoundPos)226 bool CPODocument::FindLineStart(const std::string &strToFind, size_t &FoundPos)
227 {
228
229 FoundPos = m_Entry.Content.find(strToFind);
230
231 if (FoundPos == std::string::npos || FoundPos + strToFind.size() + 2 > m_Entry.Content.size())
232 return false; // if we don't find the string or if we don't have at least one char after it
233
234 FoundPos += strToFind.size(); // to set the pos marker to the exact start of the real data
235 return true;
236 }
237
ParseNumID()238 bool CPODocument::ParseNumID()
239 {
240 if (isdigit(m_Entry.Content.at(m_Entry.xIDPos))) // verify if the first char is digit
241 {
242 // we check for the numeric id for the fist 10 chars (uint32)
243 m_Entry.xID = strtol(&m_Entry.Content[m_Entry.xIDPos], NULL, 10);
244 return true;
245 }
246
247 CLog::Log(LOGERROR, "POParser: found numeric id descriptor, but no valid id can be read, "
248 "entry was handled as normal msgid entry");
249 CLog::Log(LOGERROR, "POParser: The problematic entry: %s",
250 m_Entry.Content.c_str());
251 return false;
252 }
253
GetString(CStrEntry & strEntry)254 void CPODocument::GetString(CStrEntry &strEntry)
255 {
256 size_t nextLFPos;
257 size_t startPos = strEntry.Pos;
258 strEntry.Str.clear();
259
260 while (startPos < m_Entry.Content.size())
261 {
262 nextLFPos = m_Entry.Content.find('\n', startPos);
263 if (nextLFPos == std::string::npos)
264 nextLFPos = m_Entry.Content.size();
265
266 // check syntax, if it really is a valid quoted string line
267 if (nextLFPos-startPos < 2 || m_Entry.Content[startPos] != '\"' ||
268 m_Entry.Content[nextLFPos-1] != '\"')
269 break;
270
271 strEntry.Str.append(m_Entry.Content, startPos+1, nextLFPos-2-startPos);
272 startPos = nextLFPos+1;
273 }
274
275 strEntry.Str = UnescapeString(strEntry.Str);
276 }
277
ConvertLineEnds(const std::string & filename)278 void CPODocument::ConvertLineEnds(const std::string &filename)
279 {
280 size_t foundPos = m_strBuffer.find_first_of('\r');
281 if (foundPos == std::string::npos)
282 return; // We have only Linux style line endings in the file, nothing to do
283
284 if (foundPos+1 >= m_strBuffer.size() || m_strBuffer[foundPos+1] != '\n')
285 CLog::Log(LOGDEBUG, "POParser: PO file has Mac Style Line Endings. "
286 "Converted in memory to Linux LF for file: %s", filename.c_str());
287 else
288 CLog::Log(LOGDEBUG, "POParser: PO file has Win Style Line Endings. "
289 "Converted in memory to Linux LF for file: %s", filename.c_str());
290
291 std::string strTemp;
292 strTemp.reserve(m_strBuffer.size());
293 for (std::string::const_iterator it = m_strBuffer.begin(); it < m_strBuffer.end(); ++it)
294 {
295 if (*it == '\r')
296 {
297 if (it+1 == m_strBuffer.end() || *(it+1) != '\n')
298 strTemp.push_back('\n'); // convert Mac style line ending and continue
299 continue; // we have Win style line ending so we exclude this CR now
300 }
301 strTemp.push_back(*it);
302 }
303 m_strBuffer.swap(strTemp);
304 m_POfilelength = m_strBuffer.size();
305 }
306