1 /*
2  *  Copyright (C) 2012-2018 Team Kodi
3  *  This file is part of Kodi - https://kodi.tv
4  *
5  *  SPDX-License-Identifier: GPL-2.0-or-later
6  *  See LICENSES/README.md for more information.
7  */
8 
9 #include "utils/POUtils.h"
10 
11 #include "URL.h"
12 #include "filesystem/File.h"
13 #include "utils/log.h"
14 
15 #include <stdlib.h>
16 
CPODocument()17 CPODocument::CPODocument()
18 {
19   m_CursorPos = 0;
20   m_nextEntryPos = 0;
21   m_POfilelength = 0;
22   m_Entry.msgStrPlural.clear();
23   m_Entry.msgStrPlural.resize(1);
24 }
25 
26 CPODocument::~CPODocument() = default;
27 
LoadFile(const std::string & pofilename)28 bool CPODocument::LoadFile(const std::string &pofilename)
29 {
30   CURL poFileUrl(pofilename);
31   if (!XFILE::CFile::Exists(poFileUrl))
32     return false;
33 
34   XFILE::CFile file;
35   XFILE::auto_buffer buf;
36   if (file.LoadFile(poFileUrl, buf) < 18) // at least a size of a minimalistic header
37   {
38     CLog::Log(LOGERROR, "%s: can't load file \"%s\" or file is too small", __FUNCTION__,  pofilename.c_str());
39     return false;
40   }
41 
42   m_strBuffer = '\n';
43   m_strBuffer.append(buf.get(), buf.size());
44   buf.clear();
45 
46   ConvertLineEnds(pofilename);
47 
48   // we make sure, to have an LF at the end of buffer
49   if (*m_strBuffer.rbegin() != '\n')
50   {
51     m_strBuffer += "\n";
52   }
53 
54   m_POfilelength = m_strBuffer.size();
55 
56   if (GetNextEntry() && m_Entry.Type == MSGID_FOUND)
57     return true;
58 
59   CLog::Log(LOGERROR, "POParser: unable to read PO file header from file: %s", pofilename.c_str());
60   return false;
61 }
62 
GetNextEntry()63 bool CPODocument::GetNextEntry()
64 {
65   do
66   {
67     // if we don't find LFLF, we reached the end of the buffer and the last entry to check
68     // we indicate this with setting m_nextEntryPos to the end of the buffer
69     if ((m_nextEntryPos = m_strBuffer.find("\n\n", m_CursorPos)) == std::string::npos)
70       m_nextEntryPos = m_POfilelength-1;
71 
72     // now we read the actual entry into a temp string for further processing
73     m_Entry.Content.assign(m_strBuffer, m_CursorPos, m_nextEntryPos - m_CursorPos +1);
74     m_CursorPos = m_nextEntryPos+1; // jump cursor to the second LF character
75 
76     if (FindLineStart ("\nmsgid ", m_Entry.msgID.Pos))
77     {
78       if (FindLineStart ("\nmsgctxt \"#", m_Entry.xIDPos) && ParseNumID())
79       {
80         m_Entry.Type = ID_FOUND; // we found an entry with a valid numeric id
81         return true;
82       }
83 
84       size_t plurPos;
85       if (FindLineStart ("\nmsgid_plural ", plurPos))
86       {
87         m_Entry.Type = MSGID_PLURAL_FOUND; // we found a pluralized entry
88         return true;
89       }
90 
91       m_Entry.Type = MSGID_FOUND; // we found a normal entry, with no numeric id
92       return true;
93     }
94   }
95   while (m_nextEntryPos != m_POfilelength-1);
96   // we reached the end of buffer AND we have not found a valid entry
97 
98   return false;
99 }
100 
ParseEntry(bool bisSourceLang)101 void CPODocument::ParseEntry(bool bisSourceLang)
102 {
103   if (bisSourceLang)
104   {
105     if (m_Entry.Type == ID_FOUND)
106       GetString(m_Entry.msgID);
107     else
108       m_Entry.msgID.Str.clear();
109     return;
110   }
111 
112   if (m_Entry.Type != ID_FOUND)
113   {
114     GetString(m_Entry.msgID);
115     if (FindLineStart ("\nmsgctxt ", m_Entry.msgCtxt.Pos))
116       GetString(m_Entry.msgCtxt);
117     else
118       m_Entry.msgCtxt.Str.clear();
119   }
120 
121   if (m_Entry.Type != MSGID_PLURAL_FOUND)
122   {
123     if (FindLineStart ("\nmsgstr ", m_Entry.msgStr.Pos))
124     {
125       GetString(m_Entry.msgStr);
126       GetString(m_Entry.msgID);
127     }
128     else
129     {
130       CLog::Log(LOGERROR, "POParser: missing msgstr line in entry. Failed entry: %s",
131                 m_Entry.Content.c_str());
132       m_Entry.msgStr.Str.clear();
133     }
134     return;
135   }
136 
137   // We found a plural form entry. We read it into a vector of CStrEntry types
138   m_Entry.msgStrPlural.clear();
139   std::string strPattern = "\nmsgstr[0] ";
140   CStrEntry strEntry;
141 
142   for (int n=0; n<7 ; n++)
143   {
144     strPattern[8] = static_cast<char>(n+'0');
145     if (FindLineStart (strPattern, strEntry.Pos))
146     {
147       GetString(strEntry);
148       if (strEntry.Str.empty())
149         break;
150       m_Entry.msgStrPlural.push_back(strEntry);
151     }
152     else
153       break;
154   }
155 
156   if (m_Entry.msgStrPlural.empty())
157   {
158     CLog::Log(LOGERROR, "POParser: msgstr[] plural lines have zero valid strings. "
159                         "Failed entry: %s", m_Entry.Content.c_str());
160     m_Entry.msgStrPlural.resize(1); // Put 1 element with an empty string into the vector
161   }
162 }
163 
GetPlurMsgstr(size_t plural) const164 const std::string& CPODocument::GetPlurMsgstr(size_t plural) const
165 {
166   if (m_Entry.msgStrPlural.size() < plural+1)
167   {
168     CLog::Log(LOGERROR, "POParser: msgstr[%i] plural field requested, but not found in PO file. "
169                         "Failed entry: %s", static_cast<int>(plural), m_Entry.Content.c_str());
170     plural = m_Entry.msgStrPlural.size()-1;
171   }
172   return m_Entry.msgStrPlural[plural].Str;
173 }
174 
UnescapeString(const std::string & strInput)175 std::string CPODocument::UnescapeString(const std::string &strInput)
176 {
177   std::string strOutput;
178   if (strInput.empty())
179     return strOutput;
180 
181   char oescchar;
182   strOutput.reserve(strInput.size());
183   std::string::const_iterator it = strInput.begin();
184   while (it < strInput.end())
185   {
186     oescchar = *it++;
187     if (oescchar == '\\')
188     {
189       if (it == strInput.end())
190       {
191         CLog::Log(LOGERROR,
192                   "POParser: warning, unhandled escape character "
193                   "at line-end. Problematic entry: %s",
194                   m_Entry.Content.c_str());
195         break;
196       }
197       switch (*it++)
198       {
199         case 'a':  oescchar = '\a'; break;
200         case 'b':  oescchar = '\b'; break;
201         case 'v':  oescchar = '\v'; break;
202         case 'n':  oescchar = '\n'; break;
203         case 't':  oescchar = '\t'; break;
204         case 'r':  oescchar = '\r'; break;
205         case '"':  oescchar = '"' ; break;
206         case '0':  oescchar = '\0'; break;
207         case 'f':  oescchar = '\f'; break;
208         case '?':  oescchar = '\?'; break;
209         case '\'': oescchar = '\''; break;
210         case '\\': oescchar = '\\'; break;
211 
212         default:
213         {
214           CLog::Log(LOGERROR,
215                     "POParser: warning, unhandled escape character. Problematic entry: %s",
216                     m_Entry.Content.c_str());
217           continue;
218         }
219       }
220     }
221     strOutput.push_back(oescchar);
222   }
223   return strOutput;
224 }
225 
FindLineStart(const std::string & strToFind,size_t & FoundPos)226 bool CPODocument::FindLineStart(const std::string &strToFind, size_t &FoundPos)
227 {
228 
229   FoundPos = m_Entry.Content.find(strToFind);
230 
231   if (FoundPos == std::string::npos || FoundPos + strToFind.size() + 2 > m_Entry.Content.size())
232     return false; // if we don't find the string or if we don't have at least one char after it
233 
234   FoundPos += strToFind.size(); // to set the pos marker to the exact start of the real data
235   return true;
236 }
237 
ParseNumID()238 bool CPODocument::ParseNumID()
239 {
240   if (isdigit(m_Entry.Content.at(m_Entry.xIDPos))) // verify if the first char is digit
241   {
242     // we check for the numeric id for the fist 10 chars (uint32)
243     m_Entry.xID = strtol(&m_Entry.Content[m_Entry.xIDPos], NULL, 10);
244     return true;
245   }
246 
247   CLog::Log(LOGERROR, "POParser: found numeric id descriptor, but no valid id can be read, "
248                       "entry was handled as normal msgid entry");
249   CLog::Log(LOGERROR, "POParser: The problematic entry: %s",
250             m_Entry.Content.c_str());
251   return false;
252 }
253 
GetString(CStrEntry & strEntry)254 void CPODocument::GetString(CStrEntry &strEntry)
255 {
256   size_t nextLFPos;
257   size_t startPos = strEntry.Pos;
258   strEntry.Str.clear();
259 
260   while (startPos < m_Entry.Content.size())
261   {
262     nextLFPos = m_Entry.Content.find('\n', startPos);
263     if (nextLFPos == std::string::npos)
264       nextLFPos = m_Entry.Content.size();
265 
266     // check syntax, if it really is a valid quoted string line
267     if (nextLFPos-startPos < 2 ||  m_Entry.Content[startPos] != '\"' ||
268         m_Entry.Content[nextLFPos-1] != '\"')
269       break;
270 
271     strEntry.Str.append(m_Entry.Content, startPos+1, nextLFPos-2-startPos);
272     startPos = nextLFPos+1;
273   }
274 
275   strEntry.Str = UnescapeString(strEntry.Str);
276 }
277 
ConvertLineEnds(const std::string & filename)278 void CPODocument::ConvertLineEnds(const std::string &filename)
279 {
280   size_t foundPos = m_strBuffer.find_first_of('\r');
281   if (foundPos == std::string::npos)
282     return; // We have only Linux style line endings in the file, nothing to do
283 
284   if (foundPos+1 >= m_strBuffer.size() || m_strBuffer[foundPos+1] != '\n')
285     CLog::Log(LOGDEBUG, "POParser: PO file has Mac Style Line Endings. "
286               "Converted in memory to Linux LF for file: %s", filename.c_str());
287   else
288     CLog::Log(LOGDEBUG, "POParser: PO file has Win Style Line Endings. "
289               "Converted in memory to Linux LF for file: %s", filename.c_str());
290 
291   std::string strTemp;
292   strTemp.reserve(m_strBuffer.size());
293   for (std::string::const_iterator it = m_strBuffer.begin(); it < m_strBuffer.end(); ++it)
294   {
295     if (*it == '\r')
296     {
297       if (it+1 == m_strBuffer.end() || *(it+1) != '\n')
298         strTemp.push_back('\n'); // convert Mac style line ending and continue
299       continue; // we have Win style line ending so we exclude this CR now
300     }
301     strTemp.push_back(*it);
302   }
303   m_strBuffer.swap(strTemp);
304   m_POfilelength = m_strBuffer.size();
305 }
306