1 /*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2015,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
8 *
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
13 *
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 *
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
31 *
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
34 */
35 /*! \internal \file
36 * \brief
37 * Implements classes from rstparser.h.
38 *
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_onlinehelp
41 */
42 #include "gmxpre.h"
43
44 #include "rstparser.h"
45
46 #include <cctype>
47
48 #include <algorithm>
49
50 #include "gromacs/utility/stringutil.h"
51
52 namespace gmx
53 {
54
55 namespace
56 {
57
58 /*! \brief
59 * Counts the number of leading spaces in a text range.
60 *
61 * Does not throw.
62 */
countLeadingSpace(const std::string & text,size_t start,size_t end)63 int countLeadingSpace(const std::string& text, size_t start, size_t end)
64 {
65 for (size_t i = start; i < end; ++i)
66 {
67 if (!std::isspace(text[i]))
68 {
69 return i - start;
70 }
71 }
72 return end - start;
73 }
74
75 /*! \brief
76 * Returns `true` if a list item starts in \p text at \p index.
77 *
78 * Does not throw.
79 */
startsListItem(const std::string & text,size_t index)80 bool startsListItem(const std::string& text, size_t index)
81 {
82 if (text.length() <= index + 1)
83 {
84 return false;
85 }
86 if (text[index] == '*' && std::isspace(text[index + 1]))
87 {
88 return true;
89 }
90 if (std::isdigit(text[index]))
91 {
92 while (index < text.length() && std::isdigit(text[index]))
93 {
94 ++index;
95 }
96 if (text.length() > index + 1 && text[index] == '.' && std::isspace(text[index + 1]))
97 {
98 return true;
99 }
100 }
101 return false;
102 }
103
104 /*! \brief
105 * Returns `true` if a table starts in \p text at \p index.
106 *
107 * The function only inspects the first line for something that looks like a
108 * reStructuredText table, and accepts also some malformed tables.
109 * Any issues should be apparent when Sphinx parses the reStructuredText
110 * export, so full validation is not done here.
111 *
112 * Does not throw.
113 */
startsTable(const std::string & text,size_t index)114 bool startsTable(const std::string& text, size_t index)
115 {
116 if (text[index] == '=')
117 {
118 while (index < text.length() && text[index] != '\n')
119 {
120 if (text[index] != '=' && !std::isspace(text[index]))
121 {
122 return false;
123 }
124 ++index;
125 }
126 return true;
127 }
128 else if (text[index] == '+')
129 {
130 while (index < text.length() && text[index] != '\n')
131 {
132 if (text[index] != '-' && text[index] != '+')
133 {
134 return false;
135 }
136 ++index;
137 }
138 return true;
139 }
140 return false;
141 }
142
143 /*! \brief
144 * Returns `true` if a line in \p text starting at \p index is a title underline.
145 *
146 * Does not throw.
147 */
isTitleUnderline(const std::string & text,size_t index)148 bool isTitleUnderline(const std::string& text, size_t index)
149 {
150 const char firstChar = text[index];
151 if (std::ispunct(firstChar))
152 {
153 while (index < text.length() && text[index] != '\n')
154 {
155 if (text[index] != firstChar)
156 {
157 return false;
158 }
159 ++index;
160 }
161 return true;
162 }
163 return false;
164 }
165
166 } // namespace
167
168 /********************************************************************
169 * RstParagraphIterator
170 */
171
RstParagraphIterator(const std::string & text)172 RstParagraphIterator::RstParagraphIterator(const std::string& text) :
173 text_(text),
174 begin_(0),
175 end_(0),
176 type_(eParagraphType_Normal),
177 breakSize_(0),
178 firstLineIndent_(0),
179 indent_(0),
180 nextBegin_(0),
181 nextBreakSize_(0),
182 literalIndent_(-1)
183 {
184 }
185
nextParagraph()186 bool RstParagraphIterator::nextParagraph()
187 {
188 begin_ = nextBegin_;
189 type_ = eParagraphType_Normal;
190 breakSize_ = nextBreakSize_;
191 // Skip leading newlines (includes those separating paragraphs).
192 while (begin_ < text_.length() && text_[begin_] == '\n')
193 {
194 ++begin_;
195 }
196 if (begin_ == text_.length())
197 {
198 end_ = begin_;
199 breakSize_ = 0;
200 nextBegin_ = begin_;
201 return false;
202 }
203 if (literalIndent_ >= 0)
204 {
205 type_ = eParagraphType_Literal;
206 }
207 // Loop over lines in input until the end of the current paragraph.
208 size_t i = begin_;
209 int lineCount = 0;
210 while (true)
211 {
212 const bool bFirstLine = (lineCount == 0);
213 const size_t lineStart = i;
214 const size_t lineEnd = std::min(text_.find('\n', i), text_.length());
215 const int lineIndent = countLeadingSpace(text_, lineStart, lineEnd);
216 const size_t textStart = lineStart + lineIndent;
217 const bool bListItem = startsListItem(text_, textStart);
218 // Return each list item as a separate paragraph to make the behavior
219 // the same always; the item text could even contain multiple
220 // paragraphs, that would anyways produce breaks.
221 if (bListItem && !bFirstLine)
222 {
223 // Since there was no empty line in input, do not produce one in
224 // the output, either.
225 nextBreakSize_ = 1;
226 // end_ is not updated to break the paragraph before the current line.
227 break;
228 }
229 // Now we will actually use this line as part of this paragraph.
230 end_ = lineEnd;
231 ++lineCount;
232 // Update indentation.
233 if (bFirstLine)
234 {
235 firstLineIndent_ = indent_ = lineIndent;
236 if (bListItem)
237 {
238 // Find the indentation of the actual text after the
239 // bullet/number.
240 int prefixLength = 0;
241 while (!std::isspace(text_[textStart + prefixLength]))
242 {
243 ++prefixLength;
244 }
245 while (textStart + prefixLength < text_.length()
246 && std::isspace(text_[textStart + prefixLength]))
247 {
248 ++prefixLength;
249 }
250 indent_ += prefixLength;
251 }
252 }
253 else
254 {
255 indent_ = std::min(indent_, lineIndent);
256 }
257 // We need to check for the title underline before checking for the
258 // paragraph break so that the title is correctly recognized.
259 if (lineCount == 2 && isTitleUnderline(text_, lineStart))
260 {
261 type_ = eParagraphType_Title;
262 }
263 // Check for end-of-input or an empty line, i.e., a normal paragraph
264 // break.
265 if (lineEnd + 1 >= text_.length() || text_[lineEnd + 1] == '\n')
266 {
267 nextBreakSize_ = 2;
268 break;
269 }
270 // Always return the title as a separate paragraph, as it requires
271 // different processing.
272 // TODO: This should allow nicer formatting that shares
273 // implementation with writeTitle() and honors the nesting depths etc.,
274 // but that is not implemented.
275 if (type_ == eParagraphType_Title)
276 {
277 // If we are here, there was no actual paragraph break, so do not
278 // produce one in the output either.
279 nextBreakSize_ = 1;
280 break;
281 }
282 // Next loop starts at the character after the newline.
283 i = lineEnd + 1;
284 }
285 nextBegin_ = end_;
286 // Check if the next paragraph should be treated as a literal paragraph,
287 // and deal with transformations for the :: marker.
288 if (end_ - begin_ >= 2 && text_.compare(end_ - 2, 2, "::") == 0)
289 {
290 literalIndent_ = indent_;
291 // Return the actual literal block if the paragraph was just an "::".
292 if (end_ - begin_ == 2)
293 {
294 // Avoid leading whitespace at the beginning; breakSize_ == 0
295 // only for the first paragraph.
296 if (breakSize_ == 0)
297 {
298 nextBreakSize_ = 0;
299 }
300 return nextParagraph();
301 }
302 // Remove one of the colons, or both if preceded by whitespace.
303 const bool bRemoveDoubleColon = (text_[end_ - 3] == ' ');
304 end_ -= (bRemoveDoubleColon ? 3 : 1);
305 }
306 else
307 {
308 literalIndent_ = -1;
309 }
310 // Treat a table like a literal block (preserve newlines).
311 if (startsTable(text_, begin_ + firstLineIndent_))
312 {
313 type_ = eParagraphType_Literal;
314 }
315 return true;
316 }
317
getParagraphText(std::string * result) const318 void RstParagraphIterator::getParagraphText(std::string* result) const
319 {
320 result->clear();
321 result->reserve(end_ - begin_);
322 result->append(breakSize_, '\n');
323 const bool bPreserveNewlines = (type_ != eParagraphType_Normal);
324 size_t i = begin_;
325 while (i < end_)
326 {
327 const bool bFirstLine = (i == begin_);
328 const size_t lineStart = i + (bFirstLine ? firstLineIndent_ : indent_);
329 const size_t lineEnd = std::min(text_.find('\n', i), end_);
330 if (!bFirstLine)
331 {
332 if (bPreserveNewlines)
333 {
334 result->push_back('\n');
335 }
336 else if (!std::isspace((*result)[result->length() - 1]))
337 {
338 result->push_back(' ');
339 }
340 }
341 result->append(text_, lineStart, lineEnd - lineStart);
342 i = lineEnd + 1;
343 }
344 }
345
346 } // namespace gmx
347