1 /*
2 *
3 * Copyright (c) 2007-2011, Nick Treleaven
4 *
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License version 2 or (at your option) any later version.
7 *
8 * This module contains functions for generating tags for reStructuredText (reST) files.
9 */
10
11 /*
12 * INCLUDE FILES
13 */
14 #include "general.h" /* must always come first */
15
16 #include <ctype.h>
17 #include <string.h>
18
19 #include "parse.h"
20 #include "read.h"
21 #include "vstring.h"
22 #include "nestlevel.h"
23 #include "entry.h"
24 #include "routines.h"
25 #include "field.h"
26
27 /*
28 * DATA DEFINITIONS
29 */
30 typedef enum {
31 K_EOF = -1,
32 K_CHAPTER = 0,
33 K_SECTION,
34 K_SUBSECTION,
35 K_SUBSUBSECTION,
36 K_TARGET,
37 SECTION_COUNT
38 } rstKind;
39
40 static kindDefinition RstKinds[] = {
41 { true, 'c', "chapter", "chapters"},
42 { true, 's', "section", "sections" },
43 { true, 'S', "subsection", "subsections" },
44 { true, 't', "subsubsection", "subsubsections" },
45 { true, 'T', "target", "targets" },
46 };
47
48 typedef enum {
49 F_SECTION_MARKER,
50 } rstField;
51
52 static fieldDefinition RstFields [] = {
53 {
54 .name = "sectionMarker",
55 .description = "character used for declaring section",
56 .enabled = false,
57 },
58 };
59
60 static char kindchars[SECTION_COUNT];
61
62 static NestingLevels *nestingLevels = NULL;
63
64 /*
65 * FUNCTION DEFINITIONS
66 */
67
getNestingLevel(const int kind)68 static NestingLevel *getNestingLevel(const int kind)
69 {
70 NestingLevel *nl;
71 tagEntryInfo *e;
72
73 int d = 0;
74
75 if (kind > K_EOF)
76 {
77 d++;
78 /* 1. we want the line before the '---' underline chars */
79 d++;
80 /* 2. we want the line before the next section/chapter title. */
81 }
82
83 while (1)
84 {
85 nl = nestingLevelsGetCurrent(nestingLevels);
86 e = getEntryOfNestingLevel (nl);
87 if ((nl && (e == NULL)) || (e && e->kindIndex >= kind))
88 {
89 if (e)
90 e->extensionFields.endLine = (getInputLineNumber() - d);
91 nestingLevelsPop(nestingLevels);
92 }
93 else
94 break;
95 }
96 return nl;
97 }
98
makeTargetRstTag(const vString * const name)99 static int makeTargetRstTag(const vString* const name)
100 {
101 tagEntryInfo e;
102
103 initTagEntry (&e, vStringValue (name), K_TARGET);
104
105 const NestingLevel *nl = nestingLevelsGetCurrent(nestingLevels);
106 tagEntryInfo *parent = NULL;
107 if (nl)
108 parent = getEntryOfNestingLevel (nl);
109
110 if (parent)
111 {
112 e.extensionFields.scopeKindIndex = parent->kindIndex;
113 e.extensionFields.scopeName = parent->name;
114 }
115
116 return makeTagEntry (&e);
117 }
118
makeSectionRstTag(const vString * const name,const int kind,const MIOPos filepos,char marker)119 static void makeSectionRstTag(const vString* const name, const int kind, const MIOPos filepos,
120 char marker)
121 {
122 const NestingLevel *const nl = getNestingLevel(kind);
123 tagEntryInfo *parent;
124
125 int r = CORK_NIL;
126
127 if (vStringLength (name) > 0)
128 {
129 tagEntryInfo e;
130 char m [2] = { [1] = '\0' };
131
132 initTagEntry (&e, vStringValue (name), kind);
133
134 e.lineNumber--; /* we want the line before the '---' underline chars */
135 e.filePosition = filepos;
136
137 parent = getEntryOfNestingLevel (nl);
138 if (parent && (parent->kindIndex < kind))
139 {
140 #if 1
141 e.extensionFields.scopeKindIndex = parent->kindIndex;
142 e.extensionFields.scopeName = parent->name;
143 #else
144 /* TODO
145
146 Following code makes the scope information full qualified form.
147 Do users want the full qualified form?
148 --- ./Units/rst.simple.d/expected.tags 2015-12-18 01:32:35.574255617 +0900
149 +++ /home/yamato/var/ctags-github/Units/rst.simple.d/FILTERED.tmp 2016-05-05 03:05:38.165604756 +0900
150 @@ -5,2 +5,2 @@
151 -Subsection 1.1.1 input.rst /^Subsection 1.1.1$/;" S section:Section 1.1
152 -Subsubsection 1.1.1.1 input.rst /^Subsubsection 1.1.1.1$/;" t subsection:Subsection 1.1.1
153 +Subsection 1.1.1 input.rst /^Subsection 1.1.1$/;" S section:Chapter 1.Section 1.1
154 +Subsubsection 1.1.1.1 input.rst /^Subsubsection 1.1.1.1$/;" t subsection:Chapter 1.Section 1.1.Subsection 1.1.1
155 */
156 e.extensionFields.scopeIndex = nl->corkIndex;
157 #endif
158 }
159
160 m[0] = marker;
161 attachParserField (&e, RstFields [F_SECTION_MARKER].ftype, m);
162 r = makeTagEntry (&e);
163 }
164 nestingLevelsPush(nestingLevels, r);
165 }
166
167
168 /* checks if str is all the same character */
issame(const char * str)169 static bool issame(const char *str)
170 {
171 char first = *str;
172
173 while (*str)
174 {
175 char c;
176
177 str++;
178 c = *str;
179 if (c && c != first)
180 return false;
181 }
182 return true;
183 }
184
185
get_kind(char c)186 static int get_kind(char c)
187 {
188 int i;
189
190 for (i = 0; i < SECTION_COUNT; i++)
191 {
192 if (kindchars[i] == c)
193 return i;
194
195 if (kindchars[i] == 0)
196 {
197 kindchars[i] = c;
198 return i;
199 }
200 }
201 return -1;
202 }
203
204
205 /* computes the length of an UTF-8 string
206 * if the string doesn't look like UTF-8, return -1 */
utf8_strlen(const char * buf,int buf_len)207 static int utf8_strlen(const char *buf, int buf_len)
208 {
209 int len = 0;
210 const char *end = buf + buf_len;
211
212 for (len = 0; buf < end; len ++)
213 {
214 /* perform quick and naive validation (no sub-byte checking) */
215 if (! (*buf & 0x80))
216 buf ++;
217 else if ((*buf & 0xe0) == 0xc0)
218 buf += 2;
219 else if ((*buf & 0xf0) == 0xe0)
220 buf += 3;
221 else if ((*buf & 0xf8) == 0xf0)
222 buf += 4;
223 else /* not a valid leading UTF-8 byte, abort */
224 return -1;
225
226 if (buf > end) /* incomplete last byte */
227 return -1;
228 }
229
230 return len;
231 }
232
233
is_target_line(const unsigned char * line)234 static const unsigned char *is_target_line (const unsigned char *line)
235 {
236 if ((line [0] == '.') && (line [1] == '.') && (line [2] == ' ')
237 && (line [3] == '_'))
238 return line + 4;
239 return NULL;
240 }
241
capture_target(const unsigned char * target_line)242 static int capture_target (const unsigned char *target_line)
243 {
244 vString *name = vStringNew ();
245 unsigned char terminator;
246 int r = CORK_NIL;
247
248 if (*target_line == '`')
249 terminator = '`';
250 else if (!isspace (*target_line) && *target_line != '\0')
251 {
252 /* "Simple reference names are single words consisting of
253 * alphanumerics plus isolated (no two adjacent) internal
254 * hyphens, underscores, periods, colons and plus signs; no
255 * whitespace or other characters are allowed."
256 * -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#reference-names
257 */
258 vStringPut (name, *target_line);
259 terminator = ':';
260 }
261 else
262 goto out;
263
264 target_line++;
265
266
267 bool escaped = false;
268 while (*target_line != '\0')
269 {
270 if (escaped)
271 {
272 vStringPut (name, *target_line);
273 escaped = false;
274 }
275 else
276 {
277 if (*target_line == '\\')
278 {
279 vStringPut (name, *target_line);
280 escaped = true;
281 }
282 else if (*target_line == terminator)
283 break;
284 else
285 vStringPut (name, *target_line);
286 }
287 target_line++;
288 }
289
290 if (vStringLength (name) == 0)
291 goto out;
292
293 r = makeTargetRstTag (name);
294
295 out:
296 vStringDelete (name);
297 return r;
298 }
299
300 /* TODO: parse overlining & underlining as distinct sections. */
findRstTags(void)301 static void findRstTags (void)
302 {
303 vString *name = vStringNew ();
304 MIOPos filepos;
305 const unsigned char *line;
306 const unsigned char *target_line;
307
308 memset(&filepos, 0, sizeof(filepos));
309 memset(kindchars, 0, sizeof kindchars);
310 nestingLevels = nestingLevelsNew(0);
311
312 while ((line = readLineFromInputFile ()) != NULL)
313 {
314 /* Handle .. _target:
315 * http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets
316 */
317 if ((target_line = is_target_line (line)) != NULL)
318 {
319 if (capture_target (target_line) != CORK_NIL)
320 {
321 vStringClear (name);
322 continue;
323 }
324 }
325
326 int line_len = strlen((const char*) line);
327 int name_len_bytes = vStringLength(name);
328 /* FIXME: this isn't right, actually we need the real display width,
329 * taking into account double-width characters and stuff like that.
330 * But duh. */
331 int name_len = utf8_strlen(vStringValue(name), name_len_bytes);
332
333 /* if the name doesn't look like UTF-8, assume one-byte charset */
334 if (name_len < 0)
335 name_len = name_len_bytes;
336
337 /* underlines must be the same length or more */
338 if (line_len >= name_len && name_len > 0 &&
339 ispunct(line[0]) && issame((const char*) line))
340 {
341 char c = line[0];
342 int kind = get_kind(c);
343
344 if (kind >= 0)
345 {
346 makeSectionRstTag(name, kind, filepos, c);
347 continue;
348 }
349 }
350 vStringClear (name);
351 if (!isspace(*line))
352 {
353 vStringCatS(name, (const char*)line);
354 filepos = getInputFilePosition();
355 }
356 }
357 /* Force popping all nesting levels */
358 getNestingLevel (K_EOF);
359 vStringDelete (name);
360 nestingLevelsFree(nestingLevels);
361 }
362
RstParser(void)363 extern parserDefinition* RstParser (void)
364 {
365 static const char *const extensions [] = { "rest", "reST", "rst", NULL };
366 parserDefinition* const def = parserNew ("ReStructuredText");
367
368 def->kindTable = RstKinds;
369 def->kindCount = ARRAY_SIZE (RstKinds);
370 def->extensions = extensions;
371 def->parser = findRstTags;
372
373 def->fieldTable = RstFields;
374 def->fieldCount = ARRAY_SIZE (RstFields);
375
376 def->useCork = true;
377
378 return def;
379 }
380