1 /*
2 *
3 *   Copyright (c) 2007-2011, Nick Treleaven
4 *
5 *   This source code is released for free distribution under the terms of the
6 *   GNU General Public License version 2 or (at your option) any later version.
7 *
8 *   This module contains functions for generating tags for reStructuredText (reST) files.
9 */
10 
11 /*
12 *   INCLUDE FILES
13 */
14 #include "general.h"	/* must always come first */
15 
16 #include <ctype.h>
17 #include <string.h>
18 
19 #include "parse.h"
20 #include "read.h"
21 #include "vstring.h"
22 #include "nestlevel.h"
23 #include "entry.h"
24 #include "routines.h"
25 #include "field.h"
26 
27 /*
28 *   DATA DEFINITIONS
29 */
30 typedef enum {
31 	K_EOF = -1,
32 	K_CHAPTER = 0,
33 	K_SECTION,
34 	K_SUBSECTION,
35 	K_SUBSUBSECTION,
36 	K_TARGET,
37 	SECTION_COUNT
38 } rstKind;
39 
40 static kindDefinition RstKinds[] = {
41 	{ true, 'c', "chapter",       "chapters"},
42 	{ true, 's', "section",       "sections" },
43 	{ true, 'S', "subsection",    "subsections" },
44 	{ true, 't', "subsubsection", "subsubsections" },
45 	{ true, 'T', "target",        "targets" },
46 };
47 
48 typedef enum {
49 	F_SECTION_MARKER,
50 } rstField;
51 
52 static fieldDefinition RstFields [] = {
53 	{
54 		.name = "sectionMarker",
55 		.description = "character used for declaring section",
56 		.enabled = false,
57 	},
58 };
59 
60 static char kindchars[SECTION_COUNT];
61 
62 static NestingLevels *nestingLevels = NULL;
63 
64 /*
65 *   FUNCTION DEFINITIONS
66 */
67 
getNestingLevel(const int kind)68 static NestingLevel *getNestingLevel(const int kind)
69 {
70 	NestingLevel *nl;
71 	tagEntryInfo *e;
72 
73 	int d = 0;
74 
75 	if (kind > K_EOF)
76 	{
77 		d++;
78 		/* 1. we want the line before the '---' underline chars */
79 		d++;
80 		/* 2. we want the line before the next section/chapter title. */
81 	}
82 
83 	while (1)
84 	{
85 		nl = nestingLevelsGetCurrent(nestingLevels);
86 		e = getEntryOfNestingLevel (nl);
87 		if ((nl && (e == NULL)) || (e && e->kindIndex >= kind))
88 		{
89 			if (e)
90 				e->extensionFields.endLine = (getInputLineNumber() - d);
91 			nestingLevelsPop(nestingLevels);
92 		}
93 		else
94 			break;
95 	}
96 	return nl;
97 }
98 
makeTargetRstTag(const vString * const name)99 static int makeTargetRstTag(const vString* const name)
100 {
101 	tagEntryInfo e;
102 
103 	initTagEntry (&e, vStringValue (name), K_TARGET);
104 
105 	const NestingLevel *nl = nestingLevelsGetCurrent(nestingLevels);
106 	tagEntryInfo *parent = NULL;
107 	if (nl)
108 		parent = getEntryOfNestingLevel (nl);
109 
110 	if (parent)
111 	{
112 		e.extensionFields.scopeKindIndex = parent->kindIndex;
113 		e.extensionFields.scopeName = parent->name;
114 	}
115 
116 	return makeTagEntry (&e);
117 }
118 
makeSectionRstTag(const vString * const name,const int kind,const MIOPos filepos,char marker)119 static void makeSectionRstTag(const vString* const name, const int kind, const MIOPos filepos,
120 		       char marker)
121 {
122 	const NestingLevel *const nl = getNestingLevel(kind);
123 	tagEntryInfo *parent;
124 
125 	int r = CORK_NIL;
126 
127 	if (vStringLength (name) > 0)
128 	{
129 		tagEntryInfo e;
130 		char m [2] = { [1] = '\0' };
131 
132 		initTagEntry (&e, vStringValue (name), kind);
133 
134 		e.lineNumber--;	/* we want the line before the '---' underline chars */
135 		e.filePosition = filepos;
136 
137 		parent = getEntryOfNestingLevel (nl);
138 		if (parent && (parent->kindIndex < kind))
139 		{
140 #if 1
141 			e.extensionFields.scopeKindIndex = parent->kindIndex;
142 			e.extensionFields.scopeName = parent->name;
143 #else
144 			/* TODO
145 
146 			   Following code makes the scope information full qualified form.
147 			   Do users want the full qualified form?
148 			   --- ./Units/rst.simple.d/expected.tags	2015-12-18 01:32:35.574255617 +0900
149 			   +++ /home/yamato/var/ctags-github/Units/rst.simple.d/FILTERED.tmp	2016-05-05 03:05:38.165604756 +0900
150 			   @@ -5,2 +5,2 @@
151 			   -Subsection 1.1.1	input.rst	/^Subsection 1.1.1$/;"	S	section:Section 1.1
152 			   -Subsubsection 1.1.1.1	input.rst	/^Subsubsection 1.1.1.1$/;"	t	subsection:Subsection 1.1.1
153 			   +Subsection 1.1.1	input.rst	/^Subsection 1.1.1$/;"	S	section:Chapter 1.Section 1.1
154 			   +Subsubsection 1.1.1.1	input.rst	/^Subsubsection 1.1.1.1$/;"	t	subsection:Chapter 1.Section 1.1.Subsection 1.1.1
155 			*/
156 			   e.extensionFields.scopeIndex = nl->corkIndex;
157 #endif
158 		}
159 
160 		m[0] = marker;
161 		attachParserField (&e, RstFields [F_SECTION_MARKER].ftype, m);
162 		r = makeTagEntry (&e);
163 	}
164 	nestingLevelsPush(nestingLevels, r);
165 }
166 
167 
168 /* checks if str is all the same character */
issame(const char * str)169 static bool issame(const char *str)
170 {
171 	char first = *str;
172 
173 	while (*str)
174 	{
175 		char c;
176 
177 		str++;
178 		c = *str;
179 		if (c && c != first)
180 			return false;
181 	}
182 	return true;
183 }
184 
185 
get_kind(char c)186 static int get_kind(char c)
187 {
188 	int i;
189 
190 	for (i = 0; i < SECTION_COUNT; i++)
191 	{
192 		if (kindchars[i] == c)
193 			return i;
194 
195 		if (kindchars[i] == 0)
196 		{
197 			kindchars[i] = c;
198 			return i;
199 		}
200 	}
201 	return -1;
202 }
203 
204 
205 /* computes the length of an UTF-8 string
206  * if the string doesn't look like UTF-8, return -1 */
utf8_strlen(const char * buf,int buf_len)207 static int utf8_strlen(const char *buf, int buf_len)
208 {
209 	int len = 0;
210 	const char *end = buf + buf_len;
211 
212 	for (len = 0; buf < end; len ++)
213 	{
214 		/* perform quick and naive validation (no sub-byte checking) */
215 		if (! (*buf & 0x80))
216 			buf ++;
217 		else if ((*buf & 0xe0) == 0xc0)
218 			buf += 2;
219 		else if ((*buf & 0xf0) == 0xe0)
220 			buf += 3;
221 		else if ((*buf & 0xf8) == 0xf0)
222 			buf += 4;
223 		else /* not a valid leading UTF-8 byte, abort */
224 			return -1;
225 
226 		if (buf > end) /* incomplete last byte */
227 			return -1;
228 	}
229 
230 	return len;
231 }
232 
233 
is_target_line(const unsigned char * line)234 static const unsigned char *is_target_line (const unsigned char *line)
235 {
236 	if ((line [0] == '.') && (line [1] == '.') && (line [2] == ' ')
237 		&& (line [3] == '_'))
238 		return line + 4;
239 	return NULL;
240 }
241 
capture_target(const unsigned char * target_line)242 static int capture_target (const unsigned char *target_line)
243 {
244 	vString *name = vStringNew ();
245 	unsigned char terminator;
246 	int r = CORK_NIL;
247 
248 	if (*target_line == '`')
249 		terminator = '`';
250 	else if (!isspace (*target_line) && *target_line != '\0')
251 	{
252 		/* "Simple reference names are single words consisting of
253 		 * alphanumerics plus isolated (no two adjacent) internal
254 		 * hyphens, underscores, periods, colons and plus signs; no
255 		 * whitespace or other characters are allowed."
256 		 * -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#reference-names
257 		 */
258 		vStringPut (name, *target_line);
259 		terminator = ':';
260 	}
261 	else
262 		goto out;
263 
264 	target_line++;
265 
266 
267 	bool escaped = false;
268 	while (*target_line != '\0')
269 	{
270 		if (escaped)
271 		{
272 			vStringPut (name, *target_line);
273 			escaped = false;
274 		}
275 		else
276 		{
277 			if (*target_line == '\\')
278 			{
279 				vStringPut (name, *target_line);
280 				escaped = true;
281 			}
282 			else if (*target_line == terminator)
283 				break;
284 			else
285 				vStringPut (name, *target_line);
286 		}
287 		target_line++;
288 	}
289 
290 	if (vStringLength (name) == 0)
291 		goto out;
292 
293 	r = makeTargetRstTag (name);
294 
295  out:
296 	vStringDelete (name);
297 	return r;
298 }
299 
300 /* TODO: parse overlining & underlining as distinct sections. */
findRstTags(void)301 static void findRstTags (void)
302 {
303 	vString *name = vStringNew ();
304 	MIOPos filepos;
305 	const unsigned char *line;
306 	const unsigned char *target_line;
307 
308 	memset(&filepos, 0, sizeof(filepos));
309 	memset(kindchars, 0, sizeof kindchars);
310 	nestingLevels = nestingLevelsNew(0);
311 
312 	while ((line = readLineFromInputFile ()) != NULL)
313 	{
314 		/* Handle .. _target:
315 		 * http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets
316 		 */
317 		if ((target_line = is_target_line (line)) != NULL)
318 		{
319 			if (capture_target (target_line) != CORK_NIL)
320 			{
321 				vStringClear (name);
322 				continue;
323 			}
324 		}
325 
326 		int line_len = strlen((const char*) line);
327 		int name_len_bytes = vStringLength(name);
328 		/* FIXME: this isn't right, actually we need the real display width,
329 		 * taking into account double-width characters and stuff like that.
330 		 * But duh. */
331 		int name_len = utf8_strlen(vStringValue(name), name_len_bytes);
332 
333 		/* if the name doesn't look like UTF-8, assume one-byte charset */
334 		if (name_len < 0)
335 			name_len = name_len_bytes;
336 
337 		/* underlines must be the same length or more */
338 		if (line_len >= name_len && name_len > 0 &&
339 			ispunct(line[0]) && issame((const char*) line))
340 		{
341 			char c = line[0];
342 			int kind = get_kind(c);
343 
344 			if (kind >= 0)
345 			{
346 				makeSectionRstTag(name, kind, filepos, c);
347 				continue;
348 			}
349 		}
350 		vStringClear (name);
351 		if (!isspace(*line))
352 		{
353 			vStringCatS(name, (const char*)line);
354 			filepos = getInputFilePosition();
355 		}
356 	}
357 	/* Force popping all nesting levels */
358 	getNestingLevel (K_EOF);
359 	vStringDelete (name);
360 	nestingLevelsFree(nestingLevels);
361 }
362 
RstParser(void)363 extern parserDefinition* RstParser (void)
364 {
365 	static const char *const extensions [] = { "rest", "reST", "rst", NULL };
366 	parserDefinition* const def = parserNew ("ReStructuredText");
367 
368 	def->kindTable = RstKinds;
369 	def->kindCount = ARRAY_SIZE (RstKinds);
370 	def->extensions = extensions;
371 	def->parser = findRstTags;
372 
373 	def->fieldTable = RstFields;
374 	def->fieldCount = ARRAY_SIZE (RstFields);
375 
376 	def->useCork = true;
377 
378 	return def;
379 }
380