1 /*-------------------------------------------------------------------------
2  *
3  * test_parser.c
4  *	  Simple example of a text search parser
5  *
6  * Copyright (c) 2007-2017, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/test/modules/test_parser/test_parser.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "fmgr.h"
16 
17 PG_MODULE_MAGIC;
18 
19 /*
20  * types
21  */
22 
23 /* self-defined type */
24 typedef struct
25 {
26 	char	   *buffer;			/* text to parse */
27 	int			len;			/* length of the text in buffer */
28 	int			pos;			/* position of the parser */
29 } ParserState;
30 
31 typedef struct
32 {
33 	int			lexid;
34 	char	   *alias;
35 	char	   *descr;
36 } LexDescr;
37 
38 /*
39  * functions
40  */
41 PG_FUNCTION_INFO_V1(testprs_start);
42 PG_FUNCTION_INFO_V1(testprs_getlexeme);
43 PG_FUNCTION_INFO_V1(testprs_end);
44 PG_FUNCTION_INFO_V1(testprs_lextype);
45 
46 Datum
testprs_start(PG_FUNCTION_ARGS)47 testprs_start(PG_FUNCTION_ARGS)
48 {
49 	ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
50 
51 	pst->buffer = (char *) PG_GETARG_POINTER(0);
52 	pst->len = PG_GETARG_INT32(1);
53 	pst->pos = 0;
54 
55 	PG_RETURN_POINTER(pst);
56 }
57 
58 Datum
testprs_getlexeme(PG_FUNCTION_ARGS)59 testprs_getlexeme(PG_FUNCTION_ARGS)
60 {
61 	ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
62 	char	  **t = (char **) PG_GETARG_POINTER(1);
63 	int		   *tlen = (int *) PG_GETARG_POINTER(2);
64 	int			startpos = pst->pos;
65 	int			type;
66 
67 	*t = pst->buffer + pst->pos;
68 
69 	if (pst->pos < pst->len &&
70 		(pst->buffer)[pst->pos] == ' ')
71 	{
72 		/* blank type */
73 		type = 12;
74 		/* go to the next non-space character */
75 		while (pst->pos < pst->len &&
76 			   (pst->buffer)[pst->pos] == ' ')
77 			(pst->pos)++;
78 	}
79 	else
80 	{
81 		/* word type */
82 		type = 3;
83 		/* go to the next space character */
84 		while (pst->pos < pst->len &&
85 			   (pst->buffer)[pst->pos] != ' ')
86 			(pst->pos)++;
87 	}
88 
89 	*tlen = pst->pos - startpos;
90 
91 	/* we are finished if (*tlen == 0) */
92 	if (*tlen == 0)
93 		type = 0;
94 
95 	PG_RETURN_INT32(type);
96 }
97 
98 Datum
testprs_end(PG_FUNCTION_ARGS)99 testprs_end(PG_FUNCTION_ARGS)
100 {
101 	ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
102 
103 	pfree(pst);
104 	PG_RETURN_VOID();
105 }
106 
107 Datum
testprs_lextype(PG_FUNCTION_ARGS)108 testprs_lextype(PG_FUNCTION_ARGS)
109 {
110 	/*
111 	 * Remarks: - we have to return the blanks for headline reason - we use
112 	 * the same lexids like Teodor in the default word parser; in this way we
113 	 * can reuse the headline function of the default word parser.
114 	 */
115 	LexDescr   *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2 + 1));
116 
117 	/* there are only two types in this parser */
118 	descr[0].lexid = 3;
119 	descr[0].alias = pstrdup("word");
120 	descr[0].descr = pstrdup("Word");
121 	descr[1].lexid = 12;
122 	descr[1].alias = pstrdup("blank");
123 	descr[1].descr = pstrdup("Space symbols");
124 	descr[2].lexid = 0;
125 
126 	PG_RETURN_POINTER(descr);
127 }
128