1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #include "udm_config.h"
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include "udm_common.h"
24 #include "udm_utils.h"
25 #include "udm_db.h"
26 #include "udm_db_int.h"
27 #include "udm_url.h"
28 #include "udm_vars.h"
29 #include "udm_coords.h"
30 #include "udm_log.h"
31 #include "udm_hash.h"
32 #include "udm_doc.h"
33 #include "udm_word.h"
34 #include "udm_indexer.h"
35 #include "udm_parsehtml.h"
36
37 #ifdef HAVE_SQL
38
39
40 #define UDM_WRDCOORD(p,s) ((((unsigned int)(s))<<24)+(unsigned int)(p))
41 #define UDM_WRDSEC(c) (((unsigned int)(c))>>24)
42 #define UDM_WRDPOS(c) (((unsigned int)(c))&0x001FFFFF)
43
44
45 static udm_rc_t
UdmDeleteWordsFromURLSingle(UDM_AGENT * Indexer,UDM_DB * db,urlid_t url_id)46 UdmDeleteWordsFromURLSingle(UDM_AGENT *Indexer, UDM_DB *db, urlid_t url_id)
47 {
48 char qbuf[512];
49 udm_snprintf(qbuf, sizeof(qbuf), "DELETE FROM dict WHERE url_id=%d", url_id);
50 return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
51 }
52
53
54 static udm_rc_t
StoreWordsMySQL(UDM_AGENT * A,UDM_DB * db,UDM_WORDLIST * Words,urlid_t url_id)55 StoreWordsMySQL(UDM_AGENT *A, UDM_DB *db, UDM_WORDLIST *Words, urlid_t url_id)
56 {
57 udm_rc_t rc;
58 size_t nstored= 0, i;
59
60 if (!Words->nwords)
61 return UDM_OK;
62
63 for(nstored= 0; nstored < Words->nwords; )
64 {
65 char * qb,*qe;
66 size_t step=1024;
67 size_t mlen=1024;
68 size_t rstored = 0;
69
70 qb=(char*)UdmMalloc(mlen);
71 strcpy(qb,"INSERT INTO dict (word,url_id,coord) VALUES ");
72 qe=qb+strlen(qb);
73
74 for(i= nstored; i< Words->nwords;i++)
75 {
76 size_t len=qe-qb;
77 if (!Words->Word[i].coord.secno)
78 {
79 nstored++;
80 continue;
81 }
82 rstored++;
83
84 /* UDM_MAXWORDSIZE+100 should be enough */
85 if((len + A->Conf->WordParam.max_word_len + 100) >= mlen)
86 {
87 mlen+=step;
88 qb=(char*)UdmRealloc(qb,mlen);
89 qe=qb+len;
90 }
91
92 if(i>nstored)*qe++=',';
93
94 if (UdmSQLDBMode(db) == UDM_SQLDBMODE_SINGLE)
95 {
96 *qe++='(';
97 *qe++='\'';
98 strcpy(qe, Words->Word[i].word);
99 while(*qe)qe++;
100 *qe++='\'';
101 *qe++=',';
102 qe+=sprintf(qe,"%d,%d",url_id,
103 UDM_WRDCOORD(Words->Word[i].coord.pos,
104 Words->Word[i].coord.secno) /*+
105 Words->Word[i].seclen_marker*/);
106 *qe++=')';
107 *qe='\0';
108 }
109 if(qe>qb+UDM_MAX_MULTI_INSERT_QSIZE)
110 break;
111 }
112 nstored = i + 1;
113 rc = (rstored > 0) ? UdmDBSQLQuery(A, db, NULL, qb) : UDM_OK;
114 UDM_FREE(qb);
115 if(rc!=UDM_OK)
116 break;
117 }
118 return rc;
119 }
120
121
122 static udm_rc_t
StoreWordsSingleGeneric(UDM_AGENT * A,UDM_DB * db,UDM_WORDLIST * Words,urlid_t url_id)123 StoreWordsSingleGeneric(UDM_AGENT *A, UDM_DB *db,
124 UDM_WORDLIST *Words, urlid_t url_id)
125 {
126 size_t i;
127 for(i= 0; i < Words->nwords; i++)
128 {
129 udm_rc_t rc;
130 char qbuf[256];
131 if (!Words->Word[i].coord.secno)
132 continue;
133
134 if (UdmSQLDBMode(db) == UDM_SQLDBMODE_SINGLE)
135 {
136 sprintf(qbuf,"INSERT INTO dict (url_id,word,coord) VALUES(%d,'%s',%d)",
137 url_id,
138 Words->Word[i].word,
139 UDM_WRDCOORD(Words->Word[i].coord.pos,
140 Words->Word[i].coord.secno));
141 if (UDM_OK!= (rc= UdmDBSQLQuery(A, db, NULL, qbuf)))
142 return rc;
143 }
144 }
145 return UDM_OK;
146 }
147
148
149 static udm_rc_t
StoreWordsSingle(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)150 StoreWordsSingle(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
151 {
152 udm_rc_t rc= UDM_OK;
153 urlid_t url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
154 UDM_WORDLIST Words;
155
156 UdmWordListInit(&Words);
157 if (UDM_OK != (rc= UdmPrepareWords(Indexer, Doc, &Words)))
158 goto ret;
159
160 if (UDM_OK != (rc= UdmDeleteWordsFromURLSingle(Indexer, db, url_id)))
161 goto ret;
162
163 if (UdmVarListFindInt(&Indexer->Conf->Vars, "SaveSectionSize", 1))
164 {
165 if (UDM_OK!= (rc= UdmWordListSaveSectionSize(&Words)))
166 goto ret;
167 }
168
169 /*
170 Don't need to delete words here,
171 they are deleted in UdmStoreWords().
172 */
173
174 /* Insert new words */
175 if (UdmSQLDBType(db) == UDM_DB_MYSQL)
176 {
177 rc= StoreWordsMySQL(Indexer, db, &Words, url_id);
178 }
179 else
180 {
181 rc= StoreWordsSingleGeneric(Indexer, db, &Words, url_id);
182 }
183 ret:
184 UdmWordListFree(&Words);
185 return rc;
186 }
187
188
189 static udm_rc_t
UdmFindWordSingleInternal(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args,UDM_URLCRDLIST * CoordList,const char * table,int join)190 UdmFindWordSingleInternal(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
191 UDM_FINDWORD_ARGS *args,
192 UDM_URLCRDLIST *CoordList,
193 const char *table, int join)
194 {
195 char qbuf[4096];
196 UDM_SQLRES SQLRes;
197 size_t numrows, i, curnum;
198 udm_rc_t rc;
199
200 if (*args->where)
201 {
202 udm_snprintf(qbuf, sizeof(qbuf) - 1,"\
203 SELECT %s.url_id,%s.coord FROM %s, url%s \
204 WHERE %s.%s AND url.rec_id=%s.url_id AND %s",
205 table, table, table, UdmSQLDBQueryFrom(Query),
206 table, args->cmparg, table, args->where);
207 }
208 else if (!join)
209 {
210 udm_snprintf(qbuf, sizeof(qbuf) - 1,
211 "SELECT url_id,coord FROM %s WHERE %s", table, args->cmparg);
212 }
213 else
214 {
215
216 udm_snprintf(qbuf,sizeof(qbuf)-1,"\
217 SELECT url_id,coord FROM %s,url WHERE %s.%s AND url.rec_id=%s.url_id",
218 table, table, args->cmparg, table);
219 }
220
221 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLRes, qbuf)))
222 return rc;
223
224 if ((numrows= UdmSQLNumRows(&SQLRes)))
225 {
226 CoordList->Coords= (UDM_URL_CRD*) UdmMalloc(numrows*sizeof(UDM_URL_CRD));
227 CoordList->acoords= numrows;
228 }
229
230 /* Add new found word to the list */
231 for(curnum= 0, i= 0; i < numrows; i++)
232 {
233 uint4 coord= atoi(UdmSQLValue(&SQLRes, i, 1));
234 uint4 section= UDM_WRDSEC(coord);
235 uint4 weight= args->query_param.wf[section];
236
237 if(weight && (!args->Word.Param.secno || args->Word.Param.secno == section))
238 {
239 UDM_URL_CRD *Coord= &CoordList->Coords[curnum];
240 Coord->urlid_coord.url_id= UDM_ATOI(UdmSQLValue(&SQLRes, i, 0));
241 Coord->urlid_coord.coord.pos= UDM_WRDPOS(coord);
242 Coord->urlid_coord.coord.secno= UDM_WRDSEC(coord);
243 Coord->num= args->Word.Param.order & 0xFF;
244 Coord->seclen= 0;
245 curnum++;
246 }
247 }
248 CoordList->ncoords= curnum;
249 UdmSQLFree(&SQLRes);
250 UdmURLCRDListSortByURLThenSecnoThenPos(CoordList);
251 return rc;
252 }
253
254
255 static udm_rc_t
UdmFindWordSingle(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args)256 UdmFindWordSingle(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
257 UDM_FINDWORD_ARGS *args)
258 {
259 udm_rc_t rc;
260 UDM_URLCRDLIST CoordList;
261
262 bzero(&CoordList, sizeof(CoordList));
263
264 if (UDM_OK != (rc= UdmFindWordSingleInternal(A, db, Query, args,
265 &CoordList, "dict", 0)))
266 return rc;
267
268 if (args->query_param.SaveSectionSize && CoordList.ncoords)
269 {
270 UDM_URL_CRD *Crd= CoordList.Coords;
271 UDM_URL_CRD *End= CoordList.Coords + CoordList.ncoords;
272 UDM_URL_CRD *To= CoordList.Coords;
273 UDM_URL_CRD *Prev= CoordList.Coords;
274 urlid_t prev_url_id;
275 udm_secno_t prev_secno;
276
277 for (prev_url_id= Crd->urlid_coord.url_id,
278 prev_secno= Crd->urlid_coord.coord.secno ;
279 Crd < End; Crd++)
280 {
281 UDM_URL_CRD *Next= Crd + 1;
282 if (Next == End ||
283 Next->urlid_coord.url_id != prev_url_id ||
284 Next->urlid_coord.coord.secno != prev_secno)
285 {
286 for ( ; Prev < To; Prev++)
287 {
288 Prev->seclen= Crd->urlid_coord.coord.pos;
289 }
290 if (Next < End)
291 {
292 prev_url_id= Next->urlid_coord.url_id;
293 prev_secno= Next->urlid_coord.coord.secno;
294 Prev= To;
295 }
296 }
297 else
298 *To++= *Crd;
299 }
300 CoordList.ncoords= To - CoordList.Coords;
301 }
302 if (args->urls.nurls)
303 UdmApplyFastLimit(&CoordList, &args->urls);
304 if (CoordList.ncoords)
305 {
306 args->Word.Param.count= CoordList.ncoords;
307 UdmURLCRDListListAddWithSort2(&args->SearchSectionListList,
308 &Query->Res.WWList,
309 &args->Word, &CoordList);
310 }
311 else
312 {
313 UdmFree(CoordList.Coords);
314 }
315 return(UDM_OK);
316 }
317
318
319 static udm_rc_t
UdmTruncateDictSingle(UDM_AGENT * A,UDM_DB * db)320 UdmTruncateDictSingle(UDM_AGENT *A, UDM_DB *db)
321 {
322 return UdmDBSQLTableTruncateOrDelete(A, db, "dict");
323 }
324
325
326 static udm_rc_t
UdmWordStatCreateSingle(UDM_AGENT * A,UDM_DB * db)327 UdmWordStatCreateSingle(UDM_AGENT *A, UDM_DB *db)
328 {
329 char qbuf[128];
330 sprintf(qbuf, "SELECT word, count(*) FROM dict GROUP BY word");
331 return UdmWordStatQuery(A, db, qbuf);
332 }
333
334
335 static udm_rc_t
UdmDumpWordInfoOneDocSingle(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc)336 UdmDumpWordInfoOneDocSingle(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc)
337 {
338 return UDM_OK;
339 }
340
341
342 static udm_rc_t
UdmSingleInitSearch(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args)343 UdmSingleInitSearch(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
344 UDM_FINDWORD_ARGS *args)
345 {
346 return UDM_OK;
347 }
348
349
350 static udm_rc_t
UdmQueryActionSingle(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,udm_querycmd_t cmd)351 UdmQueryActionSingle(UDM_AGENT *A, UDM_DB *db,
352 UDM_QUERY *Query, udm_querycmd_t cmd)
353 {
354 switch (cmd)
355 {
356 case UDM_QUERYCMD_CLEAR: return UdmTruncateDictSingle(A, db);
357 case UDM_QUERYCMD_WORDSTAT: return UdmWordStatCreateSingle(A, db);
358 case UDM_QUERYCMD_INDEX:
359 return udm_dbmode_handler_blob.QueryAction(A, db, Query, cmd);
360 default: break;
361 }
362 return UDM_NOTARGET;
363 }
364
365
366 const UDM_DBMODE_HANDLER udm_dbmode_handler_single=
367 {
368 "single",
369 StoreWordsSingle,
370 UdmQueryActionSingle,
371 UdmDeleteWordsFromURLSingle,
372 UdmFindWordSingle,
373 UdmDumpWordInfoOneDocSingle,
374 UdmSingleInitSearch,
375 };
376
377
378 #endif /* HAVE_SQL */
379