1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #include "udm_config.h"
19
20 #ifdef HAVE_SQL
21
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <fcntl.h>
29 #include <errno.h>
30 #include <ctype.h>
31 #include <time.h>
32
33 #ifdef WIN32
34 #include <time.h>
35 #endif
36
37 #ifdef HAVE_UNISTD_H
38 #include <unistd.h>
39 #endif
40
41 #ifdef HAVE_SYS_TIME_H
42 #include <sys/time.h>
43 #endif
44
45 #include "udm_common.h"
46 #include "udm_utils.h"
47 #include "udm_spell.h"
48 #include "udm_robots.h"
49 #include "udm_db.h"
50 #include "udm_unicode.h"
51 #include "udm_unidata.h"
52 #include "udm_url.h"
53 #include "udm_log.h"
54 #include "udm_proto.h"
55 #include "udm_conf.h"
56 #include "udm_hash.h"
57 #include "udm_xmalloc.h"
58 #include "udm_boolean.h"
59 #include "udm_coords.h"
60 #include "udm_searchtool.h"
61 #include "udm_server.h"
62 #include "udm_stopwords.h"
63 #include "udm_doc.h"
64 #include "udm_result.h"
65 #include "udm_vars.h"
66 #include "udm_agent.h"
67 #include "udm_store.h"
68 #include "udm_hrefs.h"
69 #include "udm_word.h"
70 #include "udm_db_int.h"
71 #include "udm_match.h"
72 #include "udm_indexer.h"
73 #include "udm_textlist.h"
74 #include "udm_parsehtml.h"
75 #include "udm_wild.h"
76 #include "udm_http.h"
77 #include "udm_contentencoding.h"
78 #include "udm_indexcache.h"
79 #include "udm_query.h"
80 #include "udm_env.h"
81
82
83 static inline UDM_SQL *
UdmSQL(UDM_DB * db)84 UdmSQL(UDM_DB *db)
85 {
86 return &((UDM_SQLDB*) db->specific)->sql;
87 }
88
89 UDM_SQLDB *
UdmSQLDB(UDM_DB * db)90 UdmSQLDB(UDM_DB *db)
91 {
92 return (UDM_SQLDB*) db->specific;
93 }
94
95 udm_sqldbtype_t
UdmSQLDBType(UDM_DB * db)96 UdmSQLDBType(UDM_DB *db)
97 {
98 return UdmSQL(db)->DBType;
99 }
100
101 udm_sqldbapi_t
UdmSQLDBDriver(UDM_DB * db)102 UdmSQLDBDriver(UDM_DB *db)
103 {
104 return UdmSQL(db)->DBDriver;
105 }
106
107 udm_sqldbmode_t
UdmSQLDBMode(UDM_DB * db)108 UdmSQLDBMode(UDM_DB *db)
109 {
110 return UdmSQLDB(db)->DBMode;
111 }
112
113 int
UdmSQLDBVersion(UDM_DB * db)114 UdmSQLDBVersion(UDM_DB *db)
115 {
116 return UdmSQL(db)->version;
117 }
118
119 int
UdmSQLDBConnected(UDM_DB * db)120 UdmSQLDBConnected(UDM_DB *db)
121 {
122 return UdmSQL(db)->connected;
123 }
124
125 UDM_SQLDB_HANDLER *
UdmSQLDBHandler(UDM_DB * db)126 UdmSQLDBHandler(UDM_DB *db)
127 {
128 return &(UdmSQL(db)->handler);
129 }
130
131 static const UDM_DBMODE_HANDLER *
UdmSQLDBModeHandler(UDM_DB * db)132 UdmSQLDBModeHandler(UDM_DB *db)
133 {
134 return UdmSQLDB(db)->dbmode_handler;
135 }
136
137 static int
UdmSQLDBHaveIn(UDM_DB * db)138 UdmSQLDBHaveIn(UDM_DB *db)
139 {
140 return UdmSQL(db)->DBSQL_IN;
141 }
142
143 int
UdmSQLDBFlags(UDM_DB * db)144 UdmSQLDBFlags(UDM_DB *db)
145 {
146 return UdmSQL(db)->flags;
147 }
148
149 const char *
UdmSQLDBQueryFrom(UDM_QUERY * query)150 UdmSQLDBQueryFrom(UDM_QUERY *query)
151 {
152 return query->from;
153 }
154
155 UDM_VARLIST *
UdmSQLDBVars(UDM_DB * db)156 UdmSQLDBVars(UDM_DB *db)
157 {
158 return &UdmSQL(db)->Vars;
159 }
160
161
162 char *
UdmDBSQLError(UDM_DB * db)163 UdmDBSQLError(UDM_DB *db)
164 {
165 return UdmSQL(db)->errstr;
166 }
167
168
169 size_t
UdmDBSQLErrorSize(UDM_DB * db)170 UdmDBSQLErrorSize(UDM_DB *db)
171 {
172 return sizeof(UdmSQL(db)->errstr);
173 }
174
175
176 const char *
UdmDBSQLParamPlaceHolder(UDM_DB * db,size_t i)177 UdmDBSQLParamPlaceHolder(UDM_DB *db, size_t i)
178 {
179 return UdmSQLParamPlaceHolder(UdmSQL(db), i);
180 }
181
182
183 static inline udm_rc_t
UdmDBSQLTrace(UDM_AGENT * A,UDM_DB * db,udm_rc_t rc,udm_timer_t start,const char * func)184 UdmDBSQLTrace(UDM_AGENT *A, UDM_DB *db, udm_rc_t rc, udm_timer_t start,
185 const char *func)
186 {
187 if (UdmSQL(db)->flags & UDM_SQL_DEBUG_QUERY)
188 {
189 fprintf(stderr, "%.2f %s\n", UdmStopTimer(&start), func);
190 }
191 return rc;
192 }
193
194
195 static inline udm_rc_t
UdmDBSQLTrace1(UDM_AGENT * A,UDM_DB * db,udm_rc_t rc,udm_timer_t start,const char * func,const char * param)196 UdmDBSQLTrace1(UDM_AGENT *A, UDM_DB *db, udm_rc_t rc, udm_timer_t start,
197 const char *func, const char *param)
198 {
199 if (UdmSQL(db)->flags & UDM_SQL_DEBUG_QUERY)
200 {
201 fprintf(stderr, "%.2f %s %s\n", UdmStopTimer(&start), func, param);
202 }
203 return rc;
204 }
205
206
207 static inline udm_rc_t
UdmDBSQLTrace2(UDM_AGENT * A,UDM_DB * db,udm_rc_t rc,udm_timer_t start,const char * func,const char * from,const char * to)208 UdmDBSQLTrace2(UDM_AGENT *A, UDM_DB *db, udm_rc_t rc, udm_timer_t start,
209 const char *func, const char *from, const char *to)
210 {
211 if (UdmSQL(db)->flags & UDM_SQL_DEBUG_QUERY)
212 {
213 fprintf(stderr, "%.2f %s %s %s\n", UdmStopTimer(&start), func, from, to);
214 }
215 return rc;
216 }
217
218
219 udm_rc_t
UdmDBSQLQuery(UDM_AGENT * A,UDM_DB * db,UDM_SQLRES * Res,const char * buf)220 UdmDBSQLQuery(UDM_AGENT *A, UDM_DB *db, UDM_SQLRES *Res, const char *buf)
221 {
222 udm_timer_t ticks= UdmStartTimer();
223 udm_rc_t rc= UdmSQLQuery(UdmSQL(db), Res, buf);
224 return UdmDBSQLTrace1(A, db, rc, ticks, "SQL", buf);
225 }
226
227
228 udm_rc_t
UdmDBSQLDropTableIfExists(UDM_AGENT * A,UDM_DB * db,const char * name)229 UdmDBSQLDropTableIfExists(UDM_AGENT *A, UDM_DB *db, const char *name)
230 {
231 udm_timer_t ticks= UdmStartTimer();
232 udm_rc_t rc= UdmSQLDropTableIfExists(UdmSQL(db), name);
233 return UdmDBSQLTrace1(A, db, rc, ticks, "DropTableIfExists", name);
234 }
235
236
237 udm_rc_t
UdmDBSQLTableTruncateOrDelete(UDM_AGENT * A,UDM_DB * db,const char * name)238 UdmDBSQLTableTruncateOrDelete(UDM_AGENT *A, UDM_DB *db, const char *name)
239 {
240 udm_timer_t ticks= UdmStartTimer();
241 udm_rc_t rc= UdmSQLTableTruncateOrDelete(UdmSQL(db), name);
242 return UdmDBSQLTrace1(A, db, rc, ticks, "TableTruncateOrDelete", name);
243 }
244
245
246 udm_rc_t
UdmDBSQLCopyStructure(UDM_AGENT * A,UDM_DB * db,const char * from,const char * to)247 UdmDBSQLCopyStructure(UDM_AGENT *A, UDM_DB *db, const char *from, const char *to)
248 {
249 udm_timer_t ticks= UdmStartTimer();
250 udm_rc_t rc= UdmSQLCopyStructure(UdmSQL(db), from, to);
251 return UdmDBSQLTrace2(A, db, rc, ticks, "CopyStructure", from, to);
252 }
253
254
255 udm_rc_t
UdmDBSQLRenameTable(UDM_AGENT * A,UDM_DB * db,const char * from,const char * to)256 UdmDBSQLRenameTable(UDM_AGENT *A, UDM_DB *db, const char *from, const char *to)
257 {
258 udm_timer_t ticks= UdmStartTimer();
259 udm_rc_t rc= UdmSQLRenameTable(UdmSQL(db), from, to);
260 return UdmDBSQLTrace2(A, db, rc, ticks, "RenameTable", from, to);
261 }
262
263
264 udm_rc_t
UdmDBSQLBegin(UDM_AGENT * A,UDM_DB * db)265 UdmDBSQLBegin(UDM_AGENT *A, UDM_DB *db)
266 {
267 udm_timer_t ticks= UdmStartTimer();
268 udm_rc_t rc= UdmSQLBegin(UdmSQL(db));
269 return UdmDBSQLTrace(A, db, rc, ticks, "Begin");
270 }
271
272
273 udm_rc_t
UdmDBSQLCommit(UDM_AGENT * A,UDM_DB * db)274 UdmDBSQLCommit(UDM_AGENT *A, UDM_DB *db)
275 {
276 udm_timer_t ticks= UdmStartTimer();
277 udm_rc_t rc= UdmSQLCommit(UdmSQL(db));
278 return UdmDBSQLTrace(A, db, rc, ticks, "Commit");
279 }
280
281
282 udm_rc_t
UdmDBSQLExecDirect(UDM_AGENT * A,UDM_DB * db,UDM_SQLRES * R,const char * query)283 UdmDBSQLExecDirect(UDM_AGENT *A, UDM_DB *db, UDM_SQLRES *R, const char *query)
284 {
285 udm_timer_t ticks= UdmStartTimer();
286 udm_rc_t rc= UdmSQLExecDirect(UdmSQL(db), R, query);
287 return UdmDBSQLTrace1(A, db, rc, ticks, "ExecDirect", query);
288 }
289
290
291 udm_rc_t
UdmDBSQLPrepare(UDM_AGENT * A,UDM_DB * db,const char * query)292 UdmDBSQLPrepare(UDM_AGENT *A, UDM_DB *db, const char *query)
293 {
294 udm_timer_t ticks= UdmStartTimer();
295 udm_rc_t rc= UdmSQLPrepare(UdmSQL(db), query);
296 return UdmDBSQLTrace1(A, db, rc, ticks, "Prepare", query);
297 }
298
299
300 udm_rc_t
UdmDBSQLExecute(UDM_AGENT * A,UDM_DB * db)301 UdmDBSQLExecute(UDM_AGENT *A, UDM_DB *db)
302 {
303 udm_timer_t ticks= UdmStartTimer();
304 udm_rc_t rc= UdmSQLExecute(UdmSQL(db));
305 return UdmDBSQLTrace(A, db, rc, ticks, "Execute");
306 }
307
308
309 udm_rc_t
UdmDBSQLFetchRow(UDM_AGENT * A,UDM_DB * db,UDM_SQLRES * R,UDM_STR * p)310 UdmDBSQLFetchRow(UDM_AGENT *A, UDM_DB *db, UDM_SQLRES *R, UDM_STR *p)
311 {
312 return UdmSQLDBHandler(db)->FetchRow(UdmSQL(db), R, p);
313 }
314
315
316 udm_rc_t
UdmDBSQLStmtFree(UDM_AGENT * A,UDM_DB * db)317 UdmDBSQLStmtFree(UDM_AGENT *A, UDM_DB *db)
318 {
319 udm_timer_t ticks= UdmStartTimer();
320 udm_rc_t rc= UdmSQLStmtFree(UdmSQL(db));
321 return UdmDBSQLTrace(A, db, rc, ticks, "StmtFree");
322 }
323
324
325 static void
UdmSQLPrintParameter(FILE * file,const void * data,int size)326 UdmSQLPrintParameter(FILE *file, const void *data, int size)
327 {
328 const unsigned char *s= (const unsigned char*) data;
329 const unsigned char *e= s + size;
330 fprintf(stderr, "'");
331 for ( ; s < e; s++)
332 {
333 if (*s >= 0x20 && *s <= 0x7E)
334 fprintf(file, "%c", *s);
335 else
336 fprintf(file, "\\x%02X", (int) *s);
337 }
338 fprintf(stderr, "'");
339 }
340
341
342 udm_rc_t
UdmDBSQLBindParameter(UDM_AGENT * A,UDM_DB * db,int pos,const void * data,int size,udm_sqltype_t type)343 UdmDBSQLBindParameter(UDM_AGENT *A, UDM_DB *db,
344 int pos, const void *data, int size, udm_sqltype_t type)
345 {
346 udm_timer_t ticks= UdmStartTimer();
347 udm_rc_t rc= UdmSQLBindParameter(UdmSQL(db), pos, data, size, type);
348 UDM_ASSERT(size == 4 || type != UDM_SQLTYPE_INT32);
349 if (UdmSQL(db)->flags & UDM_SQL_DEBUG_QUERY)
350 {
351 fprintf(stderr, "%.2f BindParameter[%d] %s(%d)",
352 UdmStopTimer(&ticks), pos, UdmSQLTypeToStr(type), size);
353 UdmSQLPrintParameter(stderr, data, size);
354 fprintf(stderr, "\n");
355 }
356 return rc;
357 }
358
359
360 udm_rc_t
UdmDBSQLLockOrBegin(UDM_AGENT * A,UDM_DB * db,const char * param)361 UdmDBSQLLockOrBegin(UDM_AGENT *A, UDM_DB *db, const char *param)
362 {
363 udm_timer_t ticks= UdmStartTimer();
364 udm_rc_t rc= UdmSQLLockOrBegin(UdmSQL(db), param);
365 return UdmDBSQLTrace1(A, db, rc, ticks, "LockOrBegin", param);
366 }
367
368
369 udm_rc_t
UdmDBSQLUnlockOrCommit(UDM_AGENT * A,UDM_DB * db)370 UdmDBSQLUnlockOrCommit(UDM_AGENT *A, UDM_DB *db)
371 {
372 udm_timer_t ticks= UdmStartTimer();
373 udm_rc_t rc= UdmSQLUnlockOrCommit(UdmSQL(db));
374 return UdmDBSQLTrace(A, db, rc, ticks, "UnlockOrCommit");
375 }
376
377
378 udm_rc_t
UdmDBSQLQueryOneRowInt(UDM_AGENT * A,UDM_DB * db,int * res,const char * qbuf)379 UdmDBSQLQueryOneRowInt(UDM_AGENT *A, UDM_DB *db, int *res, const char *qbuf)
380 {
381 udm_timer_t ticks= UdmStartTimer();
382 udm_rc_t rc= UdmSQLQueryOneRowInt(UdmSQL(db), res, qbuf);
383 return UdmDBSQLTrace1(A, db, rc, ticks, "QueryOneRowInt", qbuf);
384 }
385
386
387 char *
UdmDBSQLEscStrAlloc(UDM_AGENT * A,UDM_DB * db,const char * src,size_t srclen)388 UdmDBSQLEscStrAlloc(UDM_AGENT *A, UDM_DB *db, const char *src, size_t srclen)
389 {
390 return UdmSQLEscStrAlloc(UdmSQL(db), src, srclen);
391 }
392
393
394 char *
UdmDBSQLEscStrSimple(UDM_AGENT * A,UDM_DB * db,char * to,const char * from,size_t l)395 UdmDBSQLEscStrSimple(UDM_AGENT *A, UDM_DB *db, char *to, const char *from, size_t l)
396 {
397 return UdmSQLEscStrSimple(UdmSQL(db), to, from, l);
398 }
399
400
401 size_t
UdmDBSQLBinEscStr(UDM_AGENT * A,UDM_DB * db,char * dst,size_t dstlen,const char * src,size_t srclen)402 UdmDBSQLBinEscStr(UDM_AGENT *A, UDM_DB *db, char *dst, size_t dstlen, const char *src, size_t srclen)
403 {
404 return UdmSQLBinEscStr(UdmSQL(db), dst, dstlen, src, srclen);
405 }
406
407 size_t
UdmDBSQLEscStr(UDM_AGENT * A,UDM_DB * db,char * to,const char * from,size_t l)408 UdmDBSQLEscStr(UDM_AGENT *A, UDM_DB *db, char *to, const char *from, size_t l)
409 {
410 return UdmSQLEscStr(UdmSQL(db), to, from, l);
411 }
412
413 void
UdmDBSQLTopClause(UDM_AGENT * A,UDM_DB * db,size_t top_num,UDM_SQL_TOP_CLAUSE * Top)414 UdmDBSQLTopClause(UDM_AGENT *A, UDM_DB *db, size_t top_num, UDM_SQL_TOP_CLAUSE *Top)
415 {
416 UdmSQLTopClause(UdmSQL(db), top_num, Top);
417 }
418
419 static const char*
UdmSQLDBTypeToStr(udm_sqldbtype_t dbtype)420 UdmSQLDBTypeToStr(udm_sqldbtype_t dbtype)
421 {
422 switch(dbtype)
423 {
424 case UDM_DB_MYSQL: return "mysql";
425 case UDM_DB_PGSQL: return "pgsql";
426 case UDM_DB_IBASE: return "ibase";
427 case UDM_DB_MSSQL: return "mssql";
428 case UDM_DB_ORACLE8: return "oracle";
429 case UDM_DB_SQLITE: return "sqlite";
430 case UDM_DB_SQLITE3: return "sqlite";
431 case UDM_DB_MIMER: return "mimer";
432 case UDM_DB_VIRT: return "virtuoso";
433 case UDM_DB_ACCESS: return "access";
434 case UDM_DB_DB2: return "db2";
435 case UDM_DB_CACHE: return "cache";
436 case UDM_DB_SYBASE: return "sybase";
437 case UDM_DB_MONETDB: return "monetdb";
438 case UDM_DB_SOLID: return "solid";
439 case UDM_DB_SAPDB: return "sapdb";
440 }
441 return "unknown_dbtype";
442 }
443
444
445 static const char*
UdmSQLDBModeToStr(udm_sqldbmode_t dbmode)446 UdmSQLDBModeToStr(udm_sqldbmode_t dbmode)
447 {
448 switch (dbmode)
449 {
450 case UDM_SQLDBMODE_SINGLE: return "single";
451 case UDM_SQLDBMODE_MULTI: return "multi";
452 case UDM_SQLDBMODE_BLOB: return "blob";
453 case UDM_SQLDBMODE_RAWBLOB: return "rawblob";
454 }
455 return "unknown_dbmode";
456 }
457
458
459 static udm_sqldbmode_t
UdmStr2DBMode(const char * str1,udm_bool_t * error)460 UdmStr2DBMode(const char * str1, udm_bool_t *error)
461 {
462 *error= UDM_FALSE;
463 if (!strncasecmp(str1,"single",6)) return UDM_SQLDBMODE_SINGLE;
464 if (!strncasecmp(str1,"multi",5)) return UDM_SQLDBMODE_MULTI;
465 if (!strncasecmp(str1,"blob",4)) return UDM_SQLDBMODE_BLOB;
466 if (!strncasecmp(str1,"rawblob",7))return UDM_SQLDBMODE_RAWBLOB;
467 *error= UDM_TRUE;
468 return UDM_SQLDBMODE_BLOB;
469 }
470
471
472 /************** some forward declarations ********************/
473 static udm_rc_t UdmDeleteURL(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db);
474 static udm_rc_t UdmDeleteBadHrefs(UDM_AGENT *Indexer,
475 UDM_DOCUMENT *Doc,
476 UDM_DB *db,
477 urlid_t url_id);
478 static udm_rc_t UdmDeleteWordFromURL(UDM_AGENT *A, UDM_DOCUMENT *D, UDM_DB *db);
479
480
481 /*********************** helper functions **********************/
482
483 static size_t
WhereConditionAddAnd(char * where,const char * add)484 WhereConditionAddAnd(char *where, const char *add)
485 {
486 if (add[0])
487 {
488 const char *joiner= where[0] ? " AND " : "";
489 return sprintf(where + strlen(where), "%s%s", joiner, add);
490 }
491 return 0;
492 }
493
494
495 static void
WhereConditionDSTRAddAnd(char * where,UDM_DSTR * add)496 WhereConditionDSTRAddAnd(char *where, UDM_DSTR *add)
497 {
498 if (UdmDSTRLength(add))
499 {
500 if (where[0])
501 strcat(where, " AND ");
502 strcat(where, UdmDSTRPtr(add));
503 }
504 }
505
506
507 /* Prepare to add a new condition into a class */
508 static udm_rc_t
UdmSQLWhereAddJoiner(UDM_DSTR * dstr,const char * joiner)509 UdmSQLWhereAddJoiner(UDM_DSTR *dstr, const char *joiner)
510 {
511 if (UdmDSTRLength(dstr)) /* Second (or more) condition of the same class */
512 {
513 UdmDSTRShrinkLast(dstr);
514 UdmDSTRAppendSTR(dstr, joiner);
515 }
516 else /* First condition of this class */
517 {
518 UdmDSTRAppend(dstr, "(", 1);
519 }
520 return UDM_OK;
521 }
522
523
524 static udm_rc_t
UdmSQLWhereIntParam(UDM_DB * db,UDM_DSTR * dstr,const char * sqlname,const char * val)525 UdmSQLWhereIntParam(UDM_DB *db,
526 UDM_DSTR *dstr, const char *sqlname, const char *val)
527 {
528 const char *range= strchr(val, '-');
529 UdmDSTRRealloc(dstr, UdmDSTRLength(dstr) + strlen(val) + 50);
530 if (UdmSQL(db)->DBSQL_IN && !range) /* Single value */
531 {
532 if (!UdmDSTRLength(dstr)) /* First parameter */
533 {
534 UdmDSTRAppendf(dstr, " %s IN (%d)", sqlname, atoi(val));
535 }
536 else /* Second or higher parameter */
537 {
538 UdmDSTRShrinkLast(dstr);
539 UdmDSTRAppendf(dstr, ",%d)", atoi(val));
540 }
541 }
542 else /* Range */
543 {
544 int first, second;
545 UdmSQLWhereAddJoiner(dstr, " OR ");
546 if (range && 2 == sscanf(val, "%d-%d", &first, &second))
547 {
548 UdmDSTRAppendf(dstr,
549 "%s>=%d AND %s<=%d)",
550 sqlname, first, sqlname, second);
551 }
552 else
553 {
554 UdmDSTRAppendf(dstr, "%s=%d)", sqlname, atoi(val));
555 }
556 }
557 return UDM_OK;
558 }
559
560
561 #define UDM_ADD_PARAM_NEG 1 /* If reverse condition */
562 #define UDM_ADD_PARAM_WITH_TAIL_PERCENT 2 /* If the trailing % needed */
563 #define UDM_ADD_PARAM_CHECK_URL_SCHEMA 4 /* Detect URL prefix */
564
565 #define UDM_ADD_PARAM_URL (UDM_ADD_PARAM_CHECK_URL_SCHEMA|UDM_ADD_PARAM_WITH_TAIL_PERCENT)
566 #define UDM_ADD_PARAM_URL_NEG (UDM_ADD_PARAM_NEG|UDM_ADD_PARAM_URL)
567
568
569 static udm_rc_t
UdmSQLWhereStrParam(UDM_DB * db,UDM_DSTR * dstr,const char * sqlname,const char * val,int flag)570 UdmSQLWhereStrParam(UDM_DB *db, UDM_DSTR *dstr,
571 const char *sqlname, const char *val, int flag)
572 {
573 const char *first= "";
574 const char *last= (flag & UDM_ADD_PARAM_WITH_TAIL_PERCENT) ? "%" : "";
575 const char *joiner= (flag & UDM_ADD_PARAM_NEG) ? " AND " : " OR ";
576 const char *notx= (flag & UDM_ADD_PARAM_NEG) ? "NOT " : "";
577
578 if (flag & UDM_ADD_PARAM_CHECK_URL_SCHEMA)
579 {
580 UDM_URL URL;
581 UdmURLInit(&URL);
582 UdmURLParse(&URL,val);
583 /* Check if URL prefix is not given / given */
584 first= (URL.schema == NULL) ? "%" : "";
585 UdmURLFree(&URL);
586 }
587
588 UdmSQLWhereAddJoiner(dstr, joiner);
589 UdmDSTRAppendf(dstr, "%s %sLIKE '%s%s%s')", sqlname, notx, first, val, last);
590 return UDM_OK;
591 }
592
593
594 typedef struct udm_date_param_st
595 {
596 int dt;
597 int dx;
598 int dm;
599 int dy;
600 int dd;
601 time_t dp;
602 int DB;
603 int DE;
604 int dstmp;
605 } UDM_DATE_PARAM;
606
607
608 static void
UdmDateParamInit(UDM_DATE_PARAM * d)609 UdmDateParamInit(UDM_DATE_PARAM *d)
610 {
611 d->dt= UDM_DT_UNKNOWN;
612 d->dx= 1;
613 d->dm= 0;
614 d->dy= 1970;
615 d->dd= 1;
616 d->dp= (time_t) 0;
617 d->DB= 0;
618 d->DE= time(NULL);
619 d->dstmp= 0;
620 }
621
622
623 static udm_rc_t
UdmCheckDateParam(UDM_DATE_PARAM * d,const char * var,const char * val)624 UdmCheckDateParam(UDM_DATE_PARAM *d,
625 const char *var, const char *val)
626 {
627 int intval= atoi(val);
628 int longval= atol(val);
629
630 if (!strcmp(var, "dt"))
631 {
632 if(!strcasecmp(val, "back")) d->dt= UDM_DT_BACK;
633 else if (!strcasecmp(val, "er")) d->dt= UDM_DT_ER;
634 else if (!strcasecmp(val, "range")) d->dt= UDM_DT_RANGE;
635 }
636 else if (!strcmp(var, "dx"))
637 {
638 if (intval == 1 || intval == -1) d->dx= intval;
639 else d->dx= 1;
640 }
641 else if (!strcmp(var, "dm"))
642 {
643 d->dm= intval; /* 0=Jan, 1=Feb,..., 11=Dec.*/
644 }
645 else if (!strcmp(var, "dy"))
646 {
647 d->dy= (intval) ? intval : 1970;
648 }
649 else if (!strcmp(var, "dd"))
650 {
651 d->dd= (intval) ? intval : 1;
652 }
653 else if (!strcmp(var, "dstmp"))
654 {
655 d->dstmp= longval ? longval : 0;
656 }
657 else if (!strcmp(var, "dp"))
658 {
659 d->dp= Udm_dp2time_t(val);
660 }
661 else if (!strcmp(var, "db"))
662 {
663 struct tm tm;
664 bzero((void*) &tm, sizeof(tm));
665 sscanf(val, "%d/%d/%d", &tm.tm_mday, &tm.tm_mon, &tm.tm_year);
666 tm.tm_year -= 1900; tm.tm_mon--;
667 d->DB= mktime(&tm);
668 }
669 else if (!strcmp(var, "de"))
670 {
671 struct tm tm;
672 bzero((void*) &tm, sizeof(tm));
673 sscanf(val, "%d/%d/%d", &tm.tm_mday, &tm.tm_mon, &tm.tm_year);
674 tm.tm_year -= 1900; tm.tm_mon--;
675 d->DE= mktime(&tm) + 86400; /* Including the given date */
676 }
677 else
678 return UDM_ERROR;
679 return UDM_OK;
680 }
681
682
683 static void
UdmSQLWhereDateParam(UDM_DSTR * cond,UDM_DATE_PARAM * d)684 UdmSQLWhereDateParam(UDM_DSTR *cond, UDM_DATE_PARAM *d)
685 {
686 switch(d->dt)
687 {
688 case UDM_DT_BACK:
689 if (d->dp)
690 UdmDSTRAppendf(cond, "url.last_mod_time >= %li",
691 (long int) time(NULL) - d->dp);
692 break;
693 case UDM_DT_ER:
694 {
695 struct tm tm;
696 bzero((void*) &tm, sizeof(tm));
697 tm.tm_mday= d->dd;
698 tm.tm_mon= d->dm;
699 tm.tm_year= d->dy - 1900;
700 UdmDSTRAppendf(cond, "url.last_mod_time %s %li",
701 (d->dx == 1) ? ">=" : "<=",
702 (long int) (d->dstmp ? d->dstmp : mktime(&tm)));
703 }
704 break;
705 case UDM_DT_RANGE:
706 UdmDSTRAppendf(cond,
707 "url.last_mod_time >= %li AND url.last_mod_time <= %li",
708 (long int) d->DB, (long int) d->DE);
709 break;
710 case UDM_DT_UNKNOWN:
711 default:
712 break;
713 }
714 }
715
716 typedef struct udm_search_param_st
717 {
718 const char *name;
719 } UDM_SEARCH_PARAM;
720
721
722 static const UDM_SEARCH_PARAM search_params[]=
723 {
724 {"ul"},
725 {"ue"},
726 {"u"},
727 {"tag"},
728 {"t"},
729 {"lang"},
730 {"g"},
731 {"type"},
732 {"typ"},
733 {"sl.*"},
734 {NULL}
735 };
736
737
738 static const UDM_SEARCH_PARAM*
UdmFindStringParam(const char * name)739 UdmFindStringParam(const char *name)
740 {
741 const UDM_SEARCH_PARAM *param;
742 for (param= search_params; param->name; param++)
743 {
744 if (!UdmWildCaseCmp(name, param->name))
745 return param;
746 }
747 return NULL;
748 }
749
750
751 typedef struct
752 {
753 UDM_DSTR from;
754 UDM_DSTR lang;
755 UDM_DSTR seed;
756 UDM_DSTR server;
757 UDM_DSTR site;
758 UDM_DSTR status;
759 UDM_DSTR tag;
760 UDM_DSTR timecond;
761 UDM_DSTR type;
762 UDM_DSTR ue;
763 UDM_DSTR url;
764 UDM_DSTR urlinfo;
765 UDM_DATE_PARAM datep;
766 int fromserver;
767 int fromurlinfo_lang;
768 int fromurlinfo_type;
769 int fromurlinfo;
770 } UDM_SQL_CONDITION_PARAM;
771
772
773 static void
UdmSQLConditionParamInit(UDM_SQL_CONDITION_PARAM * param)774 UdmSQLConditionParamInit(UDM_SQL_CONDITION_PARAM *param)
775 {
776 UdmDSTRInit(¶m->from, 64);
777 UdmDSTRInit(¶m->lang, 64);
778 UdmDSTRInit(¶m->seed, 64);
779 UdmDSTRInit(¶m->server, 64);
780 UdmDSTRInit(¶m->site, 64);
781 UdmDSTRInit(¶m->status, 64);
782 UdmDSTRInit(¶m->tag, 64);
783 UdmDSTRInit(¶m->timecond, 64);
784 UdmDSTRInit(¶m->type, 64);
785 UdmDSTRInit(¶m->ue, 64);
786 UdmDSTRInit(¶m->url, 64);
787 UdmDSTRInit(¶m->urlinfo, 64);
788 UdmDateParamInit(¶m->datep);
789 param->fromserver= 1;
790 param->fromurlinfo_lang= 1;
791 param->fromurlinfo_type= 1;
792 param->fromurlinfo= 1;
793 }
794
795
796 static void
UdmSQLConditionParamFree(UDM_SQL_CONDITION_PARAM * param)797 UdmSQLConditionParamFree(UDM_SQL_CONDITION_PARAM *param)
798 {
799 UdmDSTRFree(¶m->from);
800 UdmDSTRFree(¶m->lang);
801 UdmDSTRFree(¶m->seed);
802 UdmDSTRFree(¶m->server);
803 UdmDSTRFree(¶m->site);
804 UdmDSTRFree(¶m->status);
805 UdmDSTRFree(¶m->tag);
806 UdmDSTRFree(¶m->timecond);
807 UdmDSTRFree(¶m->type);
808 UdmDSTRFree(¶m->ue);
809 UdmDSTRFree(¶m->url);
810 UdmDSTRFree(¶m->urlinfo);
811 }
812
813
814 static size_t
UdmSQLConditionParamTotalLength(const UDM_SQL_CONDITION_PARAM * CondParam)815 UdmSQLConditionParamTotalLength(const UDM_SQL_CONDITION_PARAM *CondParam)
816 {
817 return
818 UdmDSTRLength(&CondParam->from) +
819 UdmDSTRLength(&CondParam->lang) +
820 UdmDSTRLength(&CondParam->seed) +
821 UdmDSTRLength(&CondParam->server) +
822 UdmDSTRLength(&CondParam->site) +
823 UdmDSTRLength(&CondParam->status) +
824 UdmDSTRLength(&CondParam->tag) +
825 UdmDSTRLength(&CondParam->timecond) +
826 UdmDSTRLength(&CondParam->type) +
827 UdmDSTRLength(&CondParam->ue) +
828 UdmDSTRLength(&CondParam->url) +
829 UdmDSTRLength(&CondParam->urlinfo);
830 }
831
832
833 static udm_rc_t
UdmSQLConditionParamAdd(UDM_SQL_CONDITION_PARAM * CondParam,UDM_AGENT * A,UDM_DB * db,const char * var,size_t varlen,const char * val,size_t vallen)834 UdmSQLConditionParamAdd(UDM_SQL_CONDITION_PARAM *CondParam,
835 UDM_AGENT *A, UDM_DB *db,
836 const char *var, size_t varlen,
837 const char *val, size_t vallen)
838 {
839 char varbuf[64];
840 char valbuf[128 + 1];
841
842 if (!vallen || varlen > (int) sizeof(varbuf))
843 return UDM_OK;
844
845 if (vallen > (int) sizeof(valbuf) - 1)
846 {
847 vallen= sizeof(valbuf) - 1;
848 if (UdmFindStringParam(var))
849 {
850 udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db),
851 "Limit is too long: %.*s=%.*s",
852 (int) varlen, var, (int) vallen, val);
853 return UDM_ERROR;
854 }
855 }
856
857 /* Protection against SQL injection */
858 var= UdmDBSQLEscStrSimple(A, db, varbuf, var, varlen);
859 val= UdmDBSQLEscStrSimple(A, db, valbuf, val, vallen);
860
861 if (!strcmp(var, "status"))
862 UdmSQLWhereIntParam(db, &CondParam->status, "url.status", val);
863
864 if (!strcmp(var, "seed"))
865 UdmSQLWhereIntParam(db, &CondParam->seed, "url.seed", val);
866
867 if (!strcmp(var, "site") && val[0] != 0)
868 UdmSQLWhereStrParam(db, &CondParam->site, "url.url", val, UDM_ADD_PARAM_WITH_TAIL_PERCENT);
869
870 if(!strcmp(var,"ul"))
871 UdmSQLWhereStrParam(db, &CondParam->url, "url.url", val, UDM_ADD_PARAM_URL);
872
873 if(!strcmp(var,"ue"))
874 UdmSQLWhereStrParam(db, &CondParam->ue, "url.url", val, UDM_ADD_PARAM_URL_NEG);
875
876 if(!strcmp(var,"u"))
877 UdmSQLWhereStrParam(db, &CondParam->url, "url.url", val, 0);
878
879 if(!strcmp(var,"tag") || !strcmp(var,"t"))
880 {
881 UdmSQLWhereStrParam(db, &CondParam->tag, "s.tag", val, 0);
882 if (CondParam->fromserver)
883 {
884 CondParam->fromserver= 0;
885 UdmDSTRAppendSTR(&CondParam->from, ", server s");
886 UdmDSTRAppendSTR(&CondParam->server, " AND s.rec_id=url.server_id");
887 }
888 }
889
890 if(!strcmp(var,"lang") || !strcmp(var,"g"))
891 {
892 UdmSQLWhereStrParam(db, &CondParam->lang, "il.sval", val, 0);
893 if (CondParam->fromurlinfo_lang)
894 {
895 CondParam->fromurlinfo_lang= 0;
896 UdmDSTRAppendSTR(&CondParam->from, ", urlinfo il");
897 UdmDSTRAppendSTR(&CondParam->server, " AND il.url_id=url.rec_id AND il.sname='Content-Language'");
898 }
899 }
900
901 if(!strncmp(var, "sl.", 3))
902 {
903 UdmSQLWhereAddJoiner(&CondParam->urlinfo, " AND ");
904 UdmDSTRAppendf(&CondParam->urlinfo,"isl%d.sname='%s' AND isl%d.sval LIKE '%s')",
905 CondParam->fromurlinfo, var + 3, CondParam->fromurlinfo, val);
906 UdmDSTRAppendf(&CondParam->from, ", urlinfo isl%d", CondParam->fromurlinfo);
907 UdmDSTRAppendf(&CondParam->server, " AND isl%d.url_id=url.rec_id",
908 CondParam->fromurlinfo);
909 CondParam->fromurlinfo++;
910 }
911
912 if (!strcmp(var,"type") || !strcmp(var, "typ"))
913 {
914 /*
915 "type" is a reserved word in ASP,
916 so "typ" is also added as a workaround
917 */
918 UdmSQLWhereStrParam(db, &CondParam->type, "it.sval", val, 0);
919 if (CondParam->fromurlinfo_type)
920 {
921 CondParam->fromurlinfo_type= 0;
922 UdmDSTRAppendSTR(&CondParam->from, ", urlinfo it");
923 UdmDSTRAppendSTR(&CondParam->server, " AND it.url_id=url.rec_id AND it.sname='Content-Type'");
924 }
925 }
926 UdmCheckDateParam(&CondParam->datep, var, val);
927 return UDM_OK;
928 }
929
930
931 static udm_rc_t
UdmSQLConditionParamPopulate(UDM_SQL_CONDITION_PARAM * CondParam,UDM_AGENT * A,UDM_DB * db,const UDM_VARLIST * Vars)932 UdmSQLConditionParamPopulate(UDM_SQL_CONDITION_PARAM *CondParam,
933 UDM_AGENT *A, UDM_DB *db,
934 const UDM_VARLIST *Vars)
935 {
936 size_t i;
937 for (i= 0; i < Vars->nvars; i++)
938 {
939 const UDM_VAR *Var= UdmVarListFindConstByIndex(Vars, i);
940 const char *var= UdmVarName(Var) ? UdmVarName(Var) : "";
941 UDM_CONST_STR valbuf, *val= UdmVarGetConstStr(Var, &valbuf);
942 size_t varlen= strlen(var);
943 udm_rc_t rc;
944
945 if (UDM_OK != (rc= UdmSQLConditionParamAdd(CondParam, A, db,
946 var, varlen,
947 val->str, val->length)))
948 return rc;
949 }
950 UdmSQLWhereDateParam(&CondParam->timecond, &CondParam->datep);
951 return UDM_OK;
952 }
953
954
955 static udm_rc_t
UdmSQLBuildWhereCondition(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,char const ** dst)956 UdmSQLBuildWhereCondition(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
957 char const **dst)
958 {
959 UDM_ENV *Conf= A->Conf;
960 size_t nbytes;
961 UDM_SQL_CONDITION_PARAM CondParam;
962 udm_bool_t need_new= UdmVarListFindBool(&A->Conf->Vars, "delta", UDM_FALSE);
963 udm_rc_t rc= UDM_OK;
964 const char *need_new_str= need_new ? /* TODO34: remove this */
965 "url.rec_id IN (SELECT url_id FROM bdicti WHERE state=1)" : "";
966
967 UDM_LOCK_CHECK_OWNER(A, UDM_LOCK_DB);
968 if (Query->where)
969 {
970 *dst= Query->where;
971 return UDM_OK;
972 }
973
974 UdmSQLConditionParamInit(&CondParam);
975
976 if (UDM_OK != (rc= UdmSQLConditionParamPopulate(&CondParam,
977 A, db, &Conf->Vars)))
978 goto ret;
979
980 if (!(nbytes= UdmSQLConditionParamTotalLength(&CondParam) + strlen(need_new_str)))
981 {
982 Query->where= (char*) UdmStrdup("");
983 Query->from= (char*) UdmStrdup("");
984 goto ret;
985 }
986 UdmQueryClearLimits(Query);
987 Query->where= (char*) UdmMalloc(nbytes + 100);
988 Query->where[0]= '\0';
989 Query->from= (char*) UdmStrdup(UdmDSTRLength(&CondParam.from) ?
990 UdmDSTRPtr(&CondParam.from) : "");
991 if (UdmDSTRLength(&CondParam.url))
992 strcat(Query->where, UdmDSTRPtr(&CondParam.url));
993
994 WhereConditionDSTRAddAnd(Query->where, &CondParam.lang);
995 WhereConditionDSTRAddAnd(Query->where, &CondParam.seed);
996 WhereConditionDSTRAddAnd(Query->where, &CondParam.site);
997 WhereConditionDSTRAddAnd(Query->where, &CondParam.status);
998 WhereConditionDSTRAddAnd(Query->where, &CondParam.tag);
999 WhereConditionDSTRAddAnd(Query->where, &CondParam.timecond);
1000 WhereConditionDSTRAddAnd(Query->where, &CondParam.type);
1001 WhereConditionDSTRAddAnd(Query->where, &CondParam.ue);
1002 WhereConditionDSTRAddAnd(Query->where, &CondParam.urlinfo);
1003
1004 if (UdmDSTRLength(&CondParam.server))
1005 {
1006 if (!Query->where[0]) strcat(Query->where, " 1=1 ");
1007 strcat(Query->where, UdmDSTRPtr(&CondParam.server));
1008 }
1009
1010 WhereConditionAddAnd(Query->where, need_new_str);
1011
1012 /* Need this for test purposes */
1013 UdmVarListReplaceStr(&Conf->Vars, "WhereCondition", Query->where);
1014
1015 ret:
1016 *dst= Query->where;
1017 UdmSQLConditionParamFree(&CondParam);
1018 return rc;
1019 }
1020
1021
1022 static udm_rc_t
UdmVarListSQLEscape(UDM_AGENT * A,UDM_VARLIST * dst,UDM_VARLIST * src,UDM_DB * db)1023 UdmVarListSQLEscape(UDM_AGENT *A, UDM_VARLIST *dst, UDM_VARLIST *src, UDM_DB *db)
1024 {
1025 size_t i, nbytes= 0;
1026 char *tmp= NULL;
1027 for (i= 0; i < src->nvars; i++)
1028 {
1029 const UDM_VAR *V= UdmVarListFindConstByIndex(src, i);
1030 UDM_CONST_STR valuebuf, *value= UdmVarGetConstStr(V, &valuebuf);
1031 if (nbytes < value->length * 2 + 1)
1032 {
1033 nbytes= value->length * 2 + 1;
1034 tmp= (char*) UdmRealloc(tmp, nbytes);
1035 }
1036 UdmDBSQLEscStr(A, db, tmp, value->str ? value->str : "", value->length); /* doc Section */
1037 UdmVarListAddStr(dst, UdmVarName(V), tmp);
1038 }
1039 UdmFree(tmp);
1040 return UDM_OK;
1041 }
1042
1043
1044 /************* Servers ******************************************/
1045
1046 #define UDM_SERVER_TABLE_COLUMNS \
1047 "rec_id,url,tag,command,weight,ordre,parent,enabled "
1048
1049 static udm_rc_t
UdmServerInitFromRecord(UDM_SERVER * S,UDM_SQLRES * SQLRes,size_t row)1050 UdmServerInitFromRecord(UDM_SERVER *S, UDM_SQLRES *SQLRes, size_t row)
1051 {
1052 const char *val;
1053 S->site_id= UDM_ATOI(UdmSQLValue(SQLRes, row, 0));
1054 val= UdmSQLValue(SQLRes, row, 1);
1055 if (UDM_OK != UdmMatchSetPattern(&S->Filter.Match, val ? val : ""))
1056 return UDM_ERROR;
1057
1058 if ((val= UdmSQLValue(SQLRes, row, 2)) && val[0])
1059 UdmVarListReplaceStr(&S->Vars, "Tag", val);
1060
1061 S->command= *UdmSQLValue(SQLRes, row, 3);
1062 S->weight= UDM_ATOF(UdmSQLValue(SQLRes, row, 4));
1063 S->ordre= UDM_ATOI(UdmSQLValue(SQLRes, row, 5));
1064 S->parent= UDM_ATOI(UdmSQLValue(SQLRes, row, 6));
1065 S->enabled= UDM_TEST(UDM_ATOI(UdmSQLValue(SQLRes, row, 7)));
1066 return UDM_OK;
1067 }
1068
1069
1070 static int
UdmServerNeedsUpdate(UDM_SERVER * a,UDM_SERVER * b)1071 UdmServerNeedsUpdate(UDM_SERVER *a, UDM_SERVER *b)
1072 {
1073 /* Note: we don't check "srvinfo" content */
1074 if (a->site_id != b->site_id ||
1075 strcmp(UdmMatchPatternConstStr(&a->Filter.Match),
1076 UdmMatchPatternConstStr(&b->Filter.Match)) ||
1077 a->command != b->command ||
1078 strcmp(UdmVarListFindStr(&a->Vars, "Tag", ""), UdmVarListFindStr(&b->Vars, "Tag", "")) ||
1079 a->weight != b->weight ||
1080 a->ordre != b->ordre ||
1081 a->parent != b->parent ||
1082 a->enabled != b->enabled)
1083 return 1;
1084 return 0;
1085 }
1086
1087
1088 static void
UdmFilterInitFromVars(UDM_FILTER * Filter,const UDM_VARLIST * Vars)1089 UdmFilterInitFromVars(UDM_FILTER *Filter, const UDM_VARLIST *Vars)
1090 {
1091 Filter->Match.Param.match_mode= UdmVarListFindInt(Vars, "Match_type", UDM_MATCH_BEGIN);
1092 UdmMatchParamSetCaseInsensitive(&Filter->Match.Param,
1093 UDM_TEST(UdmVarListFindInt(Vars, "Case_sense",
1094 UDM_CASE_INSENSITIVE)));
1095 UdmMatchParamSetNegative(&Filter->Match.Param,
1096 UDM_TEST(UdmVarListFindInt(Vars, "Nomatch", 0)));
1097 Filter->method= UdmMethod(UdmVarListFindStr(Vars, "Method",
1098 UdmMethodStr(UDM_METHOD_DEFAULT)));
1099 }
1100
1101
1102 static void
UdmFilterToVars(const UDM_FILTER * Filter,UDM_VARLIST * Vars)1103 UdmFilterToVars(const UDM_FILTER *Filter, UDM_VARLIST *Vars)
1104 {
1105 if (Filter->method != UDM_METHOD_DEFAULT)
1106 UdmVarListReplaceStr(Vars, "Method", UdmMethodStr(Filter->method));
1107 if (Filter->Match.Param.match_mode != UDM_MATCH_BEGIN)
1108 UdmVarListReplaceInt(Vars, "Match_type", Filter->Match.Param.match_mode);
1109 if (!UdmMatchIsCaseInsensitive(&Filter->Match))
1110 UdmVarListReplaceInt(Vars, "Case_sense", UDM_CASE_SENSITIVE);
1111 if (UdmMatchIsNegative(&Filter->Match))
1112 UdmVarListReplaceInt(Vars, "Nomatch", 1);
1113 }
1114
1115 static udm_rc_t
UdmLoadServerTable(UDM_AGENT * Indexer,UDM_SERVERLIST * S,UDM_DB * db)1116 UdmLoadServerTable(UDM_AGENT * Indexer, UDM_SERVERLIST *S, UDM_DB *db)
1117 {
1118 size_t rows, i, j, jrows;
1119 UDM_SQLRES SQLRes, SRes;
1120 char qbuf[1024];
1121 const char *filename= UdmVarListFindStr(UdmSQLDBVars(db), "filename", NULL);
1122 const char *name = (filename && filename[0]) ? filename : "server";
1123 const char *infoname = UdmVarListFindStr(UdmSQLDBVars(db), "srvinfo", "srvinfo");
1124 udm_rc_t rc= UDM_OK;
1125 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
1126
1127 udm_snprintf(qbuf,sizeof(qbuf)-1,"SELECT " UDM_SERVER_TABLE_COLUMNS
1128 "FROM %s "
1129 "WHERE enabled=1 AND parent=%s0%s "
1130 "ORDER BY ordre", name, qu, qu);
1131
1132 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf)))
1133 return rc;
1134
1135 rows= UdmSQLNumRows(&SQLRes);
1136 for(i= 0; i < rows; i++)
1137 {
1138 UDM_SERVER *Server= Indexer->Conf->Cfg_Srv;
1139
1140 if (UDM_OK != (rc= UdmServerInitFromRecord(Server, &SQLRes, i)))
1141 goto ex;
1142
1143 sprintf(qbuf,"SELECT sname,sval FROM %s WHERE srv_id=%s%i%s", infoname, qu, Server->site_id, qu);
1144 if(UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SRes, qbuf)))
1145 return rc;
1146 jrows= UdmSQLNumRows(&SRes);
1147 for(j= 0; j < jrows; j++)
1148 {
1149 const char *sname = UdmSQLValue(&SRes, j, 0);
1150 const char *sval = UdmSQLValue(&SRes, j, 1);
1151 UdmVarListReplaceStr(&Server->Vars, sname, sval);
1152 }
1153 UdmSQLFree(&SRes);
1154
1155 Server->webspace= (udm_webspace_t) UdmVarListFindInt(&Server->Vars, "Follow", UDM_WEBSPACE_DEFAULT);
1156 UdmFilterInitFromVars(&Server->Filter, &Server->Vars);
1157
1158 if (Server->command == 'S')
1159 {
1160 UdmServerAdd(Indexer, Server, 0);
1161 if ((Server->Filter.Match.Param.match_mode == UDM_MATCH_BEGIN) &&
1162 (Indexer->flags & UDM_FLAG_ADD_SERVURL))
1163 {
1164 UDM_HREFPARAM HrefParam;
1165 UdmHrefParamInit(&HrefParam);
1166 HrefParam.server_id= Server->site_id;
1167 HrefParam.hops= (uint4) UdmVarListFindInt(&Server->Vars, "StartHops", 0);
1168 HrefParam.link_source= UDM_LINK_SOURCE_HTDB;
1169 UdmHrefListAddConst(&Indexer->Conf->Hrefs, &HrefParam,
1170 UdmMatchPatternConstStr(&Server->Filter.Match));
1171 }
1172 }
1173 else
1174 {
1175 char errstr[128];
1176 UDM_CONST_STR str;
1177 UdmConstStrSetStr(&str, UdmMatchPatternConstStr(&Server->Filter.Match));
1178 rc= UdmFilterListAdd(&Indexer->Conf->Filters,
1179 &Server->Filter.Match.Param,
1180 Server->Filter.method,
1181 &str,
1182 errstr, sizeof(errstr));
1183 if (rc != UDM_OK)
1184 {
1185 udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db),
1186 "Error while loading ServerTable '%s' at row %d: %s",
1187 name, (int) i, errstr);
1188 break;
1189 }
1190 }
1191 UdmMatchFreeAndInit(&Server->Filter.Match);
1192 }
1193 ex:
1194 UdmSQLFree(&SQLRes);
1195 return rc;
1196 }
1197
1198
1199 static udm_rc_t
UdmServerTableFlush(UDM_AGENT * A,UDM_DB * db)1200 UdmServerTableFlush(UDM_AGENT *A, UDM_DB *db)
1201 {
1202 udm_rc_t rc;
1203 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
1204 char str[128];
1205
1206 udm_snprintf(str, sizeof(str), "UPDATE server SET enabled=0 WHERE parent=%s0%s", qu, qu);
1207 rc= UdmDBSQLQuery(A, db, NULL, str);
1208 return rc;
1209 }
1210
1211
1212 static udm_rc_t
UdmServerTableUpdateSrvInfo(UDM_AGENT * A,UDM_DB * db,UDM_SERVER * S,char * arg)1213 UdmServerTableUpdateSrvInfo(UDM_AGENT *A, UDM_DB *db, UDM_SERVER *S, char *arg)
1214 {
1215 udm_rc_t rc;
1216 size_t i;
1217 UDM_DSTR d;
1218 const char *E= (UdmSQLDBType(db) == UDM_DB_PGSQL && UdmSQLDBVersion(db) >= 80101) ? "E" :"";
1219 UdmDSTRInit(&d, 64);
1220
1221 UDM_ASSERT(UdmSQLDBConnected(db)); /* make sure E is set to a correct value */
1222
1223 UdmDSTRAppendf(&d, "DELETE FROM srvinfo WHERE srv_id=%i", S->site_id);
1224 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, UdmDSTRPtr(&d))))
1225 goto ex;
1226
1227 for (i= 0; i < S->Vars.nvars; i++)
1228 {
1229 const UDM_VAR *Sec= UdmVarListFindConstByIndex(&S->Vars, i);
1230 if (UdmVarStr(Sec) && UdmVarName(Sec) && strcasecmp(UdmVarName(Sec), "Tag"))
1231 {
1232 UDM_CONST_STR valuebuf, *value= UdmVarGetConstStr(Sec, &valuebuf);
1233 UdmDBSQLEscStr(A, db, arg, value->str, value->length); /* srvinfo */
1234 UdmDSTRReset(&d);
1235 UdmDSTRAppendf(&d, "INSERT INTO srvinfo (srv_id,sname,sval) "
1236 "VALUES (%i,'%s',%s'%s')",
1237 S->site_id, UdmVarName(Sec), E, arg);
1238 if(UDM_OK != (rc = UdmDBSQLQuery(A, db, NULL, UdmDSTRPtr(&d))))
1239 goto ex;
1240 }
1241 }
1242 ex:
1243 UdmDSTRFree(&d);
1244 return rc;
1245 }
1246
1247
1248 static udm_rc_t
UdmServerTableUpdateWithLock(UDM_AGENT * A,UDM_DB * db,UDM_SERVER * S,const char * buf,char * arg)1249 UdmServerTableUpdateWithLock(UDM_AGENT *A, UDM_DB *db, UDM_SERVER *S,
1250 const char *buf, char *arg)
1251 {
1252 udm_rc_t rc;
1253 if (UDM_OK != (rc= UdmDBSQLLockOrBegin(A, db, "server WRITE, srvinfo WRITE")) ||
1254 UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, buf)) ||
1255 UDM_OK != (rc= UdmServerTableUpdateSrvInfo(A, db, S, arg)) ||
1256 UDM_OK != (rc= UdmDBSQLUnlockOrCommit(A, db)))
1257 return rc;
1258 return UDM_OK;
1259 }
1260
1261
1262 static udm_rc_t
UdmServerTableAdd(UDM_AGENT * A,UDM_SERVERLIST * S,UDM_DB * db)1263 UdmServerTableAdd(UDM_AGENT *A, UDM_SERVERLIST *S, UDM_DB *db)
1264 {
1265 udm_rc_t res= UDM_OK;
1266 int found;
1267 const char *alias=UdmVarListFindStr(&S->Server->Vars,"Alias",NULL);
1268 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
1269 size_t i, len= 0;
1270 char *buf, *arg;
1271 UDM_VARLIST *Vars= &S->Server->Vars;
1272 UDM_SERVER Old;
1273
1274 UdmServerInit(&Old);
1275
1276 S->Server->site_id= UdmStrHash32(UdmMatchPatternConstStr(&S->Server->Filter.Match));
1277
1278 for (i=0; i < Vars->nvars; i++)
1279 {
1280 const UDM_VAR *Var= UdmVarListFindConstByIndex(Vars, i);
1281 len= udm_max(len, UdmVarLength(Var));
1282 }
1283
1284 len+= UdmMatchPatternConstStr(&S->Server->Filter.Match) ?
1285 strlen(UdmMatchPatternConstStr(&S->Server->Filter.Match)) : 0;
1286 len+= alias ? strlen(alias) : 0;
1287 len+= 2048;
1288
1289 buf = (char*)UdmMalloc(len);
1290 arg = (char*)UdmMalloc(len);
1291 if (buf == NULL || arg == NULL)
1292 {
1293 UDM_FREE(buf);
1294 UDM_FREE(arg);
1295 strcpy(UdmDBSQLError(db), "Out of memory");
1296 UdmSQL(db)->errcode = 1;
1297 return UDM_ERROR;
1298 }
1299
1300 for (found= 0; ; S->Server->site_id++)
1301 {
1302 UDM_SQLRES SQLRes;
1303 udm_snprintf(buf, len, "SELECT " UDM_SERVER_TABLE_COLUMNS
1304 "FROM server WHERE rec_id=%s%d%s",
1305 qu, S->Server->site_id, qu);
1306 if (UDM_OK != (res= UdmDBSQLQuery(A, db, &SQLRes, buf)))
1307 goto ex;
1308
1309 if (!UdmSQLNumRows(&SQLRes))
1310 {
1311 UdmSQLFree(&SQLRes);
1312 break; /* Not found */
1313 }
1314
1315 UdmServerInitFromRecord(&Old, &SQLRes, 0);
1316 found= !strcasecmp(UdmMatchPatternConstStr(&S->Server->Filter.Match),
1317 UdmSQLValue(&SQLRes, 0, 1));
1318 UdmSQLFree(&SQLRes);
1319
1320 if (found)
1321 break;
1322 }
1323
1324 if (S->Server->webspace != UDM_WEBSPACE_DEFAULT)
1325 UdmVarListReplaceInt(&S->Server->Vars, "Follow", S->Server->webspace);
1326 UdmFilterToVars(&S->Server->Filter, &S->Server->Vars);
1327
1328 UdmDBSQLEscStr(A, db, arg, /* Server pattern */
1329 UDM_NULL2EMPTY(UdmMatchPatternConstStr(&S->Server->Filter.Match)),
1330 strlen(UDM_NULL2EMPTY(UdmMatchPatternConstStr(&S->Server->Filter.Match))));
1331
1332 if (!found)
1333 {
1334 udm_snprintf(buf, len,
1335 "INSERT INTO server (rec_id, enabled, tag,"
1336 " command, parent, ordre, weight, url, pop_weight) "
1337 " VALUES "
1338 "(%s%d%s, 1, '%s', '%c', %s%d%s, %d, %f, '%s', 0)",
1339 qu, S->Server->site_id, qu,
1340 UdmVarListFindStr(&S->Server->Vars, "Tag", ""),
1341 S->Server->command,
1342 qu, S->Server->parent, qu,
1343 S->Server->ordre,
1344 S->Server->weight,
1345 arg
1346 );
1347 if (UDM_OK != (res= UdmServerTableUpdateWithLock(A, db, S->Server, buf, arg)))
1348 goto ex;
1349 }
1350 else
1351 {
1352 if (UdmServerNeedsUpdate(S->Server, &Old))
1353 {
1354 udm_snprintf(buf, len,
1355 "UPDATE server SET enabled=1, tag='%s',"
1356 "command='%c', parent=%s%i%s, ordre=%d, weight=%f "
1357 "WHERE rec_id=%s%d%s",
1358 UdmVarListFindStr(&S->Server->Vars, "Tag", ""),
1359 S->Server->command,
1360 qu, S->Server->parent, qu,
1361 S->Server->ordre,
1362 S->Server->weight,
1363 qu, S->Server->site_id, qu);
1364 if (UDM_OK != (res= UdmServerTableUpdateWithLock(A, db, S->Server, buf, arg)))
1365 goto ex;
1366 }
1367 }
1368
1369 UDM_ASSERT(res == UDM_OK);
1370
1371 ex:
1372 UDM_FREE(buf);
1373 UDM_FREE(arg);
1374 UdmServerFree(&Old);
1375 return res;
1376 }
1377
1378
1379 static udm_rc_t
UdmSrvActionSQL(UDM_AGENT * A,UDM_DB * db,UDM_SERVERLIST * S,udm_srvcmd_t cmd)1380 UdmSrvActionSQL(UDM_AGENT *A, UDM_DB *db, UDM_SERVERLIST *S, udm_srvcmd_t cmd)
1381 {
1382 switch(cmd)
1383 {
1384 case UDM_SRV_ACTION_TABLE:
1385 return UdmLoadServerTable(A,S,db);
1386 case UDM_SRV_ACTION_FLUSH:
1387 return UdmServerTableFlush(A, db);
1388 case UDM_SRV_ACTION_ADD:
1389 return UdmServerTableAdd(A, S, db);
1390 }
1391 UdmLog(A, UDM_LOG_ERROR, "Unsupported Srv Action SQL");
1392 return UDM_ERROR;
1393 }
1394
1395
1396 /********** Searching for URL_ID by various parameters ****************/
1397
1398 static urlid_t
UdmURLIdCacheFind(const UDM_URLID_CACHE * Cache,const char * url)1399 UdmURLIdCacheFind(const UDM_URLID_CACHE *Cache, const char *url)
1400 {
1401 size_t i;
1402 for (i= 0; i < UDM_FINDURL_CACHE_SIZE; i++)
1403 {
1404 if (Cache->url[i] && !strcmp(url, Cache->url[i]))
1405 return Cache->id[i];
1406 }
1407 return 0;
1408 }
1409
1410
1411 static udm_rc_t
UdmURLIdCacheAdd(UDM_URLID_CACHE * Cache,const char * url,urlid_t id)1412 UdmURLIdCacheAdd(UDM_URLID_CACHE *Cache, const char *url, urlid_t id)
1413 {
1414 UDM_FREE(Cache->url[Cache->pURLCache]);
1415 if (!(Cache->url[Cache->pURLCache]= (char*) UdmStrdup(url)))
1416 return UDM_ERROR;
1417 Cache->id[Cache->pURLCache]= id;
1418 Cache->pURLCache= (Cache->pURLCache + 1) % UDM_FINDURL_CACHE_SIZE;
1419 return UDM_OK;
1420 }
1421
1422
1423 static void
UdmURLIdCacheFree(UDM_URLID_CACHE * Cache)1424 UdmURLIdCacheFree(UDM_URLID_CACHE *Cache)
1425 {
1426 size_t i;
1427 for(i= 0; i < UDM_FINDURL_CACHE_SIZE; i++)
1428 UDM_FREE(Cache->url[i]);
1429 }
1430
1431
1432 static udm_rc_t
UdmFindURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)1433 UdmFindURL(UDM_AGENT *Indexer, UDM_DOCUMENT * Doc, UDM_DB *db)
1434 {
1435 UDM_SQLRES SQLRes;
1436 const char *url=UdmVarListFindStr(&Doc->Sections,"URL","");
1437 udmhash32_t id = 0;
1438 udm_rc_t rc= UDM_OK;
1439 udm_bool_t UseCRC32URLId= UdmVarListFindBool(&Indexer->Conf->Vars, "UseCRC32URLId", UDM_FALSE);
1440
1441 if (UseCRC32URLId)
1442 {
1443 /* Auto generation of rec_id */
1444 /* using CRC32 algorythm */
1445 id= UdmStrHash32(url);
1446 }
1447 else
1448 {
1449 const char *o;
1450 char *qbuf, *e_url;
1451 size_t i, l, url_length= strlen(url);
1452
1453 /* Escape URL string */
1454 if ((e_url = (char*)UdmMalloc(l = (8 * url_length + 1))) == NULL ||
1455 (qbuf = (char*)UdmMalloc( l + 100 )) == NULL)
1456 {
1457 UDM_FREE(e_url);
1458 UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory");
1459 return UDM_ERROR;
1460 }
1461 UdmDBSQLEscStr(Indexer, db, e_url, url, url_length);
1462
1463 if (!(id= UdmURLIdCacheFind(&UdmSQLDB(db)->URLIdCache, e_url)))
1464 {
1465 udm_snprintf(qbuf, l + 100, "SELECT rec_id FROM url WHERE url='%s'",e_url);
1466 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf)))
1467 {
1468 UDM_FREE(e_url);
1469 UDM_FREE(qbuf);
1470 return rc;
1471 }
1472 for(i=0;i<UdmSQLNumRows(&SQLRes);i++)
1473 {
1474 if((o=UdmSQLValue(&SQLRes,i,0)))
1475 {
1476 id=atoi(o);
1477 break;
1478 }
1479 }
1480 UdmSQLFree(&SQLRes);
1481 rc= UdmURLIdCacheAdd(&UdmSQLDB(db)->URLIdCache, e_url, id);
1482 }
1483 UDM_FREE(e_url);
1484 UDM_FREE(qbuf);
1485 }
1486 UdmVarListReplaceInt(&Doc->Sections, "ID", id);
1487 return rc;
1488 }
1489
1490
1491 static udm_rc_t
UdmFindMessage(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)1492 UdmFindMessage(UDM_AGENT *Indexer, UDM_DOCUMENT * Doc, UDM_DB *db)
1493 {
1494 size_t i, len;
1495 char *qbuf;
1496 char *eid;
1497 UDM_SQLRES SQLRes;
1498 const char *message_id=UdmVarListFindStr(&Doc->Sections,"Header.Message-ID",NULL);
1499 udm_rc_t rc;
1500
1501 if(!message_id)
1502 return UDM_OK;
1503
1504 len = strlen(message_id);
1505 eid = (char*)UdmMalloc(4 * len + 1);
1506 if (eid == NULL) return UDM_ERROR;
1507 qbuf = (char*)UdmMalloc(4 * len + 128);
1508 if (qbuf == NULL)
1509 {
1510 UDM_FREE(eid);
1511 return UDM_ERROR;
1512 }
1513
1514 /* Escape URL string */
1515 UdmDBSQLEscStr(Indexer, db, eid, message_id, len); /* Message ID */
1516
1517 udm_snprintf(qbuf, 4 * len + 128,
1518 "SELECT rec_id FROM url u, urlinfo i WHERE u.rec_id=i.url_id AND i.sname='Message-ID' AND i.sval='%s'", eid);
1519 rc= UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf);
1520 UDM_FREE(qbuf);
1521 UDM_FREE(eid);
1522 if (UDM_OK != rc)
1523 return rc;
1524
1525 for(i=0;i<UdmSQLNumRows(&SQLRes);i++)
1526 {
1527 const char * o;
1528 if((o=UdmSQLValue(&SQLRes,i,0)))
1529 {
1530 UdmVarListReplaceInt(&Doc->Sections,"ID", UDM_ATOI(o));
1531 break;
1532 }
1533 }
1534 UdmSQLFree(&SQLRes);
1535 return(UDM_OK);
1536 }
1537
1538
1539 /********************* Limits ********************/
1540
1541
1542 udm_rc_t
UdmLoadSlowLimit(UDM_AGENT * A,UDM_DB * db,UDM_URLID_LIST * list,const char * q)1543 UdmLoadSlowLimit(UDM_AGENT *A, UDM_DB *db, UDM_URLID_LIST *list, const char *q)
1544 {
1545 udm_timer_t ticks= UdmStartTimer();
1546 size_t i;
1547 udm_rc_t rc;
1548 UDM_SQLRES SQLRes;
1549 int exclude= list->exclude;
1550 bzero((void*) list, sizeof(UDM_URLID_LIST));
1551 list->exclude= exclude;
1552 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLRes, q)))
1553 goto ret;
1554
1555 if (!(list->nurls= UdmSQLNumRows(&SQLRes)))
1556 goto sqlfree;
1557
1558 if (!(list->urls= (urlid_t *) UdmMalloc(sizeof(urlid_t) * list->nurls)))
1559 {
1560 rc= UDM_ERROR;
1561 list->nurls= 0;
1562 goto ret;
1563 }
1564 for (i= 0; i < list->nurls; i++)
1565 {
1566 list->urls[i]= atoi(UdmSQLValue(&SQLRes, i, 0));
1567 }
1568
1569 sqlfree:
1570 UdmLog(A, UDM_LOG_DEBUG, "Limit query retured %d rows: %.2f",
1571 (int) list->nurls, UdmStopTimer(&ticks));
1572 UdmSQLFree(&SQLRes);
1573 ret:
1574 return rc;
1575 }
1576
1577
1578 static udm_rc_t
UdmLoadSlowLimitWithSort(UDM_AGENT * A,UDM_DB * db,UDM_URLID_LIST * list,const char * q)1579 UdmLoadSlowLimitWithSort(UDM_AGENT *A, UDM_DB *db,
1580 UDM_URLID_LIST *list, const char *q)
1581 {
1582 udm_rc_t rc= UdmLoadSlowLimit(A, db, list, q);
1583 if (rc == UDM_OK)
1584 UdmURLIdListSort(list);
1585 return rc;
1586 }
1587
1588
1589 static udm_rc_t
UdmSlowLimitLoadForConv(UDM_AGENT * A,UDM_DB * db,UDM_URLID_LIST * fl_urls,const char * fl)1590 UdmSlowLimitLoadForConv(UDM_AGENT *A,
1591 UDM_DB *db,
1592 UDM_URLID_LIST *fl_urls,
1593 const char *fl)
1594 {
1595 udm_rc_t rc= UDM_OK;
1596 udm_timer_t ticks= UdmStartTimer();
1597 char name[64];
1598 const char *q;
1599
1600 bzero((void*) fl_urls, sizeof(*fl_urls));
1601
1602 UdmLog(A, UDM_LOG_INFO, "Loading fast limit '%s'", fl);
1603 if ((fl_urls->exclude= (fl[0] == '-')))
1604 fl++;
1605
1606 udm_snprintf(name, sizeof(name), "Limit.%s", fl);
1607 if (!(q= UdmVarListFindStr(&A->Conf->Vars, name, NULL)))
1608 {
1609 UdmLog(A, UDM_LOG_ERROR, "Limit '%s' not specified", fl);
1610 return UDM_ERROR;
1611 }
1612
1613 if (UDM_OK != (rc= UdmLoadSlowLimitWithSort(A, db, fl_urls, q)))
1614 return rc;
1615 UdmLog(A, UDM_LOG_DEBUG, "Limit '%s' loaded%s, %d records, %.2f sec",
1616 fl, fl_urls->exclude ? " type=excluding" : "", (int) fl_urls->nurls,
1617 UdmStopTimer(&ticks));
1618 return rc;
1619 }
1620
1621
1622
1623 /******************** Orders ********************************/
1624
1625 /*
1626 Apply a sorted UserOrder to UDM_URLDATALIST
1627 */
1628 static udm_rc_t
UdmApplyFastOrderToURLDataList(UDM_URLDATALIST * Data,UDM_URL_INT4_LIST * Order)1629 UdmApplyFastOrderToURLDataList(UDM_URLDATALIST *Data,
1630 UDM_URL_INT4_LIST *Order)
1631 {
1632 UDM_URLDATA *d= Data->Item;
1633 UDM_URLDATA *de= Data->Item + Data->nitems;
1634
1635 if (!Order->nitems)
1636 return UDM_OK;
1637
1638 for ( ; d < de; d++)
1639 {
1640 UDM_URL_INT4 *found;
1641 if ((found= (UDM_URL_INT4*) UdmBSearch(&d->url_id,
1642 Order->Item,
1643 Order->nitems,
1644 sizeof(UDM_URL_INT4),
1645 (udm_qsort_cmp)UdmCmpURLID)))
1646 {
1647 char buf[64];
1648 sprintf(buf, "%08X", found->param);
1649 d->section= UdmStrdup(buf);
1650 }
1651 else
1652 {
1653 d->section= UdmStrdup("00000001");
1654 }
1655 }
1656 return UDM_OK;
1657 }
1658
1659
1660 static udm_rc_t
UdmFastOrderLoadAndApplyToURLDataList(UDM_AGENT * Agent,UDM_DB * db,UDM_URLDATALIST * Data,const char * name,size_t * norder)1661 UdmFastOrderLoadAndApplyToURLDataList(UDM_AGENT *Agent,
1662 UDM_DB *db,
1663 UDM_URLDATALIST *Data,
1664 const char *name,
1665 size_t *norder)
1666 {
1667 UDM_URL_INT4_LIST Order;
1668 udm_rc_t rc;
1669
1670 if ((UDM_OK != (rc= UdmBlobLoadFastOrder(Agent, db, &Order, name))) ||
1671 !Order.nitems)
1672 goto ret;
1673
1674 rc= UdmApplyFastOrderToURLDataList(Data, &Order);
1675
1676 ret:
1677 *norder= Order.nitems;
1678 UDM_FREE(Order.Item);
1679 return rc;
1680 }
1681
1682
1683
1684 /******************** URLData *******************************/
1685
1686 static udm_rc_t
UdmLoadURLDataFromURLForConv(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,const UDM_URLID_LIST * fl_urls)1687 UdmLoadURLDataFromURLForConv(UDM_AGENT *A,
1688 UDM_DB *db,
1689 UDM_QUERY *Query,
1690 const UDM_URLID_LIST *fl_urls)
1691 {
1692 udm_rc_t rc;
1693 udm_timer_t ticks= UdmStartTimer();
1694 char qbuf[4*1024];
1695 udm_bool_t need_url= UDM_TRUE; /* TODO34: remove this variable */
1696 /* TODO34: do not load the "url" column if no url.* sections defined */
1697 const char *urlfield, *urltable;
1698 size_t nbytes, i, j;
1699 UDM_SQLRES SQLres;
1700 const char *dummy_where;
1701
1702 bzero((void*) &Query->URLData, sizeof(Query->URLData));
1703 if (UDM_OK != (rc= UdmSQLBuildWhereCondition(A, db, Query, &dummy_where)))
1704 return rc;
1705
1706 UdmLog(A, UDM_LOG_INFO, "Loading URL list");
1707
1708 urlfield= need_url ? ", url.url" : "";
1709 urltable= (Query->from && Query->from[0]) ? "url." : "";
1710 udm_snprintf(qbuf, sizeof(qbuf),
1711 "SELECT %srec_id, last_mod_time%s"
1712 " FROM url%s%s%s",
1713 urltable, urlfield, Query->from,
1714 Query->where[0] ? " WHERE " : "", Query->where);
1715
1716 if (UDM_OK != (rc = UdmDBSQLQuery(A, db, &SQLres, qbuf)))
1717 goto fin;
1718
1719 Query->URLData.nitems= UdmSQLNumRows(&SQLres);
1720 nbytes= Query->URLData.nitems * sizeof(UDM_URLDATA);
1721 Query->URLData.Item= (UDM_URLDATA*) UdmMalloc(nbytes);
1722
1723 for (i= 0, j= 0; i < Query->URLData.nitems; i++)
1724 {
1725 UDM_URLDATA *Data= &Query->URLData.Item[j];
1726 urlid_t url_id= UDM_ATOI(UdmSQLValue(&SQLres, i, 0));
1727 if (fl_urls->nurls)
1728 {
1729 void *found= UdmBSearch(&url_id, fl_urls->urls, fl_urls->nurls,
1730 sizeof(urlid_t), (udm_qsort_cmp)UdmCmpURLID);
1731 if (found && fl_urls->exclude)
1732 continue;
1733 if (!found && !fl_urls->exclude)
1734 continue;
1735 }
1736 Data->url_id= url_id;
1737 Data->score= 0;
1738 Data->per_site= 0;
1739 Data->pop_rank= 0;
1740 Data->site_id= 0;
1741 Data->last_mod_time= UDM_ATOI(UdmSQLValue(&SQLres, i, 1));
1742 Data->url= need_url ? UdmStrdup(UdmSQLValue(&SQLres, i, 2)) : NULL;
1743 Data->section= NULL;
1744 j++;
1745 }
1746 Query->URLData.nitems= j;
1747
1748 UdmURLDataListSort(&Query->URLData);
1749 UdmSQLFree(&SQLres);
1750
1751 fin:
1752 UdmLog(A, UDM_LOG_INFO, "URL list loaded: %d documents, %.2f sec",
1753 (int) Query->URLData.nitems, UdmStopTimer(&ticks));
1754 return rc;
1755 }
1756
1757
1758 static udm_rc_t
UdmLoadURLDataFromURLAndSlowLimitForConv(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)1759 UdmLoadURLDataFromURLAndSlowLimitForConv(UDM_AGENT *A,
1760 UDM_DB *db,
1761 UDM_QUERY *Query)
1762 {
1763 udm_rc_t rc= UDM_OK;
1764 const char *fl= UdmVarListFindStr(&A->Conf->Vars, "fl", NULL);
1765 UDM_URLID_LIST fl_urls;
1766
1767 bzero((void*)&fl_urls, sizeof(fl_urls));
1768
1769 if (fl && (UDM_OK != (rc= UdmSlowLimitLoadForConv(A, db, &fl_urls, fl))))
1770 return rc;
1771
1772 rc= UdmLoadURLDataFromURLForConv(A, db, Query, &fl_urls);
1773
1774 UDM_FREE(fl_urls.urls);
1775
1776 return rc;
1777 }
1778
1779
1780 /*
1781 Load the section with the given name from the table "urlinfo",
1782 for sorting by section: "s=S&su=section".
1783 */
1784 static udm_rc_t
UdmLoadURLDataFromURLInfoUsingIN(UDM_AGENT * A,UDM_DB * db,UDM_URLDATALIST * DataList,const char * esu)1785 UdmLoadURLDataFromURLInfoUsingIN(UDM_AGENT *A,
1786 UDM_DB *db,
1787 UDM_URLDATALIST *DataList,
1788 const char *esu)
1789 {
1790 udm_rc_t rc= UDM_OK;
1791 size_t offs;
1792 char qbuf[4*1024];
1793
1794 for (offs= 0; offs < DataList->nitems; offs+= 256)
1795 {
1796 size_t nrows, s, i;
1797 int notfirst= 0;
1798 UDM_SQLRES SQLres;
1799 char *end= qbuf + sprintf(qbuf, "SELECT url_id, sval"
1800 " FROM urlinfo"
1801 " WHERE sname='%s' AND url_id IN (", esu);
1802
1803 for (i= 0; (i < 256) && (offs + i < DataList->nitems); i++)
1804 {
1805 end+= sprintf(end, "%s%i", (notfirst) ? "," : "",
1806 DataList->Item[offs + i].url_id);
1807 notfirst= 1;
1808 }
1809 end+= sprintf(end, ") ORDER BY url_id");
1810
1811 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, qbuf)))
1812 goto fin;
1813
1814 nrows= UdmSQLNumRows(&SQLres);
1815
1816 for(i= 0, s= i + offs; i < nrows; s++)
1817 {
1818 if (s == DataList->nitems)
1819 break;
1820 if (DataList->Item[s].url_id != (urlid_t) UDM_ATOI(UdmSQLValue(&SQLres, i, 0)))
1821 {
1822 DataList->Item[s].section= UdmStrdup("");
1823 }
1824 else
1825 {
1826 DataList->Item[s].section= UdmStrdup(UdmSQLValue(&SQLres, i, 1));
1827 i++;
1828 }
1829 }
1830 UdmSQLFree(&SQLres);
1831 }
1832
1833 fin:
1834 return rc;
1835 }
1836
1837
1838 /*
1839 Load URL data from "url" for sorting by:
1840 last_mod_time
1841 url
1842 section
1843 */
1844 static udm_rc_t
UdmLoadURLDataFromURLUsingIN(UDM_AGENT * A,UDM_DB * db,UDM_URLDATALIST * DataList,int flag)1845 UdmLoadURLDataFromURLUsingIN(UDM_AGENT *A,
1846 UDM_DB *db,
1847 UDM_URLDATALIST *DataList,
1848 int flag)
1849 {
1850 int need_url= (flag & (UDM_URLDATA_URL | UDM_URLDATA_SITE));
1851 udm_rc_t rc= UDM_OK;
1852 char qbuf[4*1024];
1853 UDM_SQLRES SQLres;
1854 UDM_STR row[5];
1855 size_t j;
1856 const char *hi_priority= UdmSQLDBType(db) == UDM_DB_MYSQL ? "HIGH_PRIORITY " : " ";
1857
1858 for (j= 0; j < DataList->nitems; j+= 256)
1859 {
1860 size_t i;
1861 int notfirst = 0;
1862 udm_snprintf(qbuf, sizeof(qbuf),
1863 "SELECT %srec_id,last_mod_time%s"
1864 " FROM url"
1865 " WHERE rec_id IN (",
1866 hi_priority,
1867 need_url ? ",url" : "");
1868 for (i= 0; (i < 256) && (j + i < DataList->nitems); i++)
1869 {
1870 sprintf(UDM_STREND(qbuf), "%s%i", (notfirst) ? "," : "", DataList->Item[j + i].url_id);
1871 notfirst= 1;
1872 }
1873 sprintf(UDM_STREND(qbuf), ") ORDER BY rec_id");
1874 if (UDM_OK != (rc= UdmDBSQLExecDirect(A, db, &SQLres, qbuf)))
1875 goto fin;
1876 for (i= 0; UdmDBSQLFetchRow(A, db, &SQLres, row) == UDM_OK; i++)
1877 {
1878 UDM_URLDATA *D= &DataList->Item[i + j];
1879 if (D->url_id != (urlid_t) UDM_ATOI(row[0].str))
1880 {
1881 UdmLog(A, UDM_LOG_ERROR, "Dat url_id (%d) != SQL url_id (%d)",
1882 D->url_id, UDM_ATOI(row[0].str));
1883 }
1884 D->last_mod_time= UDM_ATOI(row[1].str);
1885 if (need_url)
1886 {
1887 size_t sitelen= UdmAbsoluteURLSiteLength(row[2].str);
1888 D->site_id= UdmHash32(row[2].str, sitelen);
1889 D->url= UdmStrdup(row[2].str);
1890 }
1891 else
1892 {
1893 D->url= NULL;
1894 D->site_id= 0;
1895 }
1896 D->pop_rank= 0;
1897 D->section= NULL;
1898 }
1899 UdmSQLFree(&SQLres);
1900
1901 }
1902
1903 fin:
1904 return rc;
1905 }
1906
1907
1908 static udm_rc_t
UdmLoadURLDataFromURLUsingLoop(UDM_AGENT * A,UDM_DB * db,UDM_URLDATALIST * DataList,int flag)1909 UdmLoadURLDataFromURLUsingLoop(UDM_AGENT *A, UDM_DB *db,
1910 UDM_URLDATALIST *DataList, int flag)
1911 {
1912 udm_rc_t rc= UDM_OK;
1913 char qbuf[256];
1914 size_t i;
1915 int need_url= (flag & UDM_URLDATA_URL);
1916 const char *hi_priority= UdmSQLDBType(db) == UDM_DB_MYSQL ? "HIGH_PRIORITY" : "";
1917
1918 for (i = 0; i < DataList->nitems; i++)
1919 {
1920 UDM_SQLRES SQLres;
1921 UDM_URLDATA *D= &DataList->Item[i];
1922 udm_snprintf(qbuf, sizeof(qbuf),
1923 "SELECT %s last_mod_time%s"
1924 " FROM url WHERE rec_id=%i",
1925 hi_priority,
1926 need_url ? ",url" : "",
1927 DataList->Item[i].url_id);
1928 if (UDM_OK != (rc = UdmDBSQLQuery(A, db, &SQLres, qbuf)))
1929 goto fin;
1930 if(UdmSQLNumRows(&SQLres))
1931 {
1932 D->url_id= DataList->Item[i].url_id;
1933 D->site_id= 0;
1934 D->last_mod_time= UDM_ATOI(UdmSQLValue(&SQLres, 0, 0));
1935 D->url= need_url ? UdmStrdup(UdmSQLValue(&SQLres, 0, 1)) : NULL;
1936 D->pop_rank= 0;
1937 D->section= NULL;
1938 }
1939 UdmSQLFree(&SQLres);
1940 }
1941
1942 fin:
1943 return rc;
1944 }
1945
1946
1947 static udm_rc_t
UdmLoadURLDataFromURL(UDM_AGENT * A,UDM_DB * db,UDM_URLDATALIST * DataList,int flag)1948 UdmLoadURLDataFromURL(UDM_AGENT *A, UDM_DB *db,
1949 UDM_URLDATALIST *DataList, int flag)
1950 {
1951 udm_rc_t rc= UDM_OK;
1952 udm_bool_t use_urlbasicinfo= UdmVarListFindBool(&A->Conf->Vars, "LoadURLBasicInfo", UDM_TRUE);
1953 const char *su= UdmVarListFindStr(&A->Conf->Vars, "su", NULL);
1954
1955 /* Check that DataList is not empty and is sorted by url_id */
1956 UDM_ASSERT(DataList->nitems);
1957 UDM_ASSERT(DataList->Item[0].url_id <= DataList->Item[DataList->nitems - 1].url_id);
1958
1959 if (!use_urlbasicinfo)
1960 {
1961 UdmLog(A,UDM_LOG_DEBUG,"Not using basic URL data from url");
1962 UdmURLDataListClearParams(DataList);
1963 }
1964 else if (UdmSQLDBHaveIn(db))
1965 {
1966 UdmLog(A,UDM_LOG_DEBUG,"Trying to load URL data from url");
1967 rc= UdmLoadURLDataFromURLUsingIN(A, db, DataList, flag);
1968 }
1969 else
1970 {
1971 UdmLog(A,UDM_LOG_DEBUG,"Trying to load URL data from url, not using IN");
1972 rc= UdmLoadURLDataFromURLUsingLoop(A, db, DataList, flag);
1973 }
1974
1975 if ((flag & UDM_URLDATA_SU) && su && su[0])
1976 {
1977 char *esu=su ? UdmDBSQLEscStrSimple(A, db, NULL, su, strlen(su)) : NULL; /* User sort name */
1978 rc= UdmLoadURLDataFromURLInfoUsingIN(A, db, DataList, esu);
1979 UDM_FREE(esu);
1980 }
1981
1982 return rc;
1983 }
1984
1985
1986
1987 /****************************** User score **************************/
1988
1989 udm_rc_t
UdmUserScoreListLoad(UDM_AGENT * A,UDM_DB * db,UDM_URL_INT4_LIST * List,const char * q)1990 UdmUserScoreListLoad(UDM_AGENT *A, UDM_DB *db,
1991 UDM_URL_INT4_LIST *List, const char *q)
1992 {
1993 size_t i;
1994 udm_rc_t rc;
1995 UDM_SQLRES SQLRes;
1996 udm_timer_t ticks= UdmStartTimer();
1997
1998 bzero((void*) List, sizeof(UDM_URL_INT4_LIST));
1999
2000 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLRes, q)))
2001 goto ret;
2002
2003 if (!(List->nitems= UdmSQLNumRows(&SQLRes)))
2004 goto sqlfree;
2005
2006 if (2 != UdmSQLNumCols(&SQLRes))
2007 {
2008 udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db),
2009 "User Score query must return 2 columns, returned %d columns",
2010 (int) UdmSQLNumCols(&SQLRes));
2011 rc= UDM_ERROR;
2012 UdmSQL(db)->errcode= 1;
2013 goto sqlfree;
2014 }
2015
2016 if (!(List->Item= (UDM_URL_INT4*) UdmMalloc(sizeof(UDM_URL_INT4) * List->nitems)))
2017 {
2018 rc= UDM_ERROR;
2019 List->nitems= 0;
2020 goto sqlfree;
2021 }
2022 for (i= 0; i < List->nitems; i++)
2023 {
2024 List->Item[i].url_id= atoi(UdmSQLValue(&SQLRes, i, 0));
2025 List->Item[i].param= atoi(UdmSQLValue(&SQLRes, i, 1));
2026 }
2027 UdmSort(List->Item, List->nitems, sizeof(UDM_URL_INT4), (udm_qsort_cmp)UdmCmpURLID);
2028
2029 UdmLog(A, UDM_LOG_DEBUG,
2030 "UserScore query returned %d columns, %d rows: %.2f",
2031 (int) UdmSQLNumCols(&SQLRes), (int) List->nitems, UdmStopTimer(&ticks));
2032
2033
2034 sqlfree:
2035 UdmSQLFree(&SQLRes);
2036 ret:
2037 return rc;
2038 }
2039
2040
2041 static udm_rc_t
UdmUserScoreListLoadAndApplyToURLScoreList(UDM_AGENT * Agent,UDM_URLSCORELIST * List,UDM_DB * db,const UDM_QUERY_PARAM * prm)2042 UdmUserScoreListLoadAndApplyToURLScoreList(UDM_AGENT *Agent,
2043 UDM_URLSCORELIST *List,
2044 UDM_DB *db,
2045 const UDM_QUERY_PARAM *prm)
2046 {
2047 char name[128];
2048 const char *us, *query;
2049 UDM_URL_INT4_LIST UserScoreList;
2050 udm_rc_t rc;
2051 udm_timer_t ticks= UdmStartTimer();
2052
2053 if (!prm->UserScoreFactor ||
2054 !(us= UdmVarListFindStr(&Agent->Conf->Vars, "us", NULL)))
2055 return UDM_OK;
2056
2057 UdmLog(Agent, UDM_LOG_DEBUG, "Start loading UserScore '%s'", us);
2058
2059 udm_snprintf(name, sizeof(name), "Score.%s", us);
2060 query= UdmVarListFindStr(&Agent->Conf->Vars, name, NULL);
2061
2062 if (UDM_OK != (rc= query ?
2063 UdmUserScoreListLoad(Agent, db, &UserScoreList, query) :
2064 UdmBlobLoadFastScore(Agent, db, &UserScoreList, us)) ||
2065 !UserScoreList.nitems)
2066 goto ret;
2067
2068 rc= UdmUserScoreListApplyToURLScoreList(Agent, List, &UserScoreList, prm);
2069
2070 ret:
2071 UdmLog(Agent, UDM_LOG_DEBUG, "%-30s%.2f (%d docs found)",
2072 "Stop loading UserScore", UdmStopTimer(&ticks), (int) UserScoreList.nitems);
2073 UDM_FREE(UserScoreList.Item);
2074 return rc;
2075 }
2076
2077
2078 static udm_rc_t
UdmUserSiteScoreListLoadAndApplyToURLDataList(UDM_AGENT * Agent,UDM_URLDATALIST * List,UDM_DB * db,const UDM_QUERY_PARAM * prm)2079 UdmUserSiteScoreListLoadAndApplyToURLDataList(UDM_AGENT *Agent,
2080 UDM_URLDATALIST *List,
2081 UDM_DB *db,
2082 const UDM_QUERY_PARAM *prm)
2083 {
2084 char name[128];
2085 const char *us, *query;
2086 UDM_URL_INT4_LIST UserScoreList;
2087 udm_rc_t rc;
2088
2089 if (!prm->UserScoreFactor ||
2090 !(us= UdmVarListFindStr(&Agent->Conf->Vars, "ss", NULL)))
2091 return UDM_OK;
2092 udm_snprintf(name, sizeof(name), "SiteScore.%s", us);
2093 if (!(query= UdmVarListFindStr(&Agent->Conf->Vars, name, NULL)))
2094 return UDM_OK;
2095
2096 if ((UDM_OK != (rc= UdmUserScoreListLoad(Agent, db,
2097 &UserScoreList, query))) ||
2098 !UserScoreList.nitems)
2099 goto ret;
2100
2101 rc= UdmUserScoreListApplyToURLDataList(Agent, List, &UserScoreList, prm);
2102
2103 ret:
2104 UDM_FREE(UserScoreList.Item);
2105 return rc;
2106 }
2107
2108
2109
2110 /*********************** Creating fast index ******************/
2111
2112 static udm_rc_t
UdmIndexSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)2113 UdmIndexSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
2114 {
2115 udm_rc_t rc;
2116 if (UDM_OK != (rc= UdmLoadURLDataFromURLAndSlowLimitForConv(Indexer, db, Query)))
2117 return rc;
2118
2119 rc= UdmSQLDBModeHandler(db)->QueryAction(Indexer, db, Query, UDM_QUERYCMD_INDEX);
2120
2121 UdmURLDataListFree(&Query->URLData);
2122 return rc;
2123 }
2124
2125
2126 static udm_rc_t
UdmStoreWords(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2127 UdmStoreWords(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2128 {
2129 UDM_ASSERT(UdmSQLDBModeHandler(db)->StoreWords != NULL);
2130 return UdmSQLDBModeHandler(db)->StoreWords(Indexer, db, Doc);
2131 }
2132
2133
2134 /********************* Inserting/Deleting URLs and Links *******************/
2135
2136 static udm_rc_t
UdmDeleteRedirects(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2137 UdmDeleteRedirects(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2138 {
2139 char qbuf[128];
2140 urlid_t id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2141 udm_snprintf(qbuf, sizeof(qbuf),
2142 "DELETE FROM redirect WHERE url_id=%d", id);
2143 return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2144 }
2145
2146
2147 static udm_rc_t
UdmStoreRedirects(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc)2148 UdmStoreRedirects(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc)
2149 {
2150 char qbuf[128];
2151 urlid_t id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2152 udm_rc_t rc;
2153 size_t i;
2154
2155 if (!Doc->Spider.collect_links_destination)
2156 return UDM_OK;
2157 if (UDM_OK != (rc= UdmDeleteRedirects(A, db, Doc)))
2158 return rc;
2159
2160 udm_snprintf(qbuf, sizeof(qbuf),
2161 "INSERT INTO redirect (url_id,seed,url) VALUES(%s,%s,%s)",
2162 UdmDBSQLParamPlaceHolder(db, 1),
2163 UdmDBSQLParamPlaceHolder(db, 2),
2164 UdmDBSQLParamPlaceHolder(db, 3));
2165
2166 if (UDM_OK != (rc= UdmDBSQLPrepare(A, db, qbuf)))
2167 return rc;
2168
2169 /* UdmDocBaseHref() && UdmDocConvertHrefs() is already done here */
2170 for (i= 0; i < Doc->Hrefs.nhrefs; i++)
2171 {
2172 UDM_HREF *H= &Doc->Hrefs.Href[i];
2173 udmcrc32_t url_seed;
2174
2175 if (H->Param.link_source != UDM_LINK_SOURCE_REDIRECT)
2176 continue;
2177 if (!(H->Param.method_reason & Doc->Spider.collect_links_destination))
2178 continue;
2179
2180 url_seed= UdmStrHash32(H->url) & 0xFF;
2181
2182 /*
2183 TODO34: Remove duplicate links.
2184 Some links can be duplicate here,
2185 because after UdmDocConvertHrefs() different links can become the same:
2186 http://site/ -> http://site/
2187 http://site/? -> http://site/
2188 */
2189 if (UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 1, &id, (int) sizeof(id),
2190 UDM_SQLTYPE_INT32)) ||
2191 UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 2, &url_seed, (int) sizeof(url_seed),
2192 UDM_SQLTYPE_INT32)) ||
2193 UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 3, H->url, (int) strlen(H->url),
2194 UDM_SQLTYPE_VARCHAR)) ||
2195 UDM_OK != (rc= UdmDBSQLExecute(A, db)))
2196 {
2197 break;
2198 }
2199 }
2200 UdmDBSQLStmtFree(A, db);
2201 return rc;
2202 }
2203
2204
2205 static udm_rc_t
UdmDeleteLinks(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2206 UdmDeleteLinks(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2207 {
2208 char qbuf[128];
2209 urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2210 sprintf(qbuf,"DELETE FROM links WHERE url_id=%d", url_id);
2211 return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2212 }
2213
2214
2215 static udm_rc_t
UdmStoreLinks(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc)2216 UdmStoreLinks(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc)
2217 {
2218 char qbuf[128];
2219 udm_rc_t rc;
2220 size_t i;
2221 urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2222 udmcrc32_t url_seed= 0;
2223 UDM_CHARSET *cs= Doc->lcs;
2224
2225 if (!Doc->Spider.collect_links_destination)
2226 return UDM_OK;
2227
2228 if (UdmSQLDBType(db) == UDM_DB_MYSQL &&
2229 UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, "LOCK TABLE links WRITE")))
2230 return rc;
2231
2232 if (UDM_OK != (rc= UdmDeleteLinks(A, db, Doc)))
2233 return rc;
2234
2235 udm_snprintf(qbuf, sizeof(qbuf),
2236 "INSERT INTO links (url_id,url,linktext,src,rel,seed) "
2237 "VALUES(%s,%s,%s,%s,%s,%s)",
2238 UdmDBSQLParamPlaceHolder(db, 1),
2239 UdmDBSQLParamPlaceHolder(db, 2),
2240 UdmDBSQLParamPlaceHolder(db, 3),
2241 UdmDBSQLParamPlaceHolder(db, 4),
2242 UdmDBSQLParamPlaceHolder(db, 5),
2243 UdmDBSQLParamPlaceHolder(db, 6));
2244
2245 if (UDM_OK != (rc= UdmDBSQLPrepare(A, db, qbuf)))
2246 return rc;
2247
2248 /* UdmDocBaseHref() && UdmDocConvertHrefs() is already done here */
2249 for (i= 0; i < Doc->Hrefs.nhrefs; i++)
2250 {
2251 UDM_HREF *H= &Doc->Hrefs.Href[i];
2252 UDM_CONST_STR def= {"", 0};
2253 UDM_CONST_STR txt, rel;
2254 const char *link_source= UdmLinkSourceStr(H->Param.link_source);
2255
2256 if (H->Param.link_source == UDM_LINK_SOURCE_REDIRECT)
2257 continue;
2258 /*
2259 TODO34: Allow to specify link sources in CollectLinks,
2260 with options similar to FollowLinks.
2261 */
2262 if (H->Param.link_source == UDM_LINK_SOURCE_DIR)
2263 continue;
2264 if (!(H->Param.method_reason & Doc->Spider.collect_links_destination))
2265 continue;
2266
2267 UdmVarListFindConstStr(&txt, &H->HrefVars, "LinkText", &def);
2268 UdmVarListFindConstStr(&rel, &H->HrefVars, "Rel", &def);
2269 txt.length= cs->cset->well_formed_length(cs, txt.str, UDM_MIN(1024, txt.length), UDM_RECODE_HTML);
2270 rel.length= cs->cset->well_formed_length(cs, rel.str, UDM_MIN(32, rel.length), UDM_RECODE_HTML);
2271 url_seed= UdmStrHash32(H->url) & 0xFF;
2272
2273 /*
2274 TODO34: Remove duplicate links.
2275 Some links can be duplicate here,
2276 because after UdmDocConvertHrefs() different links can become the same:
2277 http://site/ -> http://site/
2278 http://site/? -> http://site/
2279 */
2280 if (UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 1, &url_id, (int) sizeof(url_id),
2281 UDM_SQLTYPE_INT32)) ||
2282 UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 2, H->url, (int) strlen(H->url),
2283 UDM_SQLTYPE_VARCHAR)) ||
2284 UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 3, txt.str, (int) txt.length,
2285 UDM_SQLTYPE_VARCHAR)) ||
2286 UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 4, link_source, (int) strlen(link_source),
2287 UDM_SQLTYPE_VARCHAR)) ||
2288 UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 5, rel.str, (int) rel.length,
2289 UDM_SQLTYPE_VARCHAR)) ||
2290 UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 6, &url_seed, (int) sizeof(url_seed),
2291 UDM_SQLTYPE_INT32)) ||
2292 UDM_OK != (rc= UdmDBSQLExecute(A, db)))
2293 {
2294 break;
2295 }
2296 }
2297 UdmDBSQLStmtFree(A, db);
2298 if (rc== UDM_OK && UdmSQLDBType(db) == UDM_DB_MYSQL)
2299 rc= UdmDBSQLQuery(A, db, NULL, "UNLOCK TABLES");
2300 return rc;
2301 }
2302
2303
2304 static udm_rc_t
UdmExportURL(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc,const char * sql_export)2305 UdmExportURL(UDM_AGENT *Indexer,
2306 UDM_DB *db,
2307 UDM_DOCUMENT *Doc,
2308 const char *sql_export)
2309 {
2310 udm_rc_t rc= UDM_OK;
2311 char *part, *lt, *sql_export_copy= UdmStrdup(sql_export);
2312 UDM_DSTR d;
2313 UDM_VARLIST Vars;
2314 UdmVarListInit(&Vars);
2315 UdmDSTRInit(&d,256);
2316
2317 UdmVarListSQLEscape(Indexer, &Vars, &Doc->Sections, db);
2318 for (part= udm_strtok_r(sql_export_copy, ";", <) ;
2319 part ;
2320 part= udm_strtok_r(NULL, ";", <))
2321 {
2322 UdmDSTRParse(&d, part, &Vars);
2323 if (UDM_OK!= (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&d))))
2324 break;
2325 UdmDSTRReset(&d);
2326 }
2327 UdmVarListFree(&Vars);
2328 UdmDSTRFree(&d);
2329 UdmFree(sql_export_copy);
2330 return rc;
2331 }
2332
2333
2334 static udm_rc_t
UdmAddURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2335 UdmAddURL(UDM_AGENT *Indexer,UDM_DOCUMENT * Doc,UDM_DB *db)
2336 {
2337 char *e_url, *qbuf;
2338 const char *url;
2339 int url_seed;
2340 int use_crc32_url_id;
2341 int usehtdburlid;
2342 udm_rc_t rc= UDM_OK;
2343 size_t len;
2344 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2345 urlid_t rec_id = 0;
2346
2347 url = UdmVarListFindStr(&Doc->Sections,"URL","");
2348 use_crc32_url_id = !strcasecmp(UdmVarListFindStr(&Indexer->Conf->Vars, "UseCRC32URLId", "no"), "yes");
2349 usehtdburlid = UdmVarListFindInt(&Indexer->Conf->Vars, "UseHTDBURLId", 0);
2350
2351 len = strlen(url);
2352 e_url = (char*)UdmMalloc(4 * len + 1);
2353 if (e_url == NULL) return UDM_ERROR;
2354 qbuf = (char*)UdmMalloc(4 * len + 512);
2355 if (qbuf == NULL)
2356 {
2357 UDM_FREE(e_url);
2358 return UDM_ERROR;
2359 }
2360
2361 url_seed = UdmStrHash32(url) & 0xFF;
2362
2363 /* Escape URL string */
2364 UdmDBSQLEscStr(Indexer, db, e_url, url, len);
2365
2366 if(use_crc32_url_id || usehtdburlid)
2367 {
2368 /* Auto generation of rec_id */
2369 /* using CRC32 algorythm */
2370 if (use_crc32_url_id) rec_id = UdmStrHash32(url);
2371 else rec_id = UdmVarListFindInt(&Doc->Sections, "HTDB_URL_ID", 0);
2372
2373 udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2374 "(rec_id,url,referrer,hops,crc32,next_index_time,status,seed,bad_since_time,server_id,docsize,last_mod_time,shows) "
2375 "VALUES (%s%i%s,'%s',%s%i%s,%d,0,%d,0,%d,%d,%s%i%s,%s%i%s,%li,0)",
2376 qu, rec_id, qu,
2377 e_url,
2378 qu, UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0), qu,
2379 UdmVarListFindInt(&Doc->Sections,"Hops",0),
2380 (int)time(NULL),
2381 url_seed, (int)time(NULL),
2382 qu, UdmVarListFindInt(&Doc->Sections, "Server_id", 0), qu,
2383 qu, UdmVarListFindInt(&Doc->Sections, "Content-Length", 0), qu,
2384 UdmHttpDate2Time_t(UdmVarListFindStrNonEmpty(&Doc->Sections, "Last-Modified",
2385 UdmVarListFindStrNonEmpty(&Doc->Sections, "Date", "")))
2386 );
2387 }else{
2388 /* Use dabatase generated rec_id */
2389 /* It depends on used DBType */
2390 switch(UdmSQLDBType(db))
2391 {
2392 case UDM_DB_SOLID:
2393 case UDM_DB_ORACLE8:
2394 case UDM_DB_SAPDB:
2395 /* FIXME: Dirty hack for stupid too smart databases
2396 Change this for config parameter checking */
2397 /* if (strlen(e_url)>UDM_URLSIZE)e_url[UDM_URLSIZE]=0;*/
2398 /* Use sequence next_url_id.nextval */
2399 udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2400 "(url,referrer,hops,rec_id,crc32,next_index_time,status,seed,bad_since_time,server_id)"
2401 " VALUES "
2402 "('%s',%i,%d,next_url_id.nextval,0,%d,0,%d,%d,%i)",
2403 e_url,
2404 UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0),
2405 UdmVarListFindInt(&Doc->Sections,"Hops",0),
2406 (int)time(NULL),
2407 url_seed, (int)time(NULL),
2408 UdmVarListFindInt(&Doc->Sections, "Server_id", 0));
2409 break;
2410 case UDM_DB_MIMER:
2411 udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2412 "(url,referrer,hops,rec_id,crc32,next_index_time,status,seed,bad_since_time,server_id)"
2413 " VALUES "
2414 "('%s',%i,%d,NEXT_VALUE OF rec_id_GEN,0,%d,0,%d,%d,%i)",
2415 e_url,
2416 UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0),
2417 UdmVarListFindInt(&Doc->Sections,"Hops",0),
2418 (int)time(NULL),
2419 url_seed, (int)time(NULL),
2420 UdmVarListFindInt(&Doc->Sections, "Server_id", 0));
2421 break;
2422 case UDM_DB_IBASE:
2423 udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2424 "(url,referrer,hops,rec_id,crc32,next_index_time,status,seed,bad_since_time,server_id)"
2425 " VALUES "
2426 "('%s',%i,%d,GEN_ID(rec_id_GEN,1),0,%d,0,%d,%d,%i)",
2427 e_url,
2428 UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0),
2429 UdmVarListFindInt(&Doc->Sections,"Hops",0),
2430 (int)time(NULL),
2431 url_seed, (int)time(NULL),
2432 UdmVarListFindInt(&Doc->Sections, "Server_id", 0));
2433 break;
2434 case UDM_DB_PGSQL:
2435 if (UdmSQLDBVersion(db) > 90100)
2436 {
2437 /* Use 9.1 syntax: INSERT INTO t1 SELECT ... WHERE EXISTS (SELECT) */
2438 udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2439 "(url,referrer,hops,crc32,next_index_time,status,seed,bad_since_time,server_id,docsize,last_mod_time,shows)"
2440 " SELECT "
2441 "'%s',%s%i%s,%d,0,%d,0,%d,%d,%s%i%s,%s%i%s,%li,0"
2442 " WHERE NOT EXISTS "
2443 "(SELECT rec_id FROM url WHERE url='%s')",
2444 e_url,
2445 qu, UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0), qu,
2446 UdmVarListFindInt(&Doc->Sections,"Hops",0),
2447 (int)time(NULL),
2448 url_seed, (int)time(NULL),
2449 qu, UdmVarListFindInt(&Doc->Sections, "Server_id", 0), qu,
2450 qu, UdmVarListFindInt(&Doc->Sections, "Content-Length", 0), qu,
2451 UdmHttpDate2Time_t(UdmVarListFindStrNonEmpty(&Doc->Sections, "Last-Modified",
2452 UdmVarListFindStr(&Doc->Sections, "Date", ""))),
2453 e_url);
2454 break;
2455 }
2456 /* else fall through */
2457 case UDM_DB_MYSQL: /* MySQL generates itself */
2458 case UDM_DB_VIRT:
2459 case UDM_DB_MSSQL:
2460 case UDM_DB_DB2:
2461 case UDM_DB_SQLITE:
2462 case UDM_DB_ACCESS:
2463 case UDM_DB_CACHE:
2464 case UDM_DB_SYBASE:
2465 case UDM_DB_SQLITE3:
2466 case UDM_DB_MONETDB:
2467 udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2468 "(url,referrer,hops,crc32,next_index_time,status,seed,bad_since_time,server_id,docsize,last_mod_time,shows)"
2469 " VALUES "
2470 "('%s',%s%i%s,%d,0,%d,0,%d,%d,%s%i%s,%s%i%s,%li,0)",
2471 e_url,
2472 qu, UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0), qu,
2473 UdmVarListFindInt(&Doc->Sections,"Hops",0),
2474 (int)time(NULL),
2475 url_seed, (int)time(NULL),
2476 qu, UdmVarListFindInt(&Doc->Sections, "Server_id", 0), qu,
2477 qu, UdmVarListFindInt(&Doc->Sections, "Content-Length", 0), qu,
2478 UdmHttpDate2Time_t(UdmVarListFindStrNonEmpty(&Doc->Sections, "Last-Modified",
2479 UdmVarListFindStr(&Doc->Sections, "Date", "")))
2480 );
2481 }
2482 }
2483
2484 /* Exec INSERT now */
2485 if(UDM_OK!=(rc=UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
2486 goto ex;
2487
2488 ex:
2489
2490 UDM_FREE(qbuf);
2491 UDM_FREE(e_url);
2492 return rc;
2493 }
2494
2495
2496 /******************* Cached Copy *********************/
2497
2498 #define SQLRESTODOC_COLUMNS \
2499 "rec_id,url,last_mod_time,docsize," \
2500 "next_index_time,referrer,crc32,status"
2501
2502 static void
SQLResToDoc(UDM_ENV * Conf,UDM_DOCUMENT * D,UDM_SQLRES * sqlres,size_t i)2503 SQLResToDoc(UDM_ENV *Conf, UDM_DOCUMENT *D, UDM_SQLRES *sqlres, size_t i)
2504 {
2505 time_t last_mod_time;
2506 char dbuf[UDM_MAXTIMESTRLEN];
2507 const char *format = UdmVarListFindStr(&Conf->Vars, "DateFormat", "%a, %d %b %Y, %X %Z");
2508 double pr;
2509
2510 UdmVarListReplaceStr(&D->Sections,"URL",UdmSQLValue(sqlres,i,1));
2511 UdmVarListReplaceInt(&D->Sections, "URL_ID", UdmStrHash32(UdmSQLValue(sqlres,i,1)));
2512 last_mod_time=atol(UdmSQLValue(sqlres,i,2));
2513 UdmVarListReplaceInt(&D->Sections, "Last-Modified-Timestamp", (int) last_mod_time);
2514 if (strftime(dbuf, sizeof(dbuf), format, localtime(&last_mod_time)) == 0)
2515 {
2516 UdmTime_t2HttpStr(last_mod_time, dbuf, sizeof(dbuf));
2517 }
2518 UdmVarListReplaceStr(&D->Sections,"Last-Modified",dbuf);
2519 UdmVarListReplaceStr(&D->Sections,"Content-Length",UdmSQLValue(sqlres,i,3));
2520 pr= atof(UdmSQLValue(sqlres,i,3)) / 1024;
2521 sprintf(dbuf, "%.2f", pr);
2522 UdmVarListReplaceStr(&D->Sections,"Content-Length-K",dbuf);
2523 last_mod_time=atol(UdmSQLValue(sqlres,i,4));
2524 if (strftime(dbuf, sizeof(dbuf), format, localtime(&last_mod_time)) == 0)
2525 {
2526 UdmTime_t2HttpStr(last_mod_time, dbuf, sizeof(dbuf));
2527 }
2528 UdmVarListReplaceStr(&D->Sections,"Next-Index-Time",dbuf);
2529 UdmVarListReplaceInt(&D->Sections, "Referrer-ID", UDM_ATOI(UdmSQLValue(sqlres,i,5)));
2530 UdmVarListReplaceInt(&D->Sections,"crc32",atoi(UdmSQLValue(sqlres,i,6)));
2531
2532 #if BAR_COMMA_PERIOD_ORACLE_PROBLEM
2533 {
2534 char *num= UdmSQLValue(sqlres, i, 8);
2535 char *comma= strchr(num, ',');
2536 if (comma)
2537 *comma= '.';
2538 }
2539 #endif
2540
2541 UdmVarListReplaceStr(&D->Sections, "Status", UdmSQLValue(sqlres, i, 7));
2542 }
2543
2544
2545 static udm_rc_t
UdmGetURLInfoOneDoc(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2546 UdmGetURLInfoOneDoc(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2547 {
2548 udm_rc_t rc;
2549 char buf[64];
2550 size_t i;
2551 UDM_SQLRES SQLRes;
2552
2553 udm_snprintf(buf, sizeof(buf), "SELECT sname, sval FROM urlinfo WHERE url_id=%d", UDM_ATOI(UdmVarListFindStr(&Doc->Sections, "ID", "0")));
2554 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, buf)))
2555 return rc;
2556
2557 for (i= 0; i < UdmSQLNumRows(&SQLRes); i++)
2558 {
2559 const char *sname= UdmSQLValue(&SQLRes, i, 0);
2560 const char *sval= UdmSQLValue(&SQLRes, i, 1);
2561 /*size_t l= UdmSQLLen(&SQLRes, i, 1);*/
2562
2563 if (!sname)
2564 continue;
2565
2566 UdmVarListReplaceStr(&Doc->Sections, sname, sval ? sval : "");
2567 }
2568 UdmSQLFree(&SQLRes);
2569
2570 return UDM_OK;
2571 }
2572
2573
2574 static udm_rc_t
UdmGetCachedCopyOneDoc(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2575 UdmGetCachedCopyOneDoc(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2576 {
2577 urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2578 size_t max_doc_size= UdmVarListFindInt(&Indexer->Conf->Vars, "MaxDocSize", UDM_MAXDOCSIZE);
2579 udm_rc_t rc;
2580 char buf[128];
2581 UDM_SQLRES SQLRes;
2582
2583 udm_snprintf(buf, sizeof(buf),
2584 "SELECT content FROM cachedcopy "
2585 "WHERE url_id=%d",
2586 (int) url_id);
2587
2588 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, buf)))
2589 return rc;
2590
2591 if (UdmSQLNumRows(&SQLRes) == 1)
2592 {
2593 const char *sval= UdmSQLValue(&SQLRes, 0, 0);
2594 size_t len= UdmSQLLen(&SQLRes, 0, 0);
2595 udm_timer_t timer= 0;
2596 UdmDocSetFromCachedHTTPResponse(Doc, sval, len, max_doc_size, &timer);
2597 }
2598
2599 UdmSQLFree(&SQLRes);
2600
2601 /* TODO34: Get cached copy from the original location
2602 if (unpack_cached_copy && !CachedCopy_found)
2603 {
2604 const char *url= UdmVarListFindStr(&Doc->Sections, "url", NULL);
2605 UdmGetURLSimple(Indexer, Doc, url);
2606 }
2607 */
2608
2609 return UDM_OK;
2610 }
2611
2612
2613 static udm_rc_t
UdmGetCachedCopy(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2614 UdmGetCachedCopy(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc, UDM_DB *db)
2615 {
2616 UDM_SQLRES SQLRes;
2617 char buf[1024];
2618 udm_rc_t rc;
2619 int url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2620
2621 if (!url_id)
2622 UdmFindURL(Indexer, Doc, db);
2623 url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2624 udm_snprintf(buf, sizeof(buf),
2625 "SELECT " SQLRESTODOC_COLUMNS
2626 " FROM url WHERE rec_id=%d", url_id);
2627 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, buf)))
2628 return rc;
2629
2630 if (!UdmSQLNumRows(&SQLRes))
2631 {
2632 UdmSQLFree(&SQLRes);
2633 return UDM_ERROR;
2634 }
2635
2636 SQLResToDoc(Indexer->Conf, Doc, &SQLRes, 0);
2637 UdmSQLFree(&SQLRes);
2638
2639 if (UDM_OK != (rc= UdmGetCachedCopyOneDoc(Indexer, db, Doc)))
2640 return rc;
2641
2642 if (UDM_OK != (rc= UdmGetURLInfoOneDoc(Indexer, db, Doc)))
2643 return rc;
2644
2645 return UDM_OK;
2646 }
2647
2648
2649 /********************** Reindexing "indexer -a" *************************/
2650
2651 static udm_rc_t
UdmMarkForReindex(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)2652 UdmMarkForReindex(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
2653 {
2654 char qbuf[1024];
2655 const char *where;
2656 UDM_SQLRES SQLRes;
2657 size_t i, j;
2658 udm_rc_t rc;
2659 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2660 UDM_DSTR buf;
2661
2662 UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_DB);
2663 if (UDM_OK != (rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where)))
2664 return rc;
2665
2666 if (UdmSQLDBFlags(db) & UDM_SQL_HAVE_SUBSELECT &&
2667 UdmSQLDBType(db) != UDM_DB_MYSQL)
2668 {
2669 udm_snprintf(qbuf,sizeof(qbuf),"UPDATE url SET next_index_time=%d WHERE rec_id IN (SELECT url.rec_id FROM url%s %s %s)",
2670 (int)time(NULL), Query->from, (where[0]) ? "WHERE" : "", where);
2671 return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2672 }
2673
2674 udm_snprintf(qbuf, sizeof(qbuf), "SELECT url.rec_id FROM url%s %s %s",
2675 Query->from, (where[0]) ? "WHERE" : "", where);
2676 if(UDM_OK != (rc = UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf))) return rc;
2677
2678 UdmDSTRInit(&buf, 4096);
2679 if (UdmSQLDBHaveIn(db))
2680 {
2681 for (i = 0; i < UdmSQLNumRows(&SQLRes); i += 512)
2682 {
2683 UdmDSTRReset(&buf);
2684 UdmDSTRAppendf(&buf, "UPDATE url SET next_index_time=%d WHERE rec_id IN (", (int)time(NULL));
2685 for (j = 0; (j < 512) && (i + j < UdmSQLNumRows(&SQLRes)); j++)
2686 {
2687 UdmDSTRAppendf(&buf, "%s%s%s%s", (j) ? "," : "", qu, UdmSQLValue(&SQLRes, i + j, 0), qu);
2688 }
2689 UdmDSTRAppendf(&buf, ")");
2690 if(UDM_OK != (rc = UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&buf))))
2691 {
2692 UdmSQLFree(&SQLRes);
2693 UdmDSTRFree(&buf);
2694 return rc;
2695 }
2696 }
2697 } else {
2698 for (i = 0; i < UdmSQLNumRows(&SQLRes); i++)
2699 {
2700 UdmDSTRReset(&buf);
2701 UdmDSTRAppendf(&buf, "UPDATE url SET next_index_time=%d WHERE rec_id=%s", (int)time(NULL), UdmSQLValue(&SQLRes, i, 0));
2702 if(UDM_OK != (rc = UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&buf))))
2703 {
2704 UdmSQLFree(&SQLRes);
2705 UdmDSTRFree(&buf);
2706 return rc;
2707 }
2708 }
2709 }
2710 UdmDSTRFree(&buf);
2711 UdmSQLFree(&SQLRes);
2712 return UDM_OK;
2713 }
2714
2715
2716 /************** Child - for new extensions ****************/
2717
2718 static udm_rc_t
UdmRegisterChild(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2719 UdmRegisterChild(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db)
2720 {
2721 char qbuf[1024];
2722 urlid_t url_id = UdmVarListFindInt(&Doc->Sections,"ID",0);
2723 urlid_t parent_id = UdmVarListFindInt(&Doc->Sections,"Parent-ID",0);
2724 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2725
2726 udm_snprintf(qbuf,sizeof(qbuf),"insert into links (ot,k,weight) values(%s%i%s,%s%i%s,0.0)", qu, parent_id, qu, qu, url_id, qu);
2727 return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2728 }
2729
2730
2731 /*********************** Update URL ***********************/
2732
2733 static udm_rc_t
UdmUpdateUrl(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2734 UdmUpdateUrl(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc,UDM_DB *db)
2735 {
2736 char qbuf[256];
2737 urlid_t url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
2738 int status=UdmVarListFindInt(&Doc->Sections,"Status",0);
2739 int prevStatus = UdmVarListFindInt(&Doc->Sections, "PrevStatus", 0);
2740 int next_index_time=UdmHttpDate2Time_t(UdmVarListFindStr(&Doc->Sections,"Next-Index-Time",""));
2741 udm_rc_t res;
2742 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2743
2744 if (prevStatus != status && status > 300 && status != 304)
2745 sprintf(qbuf, "UPDATE url SET "
2746 "status=%d,next_index_time=%d,bad_since_time=%d,server_id=%s%i%s"
2747 " WHERE rec_id=%s%i%s",
2748 status, next_index_time, (int)time(NULL),
2749 qu, UdmVarListFindInt(&Doc->Sections, "Server_id",0),
2750 qu, qu, url_id, qu);
2751 else
2752 sprintf(qbuf,"UPDATE url SET "
2753 "status=%d,next_index_time=%d,server_id=%s%i%s"
2754 " WHERE rec_id=%s%i%s",
2755 status, next_index_time,
2756 qu, UdmVarListFindInt(&Doc->Sections, "Server_id",0), qu,
2757 qu, url_id, qu);
2758
2759 if(UDM_OK!=(res=UdmDBSQLQuery(Indexer, db, NULL, qbuf)))return res;
2760
2761 /* remove all old broken hrefs from this document to avoid broken link collecting */
2762 return UdmDeleteBadHrefs(Indexer,Doc,db,url_id);
2763 }
2764
2765
2766 static udm_rc_t
UdmDocNormalizeContentLanguage(UDM_DOCUMENT * Doc)2767 UdmDocNormalizeContentLanguage(UDM_DOCUMENT *Doc)
2768 {
2769 UDM_VAR *var;
2770 if ((var= UdmVarListFindVar(&Doc->Sections, "Content-Language")))
2771 {
2772 char language[128];
2773 const char *lang= UdmVarStr(var) ? UdmVarStr(var) :
2774 UdmVarListFindStr(&Doc->Sections, "DefaultLang", "en");
2775 size_t i, len= udm_snprintf(language, sizeof(language), "%s", lang);
2776 for(i= 0; i < len; i++)
2777 language[i]= tolower(language[i]);
2778 return UdmVarListReplaceStr(&Doc->Sections, "Content-Language", language);
2779 }
2780 return UDM_OK;
2781 }
2782
2783
2784 static udm_rc_t
UdmUpdateUrlWithLangAndCharset(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2785 UdmUpdateUrlWithLangAndCharset(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db)
2786 {
2787 char *qbuf;
2788 udm_rc_t rc;
2789 const char *charset;
2790 int status, prevStatus;
2791 urlid_t url_id;
2792 char qsmall[64];
2793 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2794 int IndexTime= UdmVarListFindInt(&Indexer->Conf->Vars, "IndexTime", 0);
2795
2796 status = UdmVarListFindInt(&Doc->Sections, "Status", 0);
2797 prevStatus = UdmVarListFindInt(&Doc->Sections, "PrevStatus", 0);
2798 url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
2799
2800 if (UDM_OK != (rc= UdmDocNormalizeContentLanguage(Doc)))
2801 return rc;
2802
2803 charset = UdmVarListFindStr(&Doc->Sections, "Charset",
2804 UdmVarListFindStr(&Doc->Sections, "RemoteCharset", "iso-8859-1"));
2805 charset = UdmCharsetCanonicalName(charset);
2806 UdmVarListReplaceStr(&Doc->Sections, "Charset", charset);
2807
2808 if (prevStatus != status && status > 300 && status != 304)
2809 udm_snprintf(qsmall, 64, ", bad_since_time=%d", (int)time(NULL));
2810 else qsmall[0] = '\0';
2811
2812 if (IndexTime)
2813 {
2814 if (! prevStatus) udm_snprintf(UDM_STREND(qsmall), 64, ",last_mod_time=%li", time(NULL));
2815 }
2816 else
2817 {
2818 const char *lmsrc= UdmVarListFindStrNonEmpty(&Doc->Sections, "User.Date",
2819 UdmVarListFindStrNonEmpty(&Doc->Sections, "Last-Modified",
2820 UdmVarListFindStrNonEmpty(&Doc->Sections, "Date", "")));
2821 udm_snprintf(UDM_STREND(qsmall), 64, ",last_mod_time=%li", UdmHttpDate2Time_t(lmsrc));
2822 }
2823 qbuf=(char*)UdmMalloc(1024);
2824
2825
2826 udm_snprintf(qbuf, 1023, "\
2827 UPDATE url SET \
2828 status=%d,\
2829 next_index_time=%d,\
2830 docsize=%d,\
2831 crc32=%d%s, server_id=%s%i%s \
2832 WHERE rec_id=%s%i%s",
2833 status,
2834 (int) UdmHttpDate2Time_t(UdmVarListFindStr(&Doc->Sections,"Next-Index-Time","")),
2835 UdmVarListFindInt(&Doc->Sections,"Content-Length",0),
2836 UdmVarListFindInt(&Doc->Sections,"crc32",0),
2837 qsmall,
2838 qu, UdmVarListFindInt(&Doc->Sections, "Server_id",0), qu,
2839 qu, url_id, qu);
2840
2841 rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2842 UDM_FREE(qbuf);
2843 return rc;
2844 }
2845
2846
2847 static udm_bool_t
UdmCachedCopyPrintCharset(udm_content_type_t ct)2848 UdmCachedCopyPrintCharset(udm_content_type_t ct)
2849 {
2850 switch (ct)
2851 {
2852 case UDM_CONTENT_TYPE_TEXT_PLAIN:
2853 case UDM_CONTENT_TYPE_TEXT_HTML:
2854 case UDM_CONTENT_TYPE_TEXT_XML:
2855 case UDM_CONTENT_TYPE_HTDB:
2856 return UDM_TRUE;
2857 case UDM_CONTENT_TYPE_MESSAGE_RFC822:
2858 case UDM_CONTENT_TYPE_AUDIO_MPEG:
2859 case UDM_CONTENT_TYPE_DOCX:
2860 case UDM_CONTENT_TYPE_TEXT_RTF:
2861 case UDM_CONTENT_TYPE_UNKNOWN:
2862 return UDM_FALSE;
2863 }
2864 UDM_ASSERT(0);
2865 return UDM_FALSE;
2866 }
2867
2868
2869 static udm_rc_t
UdmDocInsertCachedCopy(UDM_AGENT * Agent,UDM_DB * db,UDM_DOCUMENT * Doc)2870 UdmDocInsertCachedCopy(UDM_AGENT *Agent, UDM_DB *db, UDM_DOCUMENT *Doc)
2871 {
2872 char qbuf[256];
2873 udm_rc_t rc;
2874 UDM_CONST_STR content;
2875 urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2876 UDM_HTTPBUF tmp;
2877 const char *orig_content_type= UdmVarListFindStrNonEmpty(&Doc->Sections, "Content-Type", NULL);
2878 const char *parser_content_type= UdmVarListFindStrNonEmpty(&Doc->Sections, "Cached.Content-Type", orig_content_type);
2879 udm_content_type_t orig_ct= orig_content_type ? UdmContentTypeByName(orig_content_type) : UDM_CONTENT_TYPE_UNKNOWN;
2880 udm_content_type_t parser_ct= parser_content_type ? UdmContentTypeByName(parser_content_type) : UDM_CONTENT_TYPE_UNKNOWN;
2881 UDM_CHARSET *remote_charset= UdmVarListFindCharset(&Doc->Sections, "RemoteCharset", NULL);
2882 UDM_CHARSET *orig_charset= UdmVarListFindCharset(&Doc->Sections, "Charset", remote_charset);
2883 UDM_CHARSET *parser_charset= UdmVarListFindCharset(&Doc->Sections, "Cached.Charset", orig_charset);
2884 const char *encoding= UdmVarListFindStr(&Agent->Conf->Vars, "CachedCopyEncoding", "deflate");
2885 urlid_t ts= (urlid_t) time(0);
2886
2887 if (!parser_content_type || !UdmSQLDBHandler(db)->Prepare)
2888 return UDM_ERROR;
2889
2890 /*UdmVarListPrint(&Doc->Sections, stdout);*/
2891
2892 /*
2893 Don't store cached copies if neither indexing nor archiving is allowed.
2894 TODO34: respect noarchive at search time.
2895 */
2896 if (UdmHTTPBufContentToConstStr(&Doc->Buf, &content) || !content.length ||
2897 (!Doc->Spider.robots.archive && !Doc->Spider.robots.index))
2898 return UDM_OK;
2899
2900 UdmHTTPBufInit(&tmp);
2901 if (UDM_OK != (rc= UdmHTTPBufAlloc(&tmp, content.length + 256)))
2902 return rc;
2903
2904 UdmHTTPBufPrintf(&tmp, "Content-Type: %s", parser_content_type);
2905 if (parser_charset && UdmCachedCopyPrintCharset(parser_ct))
2906 UdmHTTPBufAppendf(&tmp, "; charset=%s", parser_charset->name);
2907 UdmHTTPBufAppendf(&tmp, "\r\n");
2908
2909 /* Add the original content type if differs from the parser content type */
2910 if ((orig_content_type && orig_ct != parser_ct) ||
2911 (orig_charset != NULL && UdmCachedCopyPrintCharset(orig_ct) &&
2912 orig_charset != parser_charset))
2913
2914 {
2915 UdmHTTPBufAppendf(&tmp, "X-Orig.Content-Type: %s", orig_content_type);
2916 if (orig_charset && UdmCachedCopyPrintCharset(orig_ct))
2917 UdmHTTPBufAppendf(&tmp, "; charset=%s", orig_charset->name);
2918 UdmHTTPBufAppendf(&tmp, "\r\n");
2919 }
2920
2921 if (encoding && content.length > 128)
2922 {
2923 udm_content_encoding_t ce= UdmContentEncodingID(encoding);
2924 if (ce == UDM_CONTENT_ENCODING_DEFLATE)
2925 {
2926 UDM_HTTPBUF tmp2= tmp; /* Backup the buffer state */
2927 UdmHTTPBufAppendf(&tmp, "Content-Encoding: deflate\r\n\r\n");
2928 if (UDM_OK == UdmHTTPBufDeflateAppend(&tmp, content.str, content.length))
2929 goto ins;
2930 tmp= tmp2; /* Deflate failed, restore the buffer state */
2931 }
2932 }
2933
2934 UdmHTTPBufAppendf(&tmp, "\r\n");
2935 UdmHTTPBufAppend(&tmp, content.str, content.length);
2936
2937 ins:
2938 udm_snprintf(qbuf, sizeof(qbuf),
2939 "INSERT INTO cachedcopy (url_id,ts,content) "
2940 "VALUES(%s,%s,%s)",
2941 UdmDBSQLParamPlaceHolder(db, 1),
2942 UdmDBSQLParamPlaceHolder(db, 2),
2943 UdmDBSQLParamPlaceHolder(db, 3));
2944
2945 if (UDM_OK != (rc= UdmDBSQLPrepare(Agent, db, qbuf)))
2946 goto ex;
2947
2948 UDM_ASSERT(sizeof(url_id) == 4);
2949 if (UDM_OK != (rc= UdmDBSQLBindParameter(Agent, db, 1,
2950 &url_id, (int) sizeof(url_id),
2951 UDM_SQLTYPE_INT32)) ||
2952 UDM_OK != (rc= UdmDBSQLBindParameter(Agent, db, 2,
2953 &ts, (int) sizeof(ts),
2954 UDM_SQLTYPE_INT32)) ||
2955 UDM_OK != (rc= UdmDBSQLBindParameter(Agent, db, 3,
2956 UdmHTTPBufPtr(&tmp),
2957 (int) UdmHTTPBufSize(&tmp),
2958 UDM_SQLTYPE_LONGVARBINARY)) ||
2959 UDM_OK != (rc= UdmDBSQLExecute(Agent, db)))
2960 {
2961 UdmDBSQLStmtFree(Agent, db);
2962 goto ex;
2963 }
2964
2965 rc= UdmDBSQLStmtFree(Agent, db);
2966
2967 ex:
2968 UdmHTTPBufFree(&tmp);
2969 return rc;
2970 }
2971
2972
2973 static udm_bool_t
UdmDocSectionToBeStored(const UDM_ENV * Env,const UDM_VAR * Var)2974 UdmDocSectionToBeStored(const UDM_ENV *Env, const UDM_VAR *Var)
2975 {
2976 const UDM_SECTION *Sec;
2977 if (UdmVarValueHandlerType(Var) != UDM_VALUE_HANDLER_TYPE_STR)
2978 return UDM_FALSE;
2979 Sec= (const UDM_SECTION *) UdmVarConstDataPtr(Var);
2980 return
2981 UdmEnvSectionMaxLengthEx(Env, Sec->Param.secno, Var->header.name) &&
2982 UdmSectionPtr(Sec) && UdmSectionLength(Sec);
2983 }
2984
2985
2986 static size_t
UdmDocHaveSectionsToBeStored(const UDM_ENV * Env,const UDM_DOCUMENT * Doc)2987 UdmDocHaveSectionsToBeStored(const UDM_ENV *Env, const UDM_DOCUMENT *Doc)
2988 {
2989 size_t i;
2990 for (i= 0; i < Doc->Sections.nvars; i++)
2991 {
2992 if (UdmDocSectionToBeStored(Env, UdmVarListFindByIndex(&Doc->Sections, i)))
2993 return UDM_TRUE;
2994 }
2995 return UDM_FALSE;
2996 }
2997
2998
2999 static udm_rc_t
UdmDocInsertSectionsUsingBind(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc)3000 UdmDocInsertSectionsUsingBind(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc)
3001 {
3002 udm_rc_t rc= UDM_OK;
3003 size_t i;
3004 char qbuf[256];
3005 urlid_t url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3006
3007 UDM_ASSERT(UdmSQLDBHandler(db)->Prepare);
3008
3009 if (!UdmDocHaveSectionsToBeStored(A->Conf, Doc))
3010 return UDM_OK;
3011
3012 udm_snprintf(qbuf, sizeof(qbuf),
3013 "INSERT INTO urlinfo (url_id,sname,sval) "
3014 "VALUES(%s, %s, %s)",
3015 UdmDBSQLParamPlaceHolder(db, 1),
3016 UdmDBSQLParamPlaceHolder(db, 2),
3017 UdmDBSQLParamPlaceHolder(db, 3));
3018
3019 if (UDM_OK != (rc= UdmDBSQLPrepare(A, db, qbuf)))
3020 return rc;
3021
3022 for(i= 0; i< Doc->Sections.nvars; i++)
3023 {
3024 const UDM_VAR *Sec= UdmVarListFindConstByIndex(&Doc->Sections, i);
3025 UDM_CONST_STR valuebuf, *value= UdmVarGetConstStr(Sec, &valuebuf);
3026 if (UdmDocSectionToBeStored(A->Conf, Sec))
3027 {
3028 udm_sqltype_t bindtype= UdmSQLLongVarCharBindType(UdmSQL(db));
3029 UDM_ASSERT(sizeof(url_id) == 4);
3030 if (UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 1,
3031 &url_id, (int) sizeof(url_id),
3032 UDM_SQLTYPE_INT32)) ||
3033 UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 2,
3034 UdmVarName(Sec),
3035 (int) UdmVarNameLength(Sec),
3036 UDM_SQLTYPE_VARCHAR)) ||
3037 UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 3,
3038 value->str, (int) value->length,
3039 bindtype)) ||
3040 UDM_OK != (rc= UdmDBSQLExecute(A, db)))
3041 return rc;
3042 }
3043 }
3044
3045 return UdmDBSQLStmtFree(A, db);
3046 }
3047
3048
3049 static udm_rc_t
UdmDocInsertSectionsUsingEscapeBuildQuery(UDM_AGENT * A,UDM_DB * db,const char * table,urlid_t url_id,const char * extra_column_names,const UDM_CONST_STR * values,size_t nvalues,UDM_DSTR * qbuf)3050 UdmDocInsertSectionsUsingEscapeBuildQuery(UDM_AGENT *A, UDM_DB *db,
3051 const char *table,
3052 urlid_t url_id,
3053 const char *extra_column_names,
3054 const UDM_CONST_STR *values,
3055 size_t nvalues,
3056 UDM_DSTR *qbuf)
3057 {
3058 const char *E= (UdmSQLDBDriver(db) == UDM_DBAPI_PGSQL && UdmSQLDBVersion(db) >= 80101) ? "E" : "";
3059 size_t i;
3060 UdmDSTRReset(qbuf);
3061 UdmDSTRAppendf(qbuf, "INSERT INTO %s (url_id,%s) VALUES(",
3062 table, extra_column_names);
3063 if (url_id)
3064 UdmDSTRAppendf(qbuf, "%d", url_id);
3065 else
3066 UdmDSTRAppendSTR(qbuf, "last_insert_id()");
3067
3068 for (i= 0; i < nvalues; i++)
3069 {
3070 const UDM_CONST_STR *value= &values[i];
3071 size_t esclen;
3072 UdmDSTRReserve(qbuf, 2 + value->length * (UdmSQLDBType(db) == UDM_DB_PGSQL ? 4 : 2));
3073 UdmDSTRAppendf(qbuf, ",%s'", E);
3074 esclen= UdmDBSQLEscStr(A, db, qbuf->Val.str + qbuf->Val.length,
3075 value->str, value->length);
3076 qbuf->Val.length+= esclen;
3077 UdmDSTRAppendf(qbuf, "'");
3078 }
3079 UdmDSTRAppend(qbuf, ")", 2);
3080 return UDM_OK;
3081 }
3082
3083
3084 static udm_rc_t
UdmDocInsertSectionsUsingEscape(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc,const char * table,urlid_t url_id)3085 UdmDocInsertSectionsUsingEscape(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc,
3086 const char *table,
3087 urlid_t url_id)
3088 {
3089 udm_rc_t rc= UDM_OK;
3090 size_t i, len, esc_multiply = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? 4 : 2;
3091 UDM_DSTR qbuf;
3092
3093 /* Calculate maximum arg length */
3094 for(len= 0, i= 0; i < Doc->Sections.nvars; i++)
3095 {
3096 const UDM_VAR *Var= UdmVarListFindConstByIndex(&Doc->Sections, i);
3097 size_t l= UdmVarLength(Var) + UdmVarNameLength(Var);
3098 if (len < l)
3099 len= l;
3100 }
3101 if (!len)
3102 return UDM_OK;
3103
3104 UdmDSTRInit(&qbuf, 256);
3105 UdmDSTRAlloc(&qbuf, esc_multiply * len + 128);
3106
3107 for(i= 0; i< Doc->Sections.nvars; i++)
3108 {
3109 const UDM_VAR *Sec= UdmVarListFindConstByIndex(&Doc->Sections, i);
3110 if (UdmDocSectionToBeStored(A->Conf, Sec))
3111 {
3112 UDM_CONST_STR column[2];
3113 UDM_CONST_STR valuebuf, *value= UdmVarGetConstStr(Sec, &valuebuf);
3114 const UDM_CONST_STR *c= (const UDM_CONST_STR*) &column;
3115 UDM_ASSERT(value);
3116 UdmConstStrSetStr(&column[0], UdmVarName(Sec));
3117 column[1]= value[0];
3118 UdmDocInsertSectionsUsingEscapeBuildQuery(A, db, table,
3119 url_id, "sname,sval",
3120 c, 2, &qbuf);
3121 if(UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, UdmDSTRPtr(&qbuf))))
3122 break;
3123 }
3124 }
3125 UdmDSTRFree(&qbuf);
3126 return rc;
3127 }
3128
3129
3130 static udm_bool_t
UdmLongUpdateURLUseTnx(UDM_AGENT * A,UDM_DB * db)3131 UdmLongUpdateURLUseTnx(UDM_AGENT *A, UDM_DB *db)
3132 {
3133 switch (UdmSQLDBType(db))
3134 {
3135 case UDM_DB_VIRT: /* TODO34: check */
3136 case UDM_DB_ACCESS: /* TODO34: check */
3137 case UDM_DB_DB2: /* TODO34: check */
3138 case UDM_DB_CACHE: /* TODO34: check */
3139 return UDM_FALSE;
3140 default:
3141 case UDM_DB_MYSQL:
3142 return UDM_TEST(UdmSQLDBFlags(db) & UDM_SQL_HAVE_TRANSACT);
3143 }
3144 return UDM_FALSE;
3145 }
3146
3147 static udm_rc_t
UdmLongUpdateURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3148 UdmLongUpdateURL(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc,UDM_DB *db)
3149 {
3150 udm_rc_t rc= UDM_OK;
3151 urlid_t url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3152 const char *c;
3153 udm_bool_t use_tnx= UdmLongUpdateURLUseTnx(Indexer, db);
3154 /*
3155 TNX works fine: Sybase: ASE-15.0.2 Dev Edition + UnixODBC.
3156 */
3157
3158 if (use_tnx && UDM_OK != (rc= UdmDBSQLBegin(Indexer, db)))
3159 return rc;
3160
3161 /* Now store words */
3162 if(UDM_OK != (rc= UdmStoreWords(Indexer, db, Doc)))
3163 return rc;
3164
3165 /* Store links */
3166 if (UDM_OK != (rc= UdmStoreLinks(Indexer, db, Doc)) ||
3167 UDM_OK != (rc= UdmStoreRedirects(Indexer, db, Doc)))
3168 return rc;
3169
3170 /* Copy default languages, if not given by server and not guessed */
3171 if (!(c= UdmVarListFindStr(&Doc->Sections,"Content-Language",NULL)))
3172 {
3173 if ((c= UdmVarListFindStr(&Doc->Sections,"DefaultLang",NULL)))
3174 UdmVarListReplaceStr(&Doc->Sections,"Content-Language",c);
3175 }
3176
3177
3178 if(UDM_OK != (rc= UdmUpdateUrlWithLangAndCharset(Indexer, Doc, db)))
3179 return rc;
3180
3181 /* remove all old broken hrefs from this document to avoid broken link collecting */
3182 if (UDM_OK != (rc= UdmDeleteBadHrefs(Indexer,Doc,db,url_id)))
3183 return rc;
3184
3185 /* Remove old URLInfo only if PrevStatus != 0 */
3186 if (UdmVarListFindInt(&Doc->Sections, "PrevStatus", 1))
3187 {
3188 char qsmall[128];
3189 sprintf(qsmall,"DELETE FROM urlinfo WHERE url_id=%i", url_id);
3190 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qsmall)))
3191 return rc;
3192 sprintf(qsmall,"DELETE FROM cachedcopy WHERE url_id=%i", url_id);
3193 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qsmall)))
3194 return rc;
3195 }
3196
3197 /* No need delete from links here, it has been done before */
3198
3199 if (UdmSQLDBFlags(db) & UDM_SQL_HAVE_BIND_TEXT)
3200 {
3201 rc= UdmDocInsertSectionsUsingBind(Indexer, db, Doc);
3202 }
3203 else
3204 {
3205 rc= UdmDocInsertSectionsUsingEscape(Indexer, db, Doc, "urlinfo", url_id);
3206 }
3207
3208 if (rc == UDM_OK)
3209 rc= UdmDocInsertCachedCopy(Indexer, db, Doc);
3210
3211 if(use_tnx && rc == UDM_OK)
3212 rc= UdmDBSQLCommit(Indexer, db);
3213
3214 if (rc == UDM_OK && UdmSQLDBMode(db) == UDM_SQLDBMODE_MULTI)
3215 {
3216 int WordCacheSize= UdmVarListFindInt(&Indexer->Conf->Vars, "WordCacheSize", 0);
3217 if (WordCacheSize <= 0) WordCacheSize = 0x800000;
3218 /* UdmWordCacheWrite starts its own transaction */
3219 rc= UdmWordCacheWrite(Indexer, db, WordCacheSize);
3220 }
3221 return rc;
3222 }
3223
3224
3225 static udm_rc_t
UdmUpdateClone(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3226 UdmUpdateClone(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc,UDM_DB *db)
3227 {
3228 udm_rc_t rc;
3229 if (UDM_OK != (rc= UdmDeleteWordFromURL(Indexer, Doc, db)))
3230 return rc;
3231 rc= UdmUpdateUrlWithLangAndCharset(Indexer, Doc, db);
3232 return rc;
3233 }
3234
3235
3236
3237 /************************ Clones stuff ***************************/
3238 static udm_rc_t
UdmFindOrigin(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3239 UdmFindOrigin(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db)
3240 {
3241 size_t i=0;
3242 char qbuf[256]="";
3243 UDM_SQLRES SQLRes;
3244 urlid_t origin_id = 0;
3245 int scrc32=UdmVarListFindInt(&Doc->Sections,"crc32",0);
3246 udm_rc_t rc;
3247
3248 if (scrc32==0)return UDM_OK;
3249
3250 if (UdmSQLDBHaveIn(db))
3251 sprintf(qbuf,"SELECT rec_id FROM url WHERE crc32=%d AND status IN (200,304,206)",scrc32);
3252 else
3253 sprintf(qbuf,"SELECT rec_id FROM url WHERE crc32=%d AND (status=200 OR status=304 OR status=206)",scrc32);
3254
3255 if(UDM_OK!=(rc=UdmDBSQLQuery(Indexer,db,&SQLRes,qbuf)))
3256 return rc;
3257
3258 for(i=0;i<UdmSQLNumRows(&SQLRes);i++)
3259 {
3260 const char *o;
3261 if((o=UdmSQLValue(&SQLRes,i,0)))
3262 if((!origin_id) || (origin_id > UDM_ATOI(o)))
3263 origin_id = UDM_ATOI(o);
3264 }
3265 UdmSQLFree(&SQLRes);
3266 UdmVarListReplaceInt(&Doc->Sections, "Origin-ID", origin_id);
3267 return(UDM_OK);
3268 }
3269
3270
3271 static udm_rc_t
UdmCloneListSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc,UDM_RESULT * Res)3272 UdmCloneListSQL(UDM_AGENT * Indexer, UDM_DB *db, UDM_DOCUMENT *Doc, UDM_RESULT *Res)
3273 {
3274 size_t i, nr, nadd;
3275 char qbuf[256];
3276 UDM_SQLRES SQLres;
3277 int scrc32=UdmVarListFindInt(&Doc->Sections,"crc32",0);
3278 urlid_t origin_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3279 udm_rc_t rc;
3280 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
3281 const char *format = UdmVarListFindStr(&Indexer->Conf->Vars, "DateFormat", "%a, %d %b %Y, %X %Z");
3282
3283 if (UdmResultNumRows(Res) > 4) return UDM_OK;
3284
3285 if (!scrc32)
3286 return UDM_OK;
3287
3288 sprintf(qbuf,"SELECT rec_id,url,last_mod_time,docsize FROM url WHERE crc32=%d AND (status=200 OR status=304 OR status=206) AND rec_id<>%s%i%s", scrc32, qu, origin_id, qu);
3289 if (UDM_OK!= (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
3290 return UDM_OK;
3291
3292 nr = UdmSQLNumRows(&SQLres);
3293 if( nr == 0)
3294 {
3295 UdmSQLFree(&SQLres);
3296 return UDM_OK;
3297 }
3298 nadd = 5 - UdmResultNumRows(Res);
3299 if(nr < nadd) nadd = nr;
3300
3301 Res->Doc= (UDM_DOCUMENT*)UdmRealloc(Res->Doc,
3302 (UdmResultNumRows(Res) + nadd) *
3303 sizeof(UDM_DOCUMENT));
3304 for(i = 0; i < nadd; i++)
3305 {
3306 time_t last_mod_time;
3307 char buf[UDM_MAXTIMESTRLEN];
3308 UDM_DOCUMENT *D = &Res->Doc[Res->num_rows + i];
3309
3310 UdmDocInit(D);
3311 UdmVarListAddInt(&D->Sections, "ID", UDM_ATOI(UdmSQLValue(&SQLres,i,0)));
3312 UdmVarListAddStr(&D->Sections,"URL",UdmSQLValue(&SQLres,i,1));
3313 UdmVarListReplaceInt(&D->Sections, "URL_ID", UdmStrHash32(UdmSQLValue(&SQLres,i,1)));
3314 last_mod_time=atol(UdmSQLValue(&SQLres,i,2));
3315 if (strftime(buf, sizeof(buf), format, localtime(&last_mod_time)) == 0)
3316 {
3317 UdmTime_t2HttpStr(last_mod_time, buf, sizeof(buf));
3318 }
3319 UdmVarListAddStr(&D->Sections,"Last-Modified",buf);
3320 UdmVarListAddInt(&D->Sections,"Content-Length",atoi(UdmSQLValue(&SQLres,i,3)));
3321 UdmVarListAddInt(&D->Sections,"crc32",scrc32);
3322 UdmVarListAddInt(&D->Sections, "Origin-ID", origin_id);
3323 }
3324 Res->num_rows += nadd;
3325 UdmSQLFree(&SQLres);
3326 return UDM_OK;
3327 }
3328
3329
3330 static udm_rc_t
UdmQueryClones(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)3331 UdmQueryClones(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
3332 {
3333 size_t i;
3334 udm_rc_t rc= UDM_OK;
3335 for (i= 0; i < UdmResultNumRows(&Query->Res) && rc == UDM_OK; i++)
3336 {
3337 UDM_RESULT Cl;
3338 UdmResultInit(&Cl);
3339 if (UDM_OK == (rc= UdmCloneListSQL(A, db, &Query->Res.Doc[i], &Cl)))
3340 {
3341 size_t c;
3342 UdmVarListReplaceInt(&Query->Res.Doc[i].Sections, "nclones",
3343 UdmResultNumRows(&Cl));
3344 for (c= 0; c < UdmResultNumRows(&Cl); c++)
3345 {
3346 char name[32];
3347 sprintf(name, "Clone%d", (int) c);
3348 UdmVarListReplaceLst(&Query->Res.Doc[i].Sections,
3349 &Cl.Doc[c].Sections, name, "*");
3350 }
3351 }
3352 UdmResultFree(&Cl);
3353 }
3354 return rc;
3355 }
3356
3357
3358 /************** Get Target to be indexed ***********************/
3359
3360
3361
3362 static void
UdmSQLTopInit(UDM_SQL_TOP_CLAUSE * Top)3363 UdmSQLTopInit(UDM_SQL_TOP_CLAUSE *Top)
3364 {
3365 Top->rownum[0]= 0;
3366 Top->limit[0]= 0;
3367 Top->top[0]= 0;
3368 }
3369
3370
3371
3372 void
UdmSQLTopClause(UDM_SQL * db,size_t top_num,UDM_SQL_TOP_CLAUSE * Top)3373 UdmSQLTopClause(UDM_SQL *db, size_t top_num, UDM_SQL_TOP_CLAUSE *Top)
3374 {
3375 UdmSQLTopInit(Top);
3376 if (db->flags & UDM_SQL_HAVE_LIMIT)
3377 {
3378 udm_snprintf(Top->limit, UDM_SQL_TOP_BUF_SIZE, " LIMIT %d", (int) top_num);
3379 }
3380 else if (db->flags & UDM_SQL_HAVE_TOP)
3381 {
3382 udm_snprintf(Top->top, UDM_SQL_TOP_BUF_SIZE, " TOP %d ", (int) top_num);
3383 }
3384 else if (db->flags & UDM_SQL_HAVE_FIRST_SKIP)
3385 {
3386 udm_snprintf(Top->top, UDM_SQL_TOP_BUF_SIZE, " FIRST %d ", (int) top_num);
3387 }
3388 else if (db->DBType == UDM_DB_ORACLE8)
3389 {
3390 #if HAVE_ORACLE8
3391 if(db->DBDriver == UDM_DBAPI_ORACLE8)
3392 {
3393 udm_snprintf(Top->rownum, UDM_SQL_TOP_BUF_SIZE,
3394 " AND ROWNUM<=%d", (int) top_num);
3395 }
3396 #endif
3397 if(!Top->rownum[0])
3398 udm_snprintf(Top->rownum, UDM_SQL_TOP_BUF_SIZE,
3399 " AND ROWNUM<=%d", (int) top_num);
3400 }
3401 }
3402
3403
3404 static const char select_url_str[]=
3405 "url.url,url.rec_id,docsize,status,hops,crc32,last_mod_time,seed";
3406
3407 static const char select_url_str_for_dump[]=
3408 "url.url,url.rec_id,docsize,status,hops,crc32,last_mod_time,seed,"
3409 "next_index_time,bad_since_time,server_id"
3410 ;
3411
3412 /*
3413 The columns that are dumped:
3414 - status
3415 - docsize
3416 - last_mod_time
3417 - hops
3418 - crc32
3419 - seed
3420 - url
3421 - next_index_time
3422 - bad_since_time
3423 - site_id
3424 - server_id
3425
3426 The columns that don't need to be dumped for restore purposes:
3427 - rec_id
3428 - shows
3429 - sop_rank
3430 - referrer
3431 */
3432 static udm_rc_t
UdmTargetSQLResDump(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc,UDM_SQLRES * SQLRes,size_t rownum,UDM_DSTR * eurl)3433 UdmTargetSQLResDump(UDM_AGENT *Indexer, UDM_DB *db,
3434 UDM_DOCUMENT *Doc,
3435 UDM_SQLRES *SQLRes, size_t rownum,
3436 UDM_DSTR *eurl)
3437 {
3438 int seed= UDM_ATOI(UdmSQLValue(SQLRes, rownum, 7));
3439 UdmVarListAddInt(&Doc->Sections, "ID", UDM_ATOI(UdmSQLValue(SQLRes,rownum,1)));
3440 printf("--seed=%d\n", seed);
3441 printf("INSERT INTO url ");
3442 printf("(url,docsize,status,hops,crc32,last_mod_time,seed,next_index_time,bad_since_time,server_id) VALUES (");
3443 if (UDM_OK != UdmSQLEscDSTR(UdmSQL(db), eurl,
3444 UdmSQLValue(SQLRes, rownum, 0),
3445 UdmSQLLen(SQLRes, rownum, 0)))
3446 return UDM_ERROR;
3447 printf("'%s',", UdmDSTRPtr(eurl));
3448 printf("%s,", UdmSQLValue(SQLRes, rownum, 2));
3449 printf("%s,", UdmSQLValue(SQLRes, rownum, 3));
3450 printf("%s,", UdmSQLValue(SQLRes, rownum, 4));
3451 printf("%s,", UdmSQLValue(SQLRes, rownum, 5));
3452 printf("%s,", UdmSQLValue(SQLRes, rownum, 6));
3453 printf("%s,", UdmSQLValue(SQLRes, rownum, 7));
3454 printf("%s,", UdmSQLValue(SQLRes, rownum, 8));
3455 printf("%s,", UdmSQLValue(SQLRes, rownum, 9));
3456 printf("%s", UdmSQLValue(SQLRes, rownum, 10));
3457 printf(");\n");
3458 return UDM_OK;
3459 }
3460
3461
3462 static void
UdmTargetSQLResToDoc(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_SQLRES * SQLRes,size_t i)3463 UdmTargetSQLResToDoc(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,
3464 UDM_SQLRES *SQLRes, size_t i)
3465 {
3466 char buf[UDM_MAXTIMESTRLEN]= "";
3467 time_t last_mod_time;
3468 UdmVarListAddStr(&Doc->Sections,"URL",UdmSQLValue(SQLRes,i,0));
3469 UdmVarListAddInt(&Doc->Sections, "ID", UDM_ATOI(UdmSQLValue(SQLRes,i,1)));
3470 UdmVarListAddInt(&Doc->Sections,"Content-Length",atoi(UdmSQLValue(SQLRes,i,2)));
3471 UdmVarListAddInt(&Doc->Sections,"Status",atoi(UdmSQLValue(SQLRes,i,3)));
3472 UdmVarListAddInt(&Doc->Sections,"Hops",atoi(UdmSQLValue(SQLRes,i,4)));
3473 UdmVarListAddInt(&Doc->Sections,"crc32",atoi(UdmSQLValue(SQLRes,i,5)));
3474 last_mod_time= (time_t) atol(UdmSQLValue(SQLRes,i,6));
3475 UdmTime_t2HttpStr(last_mod_time, buf, sizeof(buf));
3476 if (last_mod_time != 0 && strlen(buf) > 0)
3477 {
3478 UdmVarListReplaceStr(&Doc->Sections, "Last-Modified", buf);
3479 }
3480 }
3481
3482 /*
3483 Setting extending sections - only needed for targets
3484 */
3485 static void
UdmTargetSQLResToDoc_Extra(UDM_AGENT * A,UDM_DOCUMENT * Doc,UDM_SQLRES * SQLRes,size_t i)3486 UdmTargetSQLResToDoc_Extra(UDM_AGENT *A, UDM_DOCUMENT *Doc,
3487 UDM_SQLRES *SQLRes, size_t i)
3488 {
3489 UdmVarListReplaceInt(&Doc->Sections, "URL_ID", UdmStrHash32(UdmSQLValue(SQLRes,i,0)));
3490 UdmVarListAddInt(&Doc->Sections,"PrevStatus",atoi(UdmSQLValue(SQLRes,i,3)));
3491 }
3492
3493
3494 static udm_rc_t
UdmTargetsSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)3495 UdmTargetsSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
3496 {
3497 char sortstr[128]= "";
3498 char updstr[64]="";
3499 char tblhint[64]="";
3500 UDM_SQL_TOP_CLAUSE Top;
3501 size_t i = 0, j, start, nrows, qbuflen;
3502 UDM_SQLRES SQLRes;
3503 char smallbuf[128];
3504 udm_rc_t rc= UDM_OK;
3505 const char *where;
3506 char *qbuf=NULL;
3507 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
3508 udm_bool_t skip_lock= UdmVarListFindBool(&Indexer->Conf->Vars, "URLSelectSkipLock", UDM_FALSE);
3509 size_t url_num= UdmVarListFindInt(&Indexer->Conf->Vars, "URLSelectCacheSize", URL_SELECT_CACHE);
3510
3511 UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_TARGETS);
3512 UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_DB);
3513
3514 if (UdmSQLDB(db)->last_notargets_time == time(0))
3515 {
3516 /*
3517 Do not query the database again if it was already queried
3518 in the same second and returned 0 targets.
3519 */
3520 return UDM_OK;
3521 }
3522
3523 if (Indexer->Conf->url_number < (int) url_num)
3524 url_num= Indexer->Conf->url_number;
3525 if (UDM_OK != (rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where)))
3526 return rc;
3527 qbuflen= 1024 + 4 * strlen(where);
3528
3529 if ((qbuf = (char*)UdmMalloc(qbuflen + 2)) == NULL)
3530 {
3531 UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory");
3532 return UDM_ERROR;
3533 }
3534
3535 if ((Indexer->flags & (UDM_FLAG_SORT_HOPS | UDM_FLAG_SORT_EXPIRED)) ||
3536 !(Indexer->flags & UDM_FLAG_DONTSORT_SEED))
3537 {
3538 sprintf(sortstr, " ORDER BY %s%s%s",
3539 (Indexer->flags & UDM_FLAG_SORT_HOPS) ? "hops" : "",
3540 (Indexer->flags & UDM_FLAG_DONTSORT_SEED) ? "" : ((Indexer->flags & UDM_FLAG_SORT_HOPS) ? ",seed" : "seed"),
3541 (Indexer->flags & UDM_FLAG_SORT_EXPIRED) ?
3542 ( ((Indexer->flags & UDM_FLAG_SORT_HOPS) || !(Indexer->flags & UDM_FLAG_DONTSORT_SEED) ) ?
3543 ",next_index_time" : "next_index_time") : "");
3544 }
3545
3546 UdmDBSQLTopClause(Indexer, db, url_num, &Top);
3547
3548 if(1)
3549 {
3550 switch(UdmSQLDBType(db))
3551 {
3552 case UDM_DB_MYSQL:
3553 udm_snprintf(qbuf, qbuflen,
3554 "INSERT INTO udm_url_tmp "
3555 "SELECT url.rec_id FROM url%s "
3556 "WHERE next_index_time<=%d %s%s%s%s",
3557 Query->from,
3558 (int)time(NULL), where[0] ? "AND " : "", where,
3559 sortstr, Top.limit);
3560 if (UDM_OK != (rc= UdmDBSQLDropTableIfExists(Indexer, db, "udm_url_tmp")) ||
3561 UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, "CREATE TEMPORARY TABLE udm_url_tmp (rec_id int not null) ENGINE=MyISAM")) ||
3562 (!skip_lock &&
3563 UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, "LOCK TABLES udm_url_tmp WRITE, url WRITE, urlinfo AS it WRITE, urlinfo AS il WRITE, server AS s WRITE"))) ||
3564 UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3565 return rc;
3566 break;
3567 case UDM_DB_PGSQL:
3568 rc= UdmDBSQLQuery(Indexer, db, NULL,"BEGIN WORK");
3569 sprintf(updstr, " FOR UPDATE ");
3570 /* rc=UdmDBSQLQuery(Indexer, db,NULL,"LOCK url");*/
3571 break;
3572 case UDM_DB_ORACLE8:
3573 sprintf(updstr, " FOR UPDATE ");
3574 break;
3575 case UDM_DB_MSSQL:
3576 strcpy(tblhint, " (TABLOCKX)");
3577 rc= UdmDBSQLBegin(Indexer, db);
3578 break;
3579 case UDM_DB_SAPDB:
3580 sprintf(updstr, " WITH LOCK ");
3581 break;
3582 default:
3583 break;
3584 }
3585 if (rc != UDM_OK)
3586 goto ex;
3587 }
3588
3589 UdmSQL(db)->res_limit= url_num;
3590 if (UdmSQLDBType(db) == UDM_DB_MYSQL)
3591 udm_snprintf(qbuf, qbuflen, "SELECT %s FROM url, udm_url_tmp "
3592 "WHERE url.rec_id=udm_url_tmp.rec_id",
3593 select_url_str);
3594 else
3595 udm_snprintf(qbuf, qbuflen, "SELECT %s%s "
3596 "FROM url%s%s "
3597 "WHERE next_index_time<=%d %s%s%s"
3598 "%s%s%s",
3599 Top.top, select_url_str, tblhint, Query->from,
3600 (int)time(NULL), where[0] ? "AND " : "", where, Top.rownum,
3601 sortstr, updstr, Top.limit);
3602
3603 if(UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf)))
3604 goto commit;
3605
3606 if(!(nrows = UdmSQLNumRows(&SQLRes)))
3607 {
3608 UdmSQLFree(&SQLRes);
3609 UdmSQLDB(db)->last_notargets_time= time(0);
3610 goto commit;
3611 }
3612
3613 start = Indexer->Conf->Targets.num_rows;
3614 Indexer->Conf->Targets.num_rows += nrows;
3615
3616 Indexer->Conf->Targets.Doc =
3617 (UDM_DOCUMENT*)UdmRealloc(Indexer->Conf->Targets.Doc, sizeof(UDM_DOCUMENT)*(Indexer->Conf->Targets.num_rows + 1));
3618 if (Indexer->Conf->Targets.Doc == NULL)
3619 {
3620 UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory at realloc %s[%d]", __FILE__, __LINE__);
3621 rc= UDM_ERROR;
3622 goto commit;
3623 }
3624
3625 for(i = 0; i < nrows; i++)
3626 {
3627 UDM_DOCUMENT *Doc = &Indexer->Conf->Targets.Doc[start + i];
3628 UdmDocInit(Doc);
3629 UdmTargetSQLResToDoc(Indexer, Doc, &SQLRes, i);
3630 UdmTargetSQLResToDoc_Extra(Indexer, Doc, &SQLRes, i);
3631 }
3632 UdmSQLFree(&SQLRes);
3633
3634
3635 if (UdmSQLDBHaveIn(db))
3636 {
3637 char *urlin=NULL;
3638
3639 if ( (qbuf = (char*)UdmRealloc(qbuf, qbuflen = qbuflen + 35 * URL_SELECT_CACHE)) == NULL)
3640 {
3641 UDM_FREE(qbuf);
3642 UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory");
3643 rc= UDM_ERROR;
3644 goto commit;
3645 }
3646
3647 if ( (urlin = (char*)UdmMalloc(35 * URL_SELECT_CACHE)) == NULL)
3648 {
3649 UDM_FREE(qbuf);
3650 UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory");
3651 rc = UDM_ERROR;
3652 goto commit;
3653 }
3654 urlin[0]=0;
3655
3656 for(i = 0; i < nrows; i+= URL_SELECT_CACHE)
3657 {
3658
3659 urlin[0] = 0;
3660
3661 for (j = 0; (j < URL_SELECT_CACHE) && (i + j < nrows) ; j++)
3662 {
3663
3664 UDM_DOCUMENT *Doc = &Indexer->Conf->Targets.Doc[start + i + j];
3665 urlid_t url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3666
3667 if(urlin[0])strcat(urlin,",");
3668 sprintf(urlin+strlen(urlin), "%s%i%s", qu, url_id, qu);
3669 }
3670 udm_snprintf(qbuf, qbuflen, "UPDATE url SET next_index_time=%d WHERE rec_id in (%s)",
3671 (int)(time(NULL) + URL_LOCK_TIME), urlin);
3672 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3673 goto commit;
3674 }
3675 UDM_FREE(urlin);
3676 }
3677 else
3678 {
3679 for(i = 0; i < nrows; i++)
3680 {
3681 UDM_DOCUMENT *Doc = &Indexer->Conf->Targets.Doc[start + i];
3682 urlid_t url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3683
3684 udm_snprintf(smallbuf, 128, "UPDATE url SET next_index_time=%d WHERE rec_id=%i",
3685 (int)(time(NULL) + URL_LOCK_TIME), url_id);
3686 if(UDM_OK!=(rc=UdmDBSQLQuery(Indexer, db, NULL, smallbuf)))
3687 goto commit;
3688 }
3689 }
3690
3691
3692 commit:
3693
3694 if (rc != UDM_OK)
3695 {
3696 UdmLog(Indexer, UDM_LOG_ERROR, "UdmTargetsSQL: DB error: %s", UdmDBSQLError(db));
3697 }
3698 if(1)
3699 {
3700 switch(UdmSQLDBType(db))
3701 {
3702 case UDM_DB_MYSQL:
3703 if (!skip_lock)
3704 rc= UdmDBSQLQuery(Indexer, db, NULL, "UNLOCK TABLES");
3705 break;
3706 case UDM_DB_PGSQL:
3707 rc=UdmDBSQLQuery(Indexer, db, NULL, "END WORK");
3708 break;
3709 case UDM_DB_MSSQL:
3710 rc= UdmDBSQLCommit(Indexer, db);
3711 default:
3712 break;
3713 }
3714 }
3715 ex:
3716 UDM_FREE(qbuf);
3717 return rc;
3718 }
3719
3720
3721
3722 /******************* Truncate database ********************/
3723
3724 static udm_rc_t
UdmTruncateURL(UDM_AGENT * Indexer,UDM_DB * db)3725 UdmTruncateURL(UDM_AGENT *Indexer,UDM_DB *db)
3726 {
3727 udm_rc_t rc;
3728
3729 rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "url");
3730 if(rc!=UDM_OK)return rc;
3731
3732 rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "redirect");
3733 if(rc != UDM_OK) return rc;
3734
3735 rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "links");
3736 if(rc != UDM_OK) return rc;
3737
3738 rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "cachedcopy");
3739 if(rc != UDM_OK) return rc;
3740
3741 rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "urlinfo");
3742 return rc;
3743 }
3744
3745
3746 static udm_rc_t
UdmTruncateDict(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)3747 UdmTruncateDict(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
3748 {
3749 UDM_ASSERT(UdmSQLDBModeHandler(db)->QueryAction != NULL);
3750 return UdmSQLDBModeHandler(db)->QueryAction(Indexer, db, Query, UDM_QUERYCMD_CLEAR);
3751 }
3752
3753
3754 static udm_rc_t
UdmTruncateDB(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)3755 UdmTruncateDB(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
3756 {
3757 udm_rc_t rc;
3758 if((UDM_OK != (rc= UdmTruncateDict(Indexer, db, Query))) ||
3759 (UDM_OK != (rc= UdmTruncateURL(Indexer, db))))
3760 return rc;
3761 return UDM_OK;
3762 }
3763
3764
3765 /******************* Clear database with condition ********/
3766
3767 static udm_rc_t
UdmDeleteWordsAndLinks(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3768 UdmDeleteWordsAndLinks(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc, UDM_DB *db)
3769 {
3770 udm_rc_t rc;
3771 if (UDM_OK != (rc= UdmDeleteWordFromURL(Indexer,Doc,db)))
3772 return rc;
3773
3774 if (Doc->Spider.collect_links_destination)
3775 {
3776 int status= UdmVarListFindInt(&Doc->Sections, "Status", 0);
3777 /* In case of redirect, we store links from the "Location" header */
3778 if (UDM_OK != (rc= UdmDeleteLinks(Indexer, db, Doc)))
3779 return rc;
3780
3781 if (UDM_OK != (rc= (status >= 300 && status <= 303) ?
3782 UdmStoreRedirects(Indexer, db, Doc) :
3783 UdmDeleteRedirects(Indexer, db, Doc)))
3784 return rc;
3785 }
3786
3787 /* Set status, bad_since_time, etc */
3788 if (UDM_OK != (rc= UdmUpdateUrl(Indexer, Doc, db)))
3789 return rc;
3790
3791 return rc;
3792 }
3793
3794
3795 static udm_rc_t
UdmDeleteWordFromURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3796 UdmDeleteWordFromURL(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc, UDM_DB *db)
3797 {
3798 urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
3799
3800 if (!UdmVarListFindInt(&Doc->Sections, "PrevStatus", 0))
3801 return UDM_OK;
3802
3803 UDM_ASSERT(UdmSQLDBModeHandler(db)->DeleteWordsFromURL != NULL);
3804 return UdmSQLDBModeHandler(db)->DeleteWordsFromURL(Indexer, db, url_id);
3805 }
3806
3807
3808 static udm_rc_t
UdmDeleteBadHrefs(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db,urlid_t url_id)3809 UdmDeleteBadHrefs(UDM_AGENT *Indexer,
3810 UDM_DOCUMENT *Doc,
3811 UDM_DB *db,
3812 urlid_t url_id)
3813 {
3814 UDM_DOCUMENT rDoc;
3815 UDM_SQLRES SQLRes;
3816 char q[256];
3817 size_t i;
3818 size_t nrows;
3819 udm_rc_t rc= UDM_OK;
3820 int hold_period= UdmVarListFindInt(&Doc->Sections,"HoldBadHrefs",0);
3821 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
3822
3823 if (hold_period <= 0)
3824 return UDM_OK;
3825
3826 udm_snprintf(q, sizeof(q), "SELECT rec_id FROM url WHERE status > 300 AND status<>304 AND referrer=%s%i%s AND bad_since_time<%d",
3827 qu, url_id, qu, (int)time(NULL) - hold_period);
3828 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, q)))
3829 return rc;
3830
3831 nrows = UdmSQLNumRows(&SQLRes);
3832
3833 UdmDocInit(&rDoc);
3834 for(i = 0; i < nrows ; i++)
3835 {
3836 UdmVarListReplaceStr(&rDoc.Sections,"ID", UdmSQLValue(&SQLRes,i,0));
3837 if(UDM_OK!=(rc=UdmDeleteURL(Indexer, &rDoc, db)))
3838 break;
3839 }
3840 UdmDocFree(&rDoc);
3841 UdmSQLFree(&SQLRes);
3842 return rc;
3843 }
3844
3845
3846 static udm_rc_t
UdmDeleteURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3847 UdmDeleteURL(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db)
3848 {
3849 char qbuf[128];
3850 udm_rc_t rc;
3851 urlid_t url_id =UdmVarListFindInt(&Doc->Sections, "ID", 0);
3852 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
3853
3854 if(UDM_OK!=(rc=UdmDeleteWordFromURL(Indexer,Doc,db)))return(rc);
3855
3856 sprintf(qbuf,"DELETE FROM url WHERE rec_id=%s%i%s", qu, url_id, qu);
3857 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3858 return rc;
3859
3860 sprintf(qbuf,"DELETE FROM urlinfo WHERE url_id=%s%i%s", qu, url_id, qu);
3861 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3862 return rc;
3863
3864 sprintf(qbuf,"DELETE FROM cachedcopy WHERE url_id=%s%i%s", qu, url_id, qu);
3865 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3866 return rc;
3867
3868 sprintf(qbuf,"DELETE FROM redirect WHERE url_id=%s%i%s", qu, url_id, qu);
3869 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3870 return rc;
3871
3872 sprintf(qbuf,"DELETE FROM links WHERE url_id=%s%i%s", qu, url_id, qu);
3873 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3874 return rc;
3875
3876 /* remove all old broken hrefs from this document to avoid broken link collecting */
3877 if (UDM_OK != (rc= UdmDeleteBadHrefs(Indexer, Doc, db, url_id)))
3878 return rc;
3879
3880 sprintf(qbuf,"UPDATE url SET referrer=%s0%s WHERE referrer=%s%i%s", qu, qu, qu, url_id, qu);
3881 return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
3882 }
3883
3884
3885 static udm_rc_t
UdmClearDBUsingIN(UDM_AGENT * Indexer,UDM_DB * db,UDM_URLID_LIST * list)3886 UdmClearDBUsingIN(UDM_AGENT *Indexer, UDM_DB *db, UDM_URLID_LIST *list)
3887 {
3888 UDM_DSTR qbuf, urlin;
3889 udm_rc_t rc= UDM_OK; /* if list if empty */
3890 size_t part;
3891 size_t url_num = UdmVarListFindInt(&Indexer->Conf->Vars, "URLSelectCacheSize", URL_DELETE_CACHE);
3892
3893 UdmDSTRInit(&qbuf, 4096);
3894 UdmDSTRInit(&urlin, 4096);
3895
3896 for (part= 0; part < list->nurls; part+= url_num)
3897 {
3898 size_t offs;
3899 urlid_t *item= &list->urls[part];
3900 UdmDSTRReset(&urlin);
3901 for(offs= 0; (offs < url_num) && ((part + offs) < list->nurls); offs++)
3902 {
3903 if(offs) UdmDSTRAppend(&urlin,",", 1);
3904 UdmDSTRAppendf(&urlin, "%d", item[offs]);
3905 }
3906
3907 if (UDM_OK != (rc= UdmDBSQLBegin(Indexer, db)))
3908 goto ret;
3909
3910 switch (UdmSQLDBMode(db))
3911 {
3912 case UDM_SQLDBMODE_BLOB:
3913 break;
3914
3915 case UDM_SQLDBMODE_MULTI:
3916 {
3917 int dictnum;
3918 for (dictnum= 0; dictnum <= MULTI_DICTS; dictnum++)
3919 {
3920 UdmDSTRReset(&qbuf);
3921 UdmDSTRAppendf(&qbuf,"DELETE FROM dict%02X WHERE url_id in (%s)",
3922 dictnum, UdmDSTRPtr(&urlin));
3923 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3924 goto ret;
3925 }
3926 }
3927 break;
3928
3929 case UDM_SQLDBMODE_SINGLE:
3930 UdmDSTRReset(&qbuf);
3931 UdmDSTRAppendf(&qbuf, "DELETE FROM dict WHERE url_id in (%s)",
3932 UdmDSTRPtr(&urlin));
3933 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3934 goto ret;
3935 break;
3936 case UDM_SQLDBMODE_RAWBLOB:
3937 udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db), "ClearDB is not supported by this DBMode");
3938 rc= UDM_ERROR;
3939 }
3940
3941 UdmDSTRReset(&qbuf);
3942 UdmDSTRAppendf(&qbuf, "DELETE FROM url WHERE rec_id in (%s)",UdmDSTRPtr(&urlin));
3943 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3944 goto ret;
3945
3946 UdmDSTRReset(&qbuf);
3947 UdmDSTRAppendf(&qbuf,"DELETE FROM urlinfo WHERE url_id in (%s)",UdmDSTRPtr(&urlin));
3948 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3949 goto ret;
3950
3951 UdmDSTRReset(&qbuf);
3952 UdmDSTRAppendf(&qbuf,"DELETE FROM cachedcopy WHERE url_id in (%s)",UdmDSTRPtr(&urlin));
3953 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3954 goto ret;
3955
3956 UdmDSTRReset(&qbuf);
3957 UdmDSTRAppendf(&qbuf,"DELETE FROM links WHERE url_id in (%s)",UdmDSTRPtr(&urlin));
3958 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3959 goto ret;
3960
3961 UdmDSTRReset(&qbuf);
3962 UdmDSTRAppendf(&qbuf,"DELETE FROM redirect WHERE url_id in (%s)",UdmDSTRPtr(&urlin));
3963 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3964 goto ret;
3965
3966 if (UDM_OK != (rc= UdmDBSQLCommit(Indexer, db)))
3967 goto ret;
3968 }
3969
3970 ret:
3971 UdmDSTRFree(&qbuf);
3972 UdmDSTRFree(&urlin);
3973 return rc;
3974 }
3975
3976
3977 static udm_rc_t
UdmClearDBUsingLoop(UDM_AGENT * Indexer,UDM_DB * db,UDM_URLID_LIST * list)3978 UdmClearDBUsingLoop(UDM_AGENT *Indexer, UDM_DB *db, UDM_URLID_LIST *list)
3979 {
3980 udm_rc_t rc= UDM_OK;
3981 size_t i;
3982 UDM_DOCUMENT Doc;
3983 bzero((void*)&Doc, sizeof(Doc));
3984
3985 for(i=0; i < list->nurls; i++)
3986 {
3987 UdmVarListReplaceInt(&Doc.Sections, "ID", list->urls[i]);
3988 if(UDM_OK != (rc= UdmDeleteURL(Indexer, &Doc, db)))
3989 break;
3990 }
3991 UdmDocFree(&Doc);
3992 return rc;
3993 }
3994
3995
3996 static udm_rc_t
UdmClearDBSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)3997 UdmClearDBSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
3998 {
3999 udm_rc_t rc;
4000 const char *where, *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
4001 char ClearDBHook[128];
4002
4003 rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where);
4004 udm_snprintf(ClearDBHook, sizeof(ClearDBHook), "%s",
4005 UdmVarListFindStr(&Indexer->Conf->Vars, "SQLClearDBHook", ""));
4006
4007 if (rc != UDM_OK ||
4008 (ClearDBHook[0] && (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, ClearDBHook)))))
4009 return rc;
4010
4011 if(!where[0])
4012 {
4013 return UdmTruncateDB(Indexer, db, Query);
4014 }
4015 else
4016 {
4017 UDM_URLID_LIST urllist;
4018 UDM_DSTR qbuf;
4019 UdmDSTRInit(&qbuf, 4096);
4020
4021 bzero((void*) &urllist, sizeof(urllist));
4022 UdmDSTRAppendf(&qbuf,"SELECT url.rec_id, url.url FROM url%s WHERE url.rec_id<>%s0%s AND %s",
4023 Query->from, qu, qu, where);
4024
4025 if (UDM_OK != (rc= UdmLoadSlowLimitWithSort(Indexer, db, &urllist, UdmDSTRPtr(&qbuf))))
4026 goto fin;
4027
4028 rc= UdmSQLDBHaveIn(db) ? UdmClearDBUsingIN(Indexer, db, &urllist) :
4029 UdmClearDBUsingLoop(Indexer, db, &urllist);
4030
4031 fin:
4032 UdmFree(urllist.urls);
4033 UdmDSTRFree(&qbuf);
4034 }
4035 return rc;
4036 }
4037
4038
4039 /******************** Hrefs ****************************/
4040
4041 static udm_bool_t
UdmStoreHrefsUseTnx(UDM_AGENT * A)4042 UdmStoreHrefsUseTnx(UDM_AGENT *A)
4043 {
4044 if (A->Conf->DBList.nitems > 1)
4045 return UDM_FALSE;
4046 if (UdmSQLDBType(&A->Conf->DBList.Item[0]) == UDM_DB_PGSQL &&
4047 UdmSQLDBVersion(&A->Conf->DBList.Item[0]) > 90100)
4048 return UDM_TRUE;
4049 return (UdmSQLDBFlags(&A->Conf->DBList.Item[0]) & UDM_SQL_HAVE_GOOD_COMMIT) ?
4050 UDM_TRUE : UDM_FALSE;
4051 }
4052
4053
4054 static udm_rc_t
UdmStoreHrefsLock(UDM_AGENT * A,UDM_DB * db)4055 UdmStoreHrefsLock(UDM_AGENT *A, UDM_DB *db)
4056 {
4057 if (UdmSQLDBType(db) == UDM_DB_MYSQL /*&&
4058 !UdmVarListFind(&Doc->Sections, "SQLExportHref")*/)
4059 return UdmDBSQLQuery(A, db, NULL, "LOCK TABLE url WRITE");
4060 return UDM_OK;
4061 }
4062
4063
4064 static udm_rc_t
UdmStoreHrefsUnlock(UDM_AGENT * A,UDM_DB * db)4065 UdmStoreHrefsUnlock(UDM_AGENT *A, UDM_DB *db)
4066 {
4067 if (UdmSQLDBType(db) == UDM_DB_MYSQL)
4068 return UdmDBSQLQuery(A, db, NULL, "UNLOCK TABLES");
4069 return UDM_OK;
4070 }
4071
4072
4073 static udm_rc_t
UdmHrefStartBulk(UDM_AGENT * A,UDM_DB * db)4074 UdmHrefStartBulk(UDM_AGENT *A, UDM_DB *db)
4075 {
4076 return UdmStoreHrefsUseTnx(A) ? UdmDBSQLBegin(A, db) :
4077 UdmStoreHrefsLock(A, db);
4078 }
4079
4080
4081 static udm_rc_t
UdmHrefStopBulk(UDM_AGENT * A,UDM_DB * db)4082 UdmHrefStopBulk(UDM_AGENT *A, UDM_DB *db)
4083 {
4084 return UdmStoreHrefsUseTnx(A) ? UdmDBSQLCommit(A, db) :
4085 UdmStoreHrefsUnlock(A, db);
4086 }
4087
4088
4089 static void
UdmHrefToVarList(UDM_VARLIST * Vars,const UDM_HREF * H)4090 UdmHrefToVarList(UDM_VARLIST *Vars, const UDM_HREF *H)
4091 {
4092 UdmVarListAddLst(Vars, &H->HrefVars, NULL, "*");
4093 UdmVarListReplaceInt(Vars, "Referrer-ID", H->Param.referrer);
4094 UdmVarListReplaceUnsigned(Vars,"Hops", H->Param.hops);
4095 UdmVarListReplaceStr(Vars,"URL",H->url?H->url:"");
4096 UdmVarListReplaceInt(Vars, "URL_ID", UdmStrHash32(H->url ? H->url : ""));
4097 UdmVarListReplaceInt(Vars,"Server_id", H->Param.server_id);
4098 UdmVarListReplaceInt(Vars, "HTDB_URL_ID", H->Param.rec_id);
4099 }
4100
4101
4102 static udm_rc_t
UdmHrefAdd(UDM_AGENT * A,UDM_DB * db,UDM_HREF * H)4103 UdmHrefAdd(UDM_AGENT *A, UDM_DB *db, UDM_HREF *H)
4104 {
4105 udm_rc_t rc;
4106 UDM_DOCUMENT Doc;
4107 UdmDocInit(&Doc);
4108 UdmHrefToVarList(&Doc.Sections, H);
4109 rc= UdmAddURL(A, &Doc, db);
4110 UdmDocFree(&Doc);
4111 UdmSQLDB(db)->last_notargets_time= 0;
4112 return rc;
4113 }
4114
4115
4116 static udm_rc_t
UdmHrefExport(UDM_AGENT * A,UDM_DB * db,UDM_HREF * H)4117 UdmHrefExport(UDM_AGENT *A, UDM_DB *db, UDM_HREF *H)
4118 {
4119 udm_rc_t rc;
4120 UDM_DOCUMENT Doc;
4121 const char *sql_export= UdmVarListFindStr(&H->HrefVars, "SQLExportHref", NULL);
4122 if (!sql_export)
4123 return UDM_OK;
4124 UdmDocInit(&Doc);
4125 UdmHrefToVarList(&Doc.Sections, H);
4126 rc= UdmExportURL(A, db, &Doc, sql_export);
4127 UdmDocFree(&Doc);
4128 UdmSQLDB(db)->last_notargets_time= 0;
4129 return rc;
4130 }
4131
4132
4133 static udm_rc_t
UdmHrefActionSQL(UDM_AGENT * A,UDM_DB * db,UDM_HREF * Href,udm_hrefcmd_t cmd)4134 UdmHrefActionSQL(UDM_AGENT *A, UDM_DB *db, UDM_HREF *Href, udm_hrefcmd_t cmd)
4135 {
4136 UDM_LOCK_CHECK_OWNER(A, UDM_LOCK_DB);
4137 switch (cmd)
4138 {
4139 case UDM_HREFCMD_START_BULK: return UdmHrefStartBulk(A, db);
4140 case UDM_HREFCMD_STOP_BULK: return UdmHrefStopBulk(A, db);
4141 case UDM_HREFCMD_ADD: return UdmHrefAdd(A, db, Href);
4142 case UDM_HREFCMD_EXPORT: return UdmHrefExport(A, db, Href);
4143 }
4144 return UDM_ERROR;
4145 }
4146
4147
4148 /******************* WWList *************************/
4149 static udm_rc_t
UdmWWListExtend(UDM_AGENT * A,UDM_DB * db,UDM_WIDEWORDLIST * result,const UDM_WIDEWORD * uw,const char * sql)4150 UdmWWListExtend(UDM_AGENT *A, UDM_DB *db,
4151 UDM_WIDEWORDLIST *result,
4152 const UDM_WIDEWORD *uw,
4153 const char *sql)
4154 {
4155 char *word= uw->Word.str;
4156 char buf[1024];
4157 size_t i, nrows;
4158 UDM_SQLRES SQLRes;
4159 UDM_WIDEWORD_PARAM Param;
4160 UdmBuildParamStr(buf, sizeof(buf), sql, &word, 1);
4161 if (UDM_OK != UdmDBSQLQuery(A, db, &SQLRes, buf))
4162 return UDM_ERROR;
4163 nrows= UdmSQLNumRows(&SQLRes);
4164 UdmWideWordParamInit(&Param);
4165 UdmWideWordParamCopySynonym(&Param, &uw->Param);
4166 for (i= 0; i < nrows; i++)
4167 {
4168 UDM_CONST_STR cstr;
4169 cstr.length= UdmSQLLen(&SQLRes, i, 0);
4170 cstr.str= UdmSQLValue(&SQLRes, i, 0);
4171 UdmWideWordListAddLikeConstStr(result, &Param, cstr);
4172 }
4173 UdmSQLFree(&SQLRes);
4174 return UDM_OK;
4175 }
4176
4177
4178 static udm_rc_t
UdmQueryWordForms(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)4179 UdmQueryWordForms(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
4180 {
4181 UDM_WIDEWORDLIST Tmp;
4182 udm_rc_t rc= UDM_OK;
4183 size_t i;
4184 const char *sql= UdmVarListFindStr(&A->Conf->Vars, "SQLWordForms", NULL);
4185 if (!sql)
4186 return UDM_OK;
4187 UdmWideWordListInit(&Tmp);
4188 for (i= 0; i < Query->Res.WWList.nwords; i++)
4189 {
4190 if (UDM_OK != (rc= UdmWWListExtend(A, db, &Tmp, &Query->Res.WWList.Word[i], sql)))
4191 break;
4192 }
4193 for (i= 0; i < Tmp.nwords; i++)
4194 {
4195 /* TODO34: change UdmWideWordListAddXXX() to return udm_rc_t */
4196 UdmWideWordListAdd(&Query->Res.WWList, &Tmp.Word[i]);
4197 }
4198 UdmWideWordListFree(&Tmp);
4199 return rc;
4200 }
4201
4202
4203 /******************* Search *************************/
4204 static int
cmp_score_urlid(UDM_URL_SCORE * s1,UDM_URL_SCORE * s2)4205 cmp_score_urlid(UDM_URL_SCORE *s1, UDM_URL_SCORE *s2)
4206 {
4207 if (s1->url_id > s2->url_id) return(1);
4208 if (s1->url_id < s2->url_id) return(-1);
4209 return 0;
4210 }
4211
4212
4213 static void
UdmScoreListToURLData(UDM_URLDATA * D,UDM_URL_SCORE * C,size_t num)4214 UdmScoreListToURLData(UDM_URLDATA *D, UDM_URL_SCORE *C, size_t num)
4215 {
4216 for ( ; num > 0; num--, D++, C++)
4217 {
4218 D->url_id= C->url_id;
4219 D->score= C->score;
4220 }
4221 }
4222
4223
4224 #ifdef HAVE_DEBUG
4225 static void
UdmURLScoreListPrint(UDM_URLSCORELIST * List)4226 UdmURLScoreListPrint(UDM_URLSCORELIST *List)
4227 {
4228 size_t i;
4229 for (i= 0; i < List->nitems; i++)
4230 {
4231 UDM_URL_SCORE *Item= &List->Item[i];
4232 fprintf(stderr, "%d:%d\n", Item->url_id, Item->score);
4233 }
4234 }
4235 #endif
4236
4237
4238 static void
UdmDebugScoreAppendScoreAndRank(UDM_VARLIST * Vars,const UDM_QUERY_PARAM * query_param,const UDM_URLDATALIST * DataList)4239 UdmDebugScoreAppendScoreAndRank(UDM_VARLIST *Vars,
4240 const UDM_QUERY_PARAM *query_param,
4241 const UDM_URLDATALIST *DataList)
4242 {
4243 UDM_VAR *var;
4244 if (query_param->DebugURLId &&
4245 (var= UdmVarListFindVar(Vars, "DebugScore")))
4246 {
4247 size_t i;
4248 for (i= 0; i < DataList->nitems; i++)
4249 {
4250 if (DataList->Item[i].url_id == query_param->DebugURLId)
4251 {
4252 char tmp[256];
4253 size_t length= udm_snprintf(tmp, sizeof(tmp), " rank=%d", (int) i + 1);
4254 UdmVarAppendStrn(var, tmp, length);
4255 break;
4256 }
4257 }
4258 }
4259 }
4260
4261
4262 static udm_rc_t
UdmSortAndGroupByURL(UDM_AGENT * A,UDM_QUERY * Query,UDM_SEARCHSECTIONLIST * SectionList,UDM_DB * db)4263 UdmSortAndGroupByURL(UDM_AGENT *A,
4264 UDM_QUERY *Query,
4265 UDM_SEARCHSECTIONLIST *SectionList,
4266 UDM_DB *db)
4267 {
4268 UDM_QUERY_PARAM query_param;
4269 UDM_URLSCORELIST ScoreList;
4270 UDM_URLDATALIST DataList;
4271 udm_timer_t ticks=UdmStartTimer();
4272 const char *pattern= UdmVarListFindStr(&A->Conf->Vars, "s", "R");
4273 size_t nbytes;
4274 int flags= 0, flags2= 0;
4275 udm_rc_t rc= UDM_OK;
4276 const char *p;
4277 const char *su= UdmVarListFindStr(&A->Conf->Vars, "su", NULL);
4278 int group_by_site= UdmVarListFindBool(&A->Conf->Vars, "GroupBySite", UDM_FALSE)
4279 && UdmVarListFindStr(&A->Conf->Vars, "site", "")[0] == '\0' ?
4280 UDM_URLDATA_SITE : 0;
4281 int group_by_site_rank= !strcmp(UdmVarListFindStr(&A->Conf->Vars, "GroupBySite", "no"), "rank");
4282 size_t BdictThreshold= (size_t) UdmVarListFindInt(&A->Conf->Vars,
4283 "URLDataThreshold", 0);
4284 size_t MaxResults= (size_t) UdmVarListFindInt(UdmSQLDBVars(db), "MaxResults", 0);
4285 udm_bool_t use_qcache= UdmVarListFindBool(UdmSQLDBVars(db), "qcache", UDM_FALSE);
4286 size_t num_best_rows= Query->num_best_rows;
4287
4288 UdmQueryParamInit(&query_param, A->Conf, UdmSQLDBVars(db));
4289
4290 flags|= group_by_site ? UDM_URLDATA_SITE : 0;
4291 flags|= group_by_site_rank ? UDM_URLDATA_SITE_RANK : 0;
4292 flags|= query_param.DateFactor ? UDM_URLDATA_LM : 0;
4293 flags2|= query_param.PopularityFactor > 0 ? UDM_URLDATA_POP : 0;
4294
4295 for (p = pattern; *p; p++)
4296 {
4297 if (*p == 'U' || *p == 'u') flags|= UDM_URLDATA_URL;
4298 if (*p == 'D' || *p == 'd') flags|= UDM_URLDATA_LM;
4299 if (*p == 'S' || *p == 's') flags|= (su && su[0]) ? UDM_URLDATA_SU : 0;
4300 if (*p == 'P' || *p == 'p') flags2|= UDM_URLDATA_POP;
4301 }
4302
4303 ticks=UdmStartTimer();
4304 bzero((void*) &ScoreList, sizeof(ScoreList));
4305 UdmURLDataListInit(&DataList);
4306
4307 UdmLog(A,UDM_LOG_DEBUG, "Start GroupByURL %d sections", (int) SectionList->nsections);
4308 UdmGroupByURL2(A, db, Query, &query_param, SectionList, &ScoreList);
4309
4310 UdmLog(A, UDM_LOG_DEBUG, "%-30s%.2f (%d docs found)",
4311 "Stop GroupByURL", UdmStopTimer(&ticks), (int) ScoreList.nitems);
4312
4313 #ifdef HAVE_DEBUG
4314 if (UdmVarListFindBool(&A->Conf->Vars, "DebugGroupByURL", UDM_FALSE))
4315 {
4316 UdmURLScoreListPrint(&ScoreList);
4317 }
4318 #endif
4319
4320 UdmApplyCachedQueryLimit(A, &ScoreList, db);
4321 if (ScoreList.nitems == 0)
4322 goto ex;
4323
4324 if (UDM_OK != (rc= UdmUserScoreListLoadAndApplyToURLScoreList(A, &ScoreList,
4325 db,
4326 &query_param)))
4327 goto ex;
4328
4329 UdmLog(A,UDM_LOG_DEBUG,"Start load url data %d docs (%d best needed)",
4330 (int) ScoreList.nitems, (int) Query->num_best_rows);
4331 ticks=UdmStartTimer();
4332
4333 nbytes= UdmHashSize(ScoreList.nitems) * sizeof(UDM_URLDATA);
4334 DataList.Item = (UDM_URLDATA*)UdmMalloc(nbytes);
4335 bzero((void*) DataList.Item, nbytes);
4336 DataList.nitems= ScoreList.nitems;
4337
4338 /* Use full sort in case if DebugURLId is specified */
4339 if (query_param.DebugURLId)
4340 num_best_rows= ScoreList.nitems;
4341
4342 if (num_best_rows > ScoreList.nitems)
4343 num_best_rows= ScoreList.nitems;
4344
4345 /* Try fast sorting if sorting is on score */
4346 if (num_best_rows < 256 && !flags && !flags2 && !use_qcache)
4347 {
4348 udm_timer_t ticks1;
4349
4350 Query->stats.total_found= ScoreList.nitems;
4351 UdmLog(A, UDM_LOG_DEBUG, "Start SortByScore %d docs", (int) ScoreList.nitems);
4352 ticks1=UdmStartTimer();
4353 if (ScoreList.nitems > 1000)
4354 {
4355 UdmURLScoreListSortByScoreThenURLTop(&ScoreList, 1000);
4356 }
4357 else
4358 {
4359 UdmURLScoreListSortByScoreThenURL(&ScoreList);
4360 }
4361 UdmSort((void*) ScoreList.Item, num_best_rows,
4362 sizeof(UDM_URL_SCORE), (udm_qsort_cmp) cmp_score_urlid);
4363 UdmScoreListToURLData(DataList.Item, ScoreList.Item, num_best_rows);
4364 UdmLog(A,UDM_LOG_DEBUG,"%-30s%.2f", "Stop SortByScore:", UdmStopTimer(&ticks1));
4365 DataList.nitems= num_best_rows; /* Put only num_best_rows into DataList */
4366 goto date_factor;
4367 }
4368
4369 UdmScoreListToURLData(DataList.Item, ScoreList.Item, DataList.nitems);
4370
4371 /* Sort by a user defined section, if given */
4372 if (flags & UDM_URLDATA_SU)
4373 {
4374 size_t norder;
4375 udm_timer_t ticks1= UdmStartTimer();
4376 UdmLog(A, UDM_LOG_DEBUG, "Trying to load fast section order '%s'", su);
4377 rc= UdmFastOrderLoadAndApplyToURLDataList(A, db, &DataList, su, &norder);
4378 UdmLog(A, UDM_LOG_DEBUG, "Loading fast order '%s' done, %d docs found, %.2f sec",
4379 su, (int) norder, UdmStopTimer(&ticks1));
4380 if (norder)
4381 flags^= UDM_URLDATA_SU;
4382 }
4383
4384 if (flags)
4385 {
4386 if (!(UdmSQLDBMode(db) == UDM_SQLDBMODE_BLOB &&
4387 !(flags & UDM_URLDATA_URL) &&
4388 !(flags & UDM_URLDATA_SU) &&
4389 BdictThreshold < ScoreList.nitems) ||
4390 (UDM_NOTARGET == UdmLoadURLDataFromBdict(A, db, &DataList, flags)))
4391 rc= UdmLoadURLDataFromURL(A, db, &DataList, flags);
4392 }
4393
4394 /* Do this before GroupBySite, while DataList is sorted by url_id */
4395 UdmLoadURLDataFromBdict(A, db, &DataList, flags2);
4396
4397 if (group_by_site)
4398 UdmURLDataListGroupBySiteUsingSort(A, &DataList, db);
4399
4400 if (UDM_OK != UdmUserSiteScoreListLoadAndApplyToURLDataList(A, &DataList, db,
4401 &query_param))
4402 {
4403 rc= UDM_ERROR;
4404 goto ex;
4405 }
4406
4407 if (flags & UDM_URLDATA_SITE_RANK)
4408 {
4409 udm_timer_t ticks1= UdmStartTimer();
4410 UdmLog(A, UDM_LOG_DEBUG, "Start applying in-site-rank");
4411 UdmURLDataSortBySite(&DataList);
4412 UdmURLDataApplySiteRank(A, &DataList, 0);
4413 UdmLog(A, UDM_LOG_DEBUG, "Stop applying in-site-rank: %.2f sec", UdmStopTimer(&ticks1));
4414 }
4415
4416 Query->stats.total_found= DataList.nitems;
4417
4418 date_factor:
4419
4420 if (rc != UDM_OK)
4421 goto ex;
4422
4423 /* TODO: check whether limit by site works fine */
4424 if (!query_param.RelevancyFactor || query_param.DateFactor)
4425 UdmURLDataListApplyRelevancyFactors(A, &DataList, &query_param);
4426
4427 if (query_param.PopularityFactor > 0)
4428 UdmURLDataListApplyPopularity(A, &DataList, &query_param);
4429
4430 UdmLog(A,UDM_LOG_DEBUG,"%-30s%.2f", "Stop load url data:", UdmStopTimer(&ticks));
4431
4432 UdmLog(A, UDM_LOG_DEBUG, "Start SortByPattern %d docs", (int) DataList.nitems);
4433 ticks=UdmStartTimer();
4434 if (DataList.nitems)
4435 UdmURLDataSortByPattern(&DataList, pattern);
4436 UdmLog(A,UDM_LOG_DEBUG,"%-30s%.2f", "Stop SortByPattern:", UdmStopTimer(&ticks));
4437
4438 Query->URLData= DataList;
4439 bzero((void *) &DataList, sizeof(DataList));
4440 UdmDebugScoreAppendScoreAndRank(&A->Conf->Vars, &query_param, &Query->URLData);
4441
4442 if (MaxResults && MaxResults < Query->stats.total_found)
4443 {
4444 UdmLog(A, UDM_LOG_DEBUG, "Applying MaxResults=%d, total_found=%d\n",
4445 (int) MaxResults, (int) Query->stats.total_found);
4446 Query->stats.total_found= MaxResults;
4447 if (Query->URLData.nitems > MaxResults)
4448 {
4449 /* Free the part of URLData that will not be unused */
4450 UdmURLDataListFreeItems(&Query->URLData, MaxResults, Query->URLData.nitems);
4451 Query->URLData.nitems= MaxResults;
4452 }
4453 }
4454
4455 ex:
4456 UdmURLDataListFree(&DataList);
4457 UdmFree(ScoreList.Item);
4458 return rc;
4459 }
4460
4461
4462 static udm_rc_t /* WHERE limit */
LoadURL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,const char * where,UDM_URLID_LIST * buf)4463 LoadURL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
4464 const char *where, UDM_URLID_LIST *buf)
4465 {
4466 udm_rc_t rc;
4467 UDM_SQLRES SQLRes;
4468 char qbuf[1024 * 4];
4469 size_t nrows;
4470 urlid_t *tmp;
4471 size_t i;
4472
4473 if (!*where)
4474 return UDM_OK;
4475
4476 /* TODO: reuse LoadSlowLimitWithSort() here */
4477 udm_snprintf(qbuf, sizeof(qbuf),
4478 "SELECT url.rec_id FROM url%s WHERE %s",
4479 Query->from, where);
4480 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLRes, qbuf)))
4481 return rc;
4482
4483 if (!(nrows= UdmSQLNumRows(&SQLRes)))
4484 {
4485 buf->empty= 1;
4486 UdmSQLFree(&SQLRes);
4487 return(UDM_OK);
4488 }
4489
4490 tmp= (urlid_t*) UdmMalloc(sizeof(urlid_t) * nrows);
4491 buf->urls= (urlid_t*) UdmMalloc(sizeof(urlid_t) * nrows);
4492 if (!tmp || !buf->urls)
4493 {
4494 UDM_FREE(buf->urls);
4495 UDM_FREE(tmp);
4496 goto ex;
4497 }
4498
4499 for (i= 0; i < nrows; i++)
4500 {
4501 tmp[i]= (urlid_t) UDM_ATOI(UdmSQLValue(&SQLRes, i, 0));
4502 }
4503 UdmSort(tmp, nrows, sizeof(urlid_t), (udm_qsort_cmp)UdmCmpURLID);
4504
4505 /* Remove duplicates */
4506 for (i= 0; i < nrows; )
4507 {
4508 while (++i < nrows && tmp[i] == tmp[i - 1]);
4509 buf->urls[buf->nurls++] = tmp[i - 1];
4510 }
4511 UDM_FREE(tmp);
4512 if ((tmp= (urlid_t*) UdmRealloc(buf->urls, sizeof(urlid_t) * buf->nurls)))
4513 buf->urls = tmp;
4514
4515 ex:
4516 UdmSQLFree(&SQLRes);
4517 return UDM_OK;
4518 }
4519
4520
4521 /*
4522 MySQL: no cast needed
4523 - SQLite: CAST(word AS INTEGER)
4524 - PostgreSQL: CASE WHEN a~'^[0-9]*$' THEN a::integer ELSE 0 END
4525 - MSSQL: CAST(word AS INTEGER)
4526 There is a function ISNUMERIC(). However, it returns true
4527 for things like "0x0123d", Cast does not work for this.
4528
4529 - Sybase: returns error when input is non-numeric
4530 - Oracle: returns error ...
4531 - IBM DB2: returns error ...
4532 - Firebird: return error ...
4533 - Mimer: return error ...
4534 */
4535 static void
UdmBuildNumericOperatorCondition(UDM_DB * db,char * cmparg,size_t maxlen,const char * op,int number)4536 UdmBuildNumericOperatorCondition(UDM_DB *db, char *cmparg, size_t maxlen,
4537 const char *op, int number)
4538 {
4539 switch (UdmSQLDBType(db))
4540 {
4541 case UDM_DB_MYSQL:
4542 udm_snprintf(cmparg, maxlen, "word%s%d", op, number);
4543 break;
4544 case UDM_DB_PGSQL:
4545 udm_snprintf(cmparg, maxlen, "(word~'^[0-9]*$' AND word::integer%s%d)", op, number);
4546 break;
4547 default:
4548 udm_snprintf(cmparg, maxlen, "(word>='0' AND word <='99999999999' AND CAST(word AS INTEGER)%s%d)", op, number);
4549 }
4550 }
4551
4552
4553 static udm_rc_t
UdmBuildCmpArgSQL(UDM_AGENT * A,UDM_DB * db,udm_match_mode_t match_mode,const char * word,char * cmparg,size_t maxlen)4554 UdmBuildCmpArgSQL(UDM_AGENT *A, UDM_DB *db, udm_match_mode_t match_mode,
4555 const char *word, char *cmparg, size_t maxlen)
4556 {
4557 const char *left= "", *right= "";
4558 size_t length= strlen(word);
4559 char escwrd[1000];
4560
4561 if (match_mode == UDM_MATCH_RANGE)
4562 {
4563 UDM_ASSERT(length > 6);
4564 if (*word == '[')
4565 left= ">=";
4566 else if (*word == '{')
4567 left= ">";
4568 if (word[length - 1] == ']')
4569 right= "<=";
4570 else if (word[length - 1] == '}')
4571 right= "<";
4572 word++;
4573 length-= 2;
4574 }
4575
4576 UdmDBSQLEscStr(A, db, escwrd, word, length); /* Search word */
4577 switch (match_mode)
4578 {
4579 case UDM_MATCH_BEGIN:
4580 udm_snprintf(cmparg, maxlen, "word LIKE '%s%%'", escwrd);
4581 break;
4582 case UDM_MATCH_END:
4583 udm_snprintf(cmparg, maxlen, "word LIKE '%%%s'", escwrd);
4584 break;
4585 case UDM_MATCH_SUBSTR:
4586 udm_snprintf(cmparg, maxlen, "word LIKE '%%%s%%'", escwrd);
4587 break;
4588 case UDM_MATCH_NUMERIC_LT:
4589 UdmBuildNumericOperatorCondition(db, cmparg, maxlen, "<", atoi(escwrd));
4590 break;
4591 case UDM_MATCH_NUMERIC_GT:
4592 UdmBuildNumericOperatorCondition(db, cmparg, maxlen, ">", atoi(escwrd));
4593 break;
4594 case UDM_MATCH_RANGE:
4595 {
4596 char *first= escwrd;
4597 char *second= strstr(first, " TO ");
4598 if (!second)
4599 {
4600 udm_snprintf(cmparg, maxlen, "word='<ERROR>'");
4601 return UDM_ERROR;
4602 }
4603 *second= '\0';
4604 second+= 4;
4605 udm_snprintf(cmparg, maxlen, "word%s'%s' AND word%s'%s'",
4606 left, first, right, second);
4607 }
4608 break;
4609 case UDM_MATCH_FULL:
4610 default:
4611 udm_snprintf(cmparg, maxlen, "word='%s'", escwrd);
4612 break;
4613 }
4614 return(UDM_OK);
4615 }
4616
4617
4618 static udm_rc_t
UdmFindOneWordSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args)4619 UdmFindOneWordSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
4620 UDM_FINDWORD_ARGS *args)
4621 {
4622 char cmparg[256];
4623 UdmBuildCmpArgSQL(A, db,
4624 args->Word.Param.match_mode, args->Word.Word.str,
4625 cmparg, sizeof(cmparg));
4626 args->cmparg= cmparg;
4627
4628 UDM_ASSERT(UdmSQLDBModeHandler(db)->FindWord != NULL);
4629 return UdmSQLDBModeHandler(db)->FindWord(A, db, Query, args);
4630 }
4631
4632
4633 static udm_rc_t
UdmFindMultiWordSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args)4634 UdmFindMultiWordSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
4635 UDM_FINDWORD_ARGS *args)
4636 {
4637 char *lt, *tmp_word, *tok;
4638 udm_rc_t rc= UDM_OK;
4639 UDM_SEARCHSECTIONLISTLIST OriginalSearchSectionListList;
4640 size_t orig_wordnum;
4641 size_t nparts= 0;
4642 const char *w;
4643 char *orig_word= args->Word.Word.str;
4644 char delim[]= " \r\t_-./";
4645
4646 /* Check if the word really multi-part */
4647 for (w= args->Word.Word.str; ; w++)
4648 {
4649 if (!*w)
4650 return UdmFindOneWordSQL(A, db, Query, args); /* No delimiters found */
4651
4652 if (strchr(delim, *w)) /* Delimiter found */
4653 break;
4654 }
4655
4656 if (!(tmp_word= UdmStrdup(args->Word.Word.str)))
4657 return(UDM_ERROR);
4658
4659 UdmLog(A, UDM_LOG_DEBUG,
4660 "Start searching for multiword '%s'", args->Word.Word.str);
4661 OriginalSearchSectionListList= args->SearchSectionListList;
4662 UdmSearchSectionListListInit(&args->SearchSectionListList);
4663 orig_wordnum= args->Word.Param.order;
4664 args->need_coords= 1; /* Force immediate coord unpacking */
4665
4666 for (tok= udm_strtok_r(tmp_word, delim, <) ; tok ;
4667 tok= udm_strtok_r(NULL, delim, <))
4668 {
4669 udm_timer_t ticks1= UdmStartTimer();
4670 args->Word.Word.str= tok;
4671 UdmLog(A, UDM_LOG_DEBUG,
4672 "Searching for subword '%s'", args->Word.Word.str);
4673 rc= UdmFindOneWordSQL(A, db, Query, args);
4674 UdmLog(A, UDM_LOG_DEBUG,
4675 "Stop searching for subword '%s' %d coords found: %.2f",
4676 args->Word.Word.str, (int) args->Word.Param.count, UdmStopTimer(&ticks1));
4677 /* If the next word wasn't found - no need to search for others. */
4678 if (rc != UDM_OK || !args->Word.Param.count)
4679 goto ret;
4680 nparts++;
4681 args->Word.Param.order++;
4682 }
4683
4684 /* All parts returned results. Check phrase */
4685 UdmMultiWordAdd(&args->SearchSectionListList, &Query->Res.WWList, &args->Word,
4686 &args->urls, &OriginalSearchSectionListList,
4687 orig_wordnum, nparts);
4688
4689
4690 ret:
4691 UdmFree(tmp_word);
4692 UdmSearchSectionListListFree(&args->SearchSectionListList);
4693 args->SearchSectionListList= OriginalSearchSectionListList;
4694 args->Word.Word.str= orig_word;
4695 args->need_coords= 0;
4696 UdmLog(A, UDM_LOG_DEBUG,
4697 "Stop searching for multiword '%s'", args->Word.Word.str);
4698 return rc;
4699 }
4700
4701
4702 static udm_rc_t
UdmFindAlwaysFoundWordSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args)4703 UdmFindAlwaysFoundWordSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
4704 UDM_FINDWORD_ARGS *args)
4705 {
4706 udm_rc_t rc= UDM_OK;
4707 UDM_SQLRES SQLRes;
4708 char qbuf[1024 * 4];
4709 size_t nrows;
4710 size_t i;
4711 UDM_URLCRDLIST CoordList;
4712 UDM_URL_CRD Coord;
4713
4714 bzero((void*) &CoordList, sizeof(CoordList));
4715 bzero((void*) &Coord, sizeof(Coord));
4716 Coord.urlid_coord.coord.pos= 0x00010100; /* TODO34: why? */
4717 Coord.num= args->Word.Param.order;
4718
4719 if (*args->where)
4720 udm_snprintf(qbuf, sizeof(qbuf), "SELECT url.rec_id FROM url%s WHERE %s",
4721 Query->from, args->where);
4722 else
4723 {
4724 if (args->urls.nurls)
4725 {
4726 /*
4727 A fast limit is loaded.
4728 No needs to do "SELECT FROM url".
4729 Populate CoordList from the fast limit instead.
4730 */
4731 for (i= 0; i < args->urls.nurls; i++)
4732 {
4733 Coord.urlid_coord.url_id= args->urls.urls[i];
4734 if (UDM_OK != (rc= UdmAddOneCoord(&CoordList, &Coord)))
4735 return UDM_ERROR;
4736 }
4737 UdmURLCRDListListAddWithSort2(&args->SearchSectionListList,
4738 &Query->Res.WWList, &args->Word, &CoordList);
4739 return UDM_OK;
4740 }
4741 udm_snprintf(qbuf, sizeof(qbuf), "SELECT url.rec_id FROM url");
4742 }
4743
4744 if ((rc= UdmDBSQLQuery(A, db, &SQLRes, qbuf)) != UDM_OK)
4745 return(rc);
4746 /* Note that rc is implicitly set to UDM_OK at this point. */
4747 if (! (nrows= UdmSQLNumRows(&SQLRes)))
4748 goto err;
4749
4750 for (i = 0; i < nrows; i++)
4751 {
4752 Coord.urlid_coord.url_id= (urlid_t) UDM_ATOI(UdmSQLValue(&SQLRes, i, 0));
4753 if (UDM_OK != (rc= UdmAddOneCoord(&CoordList, &Coord)))
4754 break;
4755 }
4756
4757 if (args->urls.nurls)
4758 UdmApplyFastLimit(&CoordList, &args->urls);
4759 if (CoordList.ncoords)
4760 UdmURLCRDListListAddWithSort2(&args->SearchSectionListList,
4761 &Query->Res.WWList, &args->Word, &CoordList);
4762
4763 err:
4764 UdmSQLFree(&SQLRes);
4765 return(rc);
4766 }
4767
4768
4769 static udm_rc_t
UdmCheckIndex(UDM_AGENT * A,UDM_DB * db)4770 UdmCheckIndex(UDM_AGENT *A, UDM_DB *db)
4771 {
4772 int tm;
4773 udm_rc_t rc;
4774 if (UDM_OK != (rc= UdmBlobReadTimestamp(A, db, &tm, 0)))
4775 return rc;
4776 if (tm)
4777 return UDM_OK;
4778 #ifdef WIN32
4779 sprintf(A->Conf->errstr, "Inverted word index not found. Probably you forgot to run 'Create fast index'.");
4780 #else
4781 sprintf(A->Conf->errstr, "Inverted word index not found. Probably you forgot to run 'indexer --index'.");
4782 #endif
4783 return UDM_ERROR;
4784 }
4785
4786
4787 static udm_rc_t
UdmMergeWords(UDM_AGENT * A,UDM_DB * db,UDM_FINDWORD_ARGS * args,UDM_SEARCHSECTIONLIST * SectionList)4788 UdmMergeWords(UDM_AGENT *A, UDM_DB *db,
4789 UDM_FINDWORD_ARGS *args, UDM_SEARCHSECTIONLIST *SectionList)
4790 {
4791 udm_timer_t ticks= UdmStartTimer();
4792
4793 UdmLog(A, UDM_LOG_DEBUG,
4794 "Start merging %d lists", (int) args->SearchSectionListList.nitems);
4795 UdmSearchSectionListListMergeSorted(&args->SearchSectionListList, SectionList, 1);
4796 UdmLog(A, UDM_LOG_DEBUG, "%-30s%.2f (%d sections)",
4797 "Stop merging:", UdmStopTimer(&ticks), (int) SectionList->nsections);
4798
4799 if (!SectionList->nsections &&
4800 UdmSQLDBMode(db) == UDM_SQLDBMODE_BLOB &&
4801 !args->live_updates)
4802 return UdmCheckIndex(A, db);
4803 return UDM_OK;
4804 }
4805
4806
4807 static udm_rc_t
UdmSearchParamInit(UDM_FINDWORD_ARGS * args,UDM_AGENT * A,UDM_QUERY * Query,UDM_DB * db)4808 UdmSearchParamInit(UDM_FINDWORD_ARGS *args,
4809 UDM_AGENT *A,
4810 UDM_QUERY *Query,
4811 UDM_DB *db)
4812 {
4813 bzero((void*) args, sizeof(*args));
4814 UdmWideWordListInit(&args->CollationMatches);
4815 UdmQueryParamInit(&args->query_param, A->Conf, UdmSQLDBVars(db));
4816 args->Word.Param.match_mode= UdmMatchMode(UdmVarListFindStr(&A->Conf->Vars, "wm", "wrd"));
4817 args->live_updates= UdmVarListFindBool(UdmSQLDBVars(db), "LiveUpdates", UDM_FALSE);
4818 return UdmSQLBuildWhereCondition(A, db, Query, &args->where);
4819 }
4820
4821
4822 static void
UdmSearchParamFree(UDM_FINDWORD_ARGS * args)4823 UdmSearchParamFree(UDM_FINDWORD_ARGS *args)
4824 {
4825 UDM_FREE(args->urls.urls);
4826 UDM_FREE(args->live_update_active_urls.urls);
4827 UDM_FREE(args->live_update_deleted_urls.urls);
4828 UdmWideWordListFree(&args->CollationMatches);
4829 UdmSearchSectionListListFree(&args->SearchSectionListList);
4830 UdmSQLResListFree(&args->SQLResults);
4831 UdmInvertedIndexCacheFree(&args->IndexCache);
4832 }
4833
4834
4835 static udm_rc_t
UdmFindLoadSlowOrFastLimit(UDM_AGENT * A,UDM_DB * db,UDM_URLID_LIST * list,const char * fl,int count)4836 UdmFindLoadSlowOrFastLimit(UDM_AGENT *A,
4837 UDM_DB *db,
4838 UDM_URLID_LIST *list,
4839 const char *fl,
4840 int count)
4841 {
4842 udm_rc_t rc;
4843 char name[64];
4844 const char *q;
4845 UDM_URLID_LIST fl_urls;
4846 bzero((void*) &fl_urls, sizeof(fl_urls));
4847 if ((fl_urls.exclude= (fl[0] == '-')))
4848 fl++;
4849 udm_snprintf(name, sizeof(name), "Limit.%s", fl);
4850 if (UDM_OK != (rc= ((q= UdmVarListFindStr(&A->Conf->Vars, name, NULL)) ?
4851 UdmLoadSlowLimitWithSort(A, db, &fl_urls, q) :
4852 UdmBlobLoadFastURLLimit(A, db, fl, &fl_urls))))
4853 goto ret;
4854 UdmLog(A,UDM_LOG_DEBUG, "Limit '%s' loaded%s%s %d URLs",
4855 fl, fl_urls.exclude ? " type=excluding" : "",
4856 q ? " source=slow":"", (int) fl_urls.nurls);
4857 if (!count)
4858 {
4859 list->exclude= fl_urls.exclude;
4860 UdmURLIdListUnion(list, &fl_urls);
4861 }
4862 else
4863 {
4864 if (list->exclude == fl_urls.exclude)
4865 UdmURLIdListUnion(list, &fl_urls);
4866 else
4867 UdmURLIdListJoin(list, &fl_urls);
4868 }
4869 UDM_FREE(fl_urls.urls);
4870 ret:
4871 return rc;
4872 }
4873
4874
4875 /*
4876 Load WHERE and fl limits from the database at search time.
4877 */
4878 static udm_rc_t
UdmFindLoadLimits(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args,const char * fl)4879 UdmFindLoadLimits(UDM_AGENT *A,
4880 UDM_DB *db,
4881 UDM_QUERY *Query,
4882 UDM_FINDWORD_ARGS *args,
4883 const char *fl)
4884 {
4885 udm_rc_t rc= UDM_OK;
4886 udm_timer_t ticks= UdmStartTimer();
4887
4888 UdmLog(A, UDM_LOG_DEBUG, "Start loading limits");
4889 ticks= UdmStartTimer();
4890 if (*args->where)
4891 {
4892 LoadURL(A, db, Query, args->where, &args->urls);
4893 UdmLog(A, UDM_LOG_DEBUG,
4894 "WHERE limit loaded. %d URLs found", (int) args->urls.nurls);
4895 }
4896 if (!args->urls.empty && fl[0])
4897 {
4898 UDM_URLID_LIST lim;
4899 char delim[]= " ,", names[128], *tok, *lt;
4900 int nlimits= 0;
4901 bzero(&lim, sizeof(lim));
4902 udm_snprintf(names, sizeof(names), "%s", fl);
4903 for (tok= udm_strtok_r(names, delim, <) ; tok ;
4904 tok= udm_strtok_r(NULL, delim, <), nlimits++)
4905 {
4906 if (UDM_OK != (rc= UdmFindLoadSlowOrFastLimit(A, db, &lim,
4907 tok, nlimits)))
4908 goto ret;
4909 }
4910 if (nlimits)
4911 UdmURLIdListMerge(&args->urls, &lim);
4912 UDM_FREE(lim.urls);
4913 }
4914 UdmLog(A, UDM_LOG_DEBUG, "%-30s%.2f (%d URLs found)",
4915 "Stop loading limits", UdmStopTimer(&ticks), (int) args->urls.nurls);
4916 ret:
4917 return rc;
4918 }
4919
4920
4921 /*
4922 Load word information from the database
4923 */
4924 static udm_rc_t
UdmFindWordsFetch(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args,const char * always_found_word)4925 UdmFindWordsFetch(UDM_AGENT *A,
4926 UDM_DB *db,
4927 UDM_QUERY *Query,
4928 UDM_FINDWORD_ARGS *args,
4929 const char *always_found_word)
4930 {
4931 size_t wordnum;
4932 udm_rc_t rc= UDM_OK;
4933 udm_timer_t ticks0= UdmStartTimer();
4934
4935 UdmLog(A, UDM_LOG_DEBUG, "Start fetching words");
4936
4937 /* Now find each word */
4938 for(wordnum=0; wordnum < Query->Res.WWList.nwords; wordnum++)
4939 {
4940 udm_timer_t ticks= UdmStartTimer();
4941 UDM_WIDEWORD *W= &Query->Res.WWList.Word[wordnum];
4942 char quoted_word[64];
4943 udm_snprintf(quoted_word, sizeof(quoted_word), "'%s'", W->Word.str);
4944
4945 if (W->Param.origin == UDM_WORD_ORIGIN_STOP) continue;
4946
4947 UdmLog(A, UDM_LOG_DEBUG, "Start search for %s", quoted_word);
4948
4949 args->Word.Param.order= wordnum;
4950 args->Word.Param.count= 0;
4951 args->Word.Word.str= W->Word.str;
4952 args->Word.Param.match_mode= W->Param.match_mode;
4953 args->Word.Param.secno= W->Param.secno;
4954
4955 /*
4956 For now SYNONYMs only are treated as a possible multi-word
4957 origin. Probably it will be changed in future, so we will
4958 use this feature for phrase search.
4959 */
4960 if (always_found_word && !strcmp(W->Word.str, always_found_word))
4961 rc= UdmFindAlwaysFoundWordSQL(A, db, Query, args);
4962 else if (W->Param.origin == UDM_WORD_ORIGIN_SYNONYM ||
4963 W->Param.phrwidth > 0)
4964 rc= UdmFindMultiWordSQL(A, db, Query, args);
4965 else
4966 rc= UdmFindOneWordSQL(A, db, Query, args);
4967
4968 if (rc != UDM_OK)
4969 goto ret;
4970
4971 /*
4972 If CollationMatches is not empty, then we should skip
4973 updating word statistics here - it will be updated in
4974 the loop after UdmSortAndGroupByURL().
4975 */
4976 if (!args->CollationMatches.nwords)
4977 Query->Res.WWList.Word[wordnum].Param.count+= args->Word.Param.count;
4978
4979 UdmLog(A, UDM_LOG_DEBUG,
4980 "Stop search for %-13s%.2f (%u coords found)",
4981 quoted_word, UdmStopTimer(&ticks), (int) args->Word.Param.count);
4982 }
4983 UdmLog(A, UDM_LOG_DEBUG,
4984 "%-30s%.2f", "Stop fetching words:", UdmStopTimer(&ticks0));
4985 ret:
4986 return rc;
4987 }
4988
4989
4990 static udm_rc_t
UdmFindWordsSQLNoCached(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)4991 UdmFindWordsSQLNoCached(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
4992 {
4993 const char *always_found_word, *fl;
4994 udm_rc_t rc= UDM_OK;
4995 UDM_FINDWORD_ARGS args;
4996
4997 /* Query->SectionList must be clean */
4998 UDM_ASSERT(!Query->SectionList.mcoords);
4999 UDM_ASSERT(!Query->SectionList.ncoords);
5000 UDM_ASSERT(!Query->SectionList.Coord);
5001 UDM_ASSERT(!Query->SectionList.msections);
5002 UDM_ASSERT(!Query->SectionList.nsections);
5003 UDM_ASSERT(!Query->SectionList.Section);
5004
5005 UDM_GETLOCK(A, UDM_LOCK_DB);
5006 {
5007 if (UDM_OK != (rc= UdmSearchParamInit(&args, A, Query, db)) &&
5008 UdmDBSQLError(db)[0])
5009 {
5010 UdmEnvCopyErrMsgFromDB(A->Conf, db);
5011 }
5012
5013
5014 always_found_word= UdmVarListFindStr(&A->Conf->Vars, "AlwaysFoundWord", NULL);
5015 fl= UdmVarListFindStr(&A->Conf->Vars, "fl", UdmVarListFindStr(UdmSQLDBVars(db), "fl", ""));
5016 }
5017 UDM_RELEASELOCK(A, UDM_LOCK_DB);
5018 if (rc != UDM_OK)
5019 goto ret;
5020
5021 if ((UdmSQLDBMode(db) == UDM_SQLDBMODE_BLOB && args.where) || fl[0])
5022 {
5023 if (UDM_OK != UdmFindLoadLimits(A, db, Query, &args, fl))
5024 goto ret;
5025
5026 if (args.urls.empty)
5027 goto ret;
5028 }
5029
5030
5031 if (UDM_OK != (rc= UdmSQLDBModeHandler(db)->InitSearch(A, db, Query, &args)))
5032 goto ret;
5033
5034 if (UDM_OK != (rc= UdmFindWordsFetch(A, db, Query, &args, always_found_word)))
5035 goto ret;
5036
5037 if (UDM_OK != (rc= UdmMergeWords(A, db, &args, &Query->SectionList)))
5038 goto ret;
5039
5040 if (UDM_OK != (rc= UdmSortAndGroupByURL(A, Query, &Query->SectionList, db)))
5041 goto ret;
5042
5043 /*
5044 We cannot add collation matches before
5045 UdmSortAndGroupByURL - to use optimized groupping
5046 functions when WWList->nwords==1
5047 */
5048 if (args.CollationMatches.nwords)
5049 {
5050 size_t i;
5051 UdmWideWordListSort(&args.CollationMatches);
5052 for (i= 0; i < args.CollationMatches.nwords; i++)
5053 {
5054 UdmWideWordListAdd(&Query->Res.WWList, &args.CollationMatches.Word[i]);
5055 }
5056 }
5057
5058 ret:
5059 UdmSearchParamFree(&args);
5060 return rc;
5061 }
5062
5063
5064 static udm_rc_t
UdmFindWordsSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5065 UdmFindWordsSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5066 {
5067 udm_rc_t rc;
5068 if (UDM_OK != (rc= UdmQueryCacheGetSQL(A, db, Query)))
5069 return rc;
5070 if (!Query->URLData.nitems)
5071 {
5072 rc= UdmFindWordsSQLNoCached(A, db, Query);
5073 if (rc == UDM_OK && Query->URLData.nitems)
5074 rc= UdmQueryCachePutSQL(A, db, Query);
5075 }
5076 return rc;
5077 }
5078
5079 /****************** Track ***********************************/
5080
5081 static udm_rc_t
UdmTrackSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5082 UdmTrackSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5083 {
5084 UDM_VARLIST Vars;
5085 char *qbuf;
5086 char *text_escaped;
5087 udm_bool_t trackquery= UdmVarListFindBool(UdmSQLDBVars(db), "trackquery", UDM_FALSE);
5088 const char *words= UdmVarListFindStr(&A->Conf->Vars,"q",""); /* "q-lc" was here */
5089 const char *IP = UdmVarListFindStr(&A->Conf->Vars, "IP", "");
5090 size_t i, escaped_len, qbuf_len;
5091 int qtime, rec_id;
5092 udm_rc_t rc;
5093 const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
5094
5095 if (!trackquery)
5096 return UDM_OK;
5097
5098 if (*words == '\0') return UDM_OK; /* do not store empty queries */
5099
5100 escaped_len = 4 * strlen(words);
5101 qbuf_len = escaped_len + 4096;
5102
5103 if ((qbuf = (char*)UdmMalloc(qbuf_len)) == NULL) return UDM_ERROR;
5104 if ((text_escaped = (char*)UdmMalloc(escaped_len)) == NULL)
5105 {
5106 UDM_FREE(qbuf);
5107 return UDM_ERROR;
5108 }
5109
5110 UdmVarListInit(&Vars);
5111 UdmVarListSQLEscape(A, &Vars, &A->Conf->Vars, db);
5112
5113 /* Escape text to track it */
5114 UdmDBSQLEscStr(A, db, text_escaped, words, strlen(words)); /* query for tracking */
5115
5116 if (UdmSQLDBType(db) == UDM_DB_IBASE ||
5117 UdmSQLDBType(db) == UDM_DB_MIMER ||
5118 UdmSQLDBType(db) == UDM_DB_ORACLE8)
5119 {
5120 const char *next;
5121 switch (UdmSQLDBType(db))
5122 {
5123 case UDM_DB_IBASE: next= "SELECT GEN_ID(qtrack_GEN,1) FROM rdb$database"; break;
5124 case UDM_DB_MIMER: next= "SELECT NEXT_VALUE OF qtrack_GEN FROM system.onerow"; break;
5125 case UDM_DB_ORACLE8: next= "SELECT qtrack_seq.nextval FROM dual"; break;
5126 default: next= NULL; /* Make compiler happy */
5127 }
5128 if (UDM_OK != (rc= UdmDBSQLQueryOneRowInt(A, db, &rec_id, next)))
5129 goto UdmTrack_exit;
5130 udm_snprintf(qbuf, qbuf_len - 1,
5131 "INSERT INTO qtrack (rec_id,ip,qwords,qtime,wtime,nfound) "
5132 "VALUES "
5133 "(%d,'%s','%s',%d,%d,%d)",
5134 rec_id, IP, text_escaped, qtime= (int)time(NULL),
5135 UdmVarListFindInt(&Query->Res.Vars, "SearchTime", 0),
5136 (int) UdmResultTotalFound(&Query->Res));
5137 if (UDM_OK != (rc = UdmDBSQLQuery(A, db, NULL, qbuf)))
5138 goto UdmTrack_exit;
5139 }
5140 else
5141 {
5142 udm_snprintf(qbuf, qbuf_len - 1,
5143 "INSERT INTO qtrack (ip,qwords,qtime,wtime,nfound) "
5144 "VALUES "
5145 "('%s','%s',%d,%d,%d)",
5146 IP, text_escaped, qtime= (int)time(NULL),
5147 UdmVarListFindInt(&Query->Res.Vars, "SearchTime", 0),
5148 (int) UdmResultTotalFound(&Query->Res));
5149
5150 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, qbuf)))
5151 goto UdmTrack_exit;
5152
5153 if (UdmSQLDBType(db) == UDM_DB_MYSQL)
5154 udm_snprintf(qbuf, qbuf_len - 1, "SELECT last_insert_id()");
5155 else
5156 udm_snprintf(qbuf, qbuf_len - 1, "SELECT rec_id FROM qtrack WHERE ip='%s' AND qtime=%d", IP, qtime);
5157 if (UDM_OK != (rc= UdmDBSQLQueryOneRowInt(A, db, &rec_id, qbuf)))
5158 goto UdmTrack_exit;
5159 }
5160
5161 for (i = 0; i < Vars.nvars; i++)
5162 {
5163 const UDM_VAR *Var= UdmVarListFindConstByIndex(&Vars, i);
5164 const char *name= UdmVarName(Var);
5165 if (!strncasecmp(name, "query.",6) &&
5166 strcasecmp(name, "query.q") &&
5167 strcasecmp(name, "query.BrowserCharset") &&
5168 strcasecmp(name, "query.IP") &&
5169 UdmVarStr(Var) != NULL && UdmVarStr(Var)[0] != '\0')
5170 {
5171 udm_snprintf(qbuf, qbuf_len,
5172 "INSERT INTO qinfo (q_id,sname,sval) "
5173 "VALUES "
5174 "(%s%i%s,'%s','%s')",
5175 qu, rec_id, qu, name + 6, UdmVarStr(Var));
5176 rc= UdmDBSQLQuery(A, db, NULL, qbuf);
5177 if (rc != UDM_OK) goto UdmTrack_exit;
5178 }
5179 }
5180 UdmTrack_exit:
5181 UdmVarListFree(&Vars);
5182 UDM_FREE(text_escaped);
5183 UDM_FREE(qbuf);
5184 return rc;
5185 }
5186
5187
5188 /********************* Adding URLInfo to Res *********************/
5189
UpdateShows(UDM_AGENT * A,UDM_DB * db,urlid_t url_id)5190 static udm_rc_t UpdateShows(UDM_AGENT *A, UDM_DB *db, urlid_t url_id)
5191 {
5192 char qbuf[64];
5193 udm_snprintf(qbuf, sizeof(qbuf), "UPDATE url SET shows = shows + 1 WHERE rec_id = %s%i%s",
5194 (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "",
5195 url_id,
5196 (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "");
5197 return UdmDBSQLQuery(A, db, NULL, qbuf);
5198 }
5199
SQLResToSection(UDM_SQLRES * R,UDM_VARLIST * S,size_t row)5200 static void SQLResToSection(UDM_SQLRES *R, UDM_VARLIST *S, size_t row)
5201 {
5202 const char *sname=UdmSQLValue(R,row,1);
5203 const char *sval=UdmSQLValue(R,row,2);
5204 UdmVarListAddStr(S, sname, sval);
5205 }
5206
5207
5208
5209 static void
SQLResToCachedCopy(UDM_SQLRES * R,UDM_DOCUMENT * Doc,size_t max_doc_size,size_t row)5210 SQLResToCachedCopy(UDM_SQLRES *R, UDM_DOCUMENT *Doc,
5211 size_t max_doc_size, size_t row)
5212 {
5213 const char *sval= UdmSQLValue(R, row, 1);
5214 size_t len= UdmSQLLen(R, row, 1);
5215 udm_timer_t timer= 0;
5216 UdmDocSetFromCachedHTTPResponse(Doc, sval, len, max_doc_size, &timer);
5217 }
5218
5219
5220 static size_t
UdmDBNum(UDM_QUERY * Query,size_t n)5221 UdmDBNum(UDM_QUERY *Query, size_t n)
5222 {
5223 UDM_URLDATA *Data= &Query->URLData.Item[n + Query->stats.first];
5224 return UDM_COORD2DBNUM(Data->score);
5225 }
5226
5227
5228 static udm_rc_t
UdmQueryAddURLInfoUsingIN(UDM_AGENT * Agent,UDM_DB * db,UDM_QUERY * Query,const char * qbuf,int urlinfob)5229 UdmQueryAddURLInfoUsingIN(UDM_AGENT *Agent, UDM_DB *db, UDM_QUERY *Query,
5230 const char *qbuf, int urlinfob)
5231 {
5232 udm_rc_t rc;
5233 UDM_SQLRES SQLres;
5234 size_t j, sqlrows;
5235 size_t max_doc_size= UdmVarListFindInt(&Agent->Conf->Vars, "MaxDocSize", UDM_MAXDOCSIZE);
5236
5237 if (UDM_OK!= (rc= UdmDBSQLQuery(Agent, db, &SQLres, qbuf)))
5238 return rc;
5239
5240 for (sqlrows= UdmSQLNumRows(&SQLres), j=0;
5241 j< UdmResultNumRows(&Query->Res);
5242 j++)
5243 {
5244 if (&Agent->Conf->DBList.Item[UdmDBNum(Query, j)] == db)
5245 {
5246 size_t i;
5247 UDM_DOCUMENT *D= &Query->Res.Doc[j];
5248 urlid_t url_id = UdmVarListFindInt(&D->Sections, "ID", 0);
5249 for(i = 0; i < sqlrows; i++)
5250 {
5251 if(url_id == UDM_ATOI(UdmSQLValue(&SQLres,i,0)))
5252 {
5253 if (urlinfob)
5254 SQLResToCachedCopy(&SQLres, D, max_doc_size, i);
5255 else
5256 SQLResToSection(&SQLres, &D->Sections, i);
5257 }
5258 }
5259 }
5260 }
5261 UdmSQLFree(&SQLres);
5262 return UDM_OK;
5263 }
5264
5265
5266 static udm_rc_t
UdmDocAddURLInfo(UDM_AGENT * A,UDM_DOCUMENT * D,UDM_DB * db,const char * qbuf)5267 UdmDocAddURLInfo(UDM_AGENT *A, UDM_DOCUMENT *D, UDM_DB *db, const char *qbuf)
5268 {
5269 UDM_SQLRES SQLres;
5270 udm_rc_t rc;
5271 size_t row;
5272
5273 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, qbuf)))
5274 return rc;
5275 for(row= 0; row < UdmSQLNumRows(&SQLres); row++)
5276 SQLResToSection(&SQLres, &D->Sections, row);
5277 UdmSQLFree(&SQLres);
5278 return rc;
5279 }
5280
5281
5282 static void
UdmDocSetPopularity(UDM_DOCUMENT * D,double pop)5283 UdmDocSetPopularity(UDM_DOCUMENT *D, double pop)
5284 {
5285 char tmp[16];
5286 udm_snprintf(tmp, sizeof(tmp), "%.5f", pop);
5287 UdmVarListReplaceStr(&D->Sections, "Pop_Rank", tmp);
5288 }
5289
5290
5291 typedef struct
5292 {
5293 udm_bool_t LoadURLBasicInfo;
5294 udm_bool_t LoadURLInfo;
5295 udm_bool_t LoadURLInfoBin;
5296 udm_bool_t LoadTagInfo;
5297 udm_bool_t PopRankUseShowCnt;
5298 double PopRankShowCntRatio;
5299 } UDM_RESINFOPARAM;
5300
5301
5302 static void
UdmResInfoParamInit(UDM_RESINFOPARAM * P,UDM_VARLIST * Vars)5303 UdmResInfoParamInit(UDM_RESINFOPARAM *P, UDM_VARLIST *Vars)
5304 {
5305 P->PopRankUseShowCnt= UdmVarListFindBool(Vars, "PopRankUseShowCnt", UDM_FALSE);
5306 P->LoadTagInfo= UdmVarListFindBool(Vars, "LoadTagInfo", UDM_FALSE);
5307 P->LoadURLInfo= UdmVarListFindBool(Vars, "LoadURLInfo", UDM_TRUE);
5308 P->PopRankShowCntRatio= UdmVarListFindDouble(Vars, "PopRankShowCntRatio", 25.0);
5309 P->LoadURLInfoBin= UdmVarListFindBool(Vars, "LoadURLInfoBin", UDM_TRUE);
5310 P->LoadURLBasicInfo= UdmVarListFindBool(Vars, "LoadURLBasicInfo", UDM_TRUE);
5311 }
5312
5313
5314 static udm_rc_t
UdmQueryAddDocInfoUsingLoop(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5315 UdmQueryAddDocInfoUsingLoop(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5316 {
5317 udm_rc_t rc;
5318 size_t i;
5319 UDM_SQLRES SQLres;
5320 UDM_RESINFOPARAM param;
5321 size_t max_doc_size= UdmVarListFindInt(&A->Conf->Vars, "MaxDocSize", UDM_MAXDOCSIZE);
5322
5323 UdmResInfoParamInit(¶m, &A->Conf->Vars);
5324 for(i= 0; i < UdmResultNumRows(&Query->Res); i++)
5325 {
5326 UDM_DOCUMENT *D= &Query->Res.Doc[i];
5327 urlid_t url_id= UdmVarListFindInt(&D->Sections, "ID", 0);
5328 char qbuf[128];
5329
5330 if (&A->Conf->DBList.Item[UdmDBNum(Query, i)] != db)
5331 continue;
5332
5333 udm_snprintf(qbuf, sizeof(qbuf),
5334 "SELECT " SQLRESTODOC_COLUMNS
5335 " FROM url WHERE rec_id=%d", url_id);
5336 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, qbuf)))
5337 return rc;
5338
5339 if (UdmSQLNumRows(&SQLres))
5340 {
5341 double pr;
5342 SQLResToDoc(A->Conf, D, &SQLres, 0);
5343 if (param.PopRankUseShowCnt &&
5344 (pr= atof(UdmVarListFindStr(&D->Sections, "Score", "0.0"))) >=
5345 param.PopRankShowCntRatio)
5346 UpdateShows(A, db, url_id);
5347 }
5348 UdmSQLFree(&SQLres);
5349
5350 if (param.LoadTagInfo)
5351 {
5352 udm_snprintf(qbuf, sizeof(qbuf),
5353 "SELECT u.rec_id, 'tag', tag FROM url u, server s "
5354 "WHERE u.rec_id=%d AND u.server_id=s.rec_id", url_id);
5355 if(UDM_OK != (rc= UdmDocAddURLInfo(A, D, db, qbuf)))
5356 return rc;
5357 }
5358
5359 if (param.LoadURLInfo)
5360 {
5361 sprintf(qbuf,"SELECT url_id,sname,sval FROM urlinfo WHERE url_id=%i", url_id);
5362 if(UDM_OK != (rc= UdmDocAddURLInfo(A, D, db, qbuf)))
5363 return rc;
5364 }
5365
5366 if (param.LoadURLInfoBin)
5367 {
5368 sprintf(qbuf,"SELECT url_id,content FROM cachedcopy WHERE url_id=%i", url_id);
5369 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, qbuf)))
5370 return rc;
5371 if (UdmSQLNumRows(&SQLres))
5372 SQLResToCachedCopy(&SQLres, D, max_doc_size, 0);
5373 }
5374 }
5375 return UDM_OK;
5376 }
5377
5378
5379 static udm_rc_t
UdmQueryAddDocInfoUsingIN(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5380 UdmQueryAddDocInfoUsingIN(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5381 {
5382 udm_rc_t rc= UDM_OK;
5383 size_t i, j, sqlrows;
5384 UDM_SQLRES SQLres;
5385 UDM_RESINFOPARAM param;
5386 UDM_DSTR in_list, qq;
5387 const char *hi_priority= UdmSQLDBType(db) == UDM_DB_MYSQL ? "HIGH_PRIORITY" : "";
5388
5389 UdmResInfoParamInit(¶m, &A->Conf->Vars);
5390
5391 UdmDSTRInit(&in_list, 1024);
5392 UdmDSTRInit(&qq, 1024);
5393
5394 /* Compose IN string and set to zero url_id field */
5395 for(i=0; i < UdmResultNumRows(&Query->Res); i++)
5396 {
5397 if (&A->Conf->DBList.Item[UdmDBNum(Query, i)] == db)
5398 {
5399 const char *comma= UdmDSTRLength(&in_list) ? "," : "";
5400 const char *squot= UdmSQLDBType(db) == UDM_DB_PGSQL ? "'" : "";
5401 UdmDSTRAppendf(&in_list, "%s%s%i%s", comma, squot,
5402 UdmVarListFindInt(&Query->Res.Doc[i].Sections, "ID", 0), squot);
5403 }
5404 }
5405
5406 if (!UdmDSTRLength(&in_list))
5407 goto ret_in;
5408
5409 if (param.LoadURLBasicInfo)
5410 {
5411 UdmDSTRReset(&qq);
5412 UdmDSTRAppendf(&qq,
5413 "SELECT %s " SQLRESTODOC_COLUMNS
5414 " FROM url WHERE rec_id IN (%s)",
5415 hi_priority, UdmDSTRPtr(&in_list));
5416 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, UdmDSTRPtr(&qq))))
5417 goto ret_in;
5418
5419 for (sqlrows= UdmSQLNumRows(&SQLres), j=0;
5420 j < UdmResultNumRows(&Query->Res);
5421 j++)
5422 {
5423 if (&A->Conf->DBList.Item[UdmDBNum(Query, j)] == db)
5424 {
5425 UDM_DOCUMENT *D= &Query->Res.Doc[j];
5426 urlid_t url_id= UdmVarListFindInt(&D->Sections, "ID", 0);
5427 for(i= 0; i < sqlrows; i++)
5428 {
5429 if (url_id == UDM_ATOI(UdmSQLValue(&SQLres,i,0)))
5430 {
5431 double pr;
5432 SQLResToDoc(A->Conf, D, &SQLres, i);
5433 if (param.PopRankUseShowCnt &&
5434 (pr= atof(UdmVarListFindStr(&D->Sections, "Score", "0.0"))) >=
5435 param.PopRankShowCntRatio)
5436 UpdateShows(A, db, url_id);
5437 break;
5438 }
5439 }
5440 }
5441 }
5442 UdmSQLFree(&SQLres);
5443 }
5444
5445 if (param.LoadTagInfo)
5446 {
5447 UdmDSTRReset(&qq);
5448 UdmDSTRAppendf(&qq,
5449 "SELECT u.rec_id, 'tag', tag FROM url u, server s "
5450 "WHERE u.rec_id in (%s) AND u.server_id=s.rec_id",
5451 UdmDSTRPtr(&in_list));
5452 if (UDM_OK != (rc= UdmQueryAddURLInfoUsingIN(A, db, Query,
5453 UdmDSTRPtr(&qq), 0)))
5454 return rc;
5455 }
5456
5457 if (param.LoadURLInfo)
5458 {
5459 UdmDSTRReset(&qq);
5460 UdmDSTRAppendf(&qq,
5461 "SELECT url_id,sname,sval "
5462 "FROM urlinfo WHERE url_id IN (%s)",
5463 UdmDSTRPtr(&in_list));
5464 if (UDM_OK != (rc= UdmQueryAddURLInfoUsingIN(A, db, Query,
5465 UdmDSTRPtr(&qq), 0)))
5466 return rc;
5467 }
5468
5469 if (param.LoadURLInfoBin)
5470 {
5471 UdmDSTRReset(&qq);
5472 UdmDSTRAppendf(&qq,
5473 "SELECT url_id,content "
5474 "FROM cachedcopy WHERE url_id IN (%s)",
5475 UdmDSTRPtr(&in_list));
5476 if (UDM_OK != (rc= UdmQueryAddURLInfoUsingIN(A, db, Query,
5477 UdmDSTRPtr(&qq), 1)))
5478 return rc;
5479 }
5480
5481 ret_in:
5482 UdmDSTRFree(&in_list);
5483 UdmDSTRFree(&qq);
5484 return rc;
5485 }
5486
5487
5488 static udm_rc_t
UdmQueryAddDocInfoSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5489 UdmQueryAddDocInfoSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5490 {
5491 size_t i;
5492 udm_rc_t rc;
5493 UDM_RESINFOPARAM param;
5494
5495 if (!UdmResultNumRows(&Query->Res))
5496 return UDM_OK;
5497
5498 UdmResInfoParamInit(¶m, &A->Conf->Vars);
5499
5500 if (param.PopRankUseShowCnt)
5501 UdmLog(A, UDM_LOG_DEBUG, "use_showcnt: %d ratio: %f",
5502 param.PopRankUseShowCnt, param.PopRankShowCntRatio);
5503
5504 for (i= 0; i < UdmResultNumRows(&Query->Res); i++)
5505 {
5506 UDM_URLDATA *Data= &Query->URLData.Item[i + Query->stats.first];
5507 UdmVarListReplaceInt(&Query->Res.Doc[i].Sections, "id", Data->url_id);
5508 UdmDocSetPopularity(&Query->Res.Doc[i], Data->pop_rank);
5509 }
5510
5511 rc= UdmSQLDBHaveIn(db) ?
5512 UdmQueryAddDocInfoUsingIN(A, db, Query) :
5513 UdmQueryAddDocInfoUsingLoop(A, db, Query);
5514
5515 return rc;
5516 }
5517
5518
5519 /************************* Misc *******************************************/
5520
5521
5522 static udm_rc_t
UdmExportSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)5523 UdmExportSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
5524 {
5525 UDM_SQLRES SQLRes;
5526 udm_rc_t rc;
5527 UDM_STR row[24];
5528
5529 /* TODO34: add well-formed output */
5530 printf("<database>\n");
5531 printf("<urlList>\n");
5532 rc= UdmDBSQLExecDirect(Indexer, db, &SQLRes,
5533 "SELECT rec_id,status,docsize,next_index_time,"
5534 "last_mod_time,referrer,hops,crc32,seed,"
5535 "bad_since_time,server_id,shows,"
5536 "url"
5537 " FROM url");
5538 if (rc != UDM_OK) return(rc);
5539 while (UdmDBSQLFetchRow(Indexer, db, &SQLRes, row) == UDM_OK)
5540 {
5541 printf(
5542 "<url "
5543 "rec_id=\"%s\" "
5544 "status=\"%s\" "
5545 "docsize=\"%s\" "
5546 "next_index_time=\"%s\" "
5547 "last_mod_time=\"%s\" "
5548 "referrer=\"%s\" "
5549 "hops=\"%s\" "
5550 "crc32=\"%s\" "
5551 "seed=\"%s\" "
5552 "bad_since_time=\"%s\" "
5553 "server_id=\"%s\" "
5554 "shows=\"%s\" "
5555 "url=\"%s\" "
5556 "/>\n",
5557 row[0].str, row[1].str, row[2].str, row[3].str,
5558 row[4].str, row[5].str, row[6].str, row[7].str,
5559 row[8].str, row[9].str, row[10].str, row[11].str,
5560 row[12].str);
5561 }
5562 UdmSQLFree(&SQLRes);
5563 printf("</urlList>\n");
5564
5565 printf("<linkList>\n");
5566 if (UDM_OK != (rc= UdmDBSQLExecDirect(Indexer, db, &SQLRes,
5567 "SELECT url_id,seed,url,src,rel,linktext FROM links")))
5568 return rc;
5569 while (UdmDBSQLFetchRow(Indexer, db, &SQLRes, row) == UDM_OK)
5570 {
5571 printf(
5572 "<link "
5573 "url_id=\"%s\" "
5574 "seed=\"%s\" "
5575 "url=\"%s\" "
5576 "src=\"%s\" "
5577 "rel=\"%s\" "
5578 "linktext=\"%s\" "
5579 "/>\n",
5580 row[0].str, row[1].str, row[2].str,
5581 row[3].str, row[4].str, row[5].str);
5582 }
5583 UdmSQLFree(&SQLRes);
5584 printf("</linkList>\n");
5585
5586 printf("</database>\n");
5587 return(0);
5588 }
5589
5590
5591 static udm_rc_t
UdmDocPerSite(UDM_AGENT * A,UDM_DOCUMENT * D,UDM_DB * db)5592 UdmDocPerSite(UDM_AGENT *A, UDM_DOCUMENT *D, UDM_DB *db)
5593 {
5594 char qbuf[1024];
5595 const char *s, *hostinfo= UdmVarListFindStr(&D->Sections, "Hostinfo", NULL);
5596 udm_rc_t rc;
5597 int num, prevnum= UdmVarListFindInt(&D->Sections, "DocPerSite", 0);
5598 UDM_SQLRES SQLRes;
5599
5600 if (!hostinfo)
5601 return UDM_OK;
5602
5603 for (s= hostinfo; s[0]; s++)
5604 {
5605 /*
5606 Host name good characters: digits, letters, hyphen (-).
5607 Just check the worst characters.
5608 */
5609 if (*s == '\'' || *s == '\"')
5610 {
5611 num= 1000000;
5612 goto ret;
5613 }
5614 }
5615 udm_snprintf(qbuf, sizeof(qbuf),
5616 "SELECT COUNT(*) FROM url WHERE url LIKE '%s%%'", hostinfo);
5617
5618 if (UDM_OK!= (rc= UdmDBSQLQuery(A, db, &SQLRes, qbuf)))
5619 return rc;
5620 num= prevnum + atoi(UdmSQLValue(&SQLRes, 0, 0));
5621 UdmSQLFree(&SQLRes);
5622 ret:
5623 UdmVarListReplaceInt(&D->Sections, "DocPerSite", num);
5624 return UDM_OK;
5625 }
5626
5627
5628 static udm_rc_t
UdmImportSection(UDM_AGENT * A,UDM_DOCUMENT * D,UDM_DB * db)5629 UdmImportSection(UDM_AGENT *A, UDM_DOCUMENT *D, UDM_DB *db)
5630 {
5631 UDM_CONST_TEXTITEM ConstItem;
5632 UDM_TEXT_PARAM Param;
5633 UDM_VARLIST Vars;
5634 UDM_SQLRES SQLRes;
5635 UDM_DSTR d;
5636 udm_rc_t rc;
5637 size_t row, rows, cols;
5638 const char *fmt= UdmVarListFindStr(&D->Sections, "SQLImportSection", NULL);
5639
5640 if (!fmt)
5641 return UDM_OK;
5642
5643 UdmDSTRInit(&d, 1024);
5644 UdmVarListInit(&Vars);
5645 UdmVarListSQLEscape(A, &Vars, &D->Sections, db);
5646 UdmDSTRParse(&d, fmt, &Vars);
5647 UdmVarListFree(&Vars);
5648 if(UDM_OK!= (rc= UdmDBSQLQuery(A, db, &SQLRes, UdmDSTRPtr(&d))))
5649 return rc;
5650
5651 cols= UdmSQLNumCols(&SQLRes);
5652 UdmConstTextItemInit(&ConstItem);
5653 UdmTextParamInit(&Param, UDM_TEXTLIST_FLAG_HTML, 0); /* TODO34: indexer.conf: format */
5654 for (row=0, rows= UdmSQLNumRows(&SQLRes); row < rows; row++)
5655 {
5656 size_t col;
5657 for (col= 0; col + 1 < cols; col+= 2)
5658 {
5659 const UDM_VAR *Sec;
5660 ConstItem.section_name.str= UdmSQLValue(&SQLRes, row, col);
5661 ConstItem.section_name.length= UdmSQLLen(&SQLRes, row, col);
5662 if ((Sec= UdmVarListFind(&D->Sections, ConstItem.section_name.str)))
5663 {
5664 ConstItem.text.str= UdmSQLValue(&SQLRes, row, col + 1);
5665 ConstItem.text.length= UdmSQLLen(&SQLRes, row, col + 1);
5666 Param.secno= UdmVarSecno(Sec);
5667 UdmTextListAddConst(&D->TextList, &ConstItem, &Param);
5668 }
5669 }
5670 }
5671
5672 UdmDSTRFree(&d);
5673 UdmSQLFree(&SQLRes);
5674 return rc;
5675 }
5676
5677
5678 static udm_rc_t
UdmGetReferers(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)5679 UdmGetReferers(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
5680 {
5681 size_t i,j;
5682 char qbuf[2048];
5683 UDM_SQLRES SQLres;
5684 const char *where;
5685 udm_rc_t rc;
5686
5687 UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_DB);
5688 if (UDM_OK != (rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where)))
5689 return rc;
5690
5691 udm_snprintf(qbuf, sizeof(qbuf),
5692 "SELECT url.status,url2.url,url.url "
5693 "FROM url,url url2%s "
5694 "WHERE url.referrer=url2.rec_id %s %s",
5695 Query->from, where[0] ? "AND" : "", where);
5696
5697 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
5698 return rc;
5699
5700 j=UdmSQLNumRows(&SQLres);
5701 for(i=0;i<j;i++)
5702 {
5703 if(Indexer->Conf->RefInfo)Indexer->Conf->RefInfo(
5704 atoi(UdmSQLValue(&SQLres,i,0)),
5705 UdmSQLValue(&SQLres,i,2),
5706 UdmSQLValue(&SQLres,i,1)
5707 );
5708 }
5709 UdmSQLFree(&SQLres);
5710 return rc;
5711 }
5712
5713
5714 static udm_rc_t
UdmGetDocCount(UDM_AGENT * Indexer,UDM_DB * db)5715 UdmGetDocCount(UDM_AGENT * Indexer, UDM_DB *db)
5716 {
5717 char qbuf[200]="";
5718 UDM_SQLRES SQLres;
5719 udm_rc_t rc;
5720
5721 sprintf(qbuf,NDOCS_QUERY);
5722 if (UDM_OK!= (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
5723 return rc;
5724
5725 if(UdmSQLNumRows(&SQLres))
5726 {
5727 const char * s;
5728 s=UdmSQLValue(&SQLres,0,0);
5729 if(s)Indexer->doccount += atoi(s);
5730 }
5731 UdmSQLFree(&SQLres);
5732 return(UDM_OK);
5733 }
5734
5735
5736 static udm_rc_t
UdmStatActionSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)5737 UdmStatActionSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
5738 {
5739 size_t i,j,n;
5740 char qbuf[2048];
5741 UDM_SQLRES SQLres;
5742 int have_group= (UdmSQLDBFlags(db) & UDM_SQL_HAVE_GROUPBY);
5743 const char *where;
5744 udm_rc_t rc= UDM_OK;
5745
5746 if(UdmSQLDBType(db)==UDM_DB_IBASE)
5747 have_group=0;
5748
5749 UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_DB);
5750 if (UDM_OK != (rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where)))
5751 return rc;
5752
5753 if(have_group)
5754 {
5755 char func[128];
5756 int time= (int) Query->StatList.time; /* To use %d on 64bit OSs */
5757
5758 switch(UdmSQLDBType(db))
5759 {
5760 case UDM_DB_MYSQL:
5761 udm_snprintf(func, sizeof(func), "next_index_time<=%d", time);
5762 break;
5763
5764 case UDM_DB_PGSQL:
5765 case UDM_DB_MSSQL:
5766 case UDM_DB_SYBASE:
5767 case UDM_DB_DB2:
5768 case UDM_DB_SQLITE:
5769 case UDM_DB_SQLITE3:
5770 default:
5771 udm_snprintf(func, sizeof(func),
5772 "case when next_index_time<=%d then 1 else 0 end", time);
5773 break;
5774
5775 case UDM_DB_ACCESS:
5776 udm_snprintf(func, sizeof(func),
5777 "IIF(next_index_time<=%d, 1, 0)", time);
5778 break;
5779
5780 case UDM_DB_ORACLE8:
5781 case UDM_DB_SAPDB:
5782 udm_snprintf(func, sizeof(func),
5783 "DECODE(SIGN(%d-next_index_time),-1,0,1,1)", time);
5784 break;
5785 }
5786
5787 udm_snprintf(qbuf, sizeof(qbuf) - 1,
5788 "SELECT status, SUM(%s), count(*) FROM url%s %s%s GROUP BY status ORDER BY status",
5789 func, Query->from, where[0] ? "WHERE " : "", where);
5790
5791 if (UDM_OK!= (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
5792 return rc;
5793
5794 if ((n= UdmSQLNumRows(&SQLres)))
5795 {
5796 for (i = 0; i < n; i++)
5797 {
5798 for (j= 0; j < Query->StatList.nstats; j++)
5799 {
5800 UDM_STAT *Stat= &Query->StatList.Stat[j];
5801 if (Stat->status== atoi(UdmSQLValue(&SQLres,i,0)))
5802 {
5803 Stat->expired += atoi(UdmSQLValue(&SQLres,i,1));
5804 Stat->total += atoi(UdmSQLValue(&SQLres,i,2));
5805 break;
5806 }
5807 }
5808 if (j == Query->StatList.nstats)
5809 {
5810 UDM_STAT *S;
5811 size_t nbytes= (Query->StatList.nstats + 1) * sizeof(Query->StatList.Stat[0]);
5812 Query->StatList.Stat= (UDM_STAT*) UdmRealloc(Query->StatList.Stat, nbytes);
5813 S= &Query->StatList.Stat[Query->StatList.nstats];
5814 S->status= atoi(UdmSQLValue(&SQLres,i,0));
5815 S->expired= atoi(UdmSQLValue(&SQLres,i,1));
5816 S->total= atoi(UdmSQLValue(&SQLres,i,2));
5817 Query->StatList.nstats++;
5818 }
5819 }
5820 }
5821 UdmSQLFree(&SQLres);
5822 }
5823 else
5824 {
5825 /*
5826 FIXME: learn how to get it from SOLID and IBASE
5827 (HAVE_IBASE || HAVE_SOLID || HAVE_VIRT )
5828 */
5829
5830 udm_snprintf(qbuf, sizeof(qbuf) - 1,
5831 "SELECT status,next_index_time FROM url%s %s%s ORDER BY status",
5832 Query->from, where[0] ? "WHERE " : "", where);
5833
5834 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
5835 return rc;
5836
5837 for(i=0;i<UdmSQLNumRows(&SQLres);i++)
5838 {
5839 for (j= 0; j< Query->StatList.nstats; j++)
5840 {
5841 UDM_STAT *Stat= &Query->StatList.Stat[j];
5842 if (Stat->status == atoi(UdmSQLValue(&SQLres,i,0)))
5843 {
5844 if ((time_t) UDM_ATOU(UdmSQLValue(&SQLres, i, 1)) <=
5845 Query->StatList.time)
5846 Stat->expired++;
5847 Stat->total++;
5848 break;
5849 }
5850 }
5851 if (j == Query->StatList.nstats)
5852 {
5853 size_t nbytes= sizeof(UDM_STAT) * (Query->StatList.nstats + 1);
5854 Query->StatList.Stat= (UDM_STAT *) UdmRealloc(Query->StatList.Stat, nbytes);
5855 Query->StatList.Stat[j].status= UDM_ATOI(UdmSQLValue(&SQLres,i,0));
5856 Query->StatList.Stat[j].expired= 0;
5857 if ((time_t) UDM_ATOU(UdmSQLValue(&SQLres, i, 1)) <=
5858 Query->StatList.time)
5859 Query->StatList.Stat[j].expired++;
5860 Query->StatList.Stat[j].total=1;
5861 Query->StatList.nstats++;
5862 }
5863 }
5864 UdmSQLFree(&SQLres);
5865 }
5866 return rc;
5867 }
5868
5869
5870 static udm_rc_t
UdmURLInfoDumpDoc(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc,const char * table,const char * column_names)5871 UdmURLInfoDumpDoc(UDM_AGENT *Indexer,
5872 UDM_DB *db,
5873 UDM_DOCUMENT *Doc,
5874 const char *table,
5875 const char *column_names)
5876 {
5877 udm_rc_t rc;
5878 char buf[64];
5879 size_t i;
5880 UDM_SQLRES SQLRes;
5881 UDM_DSTR dbuf;
5882 urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
5883
5884 udm_snprintf(buf, sizeof(buf),
5885 "SELECT %s FROM %s WHERE url_id=%d", column_names, table, url_id);
5886 if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, buf)))
5887 return rc;
5888
5889 UdmDSTRInit(&dbuf, 256);
5890
5891 for (i= 0; i < UdmSQLNumRows(&SQLRes); i++)
5892 {
5893 size_t j;
5894 UDM_CONST_STR col[2];
5895 const UDM_CONST_STR *c= (const UDM_CONST_STR*) &col;
5896 for (j= 0; j < UdmSQLNumCols(&SQLRes); j++)
5897 {
5898 UDM_ASSERT(j < 2);
5899 UdmConstStrSet(&col[j], UdmSQLValue(&SQLRes, i, j), UdmSQLLen(&SQLRes, i, j));
5900 }
5901 UdmDocInsertSectionsUsingEscapeBuildQuery(Indexer, db, table,
5902 0, column_names,
5903 c, UdmSQLNumCols(&SQLRes),
5904 &dbuf);
5905 printf("%s;\n", UdmDSTRPtr(&dbuf));
5906 }
5907 UdmSQLFree(&SQLRes);
5908 UdmDSTRFree(&dbuf);
5909 return UDM_OK;
5910 }
5911
5912
5913 static udm_rc_t
UdmDumpData(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5914 UdmDumpData(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5915 {
5916 char buf[256];
5917 UDM_SQLRES SQLRes;
5918 size_t i, nrows;
5919 udm_rc_t rc;
5920 const char *where;
5921 UDM_DSTR eurl;
5922
5923 UDM_ASSERT(UdmSQLDBModeHandler(db)->DumpWordInfo != NULL);
5924 if (UDM_OK != (rc= UdmSQLBuildWhereCondition(A, db, Query, &where)))
5925 return rc;
5926 UdmDSTRInit(&eurl, 256);
5927 udm_snprintf(buf, sizeof(buf),
5928 "SELECT %s FROM url%s%s", select_url_str_for_dump,
5929 where[0] ? " WHERE " : "", where);
5930 if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLRes, buf)))
5931 goto ret;
5932
5933 nrows= UdmSQLNumRows(&SQLRes);
5934
5935 for(i= 0; i < nrows; i++)
5936 {
5937 UDM_DOCUMENT Doc;
5938 UdmDocInit(&Doc);
5939 if (UDM_OK != UdmTargetSQLResDump(A, db, &Doc, &SQLRes, i, &eurl))
5940 goto ret;
5941 if (UDM_OK != (rc= UdmURLInfoDumpDoc(A, db, &Doc, "urlinfo", "sname,sval")))
5942 goto ret;
5943 if (UDM_OK != (rc= UdmURLInfoDumpDoc(A, db, &Doc, "cachedcopy", "sval")))
5944 goto ret;
5945 if (UDM_OK != (rc= UdmSQLDBModeHandler(db)->DumpWordInfo(A, db, &Doc)))
5946 goto ret;
5947 UdmDocFree(&Doc);
5948 }
5949
5950 ret:
5951 UdmDSTRFree(&eurl);
5952 return rc;
5953 }
5954
5955
5956 static udm_rc_t
UdmRestoreData(UDM_AGENT * A,UDM_DOCUMENT * Doc,UDM_DB * db)5957 UdmRestoreData(UDM_AGENT *A, UDM_DOCUMENT *Doc, UDM_DB *db)
5958 {
5959 size_t i;
5960 udm_rc_t rc;
5961
5962 for (i= 0; i < Doc->Sections.nvars; i++)
5963 {
5964 UDM_VAR *S= UdmVarListFindByIndex(&Doc->Sections, i);
5965 if (UdmVarValueHandlerType(S) == UDM_VALUE_HANDLER_TYPE_STR)
5966 {
5967 UDM_SECTION *Sec= (UDM_SECTION *) UdmVarDataPtr(S);
5968 printf("%s[%d]=%s\n", UdmVarName(S),
5969 (int) UdmSectionLength(Sec), UdmSectionPtr(Sec));
5970 }
5971 }
5972
5973 if (UDM_OK != (rc= UdmAddURL(A, Doc, db)))
5974 goto ex;
5975 if (UDM_OK != (rc= UdmFindURL(A, Doc, db)))
5976 goto ex;
5977 if (UDM_OK != (rc= UdmLongUpdateURL(A, Doc, db)))
5978 goto ex;
5979
5980 printf("\n");
5981
5982 ex:
5983 return rc;
5984 }
5985
5986
5987 /******* "indexer -Ewordstat" - word statistics for suggestions *************/
5988
5989 udm_rc_t
UdmWordStatQuery(UDM_AGENT * A,UDM_DB * db,const char * src)5990 UdmWordStatQuery(UDM_AGENT *A, UDM_DB *db, const char *src)
5991 {
5992 udm_rc_t rc;
5993 UDM_SQLRES SQLRes;
5994 size_t row, rows;
5995
5996 if (UDM_OK!= (rc= UdmDBSQLQuery(A, db, &SQLRes, src)))
5997 return rc;
5998
5999 if (UDM_OK != (rc= UdmDBSQLLockOrBegin(A, db, "wrdstat WRITE")))
6000 return rc;
6001
6002 rows=UdmSQLNumRows(&SQLRes);
6003 for(row=0 ; row < rows ; row++)
6004 {
6005 const char *word;
6006 int count;
6007 size_t wordlen;
6008 char snd[UDM_MAXWORDSIZE];
6009 char insert[64 + 2 * UDM_MAXWORDSIZE];
6010 /*
6011 Skip words that are longer than UDM_MAXWORDSIZE.
6012 */
6013 if ((wordlen= UdmSQLLen(&SQLRes, row, 0)) > sizeof(snd))
6014 continue;
6015 word= UdmSQLValue(&SQLRes, row, 0);
6016 count= UDM_ATOI(UdmSQLValue(&SQLRes, row, 1));
6017 UdmSoundex(A->Conf->lcs, snd, sizeof(snd), word, wordlen);
6018 if (snd[0])
6019 {
6020 udm_snprintf(insert, sizeof(insert),
6021 "INSERT INTO wrdstat (word, snd, cnt) VALUES ('%s','%s',%d)",
6022 word, snd, count);
6023 if (UDM_OK!= (rc= UdmDBSQLQuery(A, db, NULL, insert)))
6024 return rc;
6025 }
6026 if (((row % 1000) == 999) && row + 100 < rows)
6027 {
6028 if (UDM_OK != (rc= UdmDBSQLUnlockOrCommit(A, db)))
6029 return rc;
6030 if (UDM_OK != (rc= UdmDBSQLLockOrBegin(A, db, "wrdstat WRITE")))
6031 return rc;
6032 }
6033 }
6034 UdmSQLFree(&SQLRes);
6035 if (UDM_OK != (rc= UdmDBSQLUnlockOrCommit(A, db)))
6036 return rc;
6037 return UDM_OK;
6038 }
6039
6040
6041 static udm_rc_t
UdmWordStatCreate(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)6042 UdmWordStatCreate(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
6043 {
6044 udm_rc_t rc;
6045 if (UDM_OK != (rc= UdmDBSQLTableTruncateOrDelete(A, db, "wrdstat")))
6046 return rc;
6047 UDM_ASSERT(UdmSQLDB(db)->dbmode_handler->QueryAction != NULL);
6048 rc= UdmSQLDB(db)->dbmode_handler->QueryAction(A, db, Query, UDM_QUERYCMD_WORDSTAT);
6049 return rc;
6050 }
6051
6052 /******************* create and drop ***********************/
6053 typedef struct
6054 {
6055 UDM_AGENT *Agent;
6056 FILE *infile;
6057 } UDM_CREATE_DROP_PARAM;
6058
6059
6060 static udm_rc_t
sqlmonprompt_create_or_drop(UDM_IOHANDLER * iohandler,udm_msg_t msgtype,const char * msg)6061 sqlmonprompt_create_or_drop(UDM_IOHANDLER *iohandler,
6062 udm_msg_t msgtype, const char *msg)
6063 {
6064 UDM_SQLMON_PARAM *prm= (UDM_SQLMON_PARAM*) iohandler->user_data;
6065 UDM_CREATE_DROP_PARAM *prm2= (UDM_CREATE_DROP_PARAM *) prm->user_data;
6066 int level= msgtype == UDM_MSG_ERROR ? UDM_LOG_ERROR : UDM_LOG_EXTRA;
6067 if (msgtype == UDM_MSG_ERROR)
6068 UdmLog(prm2->Agent, level, "ERROR at line %d: %s",
6069 (int) prm->lineno + 1, msg);
6070 else
6071 UdmLog(prm2->Agent, level, "%s", msg);
6072 return UDM_OK;
6073 }
6074
6075
6076 static char *
sqlmongets_create_or_drop(UDM_IOHANDLER * iohandler,char * str,size_t size)6077 sqlmongets_create_or_drop(UDM_IOHANDLER *iohandler, char *str, size_t size)
6078 {
6079 UDM_SQLMON_PARAM *prm= (UDM_SQLMON_PARAM*) iohandler->user_data;
6080 UDM_CREATE_DROP_PARAM *prm2= (UDM_CREATE_DROP_PARAM *) prm->user_data;
6081 if (!fgets(str, size, prm2->infile))
6082 return 0;
6083 return str;
6084 }
6085
6086
6087 static const char*
UdmCreateOrDropCmdStr(udm_dbcmd_t cmd)6088 UdmCreateOrDropCmdStr(udm_dbcmd_t cmd)
6089 {
6090 switch(cmd)
6091 {
6092 case UDM_DBCMD_CREATE: return "create";
6093 case UDM_DBCMD_DROP: return "drop";
6094 default: return "";
6095 }
6096 return "unknown_cmd";
6097 }
6098
6099
6100 static udm_rc_t
UdmCreateOrDropSQL(UDM_AGENT * A,UDM_DB * db,udm_dbcmd_t cmd)6101 UdmCreateOrDropSQL(UDM_AGENT *A, UDM_DB *db, udm_dbcmd_t cmd)
6102 {
6103 char fname[1024];
6104 const char *sdir= UdmVarListFindStr(&A->Conf->Vars, "ShareDir", UDM_SHARE_DIR);
6105 #ifdef HAVE_FHS_LAYOUT
6106 const char *sdir2= "create" UDMSLASHSTR;
6107 #else
6108 const char *sdir2= "";
6109 #endif
6110 UDM_SQLMON_PARAM prm;
6111 UDM_CREATE_DROP_PARAM prm2;
6112
6113 udm_snprintf(fname,sizeof(fname),"%s%s%s%s%s%s.%s.sql",
6114 sdir, UDMSLASHSTR, sdir2,
6115 UdmSQLDBTypeToStr(UdmSQLDBType(db)), UDMSLASHSTR,
6116 UdmCreateOrDropCmdStr(cmd),
6117 UdmSQLDBModeToStr(UdmSQLDBMode(db)));
6118 UdmLog(A, UDM_LOG_ERROR, "Running '%s'", fname);
6119 prm2.Agent= A;
6120 if (!(prm2.infile= fopen(fname,"r")))
6121 {
6122 sprintf(A->Conf->errstr, "Can't open file '%s'", fname);
6123 return UDM_ERROR;
6124 }
6125 bzero((void*)&prm,sizeof(prm));
6126 prm.currdbnum= db - A->Conf->DBList.Item;
6127 prm.flags= UDM_SQLMON_DISPLAY_FIELDS;
6128 prm.iohandler.gets= sqlmongets_create_or_drop;
6129 prm.iohandler.prompt= sqlmonprompt_create_or_drop;
6130 prm.iohandler.user_data= &prm;
6131 prm.user_data= &prm2;
6132 UdmSQLMonitor(A, A->Conf,&prm);
6133 UdmLog(A, UDM_LOG_ERROR, "%d queries sent, %d succeeded, %d failed",
6134 (int) prm.nqueries, (int) prm.ngood, (int) prm.nbad);
6135 fclose(prm2.infile);
6136 return UDM_OK;
6137 }
6138
6139
6140 /******************* URL handlers **************************/
6141
6142 static udm_rc_t
UdmDocActionSQL(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc,udm_doccmd_t cmd)6143 UdmDocActionSQL(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc, udm_doccmd_t cmd)
6144 {
6145 switch (cmd)
6146 {
6147 case UDM_DOCCMD_DELETE: return UdmDeleteURL(A, Doc, db);
6148 case UDM_DOCCMD_ADD: return UdmAddURL(A, Doc, db);
6149 case UDM_DOCCMD_SUPDATE: return UdmUpdateUrl(A, Doc, db);
6150 case UDM_DOCCMD_LUPDATE: return UdmLongUpdateURL(A, Doc, db);
6151 case UDM_DOCCMD_DUPDATE: return UdmDeleteWordsAndLinks(A, Doc, db);
6152 case UDM_DOCCMD_UPDCLONE: return UdmUpdateClone(A, Doc, db);
6153 case UDM_DOCCMD_REGCHILD: return UdmRegisterChild(A, Doc, db);
6154 case UDM_DOCCMD_FINDBYURL: return UdmFindURL(A, Doc, db);
6155 case UDM_DOCCMD_FINDBYMSG: return UdmFindMessage(A, Doc, db);
6156 case UDM_DOCCMD_FINDORIG: return UdmFindOrigin(A, Doc, db);
6157 case UDM_DOCCMD_GET_CACHED_COPY: return UdmGetCachedCopy(A, Doc, db);
6158 case UDM_DOCCMD_DOCPERSITE: return UdmDocPerSite(A, Doc, db);
6159 case UDM_DOCCMD_SQLIMPORTSEC: return UdmImportSection(A, Doc, db);
6160 case UDM_DOCCMD_RESTOREDATA: return UdmRestoreData(A, Doc, db);
6161 }
6162 return UDM_ERROR;
6163 }
6164
6165
6166 typedef struct udm_sqldb_driver_st
6167 {
6168 const char *name;
6169 udm_sqldbtype_t DBType;
6170 udm_sqldbapi_t DBDriver;
6171 int DBSQL_IN;
6172 int flags;
6173 const UDM_SQLDB_HANDLER *handler;
6174 } UDM_SQLDB_DRIVER;
6175
6176
6177 static const UDM_SQLDB_DRIVER SQLDriver[]=
6178 {
6179 #if (HAVE_ORACLE8)
6180 {
6181 "oracle8", UDM_DB_ORACLE8, UDM_DBAPI_ORACLE8, 1,
6182 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
6183 UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND |
6184 UDM_SQL_HAVE_ROWNUM | UDM_SQL_HAVE_GOOD_COMMIT | UDM_SQL_HAVE_TRANSACT |
6185 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6186 &udm_sqldb_oracle_handler
6187 },
6188 {
6189 "oracle", UDM_DB_ORACLE8, UDM_DBAPI_ORACLE8, 1,
6190 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
6191 UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND |
6192 UDM_SQL_HAVE_ROWNUM | UDM_SQL_HAVE_GOOD_COMMIT | UDM_SQL_HAVE_TRANSACT |
6193 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6194 &udm_sqldb_oracle_handler
6195 },
6196 #endif
6197 #if (HAVE_CTLIB)
6198 {
6199 "mssql", UDM_DB_MSSQL, UDM_DBAPI_CTLIB, 1,
6200 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
6201 UDM_SQL_HAVE_TOP | UDM_SQL_HAVE_0xHEX | UDM_SQL_HAVE_TRANSACT |
6202 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6203 &udm_sqldb_ctlib_handler
6204 },
6205 {
6206 "sybase", UDM_DB_SYBASE, UDM_DBAPI_CTLIB, 1,
6207 UDM_SQL_HAVE_GROUPBY | /*UDM_SQL_HAVE_TRUNCATE |*/
6208 /*
6209 Don't use TRUNCATE with Sybase.
6210 It gives error:
6211 'TRUNCATE TABLE command not allowed within multi-statement
6212 transaction.
6213 TODO: modify the code to use TRUNCATE outside a transaction
6214 */
6215 UDM_SQL_HAVE_TOP | UDM_SQL_HAVE_0xHEX |
6216 UDM_SQL_HAVE_GOOD_COMMIT | UDM_SQL_HAVE_TRANSACT
6217 /*
6218 Something goes wrong with sp_rename!
6219 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE
6220 */
6221 ,
6222 &udm_sqldb_ctlib_handler
6223 },
6224 #endif
6225 #if (HAVE_MYSQL)
6226 {
6227 "mysql", UDM_DB_MYSQL, UDM_DBAPI_MYSQL, 1,
6228 UDM_SQL_HAVE_BIND |
6229 UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_0xHEX |
6230 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE |
6231 UDM_SQL_HAVE_DROP_IF_EXISTS,
6232 &udm_sqldb_mysql_handler
6233 },
6234 #endif
6235 #if (HAVE_PGSQL)
6236 {
6237 "pgsql", UDM_DB_PGSQL, UDM_DBAPI_PGSQL, 1,
6238 UDM_SQL_HAVE_BIND |
6239 UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY |
6240 UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_TRANSACT |
6241 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6242 /* UDM_SQL_HAVE_DROP_IF_EXISTS depends on server version */
6243 &udm_sqldb_pgsql_handler,
6244 },
6245 #endif
6246 #if (HAVE_IBASE)
6247 {
6248 "ibase", UDM_DB_IBASE, UDM_DBAPI_IBASE, 0,
6249 /*
6250 while indexing large sites and using the SQL in statement
6251 interbase will fail when the items in the in IN statements
6252 are more then 1500. We'd better have to fix code to avoid
6253 big INs instead of hidding DBSQL_IN.
6254 */
6255 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND |
6256 UDM_SQL_HAVE_FIRST_SKIP | UDM_SQL_HAVE_TRANSACT | UDM_SQL_HAVE_GOOD_COMMIT,
6257 &udm_sqldb_ibase_handler,
6258 },
6259 #endif
6260 #if (HAVE_SQLITE)
6261 {
6262 "sqlite",
6263 UDM_DB_SQLITE, UDM_DBAPI_SQLITE, 1,
6264 UDM_SQL_HAVE_BIND |
6265 UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRANSACT,
6266 &udm_sqldb_sqlite_handler,
6267 },
6268 #endif
6269 #if (HAVE_SQLITE3)
6270 {
6271 "sqlite3",
6272 UDM_DB_SQLITE3, UDM_DBAPI_SQLITE3, 1,
6273 UDM_SQL_HAVE_BIND |
6274 UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY |
6275 UDM_SQL_HAVE_STDHEX | UDM_SQL_HAVE_GOOD_COMMIT | UDM_SQL_HAVE_TRANSACT |
6276 UDM_SQL_HAVE_DROP_IF_EXISTS | UDM_SQL_HAVE_RENAME |
6277 UDM_SQL_HAVE_CREATE_LIKE,
6278 &udm_sqldb_sqlite3_handler,
6279 },
6280 #endif
6281 #if (HAVE_ODBC)
6282 {
6283 "odbc-solid", UDM_DB_SOLID, UDM_DBAPI_ODBC, 1,
6284 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_TRANSACT,
6285 &udm_sqldb_odbc_handler,
6286 },
6287 {
6288 "odbc-sapdb", UDM_DB_SAPDB, UDM_DBAPI_ODBC, 1,
6289 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_TRANSACT,
6290 &udm_sqldb_odbc_handler,
6291 },
6292 {
6293 "odbc-db2", UDM_DB_DB2, UDM_DBAPI_ODBC, 1,
6294 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND |
6295 UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_STDHEX | UDM_SQL_HAVE_TRANSACT |
6296 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6297 &udm_sqldb_odbc_handler,
6298 },
6299 {
6300 "odbc-access", UDM_DB_ACCESS, UDM_DBAPI_ODBC, 1,
6301 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT |
6302 UDM_SQL_HAVE_0xHEX | UDM_SQL_HAVE_TRANSACT,
6303 &udm_sqldb_odbc_handler,
6304 },
6305 {
6306 "odbc-mimer", UDM_DB_MIMER, UDM_DBAPI_ODBC, 1,
6307 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT |
6308 UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_STDHEX | UDM_SQL_HAVE_TRANSACT,
6309 &udm_sqldb_odbc_handler,
6310 },
6311 {
6312 "odbc-cache", UDM_DB_CACHE, UDM_DBAPI_ODBC, 1,
6313 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT |
6314 UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_TRANSACT,
6315 &udm_sqldb_odbc_handler,
6316 },
6317 {
6318 "odbc-virtuoso", UDM_DB_VIRT, UDM_DBAPI_ODBC, 1,
6319 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT |
6320 UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_TRANSACT | UDM_SQL_HAVE_TOP,
6321 &udm_sqldb_odbc_handler,
6322 },
6323 {
6324 "odbc-oracle", UDM_DB_ORACLE8, UDM_DBAPI_ODBC, 1,
6325 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
6326 UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND |
6327 UDM_SQL_HAVE_ROWNUM | UDM_SQL_HAVE_GOOD_COMMIT |
6328 UDM_SQL_HAVE_TRANSACT |
6329 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6330 &udm_sqldb_odbc_handler,
6331 },
6332 {
6333 "odbc-oracle8", UDM_DB_ORACLE8, UDM_DBAPI_ODBC, 1,
6334 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
6335 UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_ROWNUM |
6336 UDM_SQL_HAVE_TRANSACT |
6337 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6338 &udm_sqldb_odbc_handler,
6339 },
6340 {
6341 "odbc-mssql", UDM_DB_MSSQL, UDM_DBAPI_ODBC, 1,
6342 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
6343 UDM_SQL_HAVE_TOP | UDM_SQL_HAVE_0xHEX | UDM_SQL_HAVE_TRANSACT |
6344 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6345 &udm_sqldb_odbc_handler,
6346 },
6347 {
6348 "odbc-sybase", UDM_DB_SYBASE, UDM_DBAPI_ODBC, 1,
6349 UDM_SQL_HAVE_GROUPBY | /*UDM_SQL_HAVE_TRUNCATE |*/
6350 UDM_SQL_HAVE_TOP | UDM_SQL_HAVE_0xHEX |
6351 UDM_SQL_HAVE_TRANSACT| UDM_SQL_HAVE_GOOD_COMMIT,
6352 &udm_sqldb_odbc_handler,
6353 },
6354 {
6355 "odbc-mysql", UDM_DB_MYSQL, UDM_DBAPI_ODBC, 1,
6356 UDM_SQL_HAVE_BIND |
6357 UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_0xHEX |
6358 UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE |
6359 UDM_SQL_HAVE_DROP_IF_EXISTS,
6360 &udm_sqldb_odbc_handler,
6361 },
6362 {
6363 /* Bind does not seem to work with BYTEA in Windows */
6364 "odbc-pgsql", UDM_DB_PGSQL, UDM_DBAPI_ODBC, 1,
6365 UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY |
6366 UDM_SQL_HAVE_SUBSELECT /*| UDM_SQL_HAVE_BIND*/|
6367 UDM_SQL_HAVE_TRANSACT,
6368 &udm_sqldb_odbc_handler,
6369 },
6370 {
6371 "odbc-ibase", UDM_DB_IBASE, UDM_DBAPI_ODBC, 0,
6372 /*
6373 while indexing large sites and using the SQL in statement
6374 interbase will fail when the items in the in IN statements
6375 are more then 1500. We'd better have to fix code to avoid
6376 big INs instead of hidding DBSQL_IN.
6377 */
6378 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND |
6379 UDM_SQL_HAVE_FIRST_SKIP | UDM_SQL_HAVE_TRANSACT | UDM_SQL_HAVE_GOOD_COMMIT,
6380 &udm_sqldb_odbc_handler,
6381 },
6382 {
6383 "odbc-monetdb", UDM_DB_MONETDB, UDM_DBAPI_ODBC, 1,
6384 UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_LIMIT |
6385 UDM_SQL_HAVE_BIND |
6386 /*UDM_SQL_HAVE_BIND_TEXT | */UDM_SQL_HAVE_BLOB_AS_HEX | UDM_SQL_HAVE_TRANSACT,
6387 &udm_sqldb_odbc_handler,
6388 },
6389 #endif
6390 {
6391 NULL, UDM_DB_MYSQL, UDM_DBAPI_MYSQL, 0, 0, NULL
6392 }
6393 };
6394
6395
UdmSQLDriverByName(const char * name)6396 static const UDM_SQLDB_DRIVER *UdmSQLDriverByName(const char *name)
6397 {
6398 const UDM_SQLDB_DRIVER *drv;
6399 for (drv= SQLDriver; drv->name; drv++)
6400 {
6401 if (!strcasecmp(name, drv->name))
6402 return drv;
6403 if (!strncasecmp(drv->name, "odbc-", 5) &&
6404 !strcasecmp(drv->name + 5, name))
6405 return drv;
6406 }
6407 return NULL;
6408 }
6409
6410
6411 static const UDM_DBMODE_HANDLER *
UdmSQLDBModeHandlerByID(int DBMode)6412 UdmSQLDBModeHandlerByID(int DBMode)
6413 {
6414 switch (DBMode)
6415 {
6416 case UDM_SQLDBMODE_BLOB:
6417 return &udm_dbmode_handler_blob;
6418 case UDM_SQLDBMODE_SINGLE:
6419 return &udm_dbmode_handler_single;
6420 case UDM_SQLDBMODE_MULTI:
6421 return &udm_dbmode_handler_multi;
6422 case UDM_SQLDBMODE_RAWBLOB:
6423 return &udm_dbmode_handler_rawblob;
6424 }
6425 UDM_ASSERT(0);
6426 return NULL;
6427 }
6428
6429
6430 static udm_rc_t
UdmDBSetParam(UDM_DB * db,char * param)6431 UdmDBSetParam(UDM_DB *db, char *param)
6432 {
6433 char *tok, *lt;
6434
6435 for(tok = udm_strtok_r(param, "&",<) ; tok ;
6436 tok = udm_strtok_r(NULL,"&",<))
6437 {
6438 char * val;
6439 if((val=strchr(tok,'=')))
6440 {
6441 *val++='\0';
6442 UdmVarListReplaceStr(UdmSQLDBVars(db), tok, val);
6443 }
6444 else
6445 {
6446 UdmVarListReplaceStr(UdmSQLDBVars(db), tok, "");
6447 }
6448 }
6449 return UDM_OK;
6450 }
6451
6452
6453
6454 static udm_rc_t
UdmDBSetAddrCommon(UDM_DB * db,UDM_URL * addr)6455 UdmDBSetAddrCommon(UDM_DB *db, UDM_URL *addr)
6456 {
6457 char *s;
6458 if (addr->auth)
6459 {
6460 /*
6461 Unescape user and password to allow URL specific
6462 characters like '"<>@#? to be used as user or password part.
6463
6464 It's safe to spoil addr->auth here, as we don't
6465 need it anymore after setting DBUser and DBPass
6466 */
6467
6468 if ((s= strchr(addr->auth,':')))
6469 {
6470 *s++= 0;
6471 UdmUnescapeCGIQuery(s, s);
6472 UdmVarListReplaceStr(UdmSQLDBVars(db), "DBPass", s);
6473 }
6474 UdmUnescapeCGIQuery(addr->auth, addr->auth);
6475 UdmVarListReplaceStr(UdmSQLDBVars(db), "DBUser", addr->auth);
6476 }
6477
6478 UdmVarListReplaceStr(UdmSQLDBVars(db), "DBHost", addr->hostname);
6479 if (addr->port)
6480 UdmVarListReplaceInt(UdmSQLDBVars(db), "DBPort", addr->port);
6481
6482 if((s = strchr(UDM_NULL2EMPTY(addr->filename), '?')))
6483 {
6484 *s++='\0';
6485 if (UDM_OK != UdmDBSetParam(db, s))
6486 return UDM_ERROR;
6487 UdmVarListReplaceStr(UdmSQLDBVars(db), "filename", addr->filename);
6488 }
6489 else
6490 {
6491 UdmVarListReplaceStr(UdmSQLDBVars(db), "filename", addr->filename);
6492 }
6493
6494 return UDM_OK;
6495 }
6496
6497
6498 static udm_rc_t
UdmDBSetAddrSQLFromURL(UDM_DB * db,UDM_URL * addr,const char * dbaddr)6499 UdmDBSetAddrSQLFromURL(UDM_DB *db, UDM_URL *addr, const char *dbaddr)
6500 {
6501 const char *v;
6502 const UDM_SQLDB_DRIVER *drv= UdmSQLDriverByName(addr->schema);
6503 if (!drv)
6504 return UDM_NOTARGET;
6505
6506 if (!(db->specific= UdmMalloc(sizeof(UDM_SQLDB))))
6507 return UDM_ERROR;
6508 bzero(db->specific, sizeof(UDM_SQLDB));
6509
6510 UdmVarListReplaceStr(UdmSQLDBVars(db), "DBAddr", dbaddr);
6511
6512 if (UdmDBSetAddrCommon(db, addr))
6513 return UDM_ERROR;
6514
6515 UdmSQLDB(db)->DBMode= UDM_SQLDBMODE_BLOB;
6516 UdmSQL(db)->DBType= drv->DBType;
6517 UdmSQL(db)->DBDriver= drv->DBDriver;
6518 UdmSQL(db)->DBSQL_IN= drv->DBSQL_IN;
6519 UdmSQL(db)->flags= drv->flags;
6520 UdmSQL(db)->handler= drv->handler[0];
6521 db->dbhandler= &udm_dbhandler_sql;
6522
6523 if ((v= UdmVarListFindStr(UdmSQLDBVars(db),"dbmode",NULL)))
6524 {
6525 udm_bool_t error;
6526 UdmSQLDB(db)->DBMode= UdmStr2DBMode(v, &error);
6527 if (error)
6528 {
6529 udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db), "Unknown DBMode");
6530 UDM_FREE(db->specific);
6531 return UDM_ERROR;
6532 }
6533 }
6534
6535 UdmSQLDB(db)->dbmode_handler= UdmSQLDBModeHandlerByID(UdmSQLDBMode(db));
6536
6537 if ((v= UdmVarListFindStr(UdmSQLDBVars(db),"dbmodesearch",NULL)))
6538 {
6539 udm_bool_t error;
6540 udm_sqldbmode_t DBMode= UdmStr2DBMode(v, &error);
6541 if (error)
6542 {
6543 udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db), "Unknown DBModeSearch");
6544 UDM_FREE(db->specific);
6545 return UDM_ERROR;
6546 }
6547 if (DBMode == UDM_SQLDBMODE_BLOB &&
6548 UdmSQLDBType(db) != UDM_DB_MYSQL &&
6549 UdmSQLDBType(db) != UDM_DB_SYBASE &&
6550 UdmSQLDBType(db) != UDM_DB_MSSQL &&
6551 UdmSQLDBType(db) != UDM_DB_MIMER &&
6552 UdmSQLDBType(db) != UDM_DB_ORACLE8 &&
6553 UdmSQLDBType(db) != UDM_DB_DB2 &&
6554 UdmSQLDBType(db) != UDM_DB_PGSQL &&
6555 UdmSQLDBType(db) != UDM_DB_IBASE &&
6556 UdmSQLDBType(db) != UDM_DB_VIRT &&
6557 UdmSQLDBType(db) != UDM_DB_SQLITE3 &&
6558 UdmSQLDBType(db) != UDM_DB_MONETDB)
6559 {
6560 udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db),
6561 "This DBMode=blob is not supported with this database");
6562 UDM_FREE(db->specific);
6563 return UDM_ERROR;
6564 }
6565 }
6566
6567 if((v= UdmVarListFindStr(UdmSQLDBVars(db), "debugsql", "no")))
6568 {
6569 if (!strcasecmp(v, "yes"))
6570 UdmSQL(db)->flags|= UDM_SQL_DEBUG_QUERY;
6571 }
6572
6573 if(UdmSQLDBDriver(db) == UDM_DBAPI_IBASE ||
6574 UdmSQLDBDriver(db) == UDM_DBAPI_SQLITE ||
6575 UdmSQLDBDriver(db) == UDM_DBAPI_SQLITE3)
6576 {
6577 /*
6578 Ibase is a special case:
6579 It's database name consists of full path and file name
6580 */
6581 UdmVarListReplaceStr(UdmSQLDBVars(db), "DBName", UDM_NULL2EMPTY(addr->path));
6582 }
6583 else
6584 {
6585 /*
6586 ODBC Data Source Names may contain space and
6587 other tricky characters, let's unescape them.
6588 */
6589 size_t len= strlen(UDM_NULL2EMPTY(addr->path));
6590 char *DBName= (char*) UdmMalloc(len + 1);
6591 char *src= (char*) UdmMalloc(len + 1);
6592 src[0]= '\0';
6593 sscanf(UDM_NULL2EMPTY(addr->path), "/%[^/]s", src);
6594 UdmUnescapeCGIQuery(DBName, src);
6595 UdmVarListReplaceStr(UdmSQLDBVars(db), "DBName", DBName);
6596 UdmFree(src);
6597 UdmFree(DBName);
6598 }
6599
6600 if (UdmVarListFindInt(UdmSQLDBVars(db), "ps", 0) == 123)
6601 {
6602 UdmSQL(db)->handler.Prepare= UdmSQLPrepareGeneric;
6603 UdmSQL(db)->handler.Bind= UdmSQLBindGeneric;
6604 UdmSQL(db)->handler.Exec= UdmSQLExecGeneric;
6605 UdmSQL(db)->handler.StmtFree= UdmSQLStmtFreeGeneric;
6606 UdmSQL(db)->flags|= UDM_SQL_HAVE_BIND;
6607 }
6608 else if ((UdmSQLDBType(db) == UDM_DB_MSSQL ||
6609 UdmSQLDBType(db) == UDM_DB_SYBASE ||
6610 UdmSQLDBType(db) == UDM_DB_MYSQL ||
6611 UdmSQLDBType(db) == UDM_DB_PGSQL ||
6612 UdmSQLDBType(db) == UDM_DB_SQLITE ||
6613 UdmSQLDBType(db) == UDM_DB_SQLITE3)&&
6614 !strcasecmp(UdmVarListFindStr(UdmSQLDBVars(db), "ps", ""), "none"))
6615 {
6616 UdmSQL(db)->flags&= (0x7FFFFFFF ^ UDM_SQL_HAVE_BIND);
6617 }
6618 else if ((UdmSQLDBDriver(db) == UDM_DBAPI_MYSQL ||
6619 UdmSQLDBDriver(db) == UDM_DBAPI_PGSQL ||
6620 UdmSQLDBDriver(db) == UDM_DBAPI_ODBC) &&
6621 UdmSQL(db)->handler.Exec &&
6622 UdmVarListFindBool(UdmSQLDBVars(db), "ps", UDM_FALSE))
6623 {
6624 UdmSQL(db)->flags|= UDM_SQL_HAVE_BIND;
6625 }
6626
6627 return UDM_OK;
6628 }
6629
6630
6631 static udm_rc_t
UdmDBSetAddrSQL(UDM_DB * db,const char * dbaddr)6632 UdmDBSetAddrSQL(UDM_DB *db, const char *dbaddr)
6633 {
6634 udm_rc_t rc= UDM_NOTARGET;
6635 UDM_URL addr;
6636 UdmURLInit(&addr);
6637 if (!UdmURLParse(&addr, dbaddr) && addr.schema)
6638 rc= UdmDBSetAddrSQLFromURL(db, &addr, dbaddr);
6639 UdmURLFree(&addr);
6640 return rc;
6641 }
6642
6643
6644 static udm_rc_t
UdmDBCloseSQL(UDM_DB * db)6645 UdmDBCloseSQL(UDM_DB *db)
6646 {
6647 if (db->specific)
6648 {
6649 if (UdmSQLDBConnected(db))
6650 UdmSQLClose(UdmSQL(db));
6651 UdmWordCacheInit(&UdmSQLDB(db)->WordCache);
6652 UdmVarListFree(UdmSQLDBVars(db));
6653 UdmURLIdCacheFree(&UdmSQLDB(db)->URLIdCache);
6654 UDM_FREE(db->specific);
6655 }
6656 return UDM_OK;
6657 }
6658
6659
6660 static udm_rc_t
UdmDBActionSQL(UDM_AGENT * A,UDM_DB * db,udm_dbcmd_t cmd)6661 UdmDBActionSQL(UDM_AGENT *A, UDM_DB *db, udm_dbcmd_t cmd)
6662 {
6663 switch (cmd)
6664 {
6665 case UDM_DBCMD_CREATE: return UdmCreateOrDropSQL(A, db, cmd);
6666 case UDM_DBCMD_DROP: return UdmCreateOrDropSQL(A, db, cmd);
6667 case UDM_DBCMD_DOCCOUNT: return UdmGetDocCount(A, db);
6668 case UDM_DBCMD_CRAWLER_FINALIZE: return UdmWordCacheWrite(A, db, 0);
6669 }
6670 return UDM_ERROR;
6671 }
6672
6673
6674 static udm_rc_t
UdmDBInfoSQL(UDM_DB * db,void * dst,size_t dstlen,size_t * bytes_written,udm_dbinfo_t info)6675 UdmDBInfoSQL(UDM_DB *db, void *dst, size_t dstlen, size_t *bytes_written,
6676 udm_dbinfo_t info)
6677 {
6678 switch (info)
6679 {
6680 case UDM_DBINFO_IS_THREAD_SAFE:
6681 if (!dstlen)
6682 return UDM_ERROR;
6683 /* TODO: link against libmysqlclient_r */
6684 ((udm_bool_t *) dst)[0]= UDM_FALSE;
6685 *bytes_written= 1;
6686 return UDM_OK;
6687 case UDM_DBINFO_ERRCODE:
6688 if (dstlen < sizeof(int))
6689 return UDM_ERROR;
6690 ((int *)dst)[0]= UdmSQL(db)->errcode;
6691 *bytes_written= sizeof(int);
6692 return UDM_OK;
6693 case UDM_DBINFO_ADDR:
6694 {
6695 const char *dbaddr= UdmVarListFindStr(UdmSQLDBVars(db), "DBAddr", "<noaddr>");
6696 if (dstlen < 1)
6697 return UDM_ERROR;
6698 *bytes_written= udm_snprintf((char*) dst, dstlen, "%s", dbaddr);
6699 return UDM_OK;
6700 }
6701 case UDM_DBINFO_ERRSTR:
6702 {
6703 if (dstlen < 1)
6704 return UDM_ERROR;
6705 *bytes_written= udm_snprintf((char*) dst, dstlen, "%s", UdmDBSQLError(db));
6706 return UDM_OK;
6707 }
6708 }
6709 return UDM_ERROR;
6710 }
6711
6712
6713 static udm_rc_t
UdmRewriteURLData(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)6714 UdmRewriteURLData(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
6715 {
6716 udm_rc_t rc;
6717 UDM_URLID_LIST limit;
6718 bzero((void*) &limit, sizeof(limit));
6719 if (UDM_OK != (rc= UdmLoadURLDataFromURLForConv(A, db, Query, &limit)))
6720 return rc;
6721 return udm_dbmode_handler_blob.QueryAction(A, db, Query, UDM_QUERYCMD_REWRITE_URLDATA);
6722 }
6723
6724
6725 static udm_rc_t
UdmRewritePopularity(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)6726 UdmRewritePopularity(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
6727 {
6728 udm_rc_t rc;
6729 UDM_URLID_LIST limit;
6730 bzero((void*) &limit, sizeof(limit));
6731 if (UDM_OK != (rc= UdmLoadURLDataFromURLForConv(A, db, Query, &limit)))
6732 return rc;
6733 return udm_dbmode_handler_blob.QueryAction(A, db, Query, UDM_QUERYCMD_REWRITE_POPULARITY);
6734 }
6735
6736
6737 static udm_rc_t
UdmQueryActionSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,udm_querycmd_t cmd)6738 UdmQueryActionSQL(UDM_AGENT *A, UDM_DB *db,
6739 UDM_QUERY *Query, udm_querycmd_t cmd)
6740 {
6741 switch (cmd)
6742 {
6743 case UDM_QUERYCMD_FINDWORDS: return UdmFindWordsSQL(A, db, Query);
6744 case UDM_QUERYCMD_SUGGEST: return UdmQuerySuggest(A, db, Query);
6745 case UDM_QUERYCMD_DOCINFO: return UdmQueryAddDocInfoSQL(A, db, Query);
6746 case UDM_QUERYCMD_TRACK: return UdmTrackSQL(A, db, Query);
6747 case UDM_QUERYCMD_CLONES: return UdmQueryClones(A, db, Query);
6748 case UDM_QUERYCMD_WORDFORMS: return UdmQueryWordForms(A, db, Query);
6749 case UDM_QUERYCMD_TARGETS: return UdmTargetsSQL(A, db, Query);
6750 case UDM_QUERYCMD_CLEAR: return UdmClearDBSQL(A, db, Query);
6751 case UDM_QUERYCMD_INDEX: return UdmIndexSQL(A, db, Query);
6752 case UDM_QUERYCMD_EXPORT: return UdmExportSQL(A, db, Query);
6753 case UDM_QUERYCMD_EXPIRE: return UdmMarkForReindex(A, db, Query);
6754 case UDM_QUERYCMD_REFERERS: return UdmGetReferers(A, db, Query);
6755 case UDM_QUERYCMD_WORDSTAT: return UdmWordStatCreate(A, db, Query);
6756 case UDM_QUERYCMD_DUMPDATA: return UdmDumpData(A, db, Query);
6757 case UDM_QUERYCMD_STATISTICS:return UdmStatActionSQL(A, db, Query);
6758 case UDM_QUERYCMD_REWRITE_URLDATA: return UdmRewriteURLData(A, db, Query);
6759 case UDM_QUERYCMD_REWRITE_POPULARITY: return UdmRewritePopularity(A, db, Query);
6760 case UDM_QUERYCMD_REWRITE_LIMITS:
6761 return udm_dbmode_handler_blob.QueryAction(A, db, Query, cmd);
6762 }
6763 return UDM_ERROR;
6764 }
6765
6766
6767 const UDM_DBHANDLER udm_dbhandler_sql=
6768 {
6769 UdmDBSetAddrSQL, /*udm_rc_t (*Init) (UDM_DB*, const char *addr);*/
6770 UdmDBCloseSQL, /*udm_rc_t (*Close) (UDM_DB*);*/
6771 UdmDBInfoSQL, /*udm_rc_t (*Info) (UDM_DB *db, void *dst, size_t dstlen, size_t *bytes_written, udm_dbhandler_info_t info);*/
6772 UdmQueryActionSQL, /*udm_rc_t (*QueryAction) (UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query, udm_querycmd_t cmd);*/
6773 UdmDBActionSQL, /*udm_rc_t (*DBAction) (UDM_AGENT *A, UDM_DB *db, udm_dbcmd_t cmd);*/
6774 UdmDocActionSQL, /*udm_rc_t (*DocumentAction)(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *D, udm_doccmt_t cmd);*/
6775 UdmHrefActionSQL, /*udm_rc_t (*HrefAction) (UDM_AGENT *A, UDM_DB *db, UDM_HREF *H, udm_hrefcmd_t cmd)*/
6776 UdmSrvActionSQL, /*udm_rc_t (*ServerAction) (UDM_AGENT *A, UDM_DB *db, UDM_SERVERLIST *Srv, udm_srvcmd_t cmd);*/
6777 };
6778
6779
6780 #endif /* HAVE_SQL */
6781