1 /*-------------------------------------------------------------------------
2  *
3  * tsrank.c
4  *		rank tsvector by tsquery
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/tsrank.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <limits.h>
17 #include <math.h>
18 
19 #include "tsearch/ts_utils.h"
20 #include "utils/array.h"
getNextKey(int key, int len)21 #include "utils/builtins.h"
22 #include "miscadmin.h"
23 
24 
25 static const float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
26 
27 #define wpos(wep)	( w[ WEP_GETWEIGHT(wep) ] )
28 
29 #define RANK_NO_NORM			0x00
30 #define RANK_NORM_LOGLENGTH		0x01
31 #define RANK_NORM_LENGTH		0x02
32 #define RANK_NORM_EXTDIST		0x04
33 #define RANK_NORM_UNIQ			0x08
replicateValue(int[] table, int offset, int step, int end, int item)34 #define RANK_NORM_LOGUNIQ		0x10
35 #define RANK_NORM_RDIVRPLUS1	0x20
36 #define DEF_NORM_METHOD			RANK_NO_NORM
37 
38 static float calc_rank_or(const float *w, TSVector t, TSQuery q);
39 static float calc_rank_and(const float *w, TSVector t, TSQuery q);
40 
41 /*
42  * Returns a weight of a word collocation
43  */
44 static float4
45 word_distance(int32 w)
nextTableBitSize(int[] count, int len, int rootBits)46 {
47 	if (w > 100)
48 		return 1e-30f;
49 
50 	return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
51 }
52 
53 static int
54 cnt_length(TSVector t)
55 {
56 	WordEntry  *ptr = ARRPTR(t),
57 			   *end = (WordEntry *) STRPTR(t);
58 	int			len = 0;
59 
60 	while (ptr < end)
61 	{
62 		int			clen = POSDATALEN(t, ptr);
63 
buildHuffmanTable(int[] tableGroup, int tableIdx, int rootBits, int[] codeLengths, int codeLengthsSize)64 		if (clen == 0)
65 			len += 1;
66 		else
67 			len += clen;
68 
69 		ptr++;
70 	}
71 
72 	return len;
73 }
74 
75 
76 #define WordECompareQueryItem(e,q,p,i,m) \
77 	tsCompareString((q) + (i)->distance, (i)->length,	\
78 					(e) + (p)->pos, (p)->len, (m))
79 
80 
81 /*
82  * Returns a pointer to a WordEntry's array corresponding to 'item' from
83  * tsvector 't'. 'q' is the TSQuery containing 'item'.
84  * Returns NULL if not found.
85  */
86 static WordEntry *
87 find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
88 {
89 	WordEntry  *StopLow = ARRPTR(t);
90 	WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
91 	WordEntry  *StopMiddle = StopHigh;
92 	int			difference;
93 
94 	*nitem = 0;
95 
96 	/* Loop invariant: StopLow <= item < StopHigh */
97 	while (StopLow < StopHigh)
98 	{
99 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
100 		difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
101 		if (difference == 0)
102 		{
103 			StopHigh = StopMiddle;
104 			*nitem = 1;
105 			break;
106 		}
107 		else if (difference > 0)
108 			StopLow = StopMiddle + 1;
109 		else
110 			StopHigh = StopMiddle;
111 	}
112 
113 	if (item->prefix)
114 	{
115 		if (StopLow >= StopHigh)
116 			StopMiddle = StopHigh;
117 
118 		*nitem = 0;
119 
120 		while (StopMiddle < (WordEntry *) STRPTR(t) &&
121 			   WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
122 		{
123 			(*nitem)++;
124 			StopMiddle++;
125 		}
126 	}
127 
128 	return (*nitem > 0) ? StopHigh : NULL;
129 }
130 
131 
132 /*
133  * sort QueryOperands by (length, word)
134  */
135 static int
136 compareQueryOperand(const void *a, const void *b, void *arg)
137 {
138 	char	   *operand = (char *) arg;
139 	QueryOperand *qa = (*(QueryOperand *const *) a);
140 	QueryOperand *qb = (*(QueryOperand *const *) b);
141 
142 	return tsCompareString(operand + qa->distance, qa->length,
143 						   operand + qb->distance, qb->length,
144 						   false);
145 }
146 
147 /*
148  * Returns a sorted, de-duplicated array of QueryOperands in a query.
149  * The returned QueryOperands are pointers to the original QueryOperands
150  * in the query.
151  *
152  * Length of the returned array is stored in *size
153  */
154 static QueryOperand **
155 SortAndUniqItems(TSQuery q, int *size)
156 {
157 	char	   *operand = GETOPERAND(q);
158 	QueryItem  *item = GETQUERY(q);
159 	QueryOperand **res,
160 			  **ptr,
161 			  **prevptr;
162 
163 	ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
164 
165 	/* Collect all operands from the tree to res */
166 	while ((*size)--)
167 	{
168 		if (item->type == QI_VAL)
169 		{
170 			*ptr = (QueryOperand *) item;
171 			ptr++;
172 		}
173 		item++;
174 	}
175 
176 	*size = ptr - res;
177 	if (*size < 2)
178 		return res;
179 
180 	qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, (void *) operand);
181 
182 	ptr = res + 1;
183 	prevptr = res;
184 
185 	/* remove duplicates */
186 	while (ptr - res < *size)
187 	{
188 		if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
189 		{
190 			prevptr++;
191 			*prevptr = *ptr;
192 		}
193 		ptr++;
194 	}
195 
196 	*size = prevptr + 1 - res;
197 	return res;
198 }
199 
200 static float
201 calc_rank_and(const float *w, TSVector t, TSQuery q)
202 {
203 	WordEntryPosVector **pos;
204 	WordEntryPosVector1 posnull;
205 	WordEntryPosVector *POSNULL;
206 	int			i,
207 				k,
208 				l,
209 				p;
210 	WordEntry  *entry,
211 			   *firstentry;
212 	WordEntryPos *post,
213 			   *ct;
214 	int32		dimt,
215 				lenct,
216 				dist,
217 				nitem;
218 	float		res = -1.0;
219 	QueryOperand **item;
220 	int			size = q->size;
221 
222 	item = SortAndUniqItems(q, &size);
223 	if (size < 2)
224 	{
225 		pfree(item);
226 		return calc_rank_or(w, t, q);
227 	}
228 	pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
229 
230 	/* A dummy WordEntryPos array to use when haspos is false */
231 	posnull.npos = 1;
232 	posnull.pos[0] = 0;
233 	WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
234 	POSNULL = (WordEntryPosVector *) &posnull;
235 
236 	for (i = 0; i < size; i++)
237 	{
238 		firstentry = entry = find_wordentry(t, q, item[i], &nitem);
239 		if (!entry)
240 			continue;
241 
242 		while (entry - firstentry < nitem)
243 		{
244 			if (entry->haspos)
245 				pos[i] = _POSVECPTR(t, entry);
246 			else
247 				pos[i] = POSNULL;
248 
249 			dimt = pos[i]->npos;
250 			post = pos[i]->pos;
251 			for (k = 0; k < i; k++)
252 			{
253 				if (!pos[k])
254 					continue;
255 				lenct = pos[k]->npos;
256 				ct = pos[k]->pos;
257 				for (l = 0; l < dimt; l++)
258 				{
259 					for (p = 0; p < lenct; p++)
260 					{
261 						dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
262 						if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
263 						{
264 							float		curw;
265 
266 							if (!dist)
267 								dist = MAXENTRYPOS;
268 							curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
269 							res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
270 						}
271 					}
272 				}
273 			}
274 
275 			entry++;
276 		}
277 	}
278 	pfree(pos);
279 	pfree(item);
280 	return res;
281 }
282 
283 static float
284 calc_rank_or(const float *w, TSVector t, TSQuery q)
285 {
286 	WordEntry  *entry,
287 			   *firstentry;
288 	WordEntryPosVector1 posnull;
289 	WordEntryPos *post;
290 	int32		dimt,
291 				j,
292 				i,
293 				nitem;
294 	float		res = 0.0;
295 	QueryOperand **item;
296 	int			size = q->size;
297 
298 	/* A dummy WordEntryPos array to use when haspos is false */
299 	posnull.npos = 1;
300 	posnull.pos[0] = 0;
301 
302 	item = SortAndUniqItems(q, &size);
303 
304 	for (i = 0; i < size; i++)
305 	{
306 		float		resj,
307 					wjm;
308 		int32		jm;
309 
310 		firstentry = entry = find_wordentry(t, q, item[i], &nitem);
311 		if (!entry)
312 			continue;
313 
314 		while (entry - firstentry < nitem)
315 		{
316 			if (entry->haspos)
317 			{
318 				dimt = POSDATALEN(t, entry);
319 				post = POSDATAPTR(t, entry);
320 			}
321 			else
322 			{
323 				dimt = posnull.npos;
324 				post = posnull.pos;
325 			}
326 
327 			resj = 0.0;
328 			wjm = -1.0;
329 			jm = 0;
330 			for (j = 0; j < dimt; j++)
331 			{
332 				resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
333 				if (wpos(post[j]) > wjm)
334 				{
335 					wjm = wpos(post[j]);
336 					jm = j;
337 				}
338 			}
339 /*
340 			limit (sum(i/i^2),i->inf) = pi^2/6
341 			resj = sum(wi/i^2),i=1,noccurence,
342 			wi - should be sorted desc,
343 			don't sort for now, just choose maximum weight. This should be corrected
344 			Oleg Bartunov
345 */
346 			res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
347 
348 			entry++;
349 		}
350 	}
351 	if (size > 0)
352 		res = res / size;
353 	pfree(item);
354 	return res;
355 }
356 
357 static float
358 calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
359 {
360 	QueryItem  *item = GETQUERY(q);
361 	float		res = 0.0;
362 	int			len;
363 
364 	if (!t->size || !q->size)
365 		return 0.0;
366 
367 	/* XXX: What about NOT? */
368 	res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
369 									item->qoperator.oper == OP_PHRASE)) ?
370 		calc_rank_and(w, t, q) :
371 		calc_rank_or(w, t, q);
372 
373 	if (res < 0)
374 		res = 1e-20f;
375 
376 	if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
377 		res /= log((double) (cnt_length(t) + 1)) / log(2.0);
378 
379 	if (method & RANK_NORM_LENGTH)
380 	{
381 		len = cnt_length(t);
382 		if (len > 0)
383 			res /= (float) len;
384 	}
385 
386 	/* RANK_NORM_EXTDIST not applicable */
387 
388 	if ((method & RANK_NORM_UNIQ) && t->size > 0)
389 		res /= (float) (t->size);
390 
391 	if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
392 		res /= log((double) (t->size + 1)) / log(2.0);
393 
394 	if (method & RANK_NORM_RDIVRPLUS1)
395 		res /= (res + 1);
396 
397 	return res;
398 }
399 
400 static const float *
401 getWeights(ArrayType *win)
402 {
403 	static float ws[lengthof(weights)];
404 	int			i;
405 	float4	   *arrdata;
406 
407 	if (win == NULL)
408 		return weights;
409 
410 	if (ARR_NDIM(win) != 1)
411 		ereport(ERROR,
412 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
413 				 errmsg("array of weight must be one-dimensional")));
414 
415 	if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
416 		ereport(ERROR,
417 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
418 				 errmsg("array of weight is too short")));
419 
420 	if (array_contains_nulls(win))
421 		ereport(ERROR,
422 				(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
423 				 errmsg("array of weight must not contain nulls")));
424 
425 	arrdata = (float4 *) ARR_DATA_PTR(win);
426 	for (i = 0; i < lengthof(weights); i++)
427 	{
428 		ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
429 		if (ws[i] > 1.0)
430 			ereport(ERROR,
431 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
432 					 errmsg("weight out of range")));
433 	}
434 
435 	return ws;
436 }
437 
438 Datum
439 ts_rank_wttf(PG_FUNCTION_ARGS)
440 {
441 	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
442 	TSVector	txt = PG_GETARG_TSVECTOR(1);
443 	TSQuery		query = PG_GETARG_TSQUERY(2);
444 	int			method = PG_GETARG_INT32(3);
445 	float		res;
446 
447 	res = calc_rank(getWeights(win), txt, query, method);
448 
449 	PG_FREE_IF_COPY(win, 0);
450 	PG_FREE_IF_COPY(txt, 1);
451 	PG_FREE_IF_COPY(query, 2);
452 	PG_RETURN_FLOAT4(res);
453 }
454 
455 Datum
456 ts_rank_wtt(PG_FUNCTION_ARGS)
457 {
458 	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
459 	TSVector	txt = PG_GETARG_TSVECTOR(1);
460 	TSQuery		query = PG_GETARG_TSQUERY(2);
461 	float		res;
462 
463 	res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
464 
465 	PG_FREE_IF_COPY(win, 0);
466 	PG_FREE_IF_COPY(txt, 1);
467 	PG_FREE_IF_COPY(query, 2);
468 	PG_RETURN_FLOAT4(res);
469 }
470 
471 Datum
472 ts_rank_ttf(PG_FUNCTION_ARGS)
473 {
474 	TSVector	txt = PG_GETARG_TSVECTOR(0);
475 	TSQuery		query = PG_GETARG_TSQUERY(1);
476 	int			method = PG_GETARG_INT32(2);
477 	float		res;
478 
479 	res = calc_rank(getWeights(NULL), txt, query, method);
480 
481 	PG_FREE_IF_COPY(txt, 0);
482 	PG_FREE_IF_COPY(query, 1);
483 	PG_RETURN_FLOAT4(res);
484 }
485 
486 Datum
487 ts_rank_tt(PG_FUNCTION_ARGS)
488 {
489 	TSVector	txt = PG_GETARG_TSVECTOR(0);
490 	TSQuery		query = PG_GETARG_TSQUERY(1);
491 	float		res;
492 
493 	res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
494 
495 	PG_FREE_IF_COPY(txt, 0);
496 	PG_FREE_IF_COPY(query, 1);
497 	PG_RETURN_FLOAT4(res);
498 }
499 
500 typedef struct
501 {
502 	union
503 	{
504 		struct
505 		{						/* compiled doc representation */
506 			QueryItem **items;
507 			int16		nitem;
508 		}			query;
509 		struct
510 		{						/* struct is used for preparing doc
511 								 * representation */
512 			QueryItem  *item;
513 			WordEntry  *entry;
514 		}			map;
515 	}			data;
516 	WordEntryPos pos;
517 } DocRepresentation;
518 
519 static int
520 compareDocR(const void *va, const void *vb)
521 {
522 	const DocRepresentation *a = (const DocRepresentation *) va;
523 	const DocRepresentation *b = (const DocRepresentation *) vb;
524 
525 	if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
526 	{
527 		if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
528 		{
529 			if (a->data.map.entry == b->data.map.entry)
530 				return 0;
531 
532 			return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
533 		}
534 
535 		return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
536 	}
537 
538 	return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
539 }
540 
541 #define MAXQROPOS	MAXENTRYPOS
542 typedef struct
543 {
544 	bool		operandexists;
545 	bool		reverseinsert;	/* indicates insert order, true means
546 								 * descending order */
547 	uint32		npos;
548 	WordEntryPos pos[MAXQROPOS];
549 } QueryRepresentationOperand;
550 
551 typedef struct
552 {
553 	TSQuery		query;
554 	QueryRepresentationOperand *operandData;
555 } QueryRepresentation;
556 
557 #define QR_GET_OPERAND_DATA(q, v) \
558 	( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
559 
560 static bool
561 checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
562 {
563 	QueryRepresentation *qr = (QueryRepresentation *) checkval;
564 	QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val);
565 
566 	if (!opData->operandexists)
567 		return false;
568 
569 	if (data)
570 	{
571 		data->npos = opData->npos;
572 		data->pos = opData->pos;
573 		if (opData->reverseinsert)
574 			data->pos += MAXQROPOS - opData->npos;
575 	}
576 
577 	return true;
578 }
579 
580 typedef struct
581 {
582 	int			pos;
583 	int			p;
584 	int			q;
585 	DocRepresentation *begin;
586 	DocRepresentation *end;
587 } CoverExt;
588 
589 static void
590 resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
591 {
592 	int			i;
593 
594 	for (i = 0; i < qr->query->size; i++)
595 	{
596 		qr->operandData[i].operandexists = false;
597 		qr->operandData[i].reverseinsert = reverseinsert;
598 		qr->operandData[i].npos = 0;
599 	}
600 }
601 
602 static void
603 fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
604 {
605 	int			i;
606 	int			lastPos;
607 	QueryRepresentationOperand *opData;
608 
609 	for (i = 0; i < entry->data.query.nitem; i++)
610 	{
611 		if (entry->data.query.items[i]->type != QI_VAL)
612 			continue;
613 
614 		opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
615 
616 		opData->operandexists = true;
617 
618 		if (opData->npos == 0)
619 		{
620 			lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
621 			opData->pos[lastPos] = entry->pos;
622 			opData->npos++;
623 			continue;
624 		}
625 
626 		lastPos = opData->reverseinsert ?
627 			(MAXQROPOS - opData->npos) :
628 			(opData->npos - 1);
629 
630 		if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
631 		{
632 			lastPos = opData->reverseinsert ?
633 				(MAXQROPOS - 1 - opData->npos) :
634 				(opData->npos);
635 
636 			opData->pos[lastPos] = entry->pos;
637 			opData->npos++;
638 		}
639 	}
640 }
641 
642 static bool
643 Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
644 {
645 	DocRepresentation *ptr;
646 	int			lastpos = ext->pos;
647 	bool		found = false;
648 
649 	/*
650 	 * since this function recurses, it could be driven to stack overflow.
651 	 * (though any decent compiler will optimize away the tail-recursion.
652 	 */
653 	check_stack_depth();
654 
655 	resetQueryRepresentation(qr, false);
656 
657 	ext->p = INT_MAX;
658 	ext->q = 0;
659 	ptr = doc + ext->pos;
660 
661 	/* find upper bound of cover from current position, move up */
662 	while (ptr - doc < len)
663 	{
664 		fillQueryRepresentationData(qr, ptr);
665 
666 		if (TS_execute(GETQUERY(qr->query), (void *) qr,
667 					   TS_EXEC_EMPTY, checkcondition_QueryOperand))
668 		{
669 			if (WEP_GETPOS(ptr->pos) > ext->q)
670 			{
671 				ext->q = WEP_GETPOS(ptr->pos);
672 				ext->end = ptr;
673 				lastpos = ptr - doc;
674 				found = true;
675 			}
676 			break;
677 		}
678 		ptr++;
679 	}
680 
681 	if (!found)
682 		return false;
683 
684 	resetQueryRepresentation(qr, true);
685 
686 	ptr = doc + lastpos;
687 
688 	/* find lower bound of cover from found upper bound, move down */
689 	while (ptr >= doc + ext->pos)
690 	{
691 		/*
692 		 * we scan doc from right to left, so pos info in reverse order!
693 		 */
694 		fillQueryRepresentationData(qr, ptr);
695 
696 		if (TS_execute(GETQUERY(qr->query), (void *) qr,
697 					   TS_EXEC_CALC_NOT, checkcondition_QueryOperand))
698 		{
699 			if (WEP_GETPOS(ptr->pos) < ext->p)
700 			{
701 				ext->begin = ptr;
702 				ext->p = WEP_GETPOS(ptr->pos);
703 			}
704 			break;
705 		}
706 		ptr--;
707 	}
708 
709 	if (ext->p <= ext->q)
710 	{
711 		/*
712 		 * set position for next try to next lexeme after beginning of found
713 		 * cover
714 		 */
715 		ext->pos = (ptr - doc) + 1;
716 		return true;
717 	}
718 
719 	ext->pos++;
720 	return Cover(doc, len, qr, ext);
721 }
722 
723 static DocRepresentation *
724 get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
725 {
726 	QueryItem  *item = GETQUERY(qr->query);
727 	WordEntry  *entry,
728 			   *firstentry;
729 	WordEntryPos *post;
730 	int32		dimt,			/* number of 'post' items */
731 				j,
732 				i,
733 				nitem;
734 	int			len = qr->query->size * 4,
735 				cur = 0;
736 	DocRepresentation *doc;
737 
738 	doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
739 
740 	/*
741 	 * Iterate through query to make DocRepresentaion for words and it's
742 	 * entries satisfied by query
743 	 */
744 	for (i = 0; i < qr->query->size; i++)
745 	{
746 		QueryOperand *curoperand;
747 
748 		if (item[i].type != QI_VAL)
749 			continue;
750 
751 		curoperand = &item[i].qoperand;
752 
753 		firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
754 		if (!entry)
755 			continue;
756 
757 		/* iterations over entries in tsvector */
758 		while (entry - firstentry < nitem)
759 		{
760 			if (entry->haspos)
761 			{
762 				dimt = POSDATALEN(txt, entry);
763 				post = POSDATAPTR(txt, entry);
764 			}
765 			else
766 			{
767 				/* ignore words without positions */
768 				entry++;
769 				continue;
770 			}
771 
772 			while (cur + dimt >= len)
773 			{
774 				len *= 2;
775 				doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
776 			}
777 
778 			/* iterations over entry's positions */
779 			for (j = 0; j < dimt; j++)
780 			{
781 				if (curoperand->weight == 0 ||
782 					curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
783 				{
784 					doc[cur].pos = post[j];
785 					doc[cur].data.map.entry = entry;
786 					doc[cur].data.map.item = (QueryItem *) curoperand;
787 					cur++;
788 				}
789 			}
790 
791 			entry++;
792 		}
793 	}
794 
795 	if (cur > 0)
796 	{
797 		DocRepresentation *rptr = doc + 1,
798 				   *wptr = doc,
799 					storage;
800 
801 		/*
802 		 * Sort representation in ascending order by pos and entry
803 		 */
804 		qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
805 
806 		/*
807 		 * Join QueryItem per WordEntry and it's position
808 		 */
809 		storage.pos = doc->pos;
810 		storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
811 		storage.data.query.items[0] = doc->data.map.item;
812 		storage.data.query.nitem = 1;
813 
814 		while (rptr - doc < cur)
815 		{
816 			if (rptr->pos == (rptr - 1)->pos &&
817 				rptr->data.map.entry == (rptr - 1)->data.map.entry)
818 			{
819 				storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
820 				storage.data.query.nitem++;
821 			}
822 			else
823 			{
824 				*wptr = storage;
825 				wptr++;
826 				storage.pos = rptr->pos;
827 				storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
828 				storage.data.query.items[0] = rptr->data.map.item;
829 				storage.data.query.nitem = 1;
830 			}
831 
832 			rptr++;
833 		}
834 
835 		*wptr = storage;
836 		wptr++;
837 
838 		*doclen = wptr - doc;
839 		return doc;
840 	}
841 
842 	pfree(doc);
843 	return NULL;
844 }
845 
846 static float4
847 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
848 {
849 	DocRepresentation *doc;
850 	int			len,
851 				i,
852 				doclen = 0;
853 	CoverExt	ext;
854 	double		Wdoc = 0.0;
855 	double		invws[lengthof(weights)];
856 	double		SumDist = 0.0,
857 				PrevExtPos = 0.0,
858 				CurExtPos = 0.0;
859 	int			NExtent = 0;
860 	QueryRepresentation qr;
861 
862 
863 	for (i = 0; i < lengthof(weights); i++)
864 	{
865 		invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
866 		if (invws[i] > 1.0)
867 			ereport(ERROR,
868 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
869 					 errmsg("weight out of range")));
870 		invws[i] = 1.0 / invws[i];
871 	}
872 
873 	qr.query = query;
874 	qr.operandData = (QueryRepresentationOperand *)
875 		palloc0(sizeof(QueryRepresentationOperand) * query->size);
876 
877 	doc = get_docrep(txt, &qr, &doclen);
878 	if (!doc)
879 	{
880 		pfree(qr.operandData);
881 		return 0.0;
882 	}
883 
884 	MemSet(&ext, 0, sizeof(CoverExt));
885 	while (Cover(doc, doclen, &qr, &ext))
886 	{
887 		double		Cpos = 0.0;
888 		double		InvSum = 0.0;
889 		int			nNoise;
890 		DocRepresentation *ptr = ext.begin;
891 
892 		while (ptr <= ext.end)
893 		{
894 			InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
895 			ptr++;
896 		}
897 
898 		Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
899 
900 		/*
901 		 * if doc are big enough then ext.q may be equal to ext.p due to limit
902 		 * of positional information. In this case we approximate number of
903 		 * noise word as half cover's length
904 		 */
905 		nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
906 		if (nNoise < 0)
907 			nNoise = (ext.end - ext.begin) / 2;
908 		Wdoc += Cpos / ((double) (1 + nNoise));
909 
910 		CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
911 		if (NExtent > 0 && CurExtPos > PrevExtPos	/* prevent division by
912 													 * zero in a case of
913 			  * multiple lexize */ )
914 			SumDist += 1.0 / (CurExtPos - PrevExtPos);
915 
916 		PrevExtPos = CurExtPos;
917 		NExtent++;
918 	}
919 
920 	if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
921 		Wdoc /= log((double) (cnt_length(txt) + 1));
922 
923 	if (method & RANK_NORM_LENGTH)
924 	{
925 		len = cnt_length(txt);
926 		if (len > 0)
927 			Wdoc /= (double) len;
928 	}
929 
930 	if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
931 		Wdoc /= ((double) NExtent) / SumDist;
932 
933 	if ((method & RANK_NORM_UNIQ) && txt->size > 0)
934 		Wdoc /= (double) (txt->size);
935 
936 	if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
937 		Wdoc /= log((double) (txt->size + 1)) / log(2.0);
938 
939 	if (method & RANK_NORM_RDIVRPLUS1)
940 		Wdoc /= (Wdoc + 1);
941 
942 	pfree(doc);
943 
944 	pfree(qr.operandData);
945 
946 	return (float4) Wdoc;
947 }
948 
949 Datum
950 ts_rankcd_wttf(PG_FUNCTION_ARGS)
951 {
952 	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
953 	TSVector	txt = PG_GETARG_TSVECTOR(1);
954 	TSQuery		query = PG_GETARG_TSQUERY(2);
955 	int			method = PG_GETARG_INT32(3);
956 	float		res;
957 
958 	res = calc_rank_cd(getWeights(win), txt, query, method);
959 
960 	PG_FREE_IF_COPY(win, 0);
961 	PG_FREE_IF_COPY(txt, 1);
962 	PG_FREE_IF_COPY(query, 2);
963 	PG_RETURN_FLOAT4(res);
964 }
965 
966 Datum
967 ts_rankcd_wtt(PG_FUNCTION_ARGS)
968 {
969 	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
970 	TSVector	txt = PG_GETARG_TSVECTOR(1);
971 	TSQuery		query = PG_GETARG_TSQUERY(2);
972 	float		res;
973 
974 	res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
975 
976 	PG_FREE_IF_COPY(win, 0);
977 	PG_FREE_IF_COPY(txt, 1);
978 	PG_FREE_IF_COPY(query, 2);
979 	PG_RETURN_FLOAT4(res);
980 }
981 
982 Datum
983 ts_rankcd_ttf(PG_FUNCTION_ARGS)
984 {
985 	TSVector	txt = PG_GETARG_TSVECTOR(0);
986 	TSQuery		query = PG_GETARG_TSQUERY(1);
987 	int			method = PG_GETARG_INT32(2);
988 	float		res;
989 
990 	res = calc_rank_cd(getWeights(NULL), txt, query, method);
991 
992 	PG_FREE_IF_COPY(txt, 0);
993 	PG_FREE_IF_COPY(query, 1);
994 	PG_RETURN_FLOAT4(res);
995 }
996 
997 Datum
998 ts_rankcd_tt(PG_FUNCTION_ARGS)
999 {
1000 	TSVector	txt = PG_GETARG_TSVECTOR(0);
1001 	TSQuery		query = PG_GETARG_TSQUERY(1);
1002 	float		res;
1003 
1004 	res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
1005 
1006 	PG_FREE_IF_COPY(txt, 0);
1007 	PG_FREE_IF_COPY(query, 1);
1008 	PG_RETURN_FLOAT4(res);
1009 }
1010