1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #include "udm_config.h"
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <math.h>
23
24 #include "udm_common.h"
25 #include "udm_utils.h"
26 #include "udm_log.h"
27 #include "udm_searchtool.h"
28 #include "udm_coords.h"
29 #include "udm_db.h"
30
31
32 typedef struct
33 {
34 urlid_t id;
35 int popularity;
36 } UDM_URLIPOP;
37
38
39 typedef struct
40 {
41 size_t nitems;
42 size_t mitems;
43 UDM_URLIPOP *Item;
44 } UDM_URLIPOPLIST;
45
46
47 static void
UdmURLIPopListInit(UDM_URLIPOPLIST * List)48 UdmURLIPopListInit(UDM_URLIPOPLIST *List)
49 {
50 bzero((void*) List, sizeof(*List));
51 }
52
53
54 static void
UdmURLIPopListFree(UDM_URLIPOPLIST * List)55 UdmURLIPopListFree(UDM_URLIPOPLIST *List)
56 {
57 UdmFree(List->Item);
58 }
59
60
61 static udm_rc_t
UdmURLIPopListAlloc(UDM_URLIPOPLIST * List,size_t mitems)62 UdmURLIPopListAlloc(UDM_URLIPOPLIST *List, size_t mitems)
63 {
64 if (!(List->Item= (UDM_URLIPOP *) UdmMalloc(mitems * sizeof(UDM_URLIPOP))))
65 return UDM_ERROR;
66 List->mitems= mitems;
67 return UDM_OK;
68 }
69
70
71 static void
UdmURLIPopListAdd(UDM_URLIPOPLIST * List,const UDM_URLIPOP * Item)72 UdmURLIPopListAdd(UDM_URLIPOPLIST *List, const UDM_URLIPOP *Item)
73 {
74 List->Item[List->nitems++]= *Item;
75 }
76
77
78 static int
cmp_urlipop(const UDM_URLIPOP * a,const UDM_URLIPOP * b)79 cmp_urlipop(const UDM_URLIPOP *a, const UDM_URLIPOP *b)
80 {
81 if (a->popularity < b->popularity)
82 return 1;
83 if (a->popularity > b->popularity)
84 return -1;
85 if (a->id < b->id)
86 return -1;
87 if (a->id > b->id)
88 return 1;
89 return 0;
90 }
91
92 static void
UdmURLIPopListSort(UDM_URLIPOPLIST * List)93 UdmURLIPopListSort(UDM_URLIPOPLIST *List)
94 {
95 UdmSort(List->Item, List->nitems, sizeof(UDM_URLIPOP), (udm_qsort_cmp) cmp_urlipop);
96 }
97
98
99 static size_t
UdmURLIPopListCountSame(UDM_URLIPOPLIST * List,size_t offs)100 UdmURLIPopListCountSame(UDM_URLIPOPLIST *List, size_t offs)
101 {
102 size_t offs0= offs++;
103 for ( ; offs < List->nitems; offs++)
104 {
105 if (List->Item[offs0].popularity != List->Item[offs].popularity)
106 break;
107 }
108 return offs - offs0;
109 }
110
111
112 static udm_rc_t
UdmURLIPopListEncode(UDM_AGENT * A,UDM_URLIPOPLIST * List,UDM_DSTR * dstr)113 UdmURLIPopListEncode(UDM_AGENT *A, UDM_URLIPOPLIST *List, UDM_DSTR *dstr)
114 {
115 size_t i;
116 for (i= 0; i < List->nitems; )
117 {
118 size_t n= UdmURLIPopListCountSame(List, i);
119 urlid_t id;
120 if (!UdmDSTRAppendINT2BE(dstr, List->Item[i].popularity))
121 return UDM_ERROR;
122 if (UDM_OK != UdmDSTRAppendCoord(dstr, n))
123 {
124 UdmLog(A, UDM_LOG_ERROR,
125 "URLIPopListEncode: DSTRAppendCoord failed: count=%d",
126 (int) n);
127 return UDM_ERROR;
128 }
129 for (id= 0; n ; n--)
130 {
131 UDM_URLIPOP *Item= &List->Item[i++];
132 if (UDM_OK != UdmDSTRAppendCoord(dstr, Item->id - id))
133 {
134 UdmLog(A, UDM_LOG_ERROR,
135 "URLIPopListEncode: DSTRAppendCoord failed: delta=%d",
136 Item->id - id);
137 return UDM_ERROR;
138 }
139 id= Item->id;
140 }
141 }
142 return UDM_OK;
143 }
144
145
146 #define ERROR_BYTES_TO_SHOW 4
147
148 udm_rc_t
UdmURLDataListUnpackPopularity(UDM_AGENT * A,UDM_URLDATALIST * URLDataList,UDM_CONST_STR * cstr)149 UdmURLDataListUnpackPopularity(UDM_AGENT *A, UDM_URLDATALIST *URLDataList,
150 UDM_CONST_STR *cstr)
151 {
152 const char *str= cstr->str;
153 const char *end= cstr->str + cstr->length;
154 size_t nfound;
155 char hex[ERROR_BYTES_TO_SHOW + 1];
156 for (nfound= 0; str + 3 < end ; )
157 {
158 urlid_t id;
159 size_t ndocs, nbytes, i;
160 uint4 pop= (((uint4) (unsigned char) str[0]) << 8) + (unsigned char) str[1];
161 str+= 2;
162 str+= (nbytes= udm_coord_get(&ndocs, (const unsigned char *) str,
163 (const unsigned char *) end));
164 if (!nbytes)
165 goto err;
166 /*fprintf(stderr, "pop=%d ndocs=%d\n", (int) pop, (int) ndocs);*/
167 for (id= 0, i= 0; i < ndocs; i++)
168 {
169 UDM_URLDATA *data;
170 size_t delta;
171 str+= (nbytes= udm_coord_get(&delta, (const unsigned char *) str,
172 (const unsigned char *) end));
173 if (!nbytes)
174 goto err;
175 id+= delta;
176 if ((data= UdmURLDataListSearch(URLDataList, id)))
177 {
178 data->pop_rank= (double) pop / (double) 0xFFFF;
179 nfound++;
180 }
181 /*fprintf(stderr, " delta=%d id=%d %p\n", (int) delta, (int) id, (void*)data);*/
182 }
183 }
184 UdmLog(A, UDM_LOG_DEBUG, "Found %d documents in '##pop' record", (int) nfound);
185 return UDM_OK;
186 err:
187 UdmHexEncode(hex, str, UDM_MIN(end - str, ERROR_BYTES_TO_SHOW));
188 UdmLog(A, UDM_LOG_ERROR, "Bad data format in '##pop' record (%s)", hex);
189 return UDM_ERROR;
190 }
191
192
193 static udm_rc_t
UdmURLIPopListInitFromURLDataList(UDM_URLIPOPLIST * IPopList,UDM_URLDATALIST * List)194 UdmURLIPopListInitFromURLDataList(UDM_URLIPOPLIST *IPopList,
195 UDM_URLDATALIST *List)
196 {
197 size_t i;
198 UdmURLIPopListInit(IPopList);
199 if (UDM_OK != UdmURLIPopListAlloc(IPopList, List->nitems))
200 return UDM_ERROR;
201 for (i= 0; i < List->nitems; i++)
202 {
203 UDM_URLDATA *Item= &List->Item[i];
204 UDM_URLIPOP tmp;
205 if ((tmp.popularity= (int) (Item->pop_rank * 0xFFFF)))
206 {
207 if (tmp.popularity > 0xFFFF)
208 tmp.popularity= 0xFFFF;
209 tmp.id= Item->url_id;
210 UdmURLIPopListAdd(IPopList, &tmp);
211 }
212 }
213 return UDM_OK;
214 }
215
216
217 udm_rc_t
UdmURLDataListPackPopularity(UDM_AGENT * A,UDM_URLDATALIST * List,UDM_DSTR * pop)218 UdmURLDataListPackPopularity(UDM_AGENT *A, UDM_URLDATALIST *List, UDM_DSTR *pop)
219 {
220 UDM_URLIPOPLIST IPopList;
221
222 if (UDM_OK != UdmURLIPopListInitFromURLDataList(&IPopList, List))
223 return UDM_ERROR;
224
225 if (IPopList.nitems)
226 {
227 UdmURLIPopListSort(&IPopList);
228 if (UDM_OK != UdmURLIPopListEncode(A, &IPopList, pop))
229 return UDM_ERROR;
230 /*
231 for (i= 0; i < IPopList.nitems; i++)
232 {
233 UDM_URLIPOP *Item= &IPopList.Item[i];
234 fprintf(stderr, "[%d]=%d %04X\n", Item->id, Item->popularity, Item->popularity);
235 }
236 */
237 }
238 UdmURLIPopListFree(&IPopList);
239 return UDM_OK;
240 }
241