1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #include "udm_config.h"
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/types.h>
23
24 #include "udm_common.h"
25 #include "udm_spell.h"
26 #include "udm_hrefs.h"
27 #include "udm_utils.h"
28 #include "udm_xmalloc.h"
29 #include "udm_sgml.h"
30 #include "udm_url.h"
31 #include "udm_vars.h"
32
33 /* Max URLs in cache: 4K URLs will use about 200K of RAM */
34 /* This should be a configurable parameter but we'll use 4K now */
35
36 #define HSIZE 256 /* Length of buffer increment TUNE */
37 #define RESORT_HREFS 256 /* Max length of unsorted part TUNE */
38
39
40 void
UdmHrefParamInit(UDM_HREFPARAM * H)41 UdmHrefParamInit(UDM_HREFPARAM *H)
42 {
43 bzero((void*)H, sizeof(*H));
44 }
45
46
47 void
UdmHrefInit(UDM_HREF * H)48 UdmHrefInit(UDM_HREF * H)
49 {
50 bzero((void*)H, sizeof(*H));
51 }
52
53
54 void
UdmHrefFree(UDM_HREF * H)55 UdmHrefFree(UDM_HREF * H)
56 {
57 if (H->url)
58 UdmFree(H->url);
59 UdmVarListFree(&H->HrefVars);
60 }
61
62
63 /* Function to sort URLs in alphabetic order */
64 static int
cmphrefs(const void * v1,const void * v2)65 cmphrefs(const void * v1, const void * v2)
66 {
67 return(strcmp(((const UDM_HREF*)v1)->url,((const UDM_HREF*)v2)->url));
68 }
69
70
71 static void
UdmHrefCopyParam(UDM_HREFPARAM * Dst,const UDM_HREFPARAM * Src)72 UdmHrefCopyParam(UDM_HREFPARAM *Dst, const UDM_HREFPARAM *Src)
73 {
74 *Dst= *Src;
75 }
76
77
78 /*
79 Find URL in the sorted part of HrefList
80 */
81 static UDM_HREF *
UdmHrefListFindSorted(UDM_HREFLIST * HrefList,const char * ehref)82 UdmHrefListFindSorted(UDM_HREFLIST *HrefList, const char *ehref)
83 {
84 int l, r, c;
85 for (l=0, r= HrefList->shrefs - 1; l <= r; )
86 {
87 int res;
88 c= (l + r) / 2;
89 if (!(res= strcmp(HrefList->Href[c].url, ehref)))
90 return &HrefList->Href[c];
91 if (res < 0)
92 l= c + 1;
93 else
94 r= c - 1;
95 }
96 return NULL;
97 }
98
99
100 /*
101 Find URL in the unsorted part of HrefList
102 */
103 static UDM_HREF *
UdmHrefListFindUnsorted(UDM_HREFLIST * HrefList,const char * ehref)104 UdmHrefListFindUnsorted(UDM_HREFLIST *HrefList, const char *ehref)
105 {
106 int c;
107 for(c= HrefList->shrefs; c < HrefList->nhrefs; c++)
108 {
109 if (!strcmp(HrefList->Href[c].url, ehref))
110 return &HrefList->Href[c];
111 }
112 return NULL;
113 }
114
115
116 static udm_rc_t
UdmHrefListAddInternal(UDM_HREFLIST * HrefList,const UDM_HREFPARAM * HrefParam,const UDM_VARLIST * HrefVars,const UDM_CONST_STR * url)117 UdmHrefListAddInternal(UDM_HREFLIST *HrefList,
118 const UDM_HREFPARAM *HrefParam,
119 const UDM_VARLIST *HrefVars,
120 const UDM_CONST_STR *url)
121 {
122 char *ehref;
123 UDM_HREF *dst;
124
125 /* Don't add an empty link */
126 if (!url->str || !(ehref= (char*) UdmMalloc(3 * url->length + 1)))
127 return UDM_ERROR;
128
129 UdmURLCanonize(url->str, ehref, 3 * url->length + 1);
130 UdmSGMLUnescape(ehref);
131
132 /* Find current URL in the sorted part, then in the unsorted part */
133 if (!(HrefList->flags & UDM_HREFLIST_FLAG_NONUNIQ) &&
134 ((dst= UdmHrefListFindSorted(HrefList, ehref)) ||
135 (dst= UdmHrefListFindUnsorted(HrefList, ehref))))
136 {
137 UdmHrefCopyParam(&dst->Param, HrefParam);
138 UdmVarListFree(&dst->HrefVars);
139 UdmVarListInit(&dst->HrefVars);
140 if (HrefVars)
141 UdmVarListReplaceLst(&dst->HrefVars, HrefVars, NULL, "*");
142 UDM_FREE(ehref);
143 return UDM_OK;
144 }
145
146 if(HrefList->nhrefs >= HrefList->mhrefs)
147 {
148 HrefList->mhrefs+= HSIZE;
149 HrefList->Href= (UDM_HREF *) UdmRealloc(HrefList->Href,
150 HrefList->mhrefs * sizeof(UDM_HREF));
151 if (!HrefList->Href) /* Fatal */
152 {
153 UdmHrefListInit(HrefList);
154 return UDM_ERROR;
155 }
156 }
157
158 dst= &HrefList->Href[HrefList->nhrefs];
159 dst->url= (char*) UdmStrdup(ehref);
160 UdmHrefCopyParam(&dst->Param, HrefParam);
161 UdmVarListInit(&dst->HrefVars);
162 if (HrefVars)
163 UdmVarListReplaceLst(&dst->HrefVars, HrefVars, NULL, "*");
164 HrefList->nhrefs++;
165
166 /* Sort unsorted part */
167 if (!(HrefList->flags & UDM_HREFLIST_FLAG_NONUNIQ) &&
168 HrefList->nhrefs-HrefList->shrefs > RESORT_HREFS)
169 {
170 UdmSort(HrefList->Href, HrefList->nhrefs, sizeof(UDM_HREF), cmphrefs);
171 /* Remember count of sorted URLs */
172 HrefList->shrefs= HrefList->nhrefs;
173 }
174 UDM_FREE(ehref);
175 return UDM_OK;
176 }
177
178
179 udm_rc_t
UdmHrefListAddConstStr(UDM_HREFLIST * HrefList,const UDM_HREFPARAM * H,const UDM_CONST_STR * href)180 UdmHrefListAddConstStr(UDM_HREFLIST *HrefList,
181 const UDM_HREFPARAM *H,
182 const UDM_CONST_STR *href)
183 {
184 /*
185 udm_rc_t rc;
186 UDM_CONST_STR str;
187 char *tmp= UdmConstStrDup(href);
188 if (tmp)
189 return UDM_ERROR;
190 str.str= tmp;
191 str.length= href->length;
192 rc= UdmHrefListAddInternal(HrefList, H, NULL, &str);
193 UdmFree(tmp);
194 return rc;
195 */
196 return UdmHrefListAddInternal(HrefList, H, NULL, href);
197 }
198
199
200 udm_rc_t
UdmHrefListAddConstStrSGMLUnescape(UDM_HREFLIST * HrefList,const UDM_HREFPARAM * Template,const UDM_VARLIST * Vars,const UDM_CONST_STR * href)201 UdmHrefListAddConstStrSGMLUnescape(UDM_HREFLIST *HrefList,
202 const UDM_HREFPARAM *Template,
203 const UDM_VARLIST *Vars,
204 const UDM_CONST_STR *href)
205 {
206 udm_rc_t rc;
207 UDM_CONST_STR Str;
208 char *tmp;
209 if (!(tmp= UdmConstStrDup(href)))
210 return UDM_ERROR;
211 UdmSGMLUnescape(tmp);
212 Str.str= tmp;
213 Str.length= strlen(tmp);
214 rc= UdmHrefListAddInternal(HrefList, Template, Vars, &Str);
215 UdmFree(tmp);
216 return rc;
217 }
218
219
220 UDM_API(udm_rc_t)
UdmHrefListAdd(UDM_HREFLIST * HrefList,const UDM_HREF * H)221 UdmHrefListAdd(UDM_HREFLIST * HrefList, const UDM_HREF *H)
222 {
223 UDM_CONST_STR str;
224 UdmConstStrSetStr(&str, H->url);
225 return UdmHrefListAddInternal(HrefList, &H->Param, &H->HrefVars, &str);
226 }
227
228
229 udm_rc_t
UdmHrefListAddConst(UDM_HREFLIST * HrefList,const UDM_HREFPARAM * H,const char * href)230 UdmHrefListAddConst(UDM_HREFLIST * HrefList,
231 const UDM_HREFPARAM *H,
232 const char *href)
233 {
234 UDM_CONST_STR str;
235 UdmConstStrSetStr(&str, href);
236 return UdmHrefListAddInternal(HrefList, H, NULL, &str);
237 }
238
239
240 UDM_API(void)
UdmHrefListFree(UDM_HREFLIST * HrefList)241 UdmHrefListFree(UDM_HREFLIST * HrefList)
242 {
243 size_t i;
244 for(i=0;i<HrefList->nhrefs;i++)
245 UdmHrefFree(&HrefList->Href[i]);
246 UDM_FREE(HrefList->Href);
247 bzero((void*)HrefList, sizeof(*HrefList));
248 }
249
250
251 UDM_API(void)
UdmHrefListInit(UDM_HREFLIST * Hrefs)252 UdmHrefListInit(UDM_HREFLIST * Hrefs)
253 {
254 bzero((void*)Hrefs, sizeof(*Hrefs));
255 }
256
257
258 const char *
UdmLinkSourceStr(udm_link_source_t link_source)259 UdmLinkSourceStr(udm_link_source_t link_source)
260 {
261 switch (link_source)
262 {
263 case UDM_LINK_SOURCE_UNKNOWN: return "unk";
264 case UDM_LINK_SOURCE_CMDLINE: return "cline";
265 case UDM_LINK_SOURCE_CONF: return "conf";
266 case UDM_LINK_SOURCE_A_HREF: return "a";
267 case UDM_LINK_SOURCE_FRAME_SRC: return "frame";
268 case UDM_LINK_SOURCE_IMG_SRC: return "img";
269 case UDM_LINK_SOURCE_REDIRECT: return "redir";
270 case UDM_LINK_SOURCE_META_REFRESH: return "meta";
271 case UDM_LINK_SOURCE_LINK_HREF: return "link";
272 case UDM_LINK_SOURCE_AREA_HREF: return "area";
273 case UDM_LINK_SOURCE_IFRAME_SRC: return "iframe";
274 case UDM_LINK_SOURCE_SCRIPT_SRC: return "script";
275 case UDM_LINK_SOURCE_HTDB: return "htdb";
276 case UDM_LINK_SOURCE_URLFILE: return "ufile";
277 case UDM_LINK_SOURCE_ROBOTS_SITEMAP: return "robots";
278 case UDM_LINK_SOURCE_XML: return "xml";
279 case UDM_LINK_SOURCE_DIR: return "dir";
280 }
281 return "unknown";
282 }
283
284
285 udm_link_source_t
UdmLinkSourceByName(const char * str)286 UdmLinkSourceByName(const char *str)
287 {
288 if (!strcasecmp(str, "cline"))
289 return UDM_LINK_SOURCE_CMDLINE;
290 if (!strcasecmp(str, "conf"))
291 return UDM_LINK_SOURCE_CONF;
292 if (!strcasecmp(str, "a"))
293 return UDM_LINK_SOURCE_A_HREF;
294 if (!strcasecmp(str, "frame"))
295 return UDM_LINK_SOURCE_FRAME_SRC;
296 if (!strcasecmp(str, "img"))
297 return UDM_LINK_SOURCE_IMG_SRC;
298 if (!strcasecmp(str, "redir"))
299 return UDM_LINK_SOURCE_REDIRECT;
300 if (!strcasecmp(str, "meta"))
301 return UDM_LINK_SOURCE_META_REFRESH;
302 if (!strcasecmp(str, "link"))
303 return UDM_LINK_SOURCE_LINK_HREF;
304 if (!strcasecmp(str, "area"))
305 return UDM_LINK_SOURCE_AREA_HREF;
306 if (!strcasecmp(str, "iframe"))
307 return UDM_LINK_SOURCE_IFRAME_SRC;
308 if (!strcasecmp(str, "script"))
309 return UDM_LINK_SOURCE_SCRIPT_SRC;
310 if (!strcasecmp(str, "htdb"))
311 return UDM_LINK_SOURCE_HTDB;
312 if (!strcasecmp(str, "ufile"))
313 return UDM_LINK_SOURCE_URLFILE;
314 if (!strcasecmp(str, "robots"))
315 return UDM_LINK_SOURCE_ROBOTS_SITEMAP;
316 if (!strcasecmp(str, "xml"))
317 return UDM_LINK_SOURCE_XML;
318 return UDM_LINK_SOURCE_UNKNOWN;
319 }
320