1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved. 2 3 This program is free software; you can redistribute it and/or modify 4 it under the terms of the GNU General Public License as published by 5 the Free Software Foundation; either version 2 of the License, or 6 (at your option) any later version. 7 8 This program is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with this program; if not, write to the Free Software 15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 #ifndef _UDM_SEARCH_TOOL_H 19 #define _UDM_SEARCH_TOOL_H 20 21 #define UDM_FAST_PRESORT_DOCS 300 22 #define UDM_DEFAULT_USER_WORD_WEIGHT 256 23 24 typedef unsigned long long udm_found_word_bitmask_t; 25 26 27 typedef struct 28 { 29 udm_found_word_bitmask_t mask; 30 uint4 quality; 31 } UDM_SNIPPETCHUNK_STAT; 32 33 void UdmSnippetChunkStatInit(UDM_SNIPPETCHUNK_STAT *); 34 35 typedef struct 36 { 37 UDM_CONV src_uni; 38 UDM_CONV uni_dst; 39 UDM_CONV uni_wcs; 40 int hlstop; 41 int segmenter; 42 } UDM_HIGHLIGHT_CONV; 43 44 45 typedef struct udm_search_query_param_st 46 { 47 double NumWordFactor; 48 unsigned int NumSections; 49 unsigned int MinCoordFactor; 50 unsigned int MaxCoordFactor; 51 unsigned int Phrase2CountFactor; 52 unsigned int Phrase3CountFactor; 53 unsigned int WordFormFactor; 54 unsigned int WordDensityFactor; 55 unsigned int SkipWordDistanceThreshold; 56 unsigned int WordDistanceWeight; 57 unsigned int IDFFactor; 58 urlid_t DebugURLId; 59 unsigned int SingleWordDistance; 60 unsigned int NumDistinctWordFactor; 61 int UserScoreFactor; 62 int PopularityFactor; 63 int RelevancyFactor; 64 int DateFactor; 65 udm_bool_t SaveSectionSize; 66 udm_bool_t NewVersion; 67 char wf[256]; 68 char nwf[256]; 69 unsigned int nwf_num; 70 } UDM_QUERY_PARAM; 71 72 void UdmQueryParamInit(UDM_QUERY_PARAM *prm, UDM_ENV *Env, UDM_VARLIST *dbvars); 73 74 75 /* Functions form urldata.c */ 76 void UdmURLDataFree(UDM_URLDATA *D); 77 void UdmURLDataListInit(UDM_URLDATALIST *L); 78 void UdmURLDataSortBySite(UDM_URLDATALIST *L); 79 void UdmURLDataSortByPattern(UDM_URLDATALIST *D, const char *pattern); 80 void UdmURLDataListSort(UDM_URLDATALIST *L); 81 UDM_URLDATA *UdmURLDataListSearch(UDM_URLDATALIST *List, urlid_t id); 82 void UdmURLDataListFreeItems(UDM_URLDATALIST *List, size_t first, size_t last); 83 void UdmURLDataListFree(UDM_URLDATALIST *List); 84 void UdmURLDataListClearParams(UDM_URLDATALIST *List); 85 size_t UdmURLDataCompact(UDM_URLDATA *dst, UDM_URLDATA *src, size_t n); 86 87 /* Functions from groupby.c */ 88 void UdmURLDataGroupBySite(UDM_URLDATALIST *L); 89 udm_rc_t UdmURLDataListGroupBySiteUsingSort(UDM_AGENT *A, 90 UDM_URLDATALIST *R, 91 UDM_DB *db); 92 void UdmURLDataApplySiteRank(UDM_AGENT *A, UDM_URLDATALIST *DataList, 93 int is_aggregation_point); 94 void UdmURLDataListApplyPopularity(UDM_AGENT *A, 95 UDM_URLDATALIST *URLData, 96 const UDM_QUERY_PARAM *prm); 97 98 udm_rc_t UdmURLDataListPackSite(UDM_URLDATALIST *List, UDM_DSTR *dstr); 99 udm_rc_t UdmURLDataListUnpackSite(UDM_AGENT *A, UDM_URLDATALIST *List, 100 const UDM_CONST_STR *str_arg); 101 102 /* Functions from distance.c */ 103 typedef struct udm_distance_stat_st 104 { 105 uint4 sum; 106 uint4 num; 107 uint4 phrase2_count; 108 uint4 phrase3_count; 109 } UDM_WORD_DISTANCE_STAT; 110 111 void UdmWordDistanceStatInit(UDM_WORD_DISTANCE_STAT *d); 112 113 void CalcAverageWordDistance(size_t wf2_secno, 114 UDM_COORD2 *phr, size_t num, size_t nuniq, 115 UDM_WORD_DISTANCE_STAT *dist); 116 117 /* Functions from score.c */ 118 udm_rc_t UdmUserScoreListApplyToURLScoreList(UDM_AGENT *A, 119 UDM_URLSCORELIST *URLScoreList, 120 UDM_URL_INT4_LIST *UserScoreList, 121 const UDM_QUERY_PARAM *prm); 122 udm_rc_t UdmUserScoreListApplyToURLDataList(UDM_AGENT *A, 123 UDM_URLDATALIST *URLDataList, 124 UDM_URL_INT4_LIST *UserScoreList, 125 const UDM_QUERY_PARAM *prm); 126 127 udm_rc_t 128 UdmURLDataListApplyRelevancyFactors(UDM_AGENT *Agent, 129 UDM_URLDATALIST *DataList, 130 const UDM_QUERY_PARAM *prm); 131 132 void UdmURLScoreListSortByScore(UDM_URLSCORELIST *ScoreList); 133 void UdmURLScoreListSortByScoreThenURLTop(UDM_URLSCORELIST *ScoreList, 134 size_t topcount); 135 void UdmURLScoreListSortByScoreThenURL(UDM_URLSCORELIST *ScoreList); 136 137 void UdmGroupByURL2(UDM_AGENT *Agent, UDM_DB *db, UDM_QUERY *Query, 138 const UDM_QUERY_PARAM *query_param, 139 UDM_SEARCHSECTIONLIST *CoordList, 140 UDM_URLSCORELIST *ScoreList); 141 142 udm_rc_t UdmQueryPrepare(UDM_AGENT *query, UDM_QUERY *Query); 143 udm_rc_t UdmApplyFastLimit(UDM_URLCRDLIST *Coord, UDM_URLID_LIST *urls); 144 145 /* Functions from wordinfo.c */ 146 udm_rc_t UdmQueryWordInfo(UDM_QUERY *Query); 147 148 149 /* Functions from fuzzy.c */ 150 udm_rc_t UdmAllForms(UDM_AGENT *Indexer, UDM_WIDEWORDLIST *result, 151 UDM_WIDEWORD *uword); 152 153 udm_rc_t UdmComplexSynonyms(UDM_AGENT *Indexer, UDM_WIDEWORDLIST *result); 154 155 /* Functions from highlight.c */ 156 void UdmExcerptConvInit(UDM_HIGHLIGHT_CONV *cnv, 157 UDM_CHARSET *wcs, 158 UDM_CHARSET *src, 159 UDM_CHARSET *dst, 160 int hlstop, int segmenter); 161 void UdmExcerptConvInitFromEnv(UDM_HIGHLIGHT_CONV *cnv, 162 UDM_CHARSET *wcs, 163 UDM_CHARSET *src, 164 UDM_CHARSET *dst, 165 UDM_ENV *Env); 166 size_t 167 UdmHlConvertExtWithConv(UDM_AGENT *A, 168 UDM_DSTR *dstr, 169 UDM_SNIPPETCHUNK_STAT *stat, 170 UDM_WIDEWORDLIST *List, 171 const char *src, size_t srclen, 172 UDM_HIGHLIGHT_CONV *ec); 173 udm_rc_t 174 UdmVarListHlConvert(UDM_AGENT *A, 175 UDM_WIDEWORDLIST *WWList, 176 UDM_VARLIST *Vars, 177 UDM_HIGHLIGHT_CONV *ec); 178 179 udm_rc_t UdmQueryConvert(UDM_AGENT *A, UDM_QUERY *Query, 180 UDM_CHARSET *lcs, UDM_CHARSET *bcs); 181 size_t UdmRemoveHiLight(char *dst, size_t dstlength, 182 const char *src, size_t srclength); 183 size_t UdmRemoveHl(UDM_CHARSET *cs, char *str, size_t from, size_t to); 184 char* UdmRemoveHiLightDup(const char *s); 185 186 /* Functions from segment.c */ 187 int *UdmUniSegment(UDM_AGENT *Indexer, int *s, const char *lang, const char *seg); 188 int *UdmUniSegmentByType(UDM_AGENT *Indexer, int *s, int type, int ch); 189 int UdmUniSegmenterFind(UDM_ENV *Env, const char *lang, const char *seg); 190 udm_rc_t UdmTextListSegment(UDM_AGENT *Indexer, UDM_TEXTLIST *tlist, 191 UDM_CHARSET *cs, int type); 192 193 /* Functions from section.c */ 194 UDM_SEARCHSECTION *UdmSearchSectionListFind(UDM_SEARCHSECTIONLIST *SectionList, 195 urlid_t url_id); 196 void UdmSearchSectionListPrint(UDM_SEARCHSECTIONLIST *SectionList); 197 udm_rc_t UdmSearchSectionListAlloc(UDM_SEARCHSECTIONLIST *List, size_t ncoords, size_t nsections); 198 void UdmSearchSectionListFree(UDM_SEARCHSECTIONLIST *List); 199 udm_rc_t UdmSearchSectionListListAdd(UDM_SEARCHSECTIONLISTLIST *List, 200 UDM_SEARCHSECTIONLIST *Item); 201 void UdmSearchSectionListListInit(UDM_SEARCHSECTIONLISTLIST *List); 202 void UdmSearchSectionListListFree(UDM_SEARCHSECTIONLISTLIST *List); 203 udm_rc_t UdmSearchSectionListListMergeSorted(UDM_SEARCHSECTIONLISTLIST *SrcList, 204 UDM_SEARCHSECTIONLIST *Dst, int opt); 205 206 /* Functions from urlidlist.c */ 207 int UdmCmpURLID(urlid_t *s1, urlid_t *s2); /* for qsort */ 208 udm_rc_t UdmURLIdListJoin(UDM_URLID_LIST *urls, UDM_URLID_LIST *fl_urls); 209 udm_rc_t UdmURLIdListUnion(UDM_URLID_LIST *a, UDM_URLID_LIST *b); 210 udm_rc_t UdmURLIdListCopy(UDM_URLID_LIST *a, UDM_URLID_LIST *b); 211 udm_rc_t UdmURLIdListMerge(UDM_URLID_LIST *a, UDM_URLID_LIST *b); 212 void UdmURLIdListSort(UDM_URLID_LIST *a); 213 214 /* Functions from popularity.c */ 215 udm_rc_t UdmURLDataListUnpackPopularity(UDM_AGENT *A, UDM_URLDATALIST *URLDataList, 216 UDM_CONST_STR *cstr); 217 udm_rc_t UdmURLDataListPackPopularity(UDM_AGENT *A, UDM_URLDATALIST *List, 218 UDM_DSTR *pop); 219 220 #endif 221