1 #ifndef __RS_DOCUMENT_H__ 2 #define __RS_DOCUMENT_H__ 3 #include <pthread.h> 4 #include "redismodule.h" 5 #include "search_ctx.h" 6 #include "redisearch.h" 7 #include "tokenize.h" 8 #include "concurrent_ctx.h" 9 #include "byte_offsets.h" 10 #include "rmutil/args.h" 11 #include "query_error.h" 12 #include "json.h" 13 14 #ifdef __cplusplus 15 extern "C" { 16 #endif 17 18 //////////////////////////////////////////////////////////////////////////////// 19 //////////////////////////////////////////////////////////////////////////////// 20 /// General Architecture /// 21 //////////////////////////////////////////////////////////////////////////////// 22 //////////////////////////////////////////////////////////////////////////////// 23 24 /** 25 * To index a document, call Document_PrepareForAdd on the document itself. 26 * This initializes the Document structure for indexing purposes. Once the 27 * document has been prepared, acquire a new RSAddDocumentCtx() by calling 28 * NewAddDocumentCtx(). 29 * 30 * Once the new context has been received, call Document_AddToIndexes(). This 31 * will start tokenizing the documents, and should be called in a separate 32 * thread. This function will tokenize the document and send a reply back to 33 * the client. You may free the RSAddDocumentCtx structure by calling 34 * AddDocumentCtx_Free(). 35 * 36 * See document.c for the internals. 37 */ 38 39 typedef enum { 40 // Newline 41 FLD_VAR_T_RMS = 0x01, 42 FLD_VAR_T_CSTR = 0x02, 43 FLD_VAR_T_NUM = 0x04, 44 FLD_VAR_T_GEO = 0x08, 45 FLD_VAR_T_ARRAY = 0x10 46 } FieldVarType; 47 48 typedef struct DocumentField{ 49 const char *name; // Can either be char or RMString 50 const char *path; 51 union { 52 // TODO: consider removing RMS altogether 53 RedisModuleString *text; 54 struct { 55 char *strval; 56 size_t strlen; 57 }; 58 double numval; 59 struct { 60 double lon, lat; 61 }; 62 struct { 63 char **multiVal; 64 size_t arrayLen; // for multiVal TODO: use arr.h 65 }; 66 }; 67 FieldVarType unionType; 68 FieldType indexAs; 69 } DocumentField; 70 71 typedef struct Document { 72 RedisModuleString *docKey; 73 DocumentField *fields; 74 uint32_t numFields; 75 RSLanguage language; 76 float score; 77 t_docId docId; 78 const char *payload; 79 size_t payloadSize; 80 uint32_t flags; 81 DocumentType type; 82 } Document; 83 84 /** 85 * Document should decrement the reference count to the contained strings. Used 86 * when the user does not want to retain his own reference to them. It effectively 87 * "steals" a reference. 88 * 89 * This only applies to _values_; not keys. Used internally by the C API 90 */ 91 #define DOCUMENT_F_OWNREFS 0x01 92 93 /** 94 * Indicates that the document owns a reference to the field contents, 95 * the language string, and the payload. 96 * 97 * The document always owns the field array, though. 98 */ 99 #define DOCUMENT_F_OWNSTRINGS 0x02 100 101 #define UNDERSCORE_KEY "__key" 102 #define UNDERSCORE_SCORE "__score" 103 #define UNDERSCORE_PAYLOAD "__payload" 104 #define UNDERSCORE_LANGUAGE "__language" 105 106 struct RSAddDocumentCtx; 107 108 typedef void (*DocumentAddCompleted)(struct RSAddDocumentCtx *, RedisModuleCtx *, void *); 109 110 typedef struct { 111 uint32_t options; // DOCUMENT_ADD_XXX 112 RSLanguage language; // Language document should be indexed as 113 RedisModuleString *payload; // Arbitrary payload provided on return with WITHPAYLOADS 114 arrayof(RedisModuleString *) fieldsArray; // Field, Value, Field Value 115 size_t numFieldElems; // Number of elements 116 double score; // Score of the document 117 const char *evalExpr; // Only add the document if this expression evaluates to true. 118 DocumentAddCompleted donecb; // Callback to invoke when operation is done 119 120 RedisModuleString *keyStr; // key name for HSET 121 RedisModuleString *scoreStr; // score string for HSET 122 RedisModuleString *languageStr; // Language string for HSET 123 } AddDocumentOptions; 124 125 void Document_AddField(Document *d, const char *fieldname, RedisModuleString *fieldval, 126 uint32_t typemask); 127 128 /** 129 * Add a simple char buffer value. This creates an RMString internally, so this 130 * must be used with F_OWNSTRINGS 131 */ 132 void Document_AddFieldC(Document *d, const char *fieldname, const char *val, size_t vallen, 133 uint32_t typemask); 134 135 /** 136 * Load Document Field with a numeric value. 137 */ 138 void Document_AddNumericField(Document *d, const char *fieldname, 139 double val, uint32_t typemask); 140 141 /** 142 * Load Document Field with a longitude and latitude values. 143 */ 144 void Document_AddGeoField(Document *d, const char *fieldname, 145 double lon, double lat, uint32_t typemask); 146 147 /** 148 * Initialize document structure with the relevant fields. numFields will allocate 149 * the fields array, but you must still actually copy the data along. 150 * 151 * Note that this function assumes that the pointers passed in will remain valid 152 * throughout the lifetime of the document. If you need to make independent copies 153 * of the data within the document, call Document_Detach on the document (after 154 * calling this function). 155 */ 156 void Document_Init(Document *doc, RedisModuleString *docKey, double score, RSLanguage lang, DocumentType type); 157 void Document_SetPayload(Document *doc, const void *payload, size_t n); 158 159 /** 160 * Make the document the owner of the strings it contains 161 */ 162 void Document_MakeStringsOwner(Document *doc); 163 164 /** 165 * Make the document object steal references to the document's strings. 166 */ 167 void Document_MakeRefOwner(Document *doc); 168 169 /** 170 * Clear the document of its fields. This does not free the document 171 * or clear its name 172 */ 173 void Document_Clear(Document *doc); 174 175 /** 176 * Load all fields specified in the schema to the document. Note that 177 * the document must then be freed using Document_Free(). 178 * 179 * The document must already have the docKey set 180 */ 181 int Document_LoadSchemaFieldHash(Document *doc, RedisSearchCtx *sctx); 182 int Document_LoadSchemaFieldJson(Document *doc, RedisSearchCtx *sctx); 183 184 /** 185 * Load all the fields into the document. 186 */ 187 int Document_LoadAllFields(Document *doc, RedisModuleCtx *ctx); 188 189 void Document_LoadPairwiseArgs(Document *doc, RedisModuleString **args, size_t nargs); 190 void Document_LoadHSetParams(Document *d, const AddDocumentOptions *opts); 191 192 /** 193 * Print contents of document to screen 194 */ 195 void Document_Dump(const Document *doc); // LCOV_EXCL_LINE debug 196 /** 197 * Free any copied data within the document. anyCtx is any non-NULL 198 * RedisModuleCtx. The reason for requiring a context is more related to the 199 * Redis Module API requiring a context for AutoMemory purposes, though in 200 * this case, the pointers are already removed from AutoMemory manangement 201 * anyway. 202 * 203 * This function also calls Document_Free 204 */ 205 void Document_FreeDetached(Document *doc, RedisModuleCtx *anyCtx); 206 207 /** 208 * Free the document's internals (like the field array). 209 */ 210 void Document_Free(Document *doc); 211 212 #define DOCUMENT_ADD_REPLACE 0x01 213 #define DOCUMENT_ADD_PARTIAL 0x02 214 #define DOCUMENT_ADD_NOSAVE 0x04 215 #define DOCUMENT_ADD_NOCREATE 0x08 // Don't create document if not exist (replace ONLY) 216 217 struct ForwardIndex; 218 struct FieldIndexerData; 219 220 // The context has had its forward entries merged in the merge table. We can 221 // skip merging its tokens 222 #define ACTX_F_TEXTINDEXED 0x01 223 224 // The context has had an error and should not be processed further 225 #define ACTX_F_ERRORED 0x02 226 227 // Non-text fields have been indexed. 228 #define ACTX_F_OTHERINDEXED 0x04 229 230 // The content has indexable fields 231 #define ACTX_F_INDEXABLES 0x08 232 233 // The content has sortable fields 234 #define ACTX_F_SORTABLES 0x10 235 236 // Don't block/unblock the client when indexing. This is the case when the 237 // operation is being done from within the context of AOF 238 #define ACTX_F_NOBLOCK 0x20 239 240 // Document is entirely empty (no sortables, indexables) 241 #define ACTX_F_EMPTY 0x40 242 243 #define ACTX_F_NOFREEDOC 0x80 244 245 struct DocumentIndexer; 246 247 /** Context used when indexing documents */ 248 typedef struct RSAddDocumentCtx { 249 struct RSAddDocumentCtx *next; // Next context in the queue 250 Document *doc; // Document which is being indexed 251 union { 252 RedisModuleBlockedClient *bc; // Client 253 RedisSearchCtx *sctx; 254 } client; 255 256 IndexSpec *spec; 257 char *specName; 258 size_t specNameLen; 259 uint64_t specId; 260 261 // Forward index. This contains all the terms found in the document 262 struct ForwardIndex *fwIdx; 263 264 struct DocumentIndexer *indexer; 265 266 // Sorting vector for the document. If the document has sortable fields, they 267 // are added to here as well 268 RSSortingVector *sv; 269 270 // Byte offsets for highlighting. If term offsets are stored, this contains 271 // the field byte offset for each term. 272 RSByteOffsets *byteOffsets; 273 ByteOffsetWriter offsetsWriter; 274 275 // Information about each field in the document. This is read from the spec 276 // and cached, so that we can look it up without holding the GIL 277 FieldSpec *fspecs; 278 RSTokenizer *tokenizer; 279 280 // Old document data. Contains sortables 281 RSDocumentMetadata *oldMd; 282 283 // New flags to assign to the document 284 RSDocumentFlags docFlags; 285 286 // Scratch space used by per-type field preprocessors (see the source) 287 struct FieldIndexerData *fdatas; 288 QueryError status; // Error message is placed here if there is an error during processing 289 uint32_t totalTokens; // Number of tokens, used for offset vector 290 uint32_t specFlags; // Cached index flags 291 uint8_t options; // Indexing options - i.e. DOCUMENT_ADD_xxx 292 uint8_t stateFlags; // Indexing state, ACTX_F_xxx 293 DocumentAddCompleted donecb; 294 void *donecbData; 295 } RSAddDocumentCtx; 296 297 #define AddDocumentCtx_IsBlockable(aCtx) (!((aCtx)->stateFlags & ACTX_F_NOBLOCK)) 298 299 /** 300 * Creates a new context used for adding documents. Once created, call 301 * Document_AddToIndexes on it. 302 * 303 * - client is a blocked client which will be used as the context for this 304 * operation. 305 * - sp is the index that this document will be added to 306 * - base is the document to be index. The context will take ownership of the 307 * document's contents (but not the structure itself). Thus, you should not 308 * call Document_Free on the document after a successful return of this 309 * function. 310 * 311 * When done, call AddDocumentCtx_Free 312 */ 313 RSAddDocumentCtx *NewAddDocumentCtx(IndexSpec *sp, Document *base, QueryError *status); 314 315 /** 316 * At this point the context will take over from the caller, and handle sending 317 * the replies and so on. 318 */ 319 void AddDocumentCtx_Submit(RSAddDocumentCtx *aCtx, RedisSearchCtx *sctx, uint32_t options); 320 321 /** 322 * Indicate that processing is finished on the current document 323 */ 324 void AddDocumentCtx_Finish(RSAddDocumentCtx *aCtx); 325 /** 326 * This function will tokenize the document and add the resultant tokens to 327 * the relevant inverted indexes. This function should be called from a 328 * worker thread (see ConcurrentSearch functions). 329 * 330 * 331 * When this function completes, it will send the reply to the client and 332 * unblock the client passed when the context was first created. 333 */ 334 int Document_AddToIndexes(RSAddDocumentCtx *ctx); 335 336 /** 337 * Free the AddDocumentCtx. Should be done once AddToIndexes() completes; or 338 * when the client is unblocked. 339 */ 340 void AddDocumentCtx_Free(RSAddDocumentCtx *aCtx); 341 342 /* Evaluate an IF expression (e.g. IF "@foo == 'bar'") against a document, by getting the 343 * properties from the sorting table or from the hash representation of the document. 344 * 345 * NOTE: This is disconnected from the document indexing flow, and loads the document and discards 346 * of it internally 347 * 348 * Returns REDISMODULE_ERR on failure, OK otherwise*/ 349 int Document_EvalExpression(RedisSearchCtx *sctx, RedisModuleString *key, const char *expr, 350 int *result, QueryError *err); 351 352 // Don't create document if it does not exist. Replace only 353 #define REDIS_SAVEDOC_NOCREATE 0x01 354 /** 355 * Save a document in the index. Used for returning contents in search results. 356 */ 357 int Redis_SaveDocument(RedisSearchCtx *ctx, const AddDocumentOptions *opts, QueryError *status); 358 359 /* Serialzie the document's fields to a redis client */ 360 int Document_ReplyAllFields(RedisModuleCtx *ctx, IndexSpec *spec, RedisModuleString *id); 361 362 DocumentField *Document_GetField(Document *d, const char *fieldName); 363 364 /* return value as c string */ 365 const char *DocumentField_GetValueCStr(const DocumentField *df, size_t *len); 366 367 // Document add functions: 368 int RSAddDocumentCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); 369 int RSSafeAddDocumentCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); 370 int RSAddHashCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); 371 int RSSafeAddHashCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); 372 373 int RS_AddDocument(RedisSearchCtx *sctx, RedisModuleString *name, const AddDocumentOptions *opts, 374 QueryError *status); 375 376 void freeGlobalAddStrings(); 377 378 #ifdef __cplusplus 379 } 380 #endif 381 #endif 382