1 2 /* Make header file work when included from C++ */ 3 #ifdef __cplusplus 4 extern "C" { 5 #endif 6 7 struct sb_stemmer; 8 typedef unsigned char sb_symbol; 9 10 /* FIXME - should be able to get a version number for each stemming 11 * algorithm (which will be incremented each time the output changes). */ 12 13 /** Returns an array of the names of the available stemming algorithms. 14 * Note that these are the canonical names - aliases (ie, other names for 15 * the same algorithm) will not be included in the list. 16 * The list is terminated with a null pointer. 17 * 18 * The list must not be modified in any way. 19 */ 20 const char ** sb_stemmer_list(void); 21 22 /** Create a new stemmer object, using the specified algorithm, for the 23 * specified character encoding. 24 * 25 * All algorithms will usually be available in UTF-8, but may also be 26 * available in other character encodings. 27 * 28 * @param algorithm The algorithm name. This is either the english 29 * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the 30 * language. Note that case is significant in this parameter - the 31 * value should be supplied in lower case. 32 * 33 * @param charenc The character encoding. NULL may be passed as 34 * this value, in which case UTF-8 encoding will be assumed. Otherwise, 35 * the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1), 36 * "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian). Note that 37 * case is significant in this parameter. 38 * 39 * @return NULL if the specified algorithm is not recognised, or the 40 * algorithm is not available for the requested encoding. Otherwise, 41 * returns a pointer to a newly created stemmer for the requested algorithm. 42 * The returned pointer must be deleted by calling sb_stemmer_delete(). 43 * 44 * @note NULL will also be returned if an out of memory error occurs. 45 */ 46 struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc); 47 48 /** Delete a stemmer object. 49 * 50 * This frees all resources allocated for the stemmer. After calling 51 * this function, the supplied stemmer may no longer be used in any way. 52 * 53 * It is safe to pass a null pointer to this function - this will have 54 * no effect. 55 */ 56 void sb_stemmer_delete(struct sb_stemmer * stemmer); 57 58 /** Stem a word. 59 * 60 * The return value is owned by the stemmer - it must not be freed or 61 * modified, and it will become invalid when the stemmer is called again, 62 * or if the stemmer is freed. 63 * 64 * The length of the return value can be obtained using sb_stemmer_length(). 65 * 66 * If an out-of-memory error occurs, this will return NULL. 67 */ 68 const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, 69 const sb_symbol * word, int size); 70 71 /** Get the length of the result of the last stemmed word. 72 * This should not be called before sb_stemmer_stem() has been called. 73 */ 74 int sb_stemmer_length(struct sb_stemmer * stemmer); 75 76 #ifdef __cplusplus 77 } 78 #endif 79 80