1 /*------------------------------------------------------------------------------
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 ------------------------------------------------------------------------------*/
7 #ifndef _lucene_index_Term_
8 #define _lucene_index_Term_
9 
CL_NS_DEF(index)10 CL_NS_DEF(index)
11 
12 /**
13 A Term represents a word from text.  This is the unit of search.  It is
14 composed of two elements, the text of the word, as a string, and the name of
15 the field that the text occured in, an interned string.
16 
17 Note that terms may represent more than words from text fields, but also
18 things like dates, email addresses, urls, etc.
19 
20 IMPORTANT NOTE:
21 Term inherits from the template class LUCENE_REFBASE which tries to do
22 some garbage collection by counting the references an instance has. As a result
23 of this construction you MUST use _CLDECDELETE(obj) when you want to delete an
24 of Term!
25 
26 ABOUT intrn
27 
28 intrn indicates if field and text are interned or not. Interning of Strings is the process of
29 converting duplicated strings to shared ones.
30 
31 */
32 class CLUCENE_EXPORT Term:LUCENE_REFBASE {
33 private:
34   size_t cachedHashCode;
35 	const TCHAR* _field;
36 	//CLStringIntern::iterator fielditr;
37 #ifdef LUCENE_TERM_TEXT_LENGTH
38 	TCHAR _text[LUCENE_TERM_TEXT_LENGTH+1];
39 #else
40 	TCHAR* _text;
41 	size_t textLenBuf; //a cache of text len, this allows for a preliminary comparison of text lengths
42 	//bool    dupT;    //Indicates if Term Text is duplicated (and therefore must be deleted).
43 #endif
44 	size_t textLen; //a cache of text len, this allows for a preliminary comparison of text lengths
45 	bool    internF; //Indicates if Term Field is interned(and therefore must be uninternd).
46 public:
47 
48 	//uses the specified fieldTerm's field. this saves on intern'ing time.
49   /** Constructs a Term with the given field and text.
50    * <p>Note that a null field or null text value results in undefined
51    * behavior for most Lucene APIs that accept a Term parameter.
52   */
53 	Term(const Term* fieldTerm, const TCHAR* txt);
54 
55 	/** Constructs a blank term */
56 	Term();
57 
58   /** Constructs a Term with the given field and text.
59    * <p>Note that a null field or null text value results in undefined
60    * behavior for most Lucene APIs that accept a Term parameter.
61   */
62 	Term(const TCHAR* fld, const TCHAR* txt, bool internField);
63 
64 	/**
65 	* Constructor. Constructs a Term with the given field and text. Field and text are not copied
66 	* Field and text are deleted in destructor only if intern is false.
67 	* <p>Note that a null field or null text value results in undefined
68 	* behavior for most Lucene APIs that accept a Term parameter.
69 	*/
70 	Term(const TCHAR* fld, const TCHAR* txt);
71 
72 	///Destructor.
73 	~Term();
74 
75 	///Returns the field of this term, an interned string. The field indicates
76 	///the part of a document which this term came from.
77 	const TCHAR* field() const; ///<returns reference
78 
79 	///Returns the text of this term.  In the case of words, this is simply the
80 	///text of the word.  In the case of dates and other types, this is an
81 	///encoding of the object as a string.
82 	const TCHAR* text() const; ///<returns reference
83 
84 	///Resets the field and text of a Term.
85 	inline void set(const TCHAR* fld, const TCHAR* txt){
86 		set(fld,txt,true);
87 	}
88 
89 	/**
90 	* Optimized set of Term by reusing same field as this Term
91 	* - avoids field.intern() overhead
92 	* @param text The text of the new term (field is implicitly same as this Term instance)
93 	*/
94 	void set(const Term* term, const TCHAR* txt);
95 
96 	void set(const TCHAR* fld, const TCHAR* txt, const bool internField);
97 
98 	/** Compares two terms, returning a negative integer if this
99 	term belongs before the argument, zero if this term is equal to the
100 	argument, and a positive integer if this term belongs after the argument.
101 
102 	The ordering of terms is first by field, then by text.*/
103 	int32_t compareTo(const Term* other) const;
104 
105 	/** Compares two terms, returning a negative integer if this
106 	term belongs before the argument, zero if this term is equal to the
107 	argument, and a positive integer if this term belongs after the argument.
108 
109 	The ordering of terms is purely on the hashCode, so is not a logical ordering, but is repeatable.
110 	Note: can't be const because call the to hashCode is not const
111 	*/
112 	int32_t hashedCompareTo(Term* other);
113 
114 	bool equals(const Term* other) const;
115 
116 	size_t textLength() const;
117 
118 	///Forms the contents of Field and term in some kind of tuple notation
119 	///<field:text>
120 	TCHAR* toString() const;
121 
122 	size_t hashCode();
123 };
124 
125 class Term_UnorderedCompare:LUCENE_BASE, public CL_NS(util)::Compare::_base //<Term*>
126 {
127 public:
operator()128 	bool operator()( Term* t1, Term* t2 ) const{
129 		return ( t1->hashedCompareTo(t2) < 0 );
130 	}
operator()131 	size_t operator()( Term* t ) const{
132 		return t->hashCode();
133 	}
134 };
135 
136 CL_NS_END
137 #endif
138