1 /*------------------------------------------------------------------------------
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 ------------------------------------------------------------------------------*/
7 #ifndef _lucene_index_IndexFileDeleter_
8 #define _lucene_index_IndexFileDeleter_
9 
10 #include "CLucene/util/Equators.h"
11 #include "IndexDeletionPolicy.h"
12 
13 CL_CLASS_DEF(store,Directory)
14 CL_NS_DEF(index)
15 class SegmentInfos;
16 class DocumentsWriter;
17 class IndexDeletionPolicy;
18 
19 /*
20  * This class keeps track of each SegmentInfos instance that
21  * is still "live", either because it corresponds to a
22  * segments_N file in the Directory (a "commit", i.e. a
23  * committed SegmentInfos) or because it's the in-memory SegmentInfos
24  * that a writer is actively updating but has not yet committed
25  * (currently this only applies when autoCommit=false in IndexWriter).
26  * This class uses simple reference counting to map the live
27  * SegmentInfos instances to individual files in the Directory.
28  *
29  * The same directory file may be referenced by more than
30  * one IndexCommitPoints, i.e. more than one SegmentInfos.
31  * Therefore we count how many commits reference each file.
32  * When all the commits referencing a certain file have been
33  * deleted, the refcount for that file becomes zero, and the
34  * file is deleted.
35  *
36  * A separate deletion policy interface
37  * (IndexDeletionPolicy) is consulted on creation (onInit)
38  * and once per commit (onCommit), to decide when a commit
39  * should be removed.
40  *
41  * It is the business of the IndexDeletionPolicy to choose
42  * when to delete commit points.  The actual mechanics of
43  * file deletion, retrying, etc, derived from the deletion
44  * of commit points is the business of the IndexFileDeleter.
45  *
46  * The current default deletion policy is {@link
47  * KeepOnlyLastCommitDeletionPolicy}, which removes all
48  * prior commits when a new commit has completed.  This
49  * matches the behavior before 2.2.
50  *
51  * Note that you must hold the write.lock before
52  * instantiating this class.  It opens segments_N file(s)
53  * directly with no retry logic.
54  */
55 class IndexFileDeleter {
56 private:
57   /**
58   * Tracks the reference count for a single index file:
59   */
60   class RefCount {
61   public:
62 	  int count;
IncRef()63 	  int IncRef() {
64 		  return ++count;
65 	  }
DecRef()66 	  int DecRef() {
67 		  return --count;
68 	  }
69   };
70 
71   /**
72    * Holds details for each commit point.  This class is
73    * also passed to the deletion policy.  Note: this class
74    * has a natural ordering that is inconsistent with
75    * equals.
76    */
77   class CommitPoint: public IndexCommitPoint, public CL_NS(util)::Comparable {
78     int64_t gen;
79     std::string segmentsFileName;
80     IndexFileDeleter* _this;
81   public:
82 		std::vector<std::string> files;
83     bool deleted;
84 
85     CommitPoint(IndexFileDeleter* _this, SegmentInfos* segmentInfos);
86     virtual ~CommitPoint();
87 
88     /**
89      * Get the segments_N file for this commit point.
90      */
91     std::string getSegmentsFileName();
92 
93     const std::vector<std::string>& getFileNames();
94 
95     /**
96      * Called only be the deletion policy, to remove this
97      * commit point from the index.
98      */
99     void deleteCommitPoint();
100 
101     int32_t compareTo(NamedObject* obj);
102 
103 		static const char* getClassName();
104 		const char* getObjectName() const;
105     static bool sort(IndexCommitPoint* elem1, IndexCommitPoint* elem2);
106   };
107 
108 private:
109   /* Files that we tried to delete but failed (likely
110    * because they are open and we are running on Windows),
111    * so we will retry them again later: */
112   std::vector<std::string> deletable;
113 
114   typedef CL_NS(util)::CLHashMap<char*, RefCount*,
115 	  CL_NS(util)::Compare::Char,
116 	  CL_NS(util)::Equals::Char,
117 	  CL_NS(util)::Deletor::acArray,
118 	  CL_NS(util)::Deletor::Object<RefCount> > RefCountsType;
119   /* Reference count for all files in the index.
120    * Counts how many existing commits reference a file.
121    * Maps String to RefCount (class below) instances: */
122   RefCountsType refCounts;
123 
124   typedef CL_NS(util)::CLVector<IndexCommitPoint*, CL_NS(util)::Deletor::Object<IndexCommitPoint> > CommitsType;
125   /* Holds all commits (segments_N) currently in the index.
126    * This will have just 1 commit if you are using the
127    * default delete policy (KeepOnlyLastCommitDeletionPolicy).
128    * Other policies may leave commit points live for longer
129    * in which case this list would be longer than 1: */
130    CommitsType commits;
131 
132   /* Holds files we had incref'd from the previous
133    * non-commit checkpoint: */
134   std::vector<std::string> lastFiles;
135 
136   /* Commits that the IndexDeletionPolicy have decided to delete: */
137   CL_NS(util)::CLArrayList<CommitPoint*> commitsToDelete;
138 
139   std::ostream* infoStream;
140   CL_NS(store)::Directory* directory;
141   IndexDeletionPolicy* policy;
142   DocumentsWriter* docWriter;
143 
144 
145 public:
146   void deletePendingFiles();
147 
148   void setInfoStream(std::ostream* infoStream);
149   void message(std::string message);
150   void decRef(const std::string& fileName);
151   RefCount* getRefCount(const char* fileName);
152 
153   /**
154    * Remove the CommitPoints in the commitsToDelete List by
155    * DecRef'ing all files from each SegmentInfos.
156    */
157   void deleteCommits();
158 
159   /** Change to true to see details of reference counts when
160    *  infoStream != null */
161   static bool VERBOSE_REF_COUNTS;
162 
163   /**
164    * Initialize the deleter: find all previous commits in
165    * the Directory, incref the files they reference, call
166    * the policy to let it delete commits.  The incoming
167    * segmentInfos must have been loaded from a commit point
168    * and not yet modified.  This will remove any files not
169    * referenced by any of the commits.
170    * @throws CorruptIndexException if the index is corrupt
171    * @throws IOException if there is a low-level IO error
172    */
173   IndexFileDeleter(CL_NS(store)::Directory* directory, IndexDeletionPolicy* policy, SegmentInfos* segmentInfos, std::ostream* infoStream, DocumentsWriter* docWriter);
174   ~IndexFileDeleter();
175 
176   /**
177    * Writer calls this when it has hit an error and had to
178    * roll back, to tell us that there may now be
179    * unreferenced files in the filesystem.  So we re-list
180    * the filesystem and delete such files.  If segmentName
181    * is non-null, we will only delete files corresponding to
182    * that segment.
183    */
184   void refresh(const char* segmentName);
185   void refresh();
186   void close();
187 
188   /**
189    * For definition of "check point" see IndexWriter comments:
190    * "Clarification: Check Points (and commits)".
191    *
192    * Writer calls this when it has made a "consistent
193    * change" to the index, meaning new files are written to
194    * the index and the in-memory SegmentInfos have been
195    * modified to point to those files.
196    *
197    * This may or may not be a commit (segments_N may or may
198    * not have been written).
199    *
200    * We simply incref the files referenced by the new
201    * SegmentInfos and decref the files we had previously
202    * seen (if any).
203    *
204    * If this is a commit, we also call the policy to give it
205    * a chance to remove other commits.  If any commits are
206    * removed, we decref their files as well.
207    */
208   void checkpoint(SegmentInfos* segmentInfos, bool isCommit);
209 
210 
211   void CLUCENE_LOCAL_DECL incRef(SegmentInfos* segmentInfos, bool isCommit);
212   void CLUCENE_LOCAL_DECL incRef(const std::vector<std::string>& files);
213   void CLUCENE_LOCAL_DECL decRef(const std::vector<std::string>& files) ;
214   void CLUCENE_LOCAL_DECL decRef(SegmentInfos* segmentInfos);
215   void CLUCENE_LOCAL_DECL deleteFiles(std::vector<std::string>& files);
216 
217   /** Delets the specified files, but only if they are new
218    *  (have not yet been incref'd). */
219   void CLUCENE_LOCAL_DECL deleteNewFiles(const std::vector<std::string>& files);
220   void CLUCENE_LOCAL_DECL deleteFile(const char* fileName);
221 };
222 
223 CL_NS_END
224 #endif
225