1 /*------------------------------------------------------------------------------ 2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 3 * 4 * Distributable under the terms of either the Apache License (Version 2.0) or 5 * the GNU Lesser General Public License, as specified in the COPYING file. 6 ------------------------------------------------------------------------------*/ 7 #ifndef _lucene_index_IndexFileDeleter_ 8 #define _lucene_index_IndexFileDeleter_ 9 10 #include "CLucene/util/Equators.h" 11 #include "IndexDeletionPolicy.h" 12 13 CL_CLASS_DEF(store,Directory) 14 CL_NS_DEF(index) 15 class SegmentInfos; 16 class DocumentsWriter; 17 class IndexDeletionPolicy; 18 19 /* 20 * This class keeps track of each SegmentInfos instance that 21 * is still "live", either because it corresponds to a 22 * segments_N file in the Directory (a "commit", i.e. a 23 * committed SegmentInfos) or because it's the in-memory SegmentInfos 24 * that a writer is actively updating but has not yet committed 25 * (currently this only applies when autoCommit=false in IndexWriter). 26 * This class uses simple reference counting to map the live 27 * SegmentInfos instances to individual files in the Directory. 28 * 29 * The same directory file may be referenced by more than 30 * one IndexCommitPoints, i.e. more than one SegmentInfos. 31 * Therefore we count how many commits reference each file. 32 * When all the commits referencing a certain file have been 33 * deleted, the refcount for that file becomes zero, and the 34 * file is deleted. 35 * 36 * A separate deletion policy interface 37 * (IndexDeletionPolicy) is consulted on creation (onInit) 38 * and once per commit (onCommit), to decide when a commit 39 * should be removed. 40 * 41 * It is the business of the IndexDeletionPolicy to choose 42 * when to delete commit points. The actual mechanics of 43 * file deletion, retrying, etc, derived from the deletion 44 * of commit points is the business of the IndexFileDeleter. 45 * 46 * The current default deletion policy is {@link 47 * KeepOnlyLastCommitDeletionPolicy}, which removes all 48 * prior commits when a new commit has completed. This 49 * matches the behavior before 2.2. 50 * 51 * Note that you must hold the write.lock before 52 * instantiating this class. It opens segments_N file(s) 53 * directly with no retry logic. 54 */ 55 class IndexFileDeleter { 56 private: 57 /** 58 * Tracks the reference count for a single index file: 59 */ 60 class RefCount { 61 public: 62 int count; IncRef()63 int IncRef() { 64 return ++count; 65 } DecRef()66 int DecRef() { 67 return --count; 68 } 69 }; 70 71 /** 72 * Holds details for each commit point. This class is 73 * also passed to the deletion policy. Note: this class 74 * has a natural ordering that is inconsistent with 75 * equals. 76 */ 77 class CommitPoint: public IndexCommitPoint, public CL_NS(util)::Comparable { 78 int64_t gen; 79 std::string segmentsFileName; 80 IndexFileDeleter* _this; 81 public: 82 std::vector<std::string> files; 83 bool deleted; 84 85 CommitPoint(IndexFileDeleter* _this, SegmentInfos* segmentInfos); 86 virtual ~CommitPoint(); 87 88 /** 89 * Get the segments_N file for this commit point. 90 */ 91 std::string getSegmentsFileName(); 92 93 const std::vector<std::string>& getFileNames(); 94 95 /** 96 * Called only be the deletion policy, to remove this 97 * commit point from the index. 98 */ 99 void deleteCommitPoint(); 100 101 int32_t compareTo(NamedObject* obj); 102 103 static const char* getClassName(); 104 const char* getObjectName() const; 105 static bool sort(IndexCommitPoint* elem1, IndexCommitPoint* elem2); 106 }; 107 108 private: 109 /* Files that we tried to delete but failed (likely 110 * because they are open and we are running on Windows), 111 * so we will retry them again later: */ 112 std::vector<std::string> deletable; 113 114 typedef CL_NS(util)::CLHashMap<char*, RefCount*, 115 CL_NS(util)::Compare::Char, 116 CL_NS(util)::Equals::Char, 117 CL_NS(util)::Deletor::acArray, 118 CL_NS(util)::Deletor::Object<RefCount> > RefCountsType; 119 /* Reference count for all files in the index. 120 * Counts how many existing commits reference a file. 121 * Maps String to RefCount (class below) instances: */ 122 RefCountsType refCounts; 123 124 typedef CL_NS(util)::CLVector<IndexCommitPoint*, CL_NS(util)::Deletor::Object<IndexCommitPoint> > CommitsType; 125 /* Holds all commits (segments_N) currently in the index. 126 * This will have just 1 commit if you are using the 127 * default delete policy (KeepOnlyLastCommitDeletionPolicy). 128 * Other policies may leave commit points live for longer 129 * in which case this list would be longer than 1: */ 130 CommitsType commits; 131 132 /* Holds files we had incref'd from the previous 133 * non-commit checkpoint: */ 134 std::vector<std::string> lastFiles; 135 136 /* Commits that the IndexDeletionPolicy have decided to delete: */ 137 CL_NS(util)::CLArrayList<CommitPoint*> commitsToDelete; 138 139 std::ostream* infoStream; 140 CL_NS(store)::Directory* directory; 141 IndexDeletionPolicy* policy; 142 DocumentsWriter* docWriter; 143 144 145 public: 146 void deletePendingFiles(); 147 148 void setInfoStream(std::ostream* infoStream); 149 void message(std::string message); 150 void decRef(const std::string& fileName); 151 RefCount* getRefCount(const char* fileName); 152 153 /** 154 * Remove the CommitPoints in the commitsToDelete List by 155 * DecRef'ing all files from each SegmentInfos. 156 */ 157 void deleteCommits(); 158 159 /** Change to true to see details of reference counts when 160 * infoStream != null */ 161 static bool VERBOSE_REF_COUNTS; 162 163 /** 164 * Initialize the deleter: find all previous commits in 165 * the Directory, incref the files they reference, call 166 * the policy to let it delete commits. The incoming 167 * segmentInfos must have been loaded from a commit point 168 * and not yet modified. This will remove any files not 169 * referenced by any of the commits. 170 * @throws CorruptIndexException if the index is corrupt 171 * @throws IOException if there is a low-level IO error 172 */ 173 IndexFileDeleter(CL_NS(store)::Directory* directory, IndexDeletionPolicy* policy, SegmentInfos* segmentInfos, std::ostream* infoStream, DocumentsWriter* docWriter); 174 ~IndexFileDeleter(); 175 176 /** 177 * Writer calls this when it has hit an error and had to 178 * roll back, to tell us that there may now be 179 * unreferenced files in the filesystem. So we re-list 180 * the filesystem and delete such files. If segmentName 181 * is non-null, we will only delete files corresponding to 182 * that segment. 183 */ 184 void refresh(const char* segmentName); 185 void refresh(); 186 void close(); 187 188 /** 189 * For definition of "check point" see IndexWriter comments: 190 * "Clarification: Check Points (and commits)". 191 * 192 * Writer calls this when it has made a "consistent 193 * change" to the index, meaning new files are written to 194 * the index and the in-memory SegmentInfos have been 195 * modified to point to those files. 196 * 197 * This may or may not be a commit (segments_N may or may 198 * not have been written). 199 * 200 * We simply incref the files referenced by the new 201 * SegmentInfos and decref the files we had previously 202 * seen (if any). 203 * 204 * If this is a commit, we also call the policy to give it 205 * a chance to remove other commits. If any commits are 206 * removed, we decref their files as well. 207 */ 208 void checkpoint(SegmentInfos* segmentInfos, bool isCommit); 209 210 211 void CLUCENE_LOCAL_DECL incRef(SegmentInfos* segmentInfos, bool isCommit); 212 void CLUCENE_LOCAL_DECL incRef(const std::vector<std::string>& files); 213 void CLUCENE_LOCAL_DECL decRef(const std::vector<std::string>& files) ; 214 void CLUCENE_LOCAL_DECL decRef(SegmentInfos* segmentInfos); 215 void CLUCENE_LOCAL_DECL deleteFiles(std::vector<std::string>& files); 216 217 /** Delets the specified files, but only if they are new 218 * (have not yet been incref'd). */ 219 void CLUCENE_LOCAL_DECL deleteNewFiles(const std::vector<std::string>& files); 220 void CLUCENE_LOCAL_DECL deleteFile(const char* fileName); 221 }; 222 223 CL_NS_END 224 #endif 225