1 /* $Id: esort.h 1649 2009-10-19 14:35:01Z terpstra $ 2 * 3 * esort.h - Public interface to libesort 4 * 5 * Copyright (C) 2002 - Wesley W. Terpstra 6 * 7 * License: GPL 8 * 9 * Authors: 'Wesley W. Terpstra' <wesley@terpstra.ca> 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License as published by 13 * the Free Software Foundation; version 2.1. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 */ 24 25 #ifndef ESORT_H 26 #define ESORT_H 27 28 #include <string> 29 #include <memory> 30 31 /* What is ESort? -- An Online External Sort 32 * 33 * ESort provides a very primitive database API: An insert-only set of keys. 34 * 35 * There are very different trade-offs when compared with a standard database. 36 * 37 * N = number of keys in the set, M = number of operations 38 * The seeks needed to for each repeated operation in one transaction: 39 * 40 * ESort BTree Hash 41 * Insertion O(1) O(M*log(N)) O(M) 42 * Read in sequence O(1) O(M) impossible 43 * Seek to key O(M*log^2(N)) O(M*log(N)) O(M) 44 * Delete impossible O(M*log(N)) O(M) 45 * 46 * From this table, one can conclude that ESort allows blindly fast insertion 47 * and sequential read, but pays in the cost to seek. This is what makes it 48 * highly appropriate for keyword indexes. 49 * 50 * An additional restriction is that ESort only supports a single-writer. 51 * An additional advantage is that ESort readers get snap-shots. 52 */ 53 54 namespace ESort 55 { 56 57 using std::string; 58 using std::auto_ptr; 59 60 /** These parameters specify minimum values required for a database. 61 * If an existing database has too small a value, an error is returned. 62 * The unique flag toggles whether the database is a set or multiset. 63 */ 64 class Parameters 65 { 66 protected: 67 unsigned int version_; 68 unsigned long blockSize_; 69 unsigned long keySize_; 70 bool unique_; 71 bool synced_; 72 unsigned int keyWidth_; 73 74 public: 75 // keySize & blockSize are the number of bytes maximum 76 Parameters( 77 bool synced = true, 78 bool unique = true, 79 unsigned int version = 1, 80 unsigned long blockSize = 8192, 81 unsigned long keySize = 255); 82 83 bool isWider(const Parameters& o); 84 version()85 unsigned int version () const { return version_; } blockSize()86 unsigned long blockSize() const { return blockSize_; } keySize()87 unsigned long keySize () const { return keySize_; } unique()88 bool unique () const { return unique_; } synced()89 bool synced () const { return synced_; } keyWidth()90 unsigned int keyWidth () const { return keyWidth_; } 91 minimize(const Parameters & x)92 static Parameters minimize(const Parameters& x) 93 { 94 return Parameters(x.synced(), x.unique(), 1, 8, 1); 95 } 96 }; 97 98 /** The direction in which the Walker walks the database. 99 * 100 * A Forward Query will find the first key >= the request 101 * A Backward Query will find the last key < the request 102 * 103 * The Forward Query moves the iterator so the key increases 104 * The Backward Query moves the iterator so the key decreases 105 * 106 * In this manner Forward + Backward patition the database's keys 107 */ 108 enum Direction 109 { 110 Forward, 111 Backward 112 }; 113 114 /** The Walker class walks the database in sorted order. 115 * As from the chart above, each invokation of advance() pays 0 seeks. 116 */ 117 class Walker 118 { 119 public: 120 /** The current key which the walker points to. 121 * This is not valid until advance is called once. 122 */ 123 string key; 124 125 /** Advance to the next key in the database. 126 * Returned is the number of bytes in common with the last key. 127 * -1 is returned on error, errno = 0 -> reached EOF. 128 */ 129 virtual int advance() = 0; 130 131 virtual ~Walker(); 132 }; 133 134 /** A Reader object allows you to obtain Walkers. 135 * 136 * A Reader is atomic. When you obtain a Reader, the contents of the set 137 * remain fixed for as long as you keep a hold of the handle. Furthermore, 138 * a Reader will take snapshots of the database only between Writer commits. 139 */ 140 class Reader 141 { 142 public: 143 virtual ~Reader(); 144 145 /** Obtain a Walker as described in Direction. 146 * This costs O(log^2(N)) seeks, so try not to call it too often. 147 * 148 * This operation never fails; you must call advance() before use. 149 */ 150 virtual auto_ptr<Walker> seek(const string& k, Direction dir) = 0; 151 152 /** Obtain a record Walker over only those records which are 153 * prefixed by 'prefix'. When the last such is passed, eof is 154 * returned. 155 * 156 * This operation never failes; you must call advance() before use. 157 */ 158 virtual auto_ptr<Walker> seek(const string& prefix, const string& k, Direction dir) = 0; 159 160 /** Open an existing database for reading. 161 * 0 is returned and errno set on error. 162 */ 163 static auto_ptr<Reader> opendb( 164 const string& db, 165 const Parameters& p = Parameters::minimize(Parameters())); 166 }; 167 168 /** The Writer object allows you to insert keys. 169 * 170 * The Writer is atomic. When a key is inserted, it immediately becomes 171 * available in all Walkers & Readers obtained from the Writer. However, 172 * all open Readers never see the inserted keys, and new Readers do not 173 * see them until the Writer calls commit. 174 */ 175 class Writer : public Reader 176 { 177 public: 178 /** Insert a key into the database. 179 * If an error occures, -1 is returned and errno set, else 0. 180 * Be sure the key is smaller than keySize on pain of EINVAL. 181 */ 182 virtual int insert(const string& k) = 0; 183 184 /** Commit the changes to the database. 185 * If an error occures, -1 is returned and errno set, else 0. 186 */ 187 virtual int commit() = 0; 188 189 /** Open a database for writing. 190 * It is created if it did not exist. 191 * 192 * The mode is identical to open(2). 193 * 194 * 0 is returned and errno set on error. 195 */ 196 static auto_ptr<Writer> opendb( 197 const string& db, const Parameters& p = Parameters(), 198 int mode = 0666); 199 }; 200 201 } 202 203 #endif 204