1# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors 2# 3# This module is part of GitDB and is released under 4# the New BSD License: http://www.opensource.org/licenses/bsd-license.php 5"""Contains implementations of database retrieveing objects""" 6from gitdb.util import ( 7 join, 8 LazyMixin, 9 hex_to_bin 10) 11 12from gitdb.utils.encoding import force_text 13from gitdb.exc import ( 14 BadObject, 15 AmbiguousObjectName 16) 17 18from itertools import chain 19from functools import reduce 20 21 22__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB') 23 24 25class ObjectDBR(object): 26 27 """Defines an interface for object database lookup. 28 Objects are identified either by their 20 byte bin sha""" 29 30 def __contains__(self, sha): 31 return self.has_obj 32 33 #{ Query Interface 34 def has_object(self, sha): 35 """ 36 Whether the object identified by the given 20 bytes 37 binary sha is contained in the database 38 39 :return: True if the object identified by the given 20 bytes 40 binary sha is contained in the database""" 41 raise NotImplementedError("To be implemented in subclass") 42 43 def info(self, sha): 44 """ :return: OInfo instance 45 :param sha: bytes binary sha 46 :raise BadObject:""" 47 raise NotImplementedError("To be implemented in subclass") 48 49 def stream(self, sha): 50 """:return: OStream instance 51 :param sha: 20 bytes binary sha 52 :raise BadObject:""" 53 raise NotImplementedError("To be implemented in subclass") 54 55 def size(self): 56 """:return: amount of objects in this database""" 57 raise NotImplementedError() 58 59 def sha_iter(self): 60 """Return iterator yielding 20 byte shas for all objects in this data base""" 61 raise NotImplementedError() 62 63 #} END query interface 64 65 66class ObjectDBW(object): 67 68 """Defines an interface to create objects in the database""" 69 70 def __init__(self, *args, **kwargs): 71 self._ostream = None 72 73 #{ Edit Interface 74 def set_ostream(self, stream): 75 """ 76 Adjusts the stream to which all data should be sent when storing new objects 77 78 :param stream: if not None, the stream to use, if None the default stream 79 will be used. 80 :return: previously installed stream, or None if there was no override 81 :raise TypeError: if the stream doesn't have the supported functionality""" 82 cstream = self._ostream 83 self._ostream = stream 84 return cstream 85 86 def ostream(self): 87 """ 88 Return the output stream 89 90 :return: overridden output stream this instance will write to, or None 91 if it will write to the default stream""" 92 return self._ostream 93 94 def store(self, istream): 95 """ 96 Create a new object in the database 97 :return: the input istream object with its sha set to its corresponding value 98 99 :param istream: IStream compatible instance. If its sha is already set 100 to a value, the object will just be stored in the our database format, 101 in which case the input stream is expected to be in object format ( header + contents ). 102 :raise IOError: if data could not be written""" 103 raise NotImplementedError("To be implemented in subclass") 104 105 #} END edit interface 106 107 108class FileDBBase(object): 109 110 """Provides basic facilities to retrieve files of interest, including 111 caching facilities to help mapping hexsha's to objects""" 112 113 def __init__(self, root_path): 114 """Initialize this instance to look for its files at the given root path 115 All subsequent operations will be relative to this path 116 :raise InvalidDBRoot: 117 **Note:** The base will not perform any accessablity checking as the base 118 might not yet be accessible, but become accessible before the first 119 access.""" 120 super(FileDBBase, self).__init__() 121 self._root_path = root_path 122 123 #{ Interface 124 def root_path(self): 125 """:return: path at which this db operates""" 126 return self._root_path 127 128 def db_path(self, rela_path): 129 """ 130 :return: the given relative path relative to our database root, allowing 131 to pontentially access datafiles""" 132 return join(self._root_path, force_text(rela_path)) 133 #} END interface 134 135 136class CachingDB(object): 137 138 """A database which uses caches to speed-up access""" 139 140 #{ Interface 141 def update_cache(self, force=False): 142 """ 143 Call this method if the underlying data changed to trigger an update 144 of the internal caching structures. 145 146 :param force: if True, the update must be performed. Otherwise the implementation 147 may decide not to perform an update if it thinks nothing has changed. 148 :return: True if an update was performed as something change indeed""" 149 150 # END interface 151 152 153def _databases_recursive(database, output): 154 """Fill output list with database from db, in order. Deals with Loose, Packed 155 and compound databases.""" 156 if isinstance(database, CompoundDB): 157 dbs = database.databases() 158 output.extend(db for db in dbs if not isinstance(db, CompoundDB)) 159 for cdb in (db for db in dbs if isinstance(db, CompoundDB)): 160 _databases_recursive(cdb, output) 161 else: 162 output.append(database) 163 # END handle database type 164 165 166class CompoundDB(ObjectDBR, LazyMixin, CachingDB): 167 168 """A database which delegates calls to sub-databases. 169 170 Databases are stored in the lazy-loaded _dbs attribute. 171 Define _set_cache_ to update it with your databases""" 172 173 def _set_cache_(self, attr): 174 if attr == '_dbs': 175 self._dbs = list() 176 elif attr == '_db_cache': 177 self._db_cache = dict() 178 else: 179 super(CompoundDB, self)._set_cache_(attr) 180 181 def _db_query(self, sha): 182 """:return: database containing the given 20 byte sha 183 :raise BadObject:""" 184 # most databases use binary representations, prevent converting 185 # it every time a database is being queried 186 try: 187 return self._db_cache[sha] 188 except KeyError: 189 pass 190 # END first level cache 191 192 for db in self._dbs: 193 if db.has_object(sha): 194 self._db_cache[sha] = db 195 return db 196 # END for each database 197 raise BadObject(sha) 198 199 #{ ObjectDBR interface 200 201 def has_object(self, sha): 202 try: 203 self._db_query(sha) 204 return True 205 except BadObject: 206 return False 207 # END handle exceptions 208 209 def info(self, sha): 210 return self._db_query(sha).info(sha) 211 212 def stream(self, sha): 213 return self._db_query(sha).stream(sha) 214 215 def size(self): 216 """:return: total size of all contained databases""" 217 return reduce(lambda x, y: x + y, (db.size() for db in self._dbs), 0) 218 219 def sha_iter(self): 220 return chain(*(db.sha_iter() for db in self._dbs)) 221 222 #} END object DBR Interface 223 224 #{ Interface 225 226 def databases(self): 227 """:return: tuple of database instances we use for lookups""" 228 return tuple(self._dbs) 229 230 def update_cache(self, force=False): 231 # something might have changed, clear everything 232 self._db_cache.clear() 233 stat = False 234 for db in self._dbs: 235 if isinstance(db, CachingDB): 236 stat |= db.update_cache(force) 237 # END if is caching db 238 # END for each database to update 239 return stat 240 241 def partial_to_complete_sha_hex(self, partial_hexsha): 242 """ 243 :return: 20 byte binary sha1 from the given less-than-40 byte hexsha (bytes or str) 244 :param partial_hexsha: hexsha with less than 40 byte 245 :raise AmbiguousObjectName: """ 246 databases = list() 247 _databases_recursive(self, databases) 248 partial_hexsha = force_text(partial_hexsha) 249 len_partial_hexsha = len(partial_hexsha) 250 if len_partial_hexsha % 2 != 0: 251 partial_binsha = hex_to_bin(partial_hexsha + "0") 252 else: 253 partial_binsha = hex_to_bin(partial_hexsha) 254 # END assure successful binary conversion 255 256 candidate = None 257 for db in databases: 258 full_bin_sha = None 259 try: 260 if hasattr(db, 'partial_to_complete_sha_hex'): 261 full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha) 262 else: 263 full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha) 264 # END handle database type 265 except BadObject: 266 continue 267 # END ignore bad objects 268 if full_bin_sha: 269 if candidate and candidate != full_bin_sha: 270 raise AmbiguousObjectName(partial_hexsha) 271 candidate = full_bin_sha 272 # END handle candidate 273 # END for each db 274 if not candidate: 275 raise BadObject(partial_binsha) 276 return candidate 277 278 #} END interface 279