1 #ifndef PACK_OBJECTS_H
2 #define PACK_OBJECTS_H
3 
4 #include "object-store.h"
5 #include "thread-utils.h"
6 #include "pack.h"
7 
8 struct repository;
9 
10 #define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
11 
12 #define OE_DFS_STATE_BITS	2
13 #define OE_DEPTH_BITS		12
14 #define OE_IN_PACK_BITS		10
15 #define OE_Z_DELTA_BITS		20
16 /*
17  * Note that oe_set_size() becomes expensive when the given size is
18  * above this limit. Don't lower it too much.
19  */
20 #define OE_SIZE_BITS		31
21 #define OE_DELTA_SIZE_BITS	23
22 
23 /*
24  * State flags for depth-first search used for analyzing delta cycles.
25  *
26  * The depth is measured in delta-links to the base (so if A is a delta
27  * against B, then A has a depth of 1, and B a depth of 0).
28  */
29 enum dfs_state {
30 	DFS_NONE = 0,
31 	DFS_ACTIVE,
32 	DFS_DONE,
33 	DFS_NUM_STATES
34 };
35 
36 /*
37  * The size of struct nearly determines pack-objects's memory
38  * consumption. This struct is packed tight for that reason. When you
39  * add or reorder something in this struct, think a bit about this.
40  *
41  * basic object info
42  * -----------------
43  * idx.oid is filled up before delta searching starts. idx.crc32 is
44  * only valid after the object is written out and will be used for
45  * generating the index. idx.offset will be both gradually set and
46  * used in writing phase (base objects get offset first, then deltas
47  * refer to them)
48  *
49  * "size" is the uncompressed object size. Compressed size of the raw
50  * data for an object in a pack is not stored anywhere but is computed
51  * and made available when reverse .idx is made. Note that when a
52  * delta is reused, "size" is the uncompressed _delta_ size, not the
53  * canonical one after the delta has been applied.
54  *
55  * "hash" contains a path name hash which is used for sorting the
56  * delta list and also during delta searching. Once prepare_pack()
57  * returns it's no longer needed.
58  *
59  * source pack info
60  * ----------------
61  * The (in_pack, in_pack_offset) tuple contains the location of the
62  * object in the source pack. in_pack_header_size allows quickly
63  * skipping the header and going straight to the zlib stream.
64  *
65  * "type" and "in_pack_type" both describe object type. in_pack_type
66  * may contain a delta type, while type is always the canonical type.
67  *
68  * deltas
69  * ------
70  * Delta links (delta, delta_child and delta_sibling) are created to
71  * reflect that delta graph from the source pack then updated or added
72  * during delta searching phase when we find better deltas.
73  *
74  * delta_child and delta_sibling are last needed in
75  * compute_write_order(). "delta" and "delta_size" must remain valid
76  * at object writing phase in case the delta is not cached.
77  *
78  * If a delta is cached in memory and is compressed, delta_data points
79  * to the data and z_delta_size contains the compressed size. If it's
80  * uncompressed [1], z_delta_size must be zero. delta_size is always
81  * the uncompressed size and must be valid even if the delta is not
82  * cached.
83  *
84  * [1] during try_delta phase we don't bother with compressing because
85  * the delta could be quickly replaced with a better one.
86  */
87 struct object_entry {
88 	struct pack_idx_entry idx;
89 	void *delta_data;	/* cached delta (uncompressed) */
90 	off_t in_pack_offset;
91 	uint32_t hash;			/* name hint hash */
92 	unsigned size_:OE_SIZE_BITS;
93 	unsigned size_valid:1;
94 	uint32_t delta_idx;	/* delta base object */
95 	uint32_t delta_child_idx; /* deltified objects who bases me */
96 	uint32_t delta_sibling_idx; /* other deltified objects who
97 				     * uses the same base as me
98 				     */
99 	unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */
100 	unsigned delta_size_valid:1;
101 	unsigned char in_pack_header_size;
102 	unsigned in_pack_idx:OE_IN_PACK_BITS;	/* already in pack */
103 	unsigned z_delta_size:OE_Z_DELTA_BITS;
104 	unsigned type_valid:1;
105 	unsigned no_try_delta:1;
106 	unsigned type_:TYPE_BITS;
107 	unsigned in_pack_type:TYPE_BITS; /* could be delta */
108 
109 	unsigned preferred_base:1; /*
110 				    * we do not pack this, but is available
111 				    * to be used as the base object to delta
112 				    * objects against.
113 				    */
114 	unsigned tagged:1; /* near the very tip of refs */
115 	unsigned filled:1; /* assigned write-order */
116 	unsigned dfs_state:OE_DFS_STATE_BITS;
117 	unsigned depth:OE_DEPTH_BITS;
118 	unsigned ext_base:1; /* delta_idx points outside packlist */
119 
120 	/*
121 	 * pahole results on 64-bit linux (gcc and clang)
122 	 *
123 	 *   size: 80, bit_padding: 9 bits
124 	 *
125 	 * and on 32-bit (gcc)
126 	 *
127 	 *   size: 76, bit_padding: 9 bits
128 	 */
129 };
130 
131 struct packing_data {
132 	struct repository *repo;
133 	struct object_entry *objects;
134 	uint32_t nr_objects, nr_alloc;
135 
136 	int32_t *index;
137 	uint32_t index_size;
138 
139 	unsigned int *in_pack_pos;
140 	unsigned long *delta_size;
141 
142 	/*
143 	 * Only one of these can be non-NULL and they have different
144 	 * sizes. if in_pack_by_idx is allocated, oe_in_pack() returns
145 	 * the pack of an object using in_pack_idx field. If not,
146 	 * in_pack[] array is used the same way as in_pack_pos[]
147 	 */
148 	struct packed_git **in_pack_by_idx;
149 	struct packed_git **in_pack;
150 
151 	/*
152 	 * During packing with multiple threads, protect the in-core
153 	 * object database from concurrent accesses.
154 	 */
155 	pthread_mutex_t odb_lock;
156 
157 	/*
158 	 * This list contains entries for bases which we know the other side
159 	 * has (e.g., via reachability bitmaps), but which aren't in our
160 	 * "objects" list.
161 	 */
162 	struct object_entry *ext_bases;
163 	uint32_t nr_ext, alloc_ext;
164 
165 	uintmax_t oe_size_limit;
166 	uintmax_t oe_delta_size_limit;
167 
168 	/* delta islands */
169 	unsigned int *tree_depth;
170 	unsigned char *layer;
171 };
172 
173 void prepare_packing_data(struct repository *r, struct packing_data *pdata);
174 
175 /* Protect access to object database */
packing_data_lock(struct packing_data * pdata)176 static inline void packing_data_lock(struct packing_data *pdata)
177 {
178 	pthread_mutex_lock(&pdata->odb_lock);
179 }
packing_data_unlock(struct packing_data * pdata)180 static inline void packing_data_unlock(struct packing_data *pdata)
181 {
182 	pthread_mutex_unlock(&pdata->odb_lock);
183 }
184 
185 struct object_entry *packlist_alloc(struct packing_data *pdata,
186 				    const struct object_id *oid);
187 
188 struct object_entry *packlist_find(struct packing_data *pdata,
189 				   const struct object_id *oid);
190 
pack_name_hash(const char * name)191 static inline uint32_t pack_name_hash(const char *name)
192 {
193 	uint32_t c, hash = 0;
194 
195 	if (!name)
196 		return 0;
197 
198 	/*
199 	 * This effectively just creates a sortable number from the
200 	 * last sixteen non-whitespace characters. Last characters
201 	 * count "most", so things that end in ".c" sort together.
202 	 */
203 	while ((c = *name++) != 0) {
204 		if (isspace(c))
205 			continue;
206 		hash = (hash >> 2) + (c << 24);
207 	}
208 	return hash;
209 }
210 
oe_type(const struct object_entry * e)211 static inline enum object_type oe_type(const struct object_entry *e)
212 {
213 	return e->type_valid ? e->type_ : OBJ_BAD;
214 }
215 
oe_set_type(struct object_entry * e,enum object_type type)216 static inline void oe_set_type(struct object_entry *e,
217 			       enum object_type type)
218 {
219 	if (type >= OBJ_ANY)
220 		BUG("OBJ_ANY cannot be set in pack-objects code");
221 
222 	e->type_valid = type >= OBJ_NONE;
223 	e->type_ = (unsigned)type;
224 }
225 
oe_in_pack_pos(const struct packing_data * pack,const struct object_entry * e)226 static inline unsigned int oe_in_pack_pos(const struct packing_data *pack,
227 					  const struct object_entry *e)
228 {
229 	return pack->in_pack_pos[e - pack->objects];
230 }
231 
oe_set_in_pack_pos(const struct packing_data * pack,const struct object_entry * e,unsigned int pos)232 static inline void oe_set_in_pack_pos(const struct packing_data *pack,
233 				      const struct object_entry *e,
234 				      unsigned int pos)
235 {
236 	pack->in_pack_pos[e - pack->objects] = pos;
237 }
238 
oe_in_pack(const struct packing_data * pack,const struct object_entry * e)239 static inline struct packed_git *oe_in_pack(const struct packing_data *pack,
240 					    const struct object_entry *e)
241 {
242 	if (pack->in_pack_by_idx)
243 		return pack->in_pack_by_idx[e->in_pack_idx];
244 	else
245 		return pack->in_pack[e - pack->objects];
246 }
247 
248 void oe_map_new_pack(struct packing_data *pack);
249 
oe_set_in_pack(struct packing_data * pack,struct object_entry * e,struct packed_git * p)250 static inline void oe_set_in_pack(struct packing_data *pack,
251 				  struct object_entry *e,
252 				  struct packed_git *p)
253 {
254 	if (pack->in_pack_by_idx) {
255 		if (p->index) {
256 			e->in_pack_idx = p->index;
257 			return;
258 		}
259 		/*
260 		 * We're accessing packs by index, but this pack doesn't have
261 		 * an index (e.g., because it was added since we created the
262 		 * in_pack_by_idx array). Bail to oe_map_new_pack(), which
263 		 * will convert us to using the full in_pack array, and then
264 		 * fall through to our in_pack handling.
265 		 */
266 		oe_map_new_pack(pack);
267 	}
268 	pack->in_pack[e - pack->objects] = p;
269 }
270 
271 void oe_set_delta_ext(struct packing_data *pack,
272 		      struct object_entry *e,
273 		      const struct object_id *oid);
274 
oe_tree_depth(struct packing_data * pack,struct object_entry * e)275 static inline unsigned int oe_tree_depth(struct packing_data *pack,
276 					 struct object_entry *e)
277 {
278 	if (!pack->tree_depth)
279 		return 0;
280 	return pack->tree_depth[e - pack->objects];
281 }
282 
oe_set_layer(struct packing_data * pack,struct object_entry * e,unsigned char layer)283 static inline void oe_set_layer(struct packing_data *pack,
284 				struct object_entry *e,
285 				unsigned char layer)
286 {
287 	if (!pack->layer)
288 		CALLOC_ARRAY(pack->layer, pack->nr_alloc);
289 	pack->layer[e - pack->objects] = layer;
290 }
291 
292 #endif
293