1 // Copyright 2013 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 //
16 // Author: dsites@google.com (Dick Sites)
17 //
18 //
19
20 #include "offsetmap.h"
21
22 #include <string.h> // for strcmp
23 #include <stdio.h> // for fprintf, stderr, fclose, etc
24 #include <algorithm> // for min
25
26 using namespace std;
27
28 namespace CLD2 {
29
30 // Constructor, destructor
OffsetMap()31 OffsetMap::OffsetMap() {
32 Clear();
33 }
34
~OffsetMap()35 OffsetMap::~OffsetMap() {
36 }
37
38 // Clear the map
39 // After:
40 // next_diff_sub_ is 0
41 // Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1]
42 // which is a fake range of width 0 mapping 0=>0
Clear()43 void OffsetMap::Clear() {
44 diffs_.clear();
45 pending_op_ = COPY_OP;
46 pending_length_ = 0;
47 next_diff_sub_ = 0;
48 current_lo_aoffset_ = 0;
49 current_hi_aoffset_ = 0;
50 current_lo_aprimeoffset_ = 0;
51 current_hi_aprimeoffset_ = 0;
52 current_diff_ = 0;
53 max_aoffset_ = 0; // Largest seen so far
54 max_aprimeoffset_ = 0; // Largest seen so far
55 }
56
OpPart(const char c)57 static inline char OpPart(const char c) {
58 return (c >> 6) & 3;
59 }
LenPart(const char c)60 static inline char LenPart(const char c) {
61 return c & 0x3f;
62 }
63
64 // Print map to file, for debugging
Printmap(const char * filename)65 void OffsetMap::Printmap(const char* filename) {
66 FILE* fout;
67 bool needs_close = false;
68 if (strcmp(filename, "stdout") == 0) {
69 fout = stdout;
70 } else if (strcmp(filename, "stderr") == 0) {
71 fout = stderr;
72 } else {
73 fout = fopen(filename, "w");
74 needs_close = true;
75 }
76 if (fout == NULL) {
77 fprintf(stderr, "%s did not open\n", filename);
78 return;
79 }
80
81 Flush(); // Make sure any pending entry gets printed
82 fprintf(fout, "Offsetmap: %ld bytes\n", diffs_.size());
83 for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
84 fprintf(fout, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
85 if ((i % 20) == 19) {fprintf(fout, "\n");}
86 }
87 fprintf(fout, "\n");
88 if (needs_close) {
89 fclose(fout);
90 }
91 }
92
93 // Reset to offset 0
Reset()94 void OffsetMap::Reset() {
95 MaybeFlushAll();
96
97 next_diff_sub_ = 0;
98 current_lo_aoffset_ = 0;
99 current_hi_aoffset_ = 0;
100 current_lo_aprimeoffset_ = 0;
101 current_hi_aprimeoffset_ = 0;
102 current_diff_ = 0;
103 }
104
105 // Add to mapping from A to A', specifying how many next bytes are
106 // identical in A and A'
Copy(int bytes)107 void OffsetMap::Copy(int bytes) {
108 if (bytes == 0) {return;}
109 max_aoffset_ += bytes; // Largest seen so far
110 max_aprimeoffset_ += bytes; // Largest seen so far
111 if (pending_op_ == COPY_OP) {
112 pending_length_ += bytes;
113 } else {
114 Flush();
115 pending_op_ = COPY_OP;
116 pending_length_ = bytes;
117 }
118 }
119
120 // Add to mapping from A to A', specifying how many next bytes are
121 // inserted in A' while not advancing in A at all
Insert(int bytes)122 void OffsetMap::Insert(int bytes){
123 if (bytes == 0) {return;}
124 max_aprimeoffset_ += bytes; // Largest seen so far
125 if (pending_op_ == INSERT_OP) {
126 pending_length_ += bytes;
127 } else if ((bytes == 1) &&
128 (pending_op_ == DELETE_OP) && (pending_length_ == 1)) {
129 // Special-case exactly delete(1) insert(1) +> copy(1);
130 // all others backmap inserts to after deletes
131 pending_op_ = COPY_OP;
132 } else {
133 Flush();
134 pending_op_ = INSERT_OP;
135 pending_length_ = bytes;
136 }
137 }
138
139 // Add to mapping from A to A', specifying how many next bytes are
140 // deleted from A while not advancing in A' at all
Delete(int bytes)141 void OffsetMap::Delete(int bytes){
142 if (bytes == 0) {return;}
143 max_aoffset_ += bytes; // Largest seen so far
144 if (pending_op_ == DELETE_OP) {
145 pending_length_ += bytes;
146 } else if ((bytes == 1) &&
147 (pending_op_ == INSERT_OP) && (pending_length_ == 1)) {
148 // Special-case exactly insert(1) delete(1) => copy(1);
149 // all others backmap deletes to after insertss
150 pending_op_ = COPY_OP;
151 } else {
152 Flush();
153 pending_op_ = DELETE_OP;
154 pending_length_ = bytes;
155 }
156 }
157
Flush()158 void OffsetMap::Flush() {
159 if (pending_length_ == 0) {
160 return;
161 }
162 // We may be emitting a copy op just after a copy op because +1 -1 cancelled
163 // inbetween. If the lengths don't need a prefix byte, combine them
164 if ((pending_op_ == COPY_OP) && !diffs_.empty()) {
165 char c = diffs_[diffs_.size() - 1];
166 MapOp prior_op = static_cast<MapOp>(OpPart(c));
167 int prior_len = LenPart(c);
168 if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) {
169 diffs_[diffs_.size() - 1] += pending_length_;
170 pending_length_ = 0;
171 return;
172 }
173 }
174 if (pending_length_ > 0x3f) {
175 bool non_zero_emitted = false;
176 for (int shift = 30; shift > 0; shift -= 6) {
177 int prefix = (pending_length_ >> shift) & 0x3f;
178 if ((prefix > 0) || non_zero_emitted) {
179 Emit(PREFIX_OP, prefix);
180 non_zero_emitted = true;
181 }
182 }
183 }
184 Emit(pending_op_, pending_length_ & 0x3f);
185 pending_length_ = 0;
186 }
187
188
189 // Add one more entry to copy one byte off the end, then flush
FlushAll()190 void OffsetMap::FlushAll() {
191 Copy(1);
192 Flush();
193 }
194
195 // Flush all if necessary
MaybeFlushAll()196 void OffsetMap::MaybeFlushAll() {
197 if ((0 < pending_length_) || diffs_.empty()) {
198 FlushAll();
199 }
200 }
201
202 // Len may be 0, for example as the low piece of length=64
Emit(MapOp op,int len)203 void OffsetMap::Emit(MapOp op, int len) {
204 char c = (static_cast<char>(op) << 6) | (len & 0x3f);
205 diffs_.push_back(c);
206 }
207
DumpString()208 void OffsetMap::DumpString() {
209 for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
210 fprintf(stderr, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
211 }
212 fprintf(stderr, "\n");
213
214 // Print running table of correspondences
215 fprintf(stderr, " op A => A' (A forward-maps to A')\n");
216 int aoffset = 0;
217 int aprimeoffset = 0;
218 int length = 0;
219 for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
220 char c = diffs_[i];
221 MapOp op = static_cast<MapOp>(OpPart(c));
222 int len = LenPart(c);
223 length = (length << 6) + len;
224 if (op == COPY_OP) {
225 aoffset += length;
226 aprimeoffset += length;
227 length = 0;
228 } else if (op == INSERT_OP) {
229 aoffset += 0;
230 aprimeoffset += length;
231 length = 0;
232 } else if (op == DELETE_OP) {
233 aoffset += length;
234 aprimeoffset += 0;
235 length = 0;
236 } else { // (op == PREFIX_OP)
237 // Do nothing else
238 }
239 fprintf(stderr, "[%3d] %c%02d %6d %6d%s\n",
240 i, "&=+-"[op], len,
241 aoffset, aprimeoffset,
242 (next_diff_sub_ == i) ? " <==next_diff_sub_" : "");
243
244 }
245 fprintf(stderr, "\n");
246 }
247
DumpWindow()248 void OffsetMap::DumpWindow() {
249 fprintf(stderr, "DumpWindow(A => A'): max_aoffset_ = %d, "
250 "max_aprimeoffset_ = %d, next_diff_sub_ = %d<br>\n",
251 max_aoffset_, max_aprimeoffset_, next_diff_sub_);
252 fprintf(stderr, "A [%u..%u)\n",
253 current_lo_aoffset_, current_hi_aoffset_);
254 fprintf(stderr, "A' [%u..%u)\n",
255 current_lo_aprimeoffset_, current_hi_aprimeoffset_);
256 fprintf(stderr, " diff = %d\n", current_diff_);
257 DumpString();
258 }
259
260 //----------------------------------------------------------------------------//
261 // The guts of the 2013 design //
262 // If there are three ranges a b c in diffs_, we can be in one of five //
263 // states: LEFT of a, in ranges a b c, or RIGHT of c //
264 // In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_ //
265 // position next_diff_sub_ //
266 // There also are mapping constants max_aoffset_ and max_aprimeoffset_ //
267 // If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0 //
268 // If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and //
269 // next_diff_sub_=diffs_.size() //
270 // Otherwise, at least one of A[) and A'[) is non-empty and the first bytes //
271 // correspond to each other. If range i is active, next_diff_sub_ is at //
272 // the first byte of range i+1. Because of the length-prefix operator, //
273 // an individual range item in diffs_ may be multiple bytes //
274 // In all cases aprimeoffset = aoffset + current_diff_ //
275 // i.e. current_diff_ = aprimeoffset - aoffset //
276 // //
277 // In the degenerate case of diffs_.empty(), there are only two states //
278 // LEFT and RIGHT and the mapping is the identity mapping. //
279 // The initial state is LEFT. //
280 // It is an error to move left into LEFT or right into RIGHT, but the code //
281 // below is robust in these cases. //
282 //----------------------------------------------------------------------------//
283
SetLeft()284 void OffsetMap::SetLeft() {
285 current_lo_aoffset_ = 0;
286 current_hi_aoffset_ = 0;
287 current_lo_aprimeoffset_ = 0;
288 current_hi_aprimeoffset_ = 0;
289 current_diff_ = 0;
290 next_diff_sub_ = 0;
291 }
292
SetRight()293 void OffsetMap::SetRight() {
294 current_lo_aoffset_ = max_aoffset_;
295 current_hi_aoffset_ = max_aoffset_;
296 current_lo_aprimeoffset_ = max_aprimeoffset_;
297 current_hi_aprimeoffset_ = max_aprimeoffset_;
298 current_diff_ = max_aprimeoffset_ - max_aoffset_;
299 next_diff_sub_ = 0;
300 }
301
302 // Back up over previous range, 1..5 bytes
303 // Return subscript at the beginning of that. Pins at 0
Backup(int sub)304 int OffsetMap::Backup(int sub) {
305 if (sub <= 0) {return 0;}
306 --sub;
307 while ((0 < sub) &&
308 (static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) {
309 --sub;
310 }
311 return sub;
312 }
313
314 // Parse next range, 1..5 bytes
315 // Return subscript just off the end of that
ParseNext(int sub,MapOp * op,int * length)316 int OffsetMap::ParseNext(int sub, MapOp* op, int* length) {
317 *op = PREFIX_OP;
318 *length = 0;
319 char c;
320 while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) {
321 c = diffs_[sub++];
322 *op = static_cast<MapOp>(OpPart(c));
323 int len = LenPart(c);
324 *length = (*length << 6) + len;
325 }
326 // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
327 // Mal-formed can include a trailing prefix byte with no following op
328 return sub;
329 }
330
331 // Parse previous range, 1..5 bytes
332 // Return current subscript
ParsePrevious(int sub,MapOp * op,int * length)333 int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) {
334 sub = Backup(sub);
335 return ParseNext(sub, op, length);
336 }
337
338 // Quick debugging dump; does not parse multi-byte items, so just length & 0x3f
PrintPosition(const char * str)339 void OffsetMap::PrintPosition(const char* str) {
340 MapOp op = PREFIX_OP;
341 int length = 0;
342 if ((0 < next_diff_sub_) && (next_diff_sub_ <= static_cast<int>(diffs_.size()))) {
343 op = static_cast<MapOp>(OpPart(diffs_[next_diff_sub_ - 1]));
344 length = LenPart(diffs_[next_diff_sub_ - 1]);
345 }
346 fprintf(stderr, "%s[%d] %c%02d = A[%d..%d) ==> A'[%d..%d)\n",
347 str,
348 next_diff_sub_, "&=+-"[op], length,
349 current_lo_aoffset_, current_hi_aoffset_,
350 current_lo_aprimeoffset_, current_hi_aprimeoffset_);
351 }
352
353 // Move active window one range to the right
354 // Return true if move was OK
MoveRight()355 bool OffsetMap::MoveRight() {
356 // If at last range or RIGHT, set to RIGHT, return error
357 if (next_diff_sub_ >= static_cast<int>(diffs_.size())) {
358 SetRight();
359 return false;
360 }
361 // Actually OK to move right
362 MapOp op;
363 int length;
364 bool retval = true;
365 // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
366 next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length);
367
368 current_lo_aoffset_ = current_hi_aoffset_;
369 current_lo_aprimeoffset_ = current_hi_aprimeoffset_;
370 if (op == COPY_OP) {
371 current_hi_aoffset_ = current_lo_aoffset_ + length;
372 current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
373 } else if (op == INSERT_OP) {
374 current_hi_aoffset_ = current_lo_aoffset_ + 0;
375 current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
376 } else if (op == DELETE_OP) {
377 current_hi_aoffset_ = current_lo_aoffset_ + length;
378 current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0;
379 } else {
380 SetRight();
381 retval = false;
382 }
383 current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
384 return retval;
385 }
386
387 // Move active window one range to the left
388 // Return true if move was OK
MoveLeft()389 bool OffsetMap::MoveLeft() {
390 // If at first range or LEFT, set to LEFT, return error
391 if (next_diff_sub_ <= 0) {
392 SetLeft();
393 return false;
394 }
395 // Back up over current active window
396 next_diff_sub_ = Backup(next_diff_sub_);
397 if (next_diff_sub_ <= 0) {
398 SetLeft();
399 return false;
400 }
401 // Actually OK to move left
402 MapOp op;
403 int length;
404 bool retval = true;
405 // If mal-formed or in LEFT, this will return with op = PREFIX_OP
406 next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length);
407
408 current_hi_aoffset_ = current_lo_aoffset_;
409 current_hi_aprimeoffset_ = current_lo_aprimeoffset_;
410 if (op == COPY_OP) {
411 current_lo_aoffset_ = current_hi_aoffset_ - length;
412 current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
413 } else if (op == INSERT_OP) {
414 current_lo_aoffset_ = current_hi_aoffset_ - 0;
415 current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
416 } else if (op == DELETE_OP) {
417 current_lo_aoffset_ = current_hi_aoffset_ - length;
418 current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0;
419 } else {
420 SetLeft();
421 retval = false;
422 }
423 current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
424 return true;
425 }
426
427 // Map an offset in A' to the corresponding offset in A
MapBack(int aprimeoffset)428 int OffsetMap::MapBack(int aprimeoffset){
429 MaybeFlushAll();
430 if (aprimeoffset < 0) {return 0;}
431 if (max_aprimeoffset_ <= aprimeoffset) {
432 return (aprimeoffset - max_aprimeoffset_) + max_aoffset_;
433 }
434
435 // If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_,
436 // use current mapping, else move window left/right
437 bool ok = true;
438 while (ok && (aprimeoffset < current_lo_aprimeoffset_)) {
439 ok = MoveLeft();
440 }
441 while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) {
442 ok = MoveRight();
443 }
444 // So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_
445
446 int aoffset = aprimeoffset - current_diff_;
447 if (aoffset >= current_hi_aoffset_) {
448 // A' is in an insert region, all bytes of which backmap to A=hi_aoffset_
449 aoffset = current_hi_aoffset_;
450 }
451 return aoffset;
452 }
453
454 // Map an offset in A to the corresponding offset in A'
MapForward(int aoffset)455 int OffsetMap::MapForward(int aoffset){
456 MaybeFlushAll();
457 if (aoffset < 0) {return 0;}
458 if (max_aoffset_ <= aoffset) {
459 return (aoffset - max_aoffset_) + max_aprimeoffset_;
460 }
461
462 // If current_lo_aoffset_ <= aoffset < current_hi_aoffset_,
463 // use current mapping, else move window left/right
464 bool ok = true;
465 while (ok && (aoffset < current_lo_aoffset_)) {
466 ok = MoveLeft();
467 }
468 while (ok && (current_hi_aoffset_ <= aoffset)) {
469 ok = MoveRight();
470 }
471
472 int aprimeoffset = aoffset + current_diff_;
473 if (aprimeoffset >= current_hi_aprimeoffset_) {
474 // A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_
475 aprimeoffset = current_hi_aprimeoffset_;
476 }
477 return aprimeoffset;
478 }
479
480
481 // static
CopyInserts(OffsetMap * source,OffsetMap * dest)482 bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) {
483 bool ok = true;
484 while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
485 ok = source->MoveRight();
486 if (source->current_lo_aoffset_ != source->current_hi_aoffset_) {
487 return false;
488 }
489 dest->Insert(
490 source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_);
491 }
492 return true;
493 }
494
495 // static
CopyDeletes(OffsetMap * source,OffsetMap * dest)496 bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) {
497 bool ok = true;
498 while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
499 ok = source->MoveRight();
500 if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) {
501 return false;
502 }
503 dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_);
504 }
505 return true;
506 }
507
508 // static
ComposeOffsetMap(OffsetMap * g,OffsetMap * f,OffsetMap * h)509 void OffsetMap::ComposeOffsetMap(
510 OffsetMap* g, OffsetMap* f, OffsetMap* h) {
511 h->Clear();
512 f->Reset();
513 g->Reset();
514
515 int lo = 0;
516 for (;;) {
517 // Consume delete operations in f. This moves A without moving
518 // A' and A''.
519 if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) {
520 if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) {
521 // fprintf(stderr,
522 // "ComposeOffsetMap ERROR, f is longer than g.<br>\n");
523 }
524
525 // FlushAll(), called by Reset(), MapForward() or MapBack(), has
526 // added an extra COPY_OP to f and g, so this function has
527 // composed an extra COPY_OP in h from those. To avoid
528 // FlushAll() adds one more extra COPY_OP to h later, dispatch
529 // Flush() right now.
530 h->Flush();
531 return;
532 }
533
534 // Consume insert operations in g. This moves A'' without moving A
535 // and A'.
536 if (lo >= f->current_hi_aprimeoffset_) {
537 if (!CopyDeletes(f, h)) {
538 // fprintf(stderr,
539 // "ComposeOffsetMap ERROR, g is longer than f.<br>\n");
540 }
541 }
542
543 // Compose one operation which moves A' from lo to hi.
544 int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_);
545 if (f->current_lo_aoffset_ != f->current_hi_aoffset_ &&
546 g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
547 h->Copy(hi - lo);
548 } else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) {
549 h->Delete(hi - lo);
550 } else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
551 h->Insert(hi - lo);
552 }
553
554 lo = hi;
555 }
556 }
557
558 // For testing only -- force a mapping
StuffIt(const string & diffs,int max_aoffset,int max_aprimeoffset)559 void OffsetMap::StuffIt(const string& diffs,
560 int max_aoffset, int max_aprimeoffset) {
561 Clear();
562 diffs_ = diffs;
563 max_aoffset_ = max_aoffset;
564 max_aprimeoffset_ = max_aprimeoffset;
565 }
566
567
568 } // namespace CLD2
569
570