1 // Copyright 2013 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 //
16 // Author: dsites@google.com (Dick Sites)
17 //
18 //
19 
20 #include "offsetmap.h"
21 
22 #include <string.h>                     // for strcmp
23 #include <stdio.h>                      // for fprintf, stderr, fclose, etc
24 #include <algorithm>                    // for min
25 
26 using namespace std;
27 
28 namespace CLD2 {
29 
30 // Constructor, destructor
OffsetMap()31 OffsetMap::OffsetMap() {
32   Clear();
33 }
34 
~OffsetMap()35 OffsetMap::~OffsetMap() {
36 }
37 
38 // Clear the map
39 // After:
40 //   next_diff_sub_ is 0
41 //   Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1]
42 //   which is a fake range of width 0 mapping 0=>0
Clear()43 void OffsetMap::Clear() {
44   diffs_.clear();
45   pending_op_ = COPY_OP;
46   pending_length_ = 0;
47   next_diff_sub_ = 0;
48   current_lo_aoffset_ = 0;
49   current_hi_aoffset_ = 0;
50   current_lo_aprimeoffset_ = 0;
51   current_hi_aprimeoffset_ = 0;
52   current_diff_ = 0;
53   max_aoffset_ = 0;           // Largest seen so far
54   max_aprimeoffset_ = 0;      // Largest seen so far
55 }
56 
OpPart(const char c)57 static inline char OpPart(const char c) {
58   return (c >> 6) & 3;
59 }
LenPart(const char c)60 static inline char LenPart(const char c) {
61   return c & 0x3f;
62 }
63 
64 // Print map to file, for debugging
Printmap(const char * filename)65 void OffsetMap::Printmap(const char* filename) {
66   FILE* fout;
67   bool needs_close = false;
68   if (strcmp(filename, "stdout") == 0) {
69     fout = stdout;
70   } else if (strcmp(filename, "stderr") == 0) {
71     fout = stderr;
72   } else {
73     fout = fopen(filename, "w");
74     needs_close = true;
75   }
76   if (fout == NULL) {
77     fprintf(stderr, "%s did not open\n", filename);
78     return;
79   }
80 
81   Flush();    // Make sure any pending entry gets printed
82   fprintf(fout, "Offsetmap: %ld bytes\n", diffs_.size());
83   for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
84     fprintf(fout, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
85     if ((i % 20) == 19) {fprintf(fout, "\n");}
86   }
87   fprintf(fout, "\n");
88   if (needs_close) {
89     fclose(fout);
90   }
91 }
92 
93 // Reset to offset 0
Reset()94 void OffsetMap::Reset() {
95   MaybeFlushAll();
96 
97   next_diff_sub_ = 0;
98   current_lo_aoffset_ = 0;
99   current_hi_aoffset_ = 0;
100   current_lo_aprimeoffset_ = 0;
101   current_hi_aprimeoffset_ = 0;
102   current_diff_ = 0;
103 }
104 
105 // Add to  mapping from A to A', specifying how many next bytes are
106 // identical in A and A'
Copy(int bytes)107 void OffsetMap::Copy(int bytes) {
108   if (bytes == 0) {return;}
109   max_aoffset_ += bytes;           // Largest seen so far
110   max_aprimeoffset_ += bytes;      // Largest seen so far
111   if (pending_op_ == COPY_OP) {
112     pending_length_ += bytes;
113   } else {
114     Flush();
115     pending_op_ = COPY_OP;
116     pending_length_ = bytes;
117   }
118 }
119 
120 // Add to mapping from A to A', specifying how many next bytes are
121 // inserted in A' while not advancing in A at all
Insert(int bytes)122 void OffsetMap::Insert(int bytes){
123   if (bytes == 0) {return;}
124   max_aprimeoffset_ += bytes;      // Largest seen so far
125   if (pending_op_ == INSERT_OP) {
126     pending_length_ += bytes;
127   } else if ((bytes == 1) &&
128              (pending_op_ == DELETE_OP) && (pending_length_ == 1)) {
129     // Special-case exactly delete(1) insert(1) +> copy(1);
130     // all others backmap inserts to after deletes
131     pending_op_ = COPY_OP;
132   } else {
133     Flush();
134     pending_op_ = INSERT_OP;
135     pending_length_ = bytes;
136   }
137 }
138 
139 // Add to mapping from A to A', specifying how many next bytes are
140 // deleted from A while not advancing in A' at all
Delete(int bytes)141 void OffsetMap::Delete(int bytes){
142   if (bytes == 0) {return;}
143   max_aoffset_ += bytes;           // Largest seen so far
144   if (pending_op_ == DELETE_OP) {
145     pending_length_ += bytes;
146     } else if ((bytes == 1) &&
147                (pending_op_ == INSERT_OP) && (pending_length_ == 1)) {
148       // Special-case exactly insert(1) delete(1) => copy(1);
149       // all others backmap deletes to after insertss
150       pending_op_ = COPY_OP;
151   } else {
152     Flush();
153     pending_op_ = DELETE_OP;
154     pending_length_ = bytes;
155   }
156 }
157 
Flush()158 void OffsetMap::Flush() {
159   if (pending_length_ == 0) {
160     return;
161   }
162   // We may be emitting a copy op just after a copy op because +1 -1 cancelled
163   // inbetween. If the lengths don't need a prefix byte, combine them
164   if ((pending_op_ == COPY_OP) && !diffs_.empty()) {
165     char c = diffs_[diffs_.size() - 1];
166     MapOp prior_op = static_cast<MapOp>(OpPart(c));
167     int prior_len = LenPart(c);
168     if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) {
169       diffs_[diffs_.size() - 1] += pending_length_;
170       pending_length_ = 0;
171       return;
172     }
173   }
174   if (pending_length_ > 0x3f) {
175     bool non_zero_emitted = false;
176     for (int shift = 30; shift > 0; shift -= 6) {
177       int prefix = (pending_length_ >> shift) & 0x3f;
178       if ((prefix > 0) || non_zero_emitted) {
179         Emit(PREFIX_OP, prefix);
180         non_zero_emitted = true;
181       }
182     }
183   }
184   Emit(pending_op_, pending_length_ & 0x3f);
185   pending_length_ = 0;
186 }
187 
188 
189 // Add one more entry to copy one byte off the end, then flush
FlushAll()190 void OffsetMap::FlushAll() {
191   Copy(1);
192   Flush();
193 }
194 
195 // Flush all if necessary
MaybeFlushAll()196 void OffsetMap::MaybeFlushAll() {
197   if ((0 < pending_length_) || diffs_.empty()) {
198     FlushAll();
199   }
200 }
201 
202 // Len may be 0, for example as the low piece of length=64
Emit(MapOp op,int len)203 void OffsetMap::Emit(MapOp op, int len) {
204   char c = (static_cast<char>(op) << 6) | (len & 0x3f);
205   diffs_.push_back(c);
206 }
207 
DumpString()208 void OffsetMap::DumpString() {
209   for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
210     fprintf(stderr, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
211   }
212   fprintf(stderr, "\n");
213 
214   // Print running table of correspondences
215   fprintf(stderr, "       op      A =>  A'     (A forward-maps to A')\n");
216   int aoffset = 0;
217   int aprimeoffset = 0;
218   int length = 0;
219   for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
220     char c = diffs_[i];
221     MapOp op = static_cast<MapOp>(OpPart(c));
222     int len = LenPart(c);
223     length = (length << 6) + len;
224     if (op == COPY_OP) {
225       aoffset += length;
226       aprimeoffset += length;
227       length = 0;
228     } else if (op == INSERT_OP) {
229       aoffset += 0;
230       aprimeoffset += length;
231       length = 0;
232     } else if (op == DELETE_OP) {
233       aoffset += length;
234       aprimeoffset += 0;
235       length = 0;
236     } else {              // (op == PREFIX_OP)
237       // Do nothing else
238     }
239     fprintf(stderr, "[%3d] %c%02d %6d %6d%s\n",
240             i, "&=+-"[op], len,
241             aoffset, aprimeoffset,
242             (next_diff_sub_ == i) ? " <==next_diff_sub_" : "");
243 
244   }
245   fprintf(stderr, "\n");
246 }
247 
DumpWindow()248 void OffsetMap::DumpWindow() {
249   fprintf(stderr, "DumpWindow(A => A'): max_aoffset_ = %d, "
250                   "max_aprimeoffset_ = %d, next_diff_sub_ = %d<br>\n",
251           max_aoffset_, max_aprimeoffset_, next_diff_sub_);
252   fprintf(stderr, "A  [%u..%u)\n",
253           current_lo_aoffset_, current_hi_aoffset_);
254   fprintf(stderr, "A' [%u..%u)\n",
255           current_lo_aprimeoffset_, current_hi_aprimeoffset_);
256   fprintf(stderr, "  diff = %d\n", current_diff_);
257   DumpString();
258 }
259 
260 //----------------------------------------------------------------------------//
261 // The guts of the 2013 design                                                //
262 // If there are three ranges a b c in  diffs_, we can be in one of five       //
263 // states: LEFT of a, in ranges a b c, or RIGHT of c                          //
264 // In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_    //
265 // position next_diff_sub_                                                    //
266 // There also are mapping constants max_aoffset_ and max_aprimeoffset_        //
267 // If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0                       //
268 // If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and            //
269 //   next_diff_sub_=diffs_.size()                                             //
270 // Otherwise, at least one of A[) and A'[) is non-empty and the first bytes   //
271 //   correspond to each other. If range i is active, next_diff_sub_ is at     //
272 //   the first byte of range i+1. Because of the length-prefix operator,      //
273 //   an individual range item in diffs_ may be multiple bytes                 //
274 // In all cases aprimeoffset = aoffset + current_diff_                        //
275 //   i.e. current_diff_ = aprimeoffset - aoffset                              //
276 //                                                                            //
277 // In the degenerate case of diffs_.empty(), there are only two states        //
278 //   LEFT and RIGHT and the mapping is the identity mapping.                  //
279 // The initial state is LEFT.                                                 //
280 // It is an error to move left into LEFT or right into RIGHT, but the code    //
281 //   below is robust in these cases.                                          //
282 //----------------------------------------------------------------------------//
283 
SetLeft()284 void OffsetMap::SetLeft() {
285    current_lo_aoffset_ = 0;
286    current_hi_aoffset_ = 0;
287    current_lo_aprimeoffset_ = 0;
288    current_hi_aprimeoffset_ = 0;
289    current_diff_ = 0;
290    next_diff_sub_ = 0;
291 }
292 
SetRight()293 void OffsetMap::SetRight() {
294    current_lo_aoffset_ = max_aoffset_;
295    current_hi_aoffset_ = max_aoffset_;
296    current_lo_aprimeoffset_ = max_aprimeoffset_;
297    current_hi_aprimeoffset_ = max_aprimeoffset_;
298    current_diff_ = max_aprimeoffset_ - max_aoffset_;
299    next_diff_sub_ = 0;
300 }
301 
302 // Back up over previous range, 1..5 bytes
303 // Return subscript at the beginning of that. Pins at 0
Backup(int sub)304 int OffsetMap::Backup(int sub) {
305   if (sub <= 0) {return 0;}
306   --sub;
307   while ((0 < sub) &&
308          (static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) {
309     --sub;
310   }
311   return sub;
312 }
313 
314 // Parse next range, 1..5 bytes
315 // Return subscript just off the end of that
ParseNext(int sub,MapOp * op,int * length)316 int OffsetMap::ParseNext(int sub, MapOp* op, int* length) {
317    *op = PREFIX_OP;
318    *length = 0;
319    char c;
320    while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) {
321      c = diffs_[sub++];
322      *op = static_cast<MapOp>(OpPart(c));
323      int len = LenPart(c);
324      *length = (*length << 6) + len;
325    }
326    // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
327    // Mal-formed can include a trailing prefix byte with no following op
328    return sub;
329 }
330 
331 // Parse previous range, 1..5 bytes
332 // Return current subscript
ParsePrevious(int sub,MapOp * op,int * length)333 int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) {
334   sub = Backup(sub);
335   return ParseNext(sub, op, length);
336 }
337 
338 // Quick debugging dump; does not parse multi-byte items, so just length & 0x3f
PrintPosition(const char * str)339 void OffsetMap::PrintPosition(const char* str) {
340   MapOp op = PREFIX_OP;
341   int length = 0;
342   if ((0 < next_diff_sub_) && (next_diff_sub_ <= static_cast<int>(diffs_.size()))) {
343     op = static_cast<MapOp>(OpPart(diffs_[next_diff_sub_ - 1]));
344     length = LenPart(diffs_[next_diff_sub_ - 1]);
345   }
346   fprintf(stderr, "%s[%d] %c%02d = A[%d..%d) ==> A'[%d..%d)\n",
347           str,
348           next_diff_sub_, "&=+-"[op], length,
349           current_lo_aoffset_, current_hi_aoffset_,
350           current_lo_aprimeoffset_, current_hi_aprimeoffset_);
351 }
352 
353 // Move active window one range to the right
354 // Return true if move was OK
MoveRight()355 bool OffsetMap::MoveRight() {
356   // If at last range or RIGHT, set to RIGHT, return error
357   if (next_diff_sub_ >= static_cast<int>(diffs_.size())) {
358     SetRight();
359     return false;
360   }
361   // Actually OK to move right
362   MapOp op;
363   int length;
364   bool retval = true;
365   // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
366   next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length);
367 
368   current_lo_aoffset_ = current_hi_aoffset_;
369   current_lo_aprimeoffset_ = current_hi_aprimeoffset_;
370   if (op == COPY_OP) {
371     current_hi_aoffset_ = current_lo_aoffset_ + length;
372     current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
373   } else if (op == INSERT_OP) {
374     current_hi_aoffset_ = current_lo_aoffset_ + 0;
375     current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
376   } else if (op == DELETE_OP) {
377     current_hi_aoffset_ = current_lo_aoffset_ + length;
378     current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0;
379   } else {
380     SetRight();
381     retval = false;
382   }
383   current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
384   return retval;
385 }
386 
387 // Move active window one range to the left
388 // Return true if move was OK
MoveLeft()389 bool OffsetMap::MoveLeft() {
390   // If at first range or LEFT, set to LEFT, return error
391   if (next_diff_sub_ <= 0) {
392     SetLeft();
393     return false;
394   }
395   // Back up over current active window
396   next_diff_sub_ = Backup(next_diff_sub_);
397   if (next_diff_sub_ <= 0) {
398     SetLeft();
399     return false;
400   }
401   // Actually OK to move left
402   MapOp op;
403   int length;
404   bool retval = true;
405   // If mal-formed or in LEFT, this will return with op = PREFIX_OP
406   next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length);
407 
408   current_hi_aoffset_ = current_lo_aoffset_;
409   current_hi_aprimeoffset_ = current_lo_aprimeoffset_;
410   if (op == COPY_OP) {
411     current_lo_aoffset_ = current_hi_aoffset_ - length;
412     current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
413   } else if (op == INSERT_OP) {
414     current_lo_aoffset_ = current_hi_aoffset_ - 0;
415     current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
416   } else if (op == DELETE_OP) {
417     current_lo_aoffset_ = current_hi_aoffset_ - length;
418     current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0;
419   } else {
420     SetLeft();
421     retval = false;
422   }
423   current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
424   return true;
425 }
426 
427 // Map an offset in A' to the corresponding offset in A
MapBack(int aprimeoffset)428 int OffsetMap::MapBack(int aprimeoffset){
429   MaybeFlushAll();
430   if (aprimeoffset < 0) {return 0;}
431   if (max_aprimeoffset_ <= aprimeoffset) {
432      return (aprimeoffset - max_aprimeoffset_) + max_aoffset_;
433   }
434 
435   // If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_,
436   // use current mapping, else move window left/right
437   bool ok = true;
438   while (ok && (aprimeoffset < current_lo_aprimeoffset_)) {
439     ok = MoveLeft();
440   }
441   while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) {
442     ok = MoveRight();
443   }
444   // So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_
445 
446   int aoffset = aprimeoffset - current_diff_;
447   if (aoffset >= current_hi_aoffset_) {
448     // A' is in an insert region, all bytes of which backmap to A=hi_aoffset_
449     aoffset = current_hi_aoffset_;
450   }
451   return aoffset;
452 }
453 
454 // Map an offset in A to the corresponding offset in A'
MapForward(int aoffset)455 int OffsetMap::MapForward(int aoffset){
456   MaybeFlushAll();
457   if (aoffset < 0) {return 0;}
458   if (max_aoffset_ <= aoffset) {
459      return (aoffset - max_aoffset_) + max_aprimeoffset_;
460   }
461 
462   // If current_lo_aoffset_ <= aoffset < current_hi_aoffset_,
463   // use current mapping, else move window left/right
464   bool ok = true;
465   while (ok && (aoffset < current_lo_aoffset_)) {
466     ok = MoveLeft();
467   }
468   while (ok && (current_hi_aoffset_ <= aoffset)) {
469     ok = MoveRight();
470   }
471 
472   int aprimeoffset = aoffset + current_diff_;
473   if (aprimeoffset >= current_hi_aprimeoffset_) {
474     // A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_
475     aprimeoffset = current_hi_aprimeoffset_;
476   }
477   return aprimeoffset;
478 }
479 
480 
481 // static
CopyInserts(OffsetMap * source,OffsetMap * dest)482 bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) {
483   bool ok = true;
484   while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
485     ok = source->MoveRight();
486     if (source->current_lo_aoffset_ != source->current_hi_aoffset_) {
487       return false;
488     }
489     dest->Insert(
490         source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_);
491   }
492   return true;
493 }
494 
495 // static
CopyDeletes(OffsetMap * source,OffsetMap * dest)496 bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) {
497   bool ok = true;
498   while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
499     ok = source->MoveRight();
500     if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) {
501       return false;
502     }
503     dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_);
504   }
505   return true;
506 }
507 
508 // static
ComposeOffsetMap(OffsetMap * g,OffsetMap * f,OffsetMap * h)509 void OffsetMap::ComposeOffsetMap(
510     OffsetMap* g, OffsetMap* f, OffsetMap* h) {
511   h->Clear();
512   f->Reset();
513   g->Reset();
514 
515   int lo = 0;
516   for (;;) {
517      // Consume delete operations in f. This moves A without moving
518      // A' and A''.
519      if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) {
520        if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) {
521           // fprintf(stderr,
522           //         "ComposeOffsetMap ERROR, f is longer than g.<br>\n");
523        }
524 
525        // FlushAll(), called by Reset(), MapForward() or MapBack(), has
526        // added an extra COPY_OP to f and g, so this function has
527        // composed an extra COPY_OP in h from those. To avoid
528        // FlushAll() adds one more extra COPY_OP to h later, dispatch
529        // Flush() right now.
530        h->Flush();
531        return;
532      }
533 
534      // Consume insert operations in g. This moves A'' without moving A
535      // and A'.
536      if (lo >= f->current_hi_aprimeoffset_) {
537        if (!CopyDeletes(f, h)) {
538           // fprintf(stderr,
539           //         "ComposeOffsetMap ERROR, g is longer than f.<br>\n");
540        }
541      }
542 
543      // Compose one operation which moves A' from lo to hi.
544      int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_);
545      if (f->current_lo_aoffset_ != f->current_hi_aoffset_ &&
546          g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
547        h->Copy(hi - lo);
548      } else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) {
549        h->Delete(hi - lo);
550      } else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
551        h->Insert(hi - lo);
552      }
553 
554      lo = hi;
555   }
556 }
557 
558 // For testing only -- force a mapping
StuffIt(const string & diffs,int max_aoffset,int max_aprimeoffset)559 void OffsetMap::StuffIt(const string& diffs,
560                         int max_aoffset, int max_aprimeoffset) {
561   Clear();
562   diffs_ = diffs;
563   max_aoffset_ = max_aoffset;
564   max_aprimeoffset_ = max_aprimeoffset;
565 }
566 
567 
568 }  // namespace CLD2
569 
570