1 // viewx.cpp --
2 // This is part of Metakit, see http://www.equi4.com/metakit.html
3 
4 /** @file
5  * Implements c4_Sequence, c4_Reference, and c4_...Ref
6  */
7 
8 #include "header.h"
9 #include "handler.h"
10 #include "store.h"
11 #include "column.h"
12 
13 /////////////////////////////////////////////////////////////////////////////
14 
c4_Sequence()15 c4_Sequence::c4_Sequence() : _refCount(0)
16     , _dependencies(nullptr)
17     , _propertyLimit(0)
18     , _propertyMap(nullptr)
19     , _tempBuf(nullptr)
20 {
21 }
22 
~c4_Sequence()23 c4_Sequence::~c4_Sequence()
24 {
25     d4_assert(_refCount == 0);
26 
27     d4_assert(!_dependencies); // there can be no dependencies left
28 
29     ClearCache();
30 
31     delete _tempBuf;
32 }
33 
Persist() const34 c4_Persist *c4_Sequence::Persist() const
35 {
36     return nullptr;
37 }
38 
39 /// Increment the reference count of this sequence
IncRef()40 void c4_Sequence::IncRef()
41 {
42     ++_refCount;
43 
44     d4_assert(_refCount != 0);
45 }
46 
47 /// Decrement the reference count, delete objects when last
DecRef()48 void c4_Sequence::DecRef()
49 {
50     d4_assert(_refCount != 0);
51 
52     if (--_refCount == 0) {
53         delete this;
54     }
55 }
56 
57 /// Return the current reference count
NumRefs() const58 int c4_Sequence::NumRefs() const
59 {
60     return _refCount;
61 }
62 
63 /// Compare the specified row with another one
Compare(int index_,c4_Cursor cursor_) const64 int c4_Sequence::Compare(int index_, c4_Cursor cursor_) const
65 {
66     d4_assert(cursor_._seq != 0);
67 
68     c4_Bytes data;
69 
70     for (int colNum = 0; colNum < NumHandlers(); ++colNum) {
71         c4_Handler &h = NthHandler(colNum);
72 
73         const c4_Sequence *hc = HandlerContext(colNum);
74         int i = RemapIndex(index_, hc);
75 
76         if (!cursor_._seq->Get(cursor_._index, h.PropId(), data)) {
77             h.ClearBytes(data);
78         }
79 
80         int f = h.Compare(i, data);
81         if (f != 0) {
82             return f;
83         }
84     }
85 
86     return 0;
87 }
88 
89 /// Restrict the search range for rows
RestrictSearch(c4_Cursor,int &,int &)90 bool c4_Sequence::RestrictSearch(c4_Cursor, int &, int &)
91 {
92     return true;
93 }
94 
95 /// Replace the contents of a specified row
SetAt(int index_,c4_Cursor newElem_)96 void c4_Sequence::SetAt(int index_, c4_Cursor newElem_)
97 {
98     d4_assert(newElem_._seq != 0);
99 
100     c4_Bytes data;
101 
102     c4_Notifier change(this);
103     if (GetDependencies()) {
104         change.StartSetAt(index_, newElem_);
105     }
106 
107     for (int i = 0; i < newElem_._seq->NumHandlers(); ++i) {
108         c4_Handler &h = newElem_._seq->NthHandler(i);
109 
110         // added 06-12-1999 to do index remapping for derived seq's
111         const c4_Sequence *hc = newElem_._seq->HandlerContext(i);
112         int ri = newElem_._seq->RemapIndex(newElem_._index, hc);
113 
114         h.GetBytes(ri, data);
115 
116         //    Set(index_, cursor._seq->NthProperty(i), data);
117         int colNum = PropIndex(h.Property());
118         d4_assert(colNum >= 0);
119 
120         NthHandler(colNum).Set(index_, data);
121     }
122 
123     // if number of props in dest is larger after adding, clear the rest
124     // this way, new props get copied and undefined props get cleared
125     if (newElem_._seq->NumHandlers() < NumHandlers()) {
126         for (int j = 0; j < NumHandlers(); ++j) {
127             c4_Handler &h = NthHandler(j);
128 
129             // if the property does not appear in the source
130             if (newElem_._seq->PropIndex(h.PropId()) < 0) {
131                 h.ClearBytes(data);
132                 h.Set(index_, data);
133             }
134         }
135     }
136 }
137 
138 /// Remap the index to an underlying view
RemapIndex(int index_,const c4_Sequence * seq_) const139 int c4_Sequence::RemapIndex(int index_, const c4_Sequence *seq_) const
140 {
141     return seq_ == this ? index_ : -1;
142 }
143 
144 /// Gives access to a general purpose temporary buffer
Buffer()145 c4_Bytes &c4_Sequence::Buffer()
146 {
147     if (_tempBuf == nullptr) {
148         _tempBuf = d4_new c4_Bytes;
149     }
150     return *_tempBuf;
151 }
152 
153 // 1.8.5: extra buffer to hold returned description strings
UseTempBuffer(const char * str_)154 const char *c4_Sequence::UseTempBuffer(const char *str_)
155 {
156     return strcpy((char *)Buffer().SetBuffer(strlen(str_) + 1), str_);
157 }
158 
159 /// Change number of rows, either by inserting or removing them
Resize(int newSize_,int)160 void c4_Sequence::Resize(int newSize_, int)
161 {
162     if (NumHandlers() > 0) {
163         int diff = newSize_ - NumRows();
164 
165         if (diff > 0) {
166             c4_Row empty; // make sure this doesn't recurse, see below
167             InsertAt(NumRows(), &empty, diff);
168         } else if (diff < 0) {
169             RemoveAt(newSize_, -diff);
170         }
171     } else {
172         // need special case to avoid recursion for c4_Row allocations
173         SetNumRows(newSize_);
174     }
175 }
176 
177 /// Insert one or more rows into this sequence
InsertAt(int index_,c4_Cursor newElem_,int count_)178 void c4_Sequence::InsertAt(int index_, c4_Cursor newElem_, int count_)
179 {
180     d4_assert(newElem_._seq != 0);
181 
182     c4_Notifier change(this);
183     if (GetDependencies()) {
184         change.StartInsertAt(index_, newElem_, count_);
185     }
186 
187     SetNumRows(NumRows() + count_);
188 
189     c4_Bytes data;
190 
191     for (int i = 0; i < newElem_._seq->NumHandlers(); ++i) {
192         c4_Handler &h = newElem_._seq->NthHandler(i);
193 
194         // added 06-12-1999 to do index remapping for derived seq's
195         const c4_Sequence *hc = newElem_._seq->HandlerContext(i);
196         int ri = newElem_._seq->RemapIndex(newElem_._index, hc);
197 
198         int colNum = PropIndex(h.Property());
199         d4_assert(colNum >= 0);
200 
201         if (h.Property().Type() == 'V') {
202             // If inserting from self: Make sure we get a copy of the bytes,
203             // so we don't get an invalid pointer if the memory get realloc'ed
204             h.GetBytes(ri, data, newElem_._seq == this);
205 
206             // special treatment for subviews, insert empty, then overwrite
207             // changed 19990904 - probably fixes a long-standing limitation
208             c4_Bytes temp;
209             h.ClearBytes(temp);
210 
211             c4_Handler &h2 = NthHandler(colNum);
212             h2.Insert(index_, temp, count_);
213 
214             for (int j = 0; j < count_; ++j) {
215                 h2.Set(index_ + j, data);
216             }
217         } else {
218             h.GetBytes(ri, data);
219             NthHandler(colNum).Insert(index_, data, count_);
220         }
221     }
222 
223     // if number of props in dest is larger after adding, clear the rest
224     // this way, new props get copied and undefined props get cleared
225     if (newElem_._seq->NumHandlers() < NumHandlers()) {
226         for (int j = 0; j < NumHandlers(); ++j) {
227             c4_Handler &h = NthHandler(j);
228 
229             // if the property does not appear in the source
230             if (newElem_._seq->PropIndex(h.PropId()) < 0) {
231                 h.ClearBytes(data);
232                 h.Insert(index_, data, count_);
233             }
234         }
235     }
236 }
237 
238 /// Remove one or more rows from this sequence
RemoveAt(int index_,int count_)239 void c4_Sequence::RemoveAt(int index_, int count_)
240 {
241     c4_Notifier change(this);
242     if (GetDependencies()) {
243         change.StartRemoveAt(index_, count_);
244     }
245 
246     SetNumRows(NumRows() - count_);
247 
248     //! careful, this does no index remapping, wrong for derived seq's
249     for (int i = 0; i < NumHandlers(); ++i) {
250         NthHandler(i).Remove(index_, count_);
251     }
252 }
253 
254 /// Move a row to another position
Move(int from_,int to_)255 void c4_Sequence::Move(int from_, int to_)
256 {
257     c4_Notifier change(this);
258     if (GetDependencies()) {
259         change.StartMove(from_, to_);
260     }
261 
262     //! careful, this does no index remapping, wrong for derived seq's
263     for (int i = 0; i < NumHandlers(); ++i) {
264         NthHandler(i).Move(from_, to_);
265     }
266 }
267 
268 /// Return the id of the N-th property
NthPropId(int index_) const269 int c4_Sequence::NthPropId(int index_) const
270 {
271     return NthHandler(index_).PropId();
272 }
273 
ClearCache()274 void c4_Sequence::ClearCache()
275 {
276     if (_propertyLimit > 0) {
277         delete [] _propertyMap; // property indexes may change
278         _propertyLimit = 0;
279     }
280 }
281 
282 /// Find the index of a property by its id
PropIndex(int propId_)283 int c4_Sequence::PropIndex(int propId_)
284 {
285     //! CACHING NOTE: derived views will fail if underlying view is restructured
286     //          still, this cache is kept, since sort will fail anyway...
287     //  The only safe change in these cases is adding new properties at the end.
288 
289     // use the map for the fastest result once known
290     if (propId_ < _propertyLimit && _propertyMap[propId_] >= 0) {
291         return _propertyMap[propId_];
292     }
293 
294     // locate the property using a linear search, return if not present
295     int n = NumHandlers();
296     do {
297         if (--n < 0) {
298             return -1;
299         }
300     } while (NthPropId(n) != propId_);
301 
302     // if the map is too small, resize it (with a little slack)
303     if (propId_ >= _propertyLimit) {
304         int round = (propId_ + 8) & ~0x07;
305         short *vec = d4_new short[round];
306 
307         for (int i = 0; i < round; ++i) {
308             vec[i] = i < _propertyLimit ? _propertyMap[i] : -1;
309         }
310 
311         if (_propertyLimit > 0) {
312             delete [] _propertyMap;
313         }
314 
315         _propertyMap = vec;
316         _propertyLimit = round;
317     }
318 
319     // we have a map, adjust the entry and return
320     return _propertyMap[propId_] = n;
321 }
322 
323 /// Find the index of a property, or create a new entry
PropIndex(const c4_Property & prop_)324 int c4_Sequence::PropIndex(const c4_Property &prop_)
325 {
326     int pos = PropIndex(prop_.GetId());
327     if (pos >= 0) {
328         d4_assert(NthHandler(pos).Property().Type() == prop_.Type());
329         return pos;
330     }
331 
332     c4_Handler *h = CreateHandler(prop_);
333     d4_assert(h != 0);
334 
335     int i = AddHandler(h);
336     if (i >= 0 && NumRows() > 0) {
337         c4_Bytes data;
338         h->ClearBytes(data);
339         h->Insert(0, data, NumRows());
340     }
341 
342     return i;
343 }
344 
Description()345 const char *c4_Sequence::Description()
346 {
347     return nullptr;
348 }
349 
ItemSize(int index_,int propId_)350 int c4_Sequence::ItemSize(int index_, int propId_)
351 {
352     int colNum = PropIndex(propId_);
353     return colNum >= 0 ? NthHandler(colNum).ItemSize(index_) : -1;
354 }
355 
Get(int index_,int propId_,c4_Bytes & buf_)356 bool c4_Sequence::Get(int index_, int propId_, c4_Bytes &buf_)
357 {
358     int colNum = PropIndex(propId_);
359     if (colNum < 0) {
360         return false;
361     }
362 
363     NthHandler(colNum).GetBytes(index_, buf_);
364     return true;
365 }
366 
Set(int index_,const c4_Property & prop_,const c4_Bytes & buf_)367 void c4_Sequence::Set(int index_, const c4_Property &prop_, const c4_Bytes
368                       &buf_)
369 {
370     int colNum = PropIndex(prop_);
371     d4_assert(colNum >= 0);
372 
373     c4_Handler &h = NthHandler(colNum);
374 
375     c4_Notifier change(this);
376     if (GetDependencies()) {
377         change.StartSet(index_, prop_.GetId(), buf_);
378     }
379 
380     if (buf_.Size()) {
381         h.Set(index_, buf_);
382     } else {
383         c4_Bytes empty;
384         h.ClearBytes(empty);
385         h.Set(index_, empty);
386     }
387 }
388 
389 /// Register a sequence to receive change notifications
Attach(c4_Sequence * child_)390 void c4_Sequence::Attach(c4_Sequence *child_)
391 {
392     IncRef();
393 
394     if (!_dependencies) {
395         _dependencies = d4_new c4_Dependencies;
396     }
397 
398     _dependencies->Add(child_);
399 }
400 
401 /// Unregister a sequence which received change notifications
Detach(c4_Sequence * child_)402 void c4_Sequence::Detach(c4_Sequence *child_)
403 {
404     d4_assert(_dependencies != 0);
405 
406     if (!_dependencies->Remove(child_)) {
407         delete _dependencies;
408         _dependencies = nullptr;
409     }
410 
411     DecRef();
412 }
413 
414 /// Called just before a change is made to the sequence
PreChange(c4_Notifier &)415 c4_Notifier *c4_Sequence::PreChange(c4_Notifier &)
416 {
417     d4_assert(0); // should not be called, because it should not attach
418     return nullptr;
419 }
420 
421 /// Called after changes have been made to the sequence
PostChange(c4_Notifier &)422 void c4_Sequence::PostChange(c4_Notifier &)
423 {
424 }
425 
426 /////////////////////////////////////////////////////////////////////////////
427 
operator =(const c4_Reference & value_)428 c4_Reference &c4_Reference::operator =(const c4_Reference &value_)
429 {
430     c4_Bytes result;
431     value_.GetData(result);
432     SetData(result);
433 
434     return *this;
435 }
436 
operator ==(const c4_Reference & a_,const c4_Reference & b_)437 bool operator ==(const c4_Reference &a_, const c4_Reference &b_)
438 {
439     c4_Bytes buf1;
440     bool f1 = a_.GetData(buf1);
441 
442     c4_Bytes buf2;
443     bool f2 = b_.GetData(buf2);
444 
445     // if absent, fill either with zero bytes to match length
446     if (!f1) {
447         buf1.SetBufferClear(buf2.Size());
448     }
449     if (!f2) {
450         buf2.SetBufferClear(buf1.Size());
451     }
452 
453     return buf1 == buf2;
454 }
455 
456 /////////////////////////////////////////////////////////////////////////////
457 
operator t4_i32() const458 c4_IntRef::operator t4_i32() const
459 {
460     c4_Bytes result;
461     if (!GetData(result)) {
462         return 0;
463     }
464 
465     d4_assert(result.Size() == sizeof(t4_i32));
466     return *(const t4_i32 *)result.Contents();
467 }
468 
operator =(t4_i32 value_)469 c4_IntRef &c4_IntRef::operator =(t4_i32 value_)
470 {
471     SetData(c4_Bytes(&value_, sizeof value_));
472     return *this;
473 }
474 
475 /////////////////////////////////////////////////////////////////////////////
476 #if !defined(q4_TINY) || !q4_TINY
477 /////////////////////////////////////////////////////////////////////////////
478 
operator t4_i64() const479 c4_LongRef::operator t4_i64() const
480 {
481     c4_Bytes result;
482     if (!GetData(result)) {
483         static t4_i64 zero;
484         return zero;
485     }
486 
487     d4_assert(result.Size() == sizeof(t4_i64));
488     return *(const t4_i64 *)result.Contents();
489 }
490 
operator =(t4_i64 value_)491 c4_LongRef &c4_LongRef::operator =(t4_i64 value_)
492 {
493     SetData(c4_Bytes(&value_, sizeof value_));
494     return *this;
495 }
496 
497 /////////////////////////////////////////////////////////////////////////////
498 
operator double() const499 c4_FloatRef::operator double() const
500 {
501     c4_Bytes result;
502     if (!GetData(result)) {
503         return 0;
504     }
505 
506     d4_assert(result.Size() == sizeof(float));
507     return *(const float *)result.Contents();
508 }
509 
operator =(double value_)510 c4_FloatRef &c4_FloatRef::operator =(double value_)
511 {
512     float v = (float)value_; // loses precision
513     SetData(c4_Bytes(&v, sizeof v));
514     return *this;
515 }
516 
517 /////////////////////////////////////////////////////////////////////////////
518 
operator double() const519 c4_DoubleRef::operator double() const
520 {
521     c4_Bytes result;
522     if (!GetData(result)) {
523         return 0;
524     }
525 
526     d4_assert(result.Size() == sizeof(double));
527     return *(const double *)result.Contents();
528 }
529 
operator =(double value_)530 c4_DoubleRef &c4_DoubleRef::operator =(double value_)
531 {
532     SetData(c4_Bytes(&value_, sizeof value_));
533     return *this;
534 }
535 
536 /////////////////////////////////////////////////////////////////////////////
537 #endif // !q4_TINY
538 /////////////////////////////////////////////////////////////////////////////
539 
operator c4_Bytes() const540 c4_BytesRef::operator c4_Bytes() const
541 {
542     c4_Bytes result;
543     GetData(result);
544 
545     // the result must immediately be used, its lifetime may be limited
546     return result;
547 }
548 
operator =(const c4_Bytes & value_)549 c4_BytesRef &c4_BytesRef::operator =(const c4_Bytes &value_)
550 {
551     SetData(value_);
552     return *this;
553 }
554 
Access(t4_i32 off_,int len_,bool noCopy_) const555 c4_Bytes c4_BytesRef::Access(t4_i32 off_, int len_, bool noCopy_) const
556 {
557     c4_Bytes &buffer = _cursor._seq->Buffer();
558 
559     int colNum = _cursor._seq->PropIndex(_property.GetId());
560     if (colNum >= 0) {
561         c4_Handler &h = _cursor._seq->NthHandler(colNum);
562         int sz = h.ItemSize(_cursor._index);
563         if (len_ == 0 || off_ + len_ > sz) {
564             len_ = sz - off_;
565         }
566 
567         if (len_ > 0) {
568             c4_Column *col = h.GetNthMemoCol(_cursor._index, true);
569             if (col != nullptr) {
570                 if (noCopy_) {
571                     // 21-11-2005 optimization by A. Stigsen
572                     // return just the first segment (even if it is smaller than
573                     // len). this avoids any expensive memcopies, but you have to
574                     // remember to check length of the returned bytes.
575                     c4_ColIter iter(*col, off_, off_ + len_);
576                     iter.Next();
577                     return c4_Bytes(iter.BufLoad(), iter.BufLen() < len_ ? iter.BufLen()
578                                     : len_);
579                 } else {
580                     const t4_byte *bytes = col->FetchBytes(off_, len_, buffer, false);
581                     if (bytes == buffer.Contents()) {
582                         return buffer;
583                     }
584                     return c4_Bytes(bytes, len_);
585                 }
586             } else {
587                 // do it the hard way for custom/mapped views (2002-03-13)
588                 c4_Bytes result;
589                 GetData(result);
590                 d4_assert(off_ + len_ <= result.Size());
591                 return c4_Bytes(result.Contents() + off_, len_, true);
592             }
593         }
594     }
595 
596     return c4_Bytes();
597 }
598 
Modify(const c4_Bytes & buf_,t4_i32 off_,int diff_) const599 bool c4_BytesRef::Modify(const c4_Bytes &buf_, t4_i32 off_, int diff_) const
600 {
601     int colNum = _cursor._seq->PropIndex(_property.GetId());
602     if (colNum >= 0) {
603         c4_Handler &h = _cursor._seq->NthHandler(colNum);
604         const int n = buf_.Size();
605         const t4_i32 limit = off_ + n; // past changed bytes
606         const t4_i32 overshoot = limit - h.ItemSize(_cursor._index);
607 
608         if (diff_ < overshoot) {
609             diff_ = overshoot;
610         }
611 
612         c4_Column *col = h.GetNthMemoCol(_cursor._index, true);
613         if (col != nullptr) {
614             if (diff_ < 0) {
615                 col->Shrink(limit, -diff_);
616             } else if (diff_ > 0) {
617                 // insert bytes in the highest possible spot
618                 // if a gap is created, it will contain garbage
619                 col->Grow(overshoot > 0 ? col->ColSize() : diff_ > n ? off_ : limit
620                           -diff_, diff_);
621             }
622 
623             col->StoreBytes(off_, buf_);
624         } else {
625             // do it the hard way for custom/mapped views (2002-03-13)
626             c4_Bytes orig;
627             GetData(orig);
628 
629             c4_Bytes result;
630             t4_byte *ptr = result.SetBuffer(orig.Size() + diff_);
631 
632             memcpy(ptr, orig.Contents(), off_);
633             memcpy(ptr + off_, buf_.Contents(), n);
634             memcpy(ptr + off_ + n, orig.Contents() + off_, orig.Size() - off_);
635 
636             SetData(result);
637         }
638         return true;
639     }
640 
641     return false;
642 }
643 
644 /////////////////////////////////////////////////////////////////////////////
645 
operator const char*() const646 c4_StringRef::operator const char *() const
647 {
648     c4_Bytes result;
649     GetData(result);
650 
651     return result.Size() > 0 ? (const char *)result.Contents() : "";
652 }
653 
operator =(const char * value_)654 c4_StringRef &c4_StringRef::operator =(const char *value_)
655 {
656     SetData(c4_Bytes(value_, strlen(value_) + 1));
657     return *this;
658 }
659 
660 /////////////////////////////////////////////////////////////////////////////
661 
operator c4_View() const662 c4_ViewRef::operator c4_View() const
663 {
664     c4_Bytes result;
665     if (!GetData(result)) {
666         return (c4_Sequence *)nullptr;
667     }
668     // resolve ambiguity
669 
670     d4_assert(result.Size() == sizeof(c4_Sequence *));
671     return *(c4_Sequence *const *)result.Contents();
672 }
673 
operator =(const c4_View & value_)674 c4_ViewRef &c4_ViewRef::operator =(const c4_View &value_)
675 {
676     SetData(c4_Bytes(&value_._seq, sizeof value_._seq));
677     return *this;
678 }
679 
680 /////////////////////////////////////////////////////////////////////////////
681 
~c4_Stream()682 c4_Stream::~c4_Stream()
683 {
684 }
685 
686 /////////////////////////////////////////////////////////////////////////////
687 
c4_Strategy()688 c4_Strategy::c4_Strategy() : _bytesFlipped(false)
689     , _failure(0)
690     , _mapStart(nullptr)
691     , _dataSize(0)
692     , _baseOffset(0)
693     , _rootPos(-1)
694     , _rootLen(-1)
695 {
696 }
697 
~c4_Strategy()698 c4_Strategy::~c4_Strategy()
699 {
700     d4_assert(_mapStart == 0);
701 }
702 
703 /// Read a number of bytes
DataRead(t4_i32,void *,int)704 int c4_Strategy::DataRead(t4_i32, void *, int)
705 {
706     /*
707     if (_mapStart != 0 && pos_ + length_ <= _dataSize)
708     {
709     memcpy(buffer_, _mapStart + pos_, length_);
710     return length_;
711     }
712      */
713     ++_failure;
714     return -1;
715 }
716 
717 /// Write a number of bytes, return true if successful
DataWrite(t4_i32,const void *,int)718 void c4_Strategy::DataWrite(t4_i32, const void *, int)
719 {
720     ++_failure;
721 }
722 
723 /// Flush and truncate file
DataCommit(t4_i32)724 void c4_Strategy::DataCommit(t4_i32)
725 {
726 }
727 
728 /// Override to support memory-mapped files
ResetFileMapping()729 void c4_Strategy::ResetFileMapping()
730 {
731 }
732 
733 /// Report total size of the datafile
FileSize()734 t4_i32 c4_Strategy::FileSize()
735 {
736     return _dataSize;
737 }
738 
739 /// Return a value to use as fresh generation counter
FreshGeneration()740 t4_i32 c4_Strategy::FreshGeneration()
741 {
742     return 1;
743 }
744 
745 /// Define the base offset where data is stored
SetBase(t4_i32 base_)746 void c4_Strategy::SetBase(t4_i32 base_)
747 {
748     t4_i32 off = base_ - _baseOffset;
749     _baseOffset = base_;
750     _dataSize -= off;
751     if (_mapStart != nullptr) {
752         _mapStart += off;
753     }
754 }
755 
756 /*
757 end_ is file position to start from (0 defaults to FileSize())
758 
759 result is the logical end of the datafile (or -1 if no data)
760 
761 This code uses a tiny state machine so all the code to read and decode
762 file marks is in one place within the loop.
763  */
764 
765 /// Scan datafile head/tail markers, return logical end of data
EndOfData(t4_i32 end_)766 t4_i32 c4_Strategy::EndOfData(t4_i32 end_)
767 {
768     enum {
769         kStateAtEnd, kStateCommit, kStateHead, kStateOld, kStateDone
770     };
771 
772     t4_i32 pos = (end_ >= 0 ? end_ : FileSize()) - _baseOffset;
773     t4_i32 last = pos;
774     t4_i32 rootPos = 0;
775     t4_i32 rootLen = -1;   // impossible value, flags old-style header
776     t4_byte mark[8];
777 
778     for (int state = kStateAtEnd; state != kStateDone;) {
779         pos -= 8;
780         if (pos + _baseOffset < 0 && state != kStateOld) {
781             // bad offset, try old-style header from start of file
782             pos = -_baseOffset;
783             state = kStateOld;
784         }
785 
786         if (DataRead(pos, &mark, sizeof mark) != sizeof mark) {
787             return -1;
788         }
789 
790         t4_i32 count = 0;
791         for (int i = 1; i < 4; ++i) {
792             count = (count << 8) + mark[i];
793         }
794 
795         t4_i32 offset = 0;
796         for (int j = 4; j < 8; ++j) {
797             offset = (offset << 8) + mark[j];
798         }
799 
800         const bool isSkipTail = ((mark[0] & 0xF0) == 0x90 /* 2006-11-11 */
801                                  || (mark[0] == 0x80 && count == 0)) && offset > 0;
802         const bool isCommitTail = mark[0] == 0x80 && count > 0 && offset > 0;
803         const bool isHeader = (mark[0] == 'J' || mark[0] == 'L') && (mark[0]
804                                                                      ^mark[1]) == ('J' ^ 'L') && mark[2] == 0x1A && (mark[3] & 0x40) == 0;
805 
806         switch (state) {
807         case kStateAtEnd:
808             // no commit tail found yet
809 
810             if (isSkipTail) {
811                 pos -= offset;
812                 last = pos;
813             } else if (isCommitTail) {
814                 rootPos = offset;
815                 rootLen = count;
816                 state = kStateCommit;
817             } else {
818                 pos = 8;
819                 state = kStateOld;
820             }
821             break;
822 
823         case kStateCommit:
824             // commit tail must be preceded by skip tail
825 
826             if (!isSkipTail) {
827                 return -1;
828             }
829             pos -= offset - 8;
830             state = kStateHead;
831             break;
832 
833         case kStateHead:
834             // fetch the header
835 
836             if (!isHeader) {
837                 pos = 8;
838                 state = kStateOld;
839             } else {
840                 state = kStateDone;
841             }
842             break;
843 
844         case kStateOld:
845             // old format, look for header in first 4 Kb
846 
847             if (isHeader && mark[3] == 0x80) {
848                 d4_assert(rootPos == 0);
849                 for (int k = 8; --k >= 4;) {
850                     // old header is little-endian
851                     rootPos = (rootPos << 8) + mark[k];
852                 }
853                 state = kStateDone;
854             } else {
855                 pos += 16;
856                 if (pos > 4096) {
857                     return -1;
858                 }
859             }
860             break;
861         }
862     }
863 
864     last += _baseOffset; // all seeks were relative to current offset
865 
866     if (end_ >= 0) {
867         // if end was specified, then adjust this strategy object
868         _baseOffset += pos;
869         d4_assert(_baseOffset >= 0);
870         if (_mapStart != nullptr) {
871             _mapStart += pos;
872             _dataSize -= pos;
873         }
874 
875         _rootPos = rootPos;
876         _rootLen = rootLen;
877     }
878 
879     d4_assert(mark[0] == 'J' || mark[1] == 'J');
880     _bytesFlipped = (char)*(const short *)mark != 'J';
881 
882     return last;
883 }
884 
885 /////////////////////////////////////////////////////////////////////////////
886