1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-  */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #ifndef _MORKATOM_
7 #define _MORKATOM_ 1
8 
9 #ifndef _MORK_
10 #  include "mork.h"
11 #endif
12 
13 // 456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789
14 
15 #define morkAtom_kMaxByteSize 255       /* max for 8-bit integer */
16 #define morkAtom_kForeverCellUses 0x0FF /* max for 8-bit integer */
17 #define morkAtom_kMaxCellUses 0x07F     /* max for 7-bit integer */
18 
19 #define morkAtom_kKindWeeAnon 'a'  /* means morkWeeAnonAtom subclass */
20 #define morkAtom_kKindBigAnon 'A'  /* means morkBigAnonAtom subclass */
21 #define morkAtom_kKindWeeBook 'b'  /* means morkWeeBookAtom subclass */
22 #define morkAtom_kKindBigBook 'B'  /* means morkBigBookAtom subclass */
23 #define morkAtom_kKindFarBook 'f'  /* means morkFarBookAtom subclass */
24 #define morkAtom_kKindRowOid 'r'   /* means morkOidAtom subclass */
25 #define morkAtom_kKindTableOid 't' /* means morkOidAtom subclass */
26 
27 /*| Atom: .
28 |*/
29 class morkAtom {  //
30 
31  public:
32   mork_u1 mAtom_Kind;        // identifies a specific atom subclass
33   mork_u1 mAtom_CellUses;    // number of persistent uses in a cell
34   mork_change mAtom_Change;  // how has this atom been changed?
35   mork_u1 mAtom_Size;        // only for atoms smaller than 256 bytes
36 
37  public:
38   morkAtom(mork_aid inAid, mork_u1 inKind);
39 
IsWeeAnon()40   mork_bool IsWeeAnon() const { return mAtom_Kind == morkAtom_kKindWeeAnon; }
IsBigAnon()41   mork_bool IsBigAnon() const { return mAtom_Kind == morkAtom_kKindBigAnon; }
IsWeeBook()42   mork_bool IsWeeBook() const { return mAtom_Kind == morkAtom_kKindWeeBook; }
IsBigBook()43   mork_bool IsBigBook() const { return mAtom_Kind == morkAtom_kKindBigBook; }
IsFarBook()44   mork_bool IsFarBook() const { return mAtom_Kind == morkAtom_kKindFarBook; }
IsRowOid()45   mork_bool IsRowOid() const { return mAtom_Kind == morkAtom_kKindRowOid; }
IsTableOid()46   mork_bool IsTableOid() const { return mAtom_Kind == morkAtom_kKindTableOid; }
47 
IsBook()48   mork_bool IsBook() const { return this->IsWeeBook() || this->IsBigBook(); }
49 
50  public:  // clean vs dirty
SetAtomClean()51   void SetAtomClean() { mAtom_Change = morkChange_kNil; }
SetAtomDirty()52   void SetAtomDirty() { mAtom_Change = morkChange_kAdd; }
53 
IsAtomClean()54   mork_bool IsAtomClean() const { return mAtom_Change == morkChange_kNil; }
IsAtomDirty()55   mork_bool IsAtomDirty() const { return mAtom_Change == morkChange_kAdd; }
56 
57  public:  // atom space scope if IsBook() is true, or else zero:
58   mork_scope GetBookAtomSpaceScope(morkEnv* ev) const;
59   // zero or book's space's scope
60 
61   mork_aid GetBookAtomAid() const;
62   // zero or book atom's ID
63 
64  public:  // empty construction does nothing
morkAtom()65   morkAtom() {}
66 
67  public:  // one-byte refcounting, freezing at maximum
68   void MakeCellUseForever(morkEnv* ev);
69   mork_u1 AddCellUse(morkEnv* ev);
70   mork_u1 CutCellUse(morkEnv* ev);
71 
IsCellUseForever()72   mork_bool IsCellUseForever() const {
73     return mAtom_CellUses == morkAtom_kForeverCellUses;
74   }
75 
76  private:  // warnings
77   static void CellUsesUnderflowWarning(morkEnv* ev);
78 
79  public:  // errors
80   static void BadAtomKindError(morkEnv* ev);
81   static void ZeroAidError(morkEnv* ev);
82   static void AtomSizeOverflowError(morkEnv* ev);
83 
84  public:  // yarns
85   static mork_bool AliasYarn(const morkAtom* atom, mdbYarn* outYarn);
86   static mork_bool GetYarn(const morkAtom* atom, mdbYarn* outYarn);
87 
88  private:  // copying is not allowed
89   morkAtom(const morkAtom& other);
90   morkAtom& operator=(const morkAtom& other);
91 };
92 
93 /*| OidAtom: an atom that references a row or table by identity.
94 |*/
95 class morkOidAtom : public morkAtom {  //
96 
97   // mork_u1       mAtom_Kind;      // identifies a specific atom subclass
98   // mork_u1       mAtom_CellUses;  // number of persistent uses in a cell
99   // mork_change   mAtom_Change;    // how has this atom been changed?
100   // mork_u1       mAtom_Size;      // NOT USED IN "BIG" format atoms
101 
102  public:
103   mdbOid mOidAtom_Oid;  // identity of referenced object
104 
105  public:  // empty construction does nothing
morkOidAtom()106   morkOidAtom() {}
107   void InitRowOidAtom(morkEnv* ev, const mdbOid& inOid);
108   void InitTableOidAtom(morkEnv* ev, const mdbOid& inOid);
109 
110  private:  // copying is not allowed
111   morkOidAtom(const morkOidAtom& other);
112   morkOidAtom& operator=(const morkOidAtom& other);
113 };
114 
115 /*| WeeAnonAtom: an atom whose content immediately follows morkAtom slots
116 **| in an inline fashion, so that morkWeeAnonAtom contains both leading
117 **| atom slots and then the content bytes without further overhead.  Note
118 **| that charset encoding is not indicated, so zero is implied for Latin1.
119 **| (Non-Latin1 content must be stored in a morkBigAnonAtom with a charset.)
120 **|
121 **|| An anon (anonymous) atom has no identity, with no associated bookkeeping
122 **| for lookup needed for sharing like a book atom.
123 **|
124 **|| A wee anon atom is immediate but not shared with any other users of this
125 **| atom, so no bookkeeping for sharing is needed.  This means the atom has
126 **| no ID, because the atom has no identity other than this immediate content,
127 **| and no hash table is needed to look up this particular atom.  This also
128 **| applies to the larger format morkBigAnonAtom, which has more slots.
129 |*/
130 class morkWeeAnonAtom : public morkAtom {  //
131 
132   // mork_u1       mAtom_Kind;      // identifies a specific atom subclass
133   // mork_u1       mAtom_CellUses;  // number of persistent uses in a cell
134   // mork_change   mAtom_Change;    // how has this atom been changed?
135   // mork_u1       mAtom_Size;      // only for atoms smaller than 256 bytes
136 
137  public:
138   mork_u1 mWeeAnonAtom_Body[1];  // 1st byte of immediate content vector
139 
140  public:  // empty construction does nothing
morkWeeAnonAtom()141   morkWeeAnonAtom() {}
142   void InitWeeAnonAtom(morkEnv* ev, const morkBuf& inBuf);
143 
144   // allow extra trailing byte for a null byte:
SizeForFill(mork_fill inFill)145   static mork_size SizeForFill(mork_fill inFill) {
146     return sizeof(morkWeeAnonAtom) + inFill;
147   }
148 
149  private:  // copying is not allowed
150   morkWeeAnonAtom(const morkWeeAnonAtom& other);
151   morkWeeAnonAtom& operator=(const morkWeeAnonAtom& other);
152 };
153 
154 /*| BigAnonAtom: another immediate atom that cannot be encoded as the smaller
155 **| morkWeeAnonAtom format because either the size is too great, and/or the
156 **| charset is not the default zero for Latin1 and must be explicitly noted.
157 **|
158 **|| An anon (anonymous) atom has no identity, with no associated bookkeeping
159 **| for lookup needed for sharing like a book atom.
160 |*/
161 class morkBigAnonAtom : public morkAtom {  //
162 
163   // mork_u1       mAtom_Kind;      // identifies a specific atom subclass
164   // mork_u1       mAtom_CellUses;  // number of persistent uses in a cell
165   // mork_change   mAtom_Change;    // how has this atom been changed?
166   // mork_u1       mAtom_Size;      // NOT USED IN "BIG" format atoms
167 
168  public:
169   mork_cscode mBigAnonAtom_Form;  // charset format encoding
170   mork_size mBigAnonAtom_Size;    // size of content vector
171   mork_u1 mBigAnonAtom_Body[1];   // 1st byte of immed content vector
172 
173  public:  // empty construction does nothing
morkBigAnonAtom()174   morkBigAnonAtom() {}
175   void InitBigAnonAtom(morkEnv* ev, const morkBuf& inBuf, mork_cscode inForm);
176 
177   // allow extra trailing byte for a null byte:
SizeForFill(mork_fill inFill)178   static mork_size SizeForFill(mork_fill inFill) {
179     return sizeof(morkBigAnonAtom) + inFill;
180   }
181 
182  private:  // copying is not allowed
183   morkBigAnonAtom(const morkBigAnonAtom& other);
184   morkBigAnonAtom& operator=(const morkBigAnonAtom& other);
185 };
186 
187 #define morkBookAtom_kMaxBodySize 1024 /* if larger, cannot be shared */
188 
189 /*| BookAtom: the common subportion of wee book atoms and big book atoms that
190 **| includes the atom ID and the pointer to the space referencing this atom
191 **| through a hash table.
192 |*/
193 class morkBookAtom : public morkAtom {  //
194   // mork_u1       mAtom_Kind;      // identifies a specific atom subclass
195   // mork_u1       mAtom_CellUses;  // number of persistent uses in a cell
196   // mork_change   mAtom_Change;    // how has this atom been changed?
197   // mork_u1       mAtom_Size;      // only for atoms smaller than 256 bytes
198 
199  public:
200   morkAtomSpace*
201       mBookAtom_Space;    // mBookAtom_Space->SpaceScope() is atom scope
202   mork_aid mBookAtom_Id;  // identity token for this shared atom
203 
204  public:  // empty construction does nothing
morkBookAtom()205   morkBookAtom() {}
206 
207   static void NonBookAtomTypeError(morkEnv* ev);
208 
209  public:  // Hash() and Equal() for atom ID maps are same for all subclasses:
HashAid()210   mork_u4 HashAid() const { return mBookAtom_Id; }
EqualAid(const morkBookAtom * inAtom)211   mork_bool EqualAid(const morkBookAtom* inAtom) const {
212     return (mBookAtom_Id == inAtom->mBookAtom_Id);
213   }
214 
215  public:  // Hash() and Equal() for atom body maps know about subclasses:
216   // YOU CANNOT SUBCLASS morkBookAtom WITHOUT FIXING Hash and Equal METHODS:
217 
218   mork_u4 HashFormAndBody(morkEnv* ev) const;
219   mork_bool EqualFormAndBody(morkEnv* ev, const morkBookAtom* inAtom) const;
220 
221  public:  // separation from containing space
222   void CutBookAtomFromSpace(morkEnv* ev);
223 
224  private:  // copying is not allowed
225   morkBookAtom(const morkBookAtom& other);
226   morkBookAtom& operator=(const morkBookAtom& other);
227 };
228 
229 /*| FarBookAtom: this alternative format for book atoms was introduced
230 **| in May 2000 in order to support finding atoms in hash tables without
231 **| first copying the strings from original parsing buffers into a new
232 **| atom format.  This was consuming too much time.  However, we can
233 **| use morkFarBookAtom to stage a hash table query, as long as we then
234 **| fix HashFormAndBody() and EqualFormAndBody() to use morkFarBookAtom
235 **| correctly.
236 **|
237 **|| Note we do NOT intend that instances of morkFarBookAtom will ever
238 **| be installed in hash tables, because this is not space efficient.
239 **| We only expect to create temp instances for table lookups.
240 |*/
241 class morkFarBookAtom : public morkBookAtom {  //
242 
243   // mork_u1       mAtom_Kind;      // identifies a specific atom subclass
244   // mork_u1       mAtom_CellUses;  // number of persistent uses in a cell
245   // mork_change   mAtom_Change;    // how has this atom been changed?
246   // mork_u1       mAtom_Size;      // NOT USED IN "BIG" format atoms
247 
248   // morkAtomSpace* mBookAtom_Space; // mBookAtom_Space->SpaceScope() is scope
249   // mork_aid       mBookAtom_Id;    // identity token for this shared atom
250 
251  public:
252   mork_cscode mFarBookAtom_Form;  // charset format encoding
253   mork_size mFarBookAtom_Size;    // size of content vector
254   mork_u1* mFarBookAtom_Body;     // bytes are elsewhere, out of line
255 
256  public:  // empty construction does nothing
morkFarBookAtom()257   morkFarBookAtom() {}
258   void InitFarBookAtom(morkEnv* ev, const morkBuf& inBuf, mork_cscode inForm,
259                        morkAtomSpace* ioSpace, mork_aid inAid);
260 
261  private:  // copying is not allowed
262   morkFarBookAtom(const morkFarBookAtom& other);
263   morkFarBookAtom& operator=(const morkFarBookAtom& other);
264 };
265 
266 /*| WeeBookAtom: .
267 |*/
268 class morkWeeBookAtom : public morkBookAtom {  //
269   // mork_u1       mAtom_Kind;      // identifies a specific atom subclass
270   // mork_u1       mAtom_CellUses;  // number of persistent uses in a cell
271   // mork_change   mAtom_Change;    // how has this atom been changed?
272   // mork_u1       mAtom_Size;      // only for atoms smaller than 256 bytes
273 
274   // morkAtomSpace* mBookAtom_Space; // mBookAtom_Space->SpaceScope() is scope
275   // mork_aid       mBookAtom_Id;    // identity token for this shared atom
276 
277  public:
278   mork_u1 mWeeBookAtom_Body[1];  // 1st byte of immed content vector
279 
280  public:  // empty construction does nothing
morkWeeBookAtom()281   morkWeeBookAtom() {}
282   explicit morkWeeBookAtom(mork_aid inAid);
283 
284   void InitWeeBookAtom(morkEnv* ev, const morkBuf& inBuf,
285                        morkAtomSpace* ioSpace, mork_aid inAid);
286 
287   // allow extra trailing byte for a null byte:
SizeForFill(mork_fill inFill)288   static mork_size SizeForFill(mork_fill inFill) {
289     return sizeof(morkWeeBookAtom) + inFill;
290   }
291 
292  private:  // copying is not allowed
293   morkWeeBookAtom(const morkWeeBookAtom& other);
294   morkWeeBookAtom& operator=(const morkWeeBookAtom& other);
295 };
296 
297 /*| BigBookAtom: .
298 |*/
299 class morkBigBookAtom : public morkBookAtom {  //
300 
301   // mork_u1       mAtom_Kind;      // identifies a specific atom subclass
302   // mork_u1       mAtom_CellUses;  // number of persistent uses in a cell
303   // mork_change   mAtom_Change;    // how has this atom been changed?
304   // mork_u1       mAtom_Size;      // NOT USED IN "BIG" format atoms
305 
306   // morkAtomSpace* mBookAtom_Space; // mBookAtom_Space->SpaceScope() is scope
307   // mork_aid       mBookAtom_Id;    // identity token for this shared atom
308 
309  public:
310   mork_cscode mBigBookAtom_Form;  // charset format encoding
311   mork_size mBigBookAtom_Size;    // size of content vector
312   mork_u1 mBigBookAtom_Body[1];   // 1st byte of immed content vector
313 
314  public:  // empty construction does nothing
morkBigBookAtom()315   morkBigBookAtom() {}
316   void InitBigBookAtom(morkEnv* ev, const morkBuf& inBuf, mork_cscode inForm,
317                        morkAtomSpace* ioSpace, mork_aid inAid);
318 
319   // allow extra trailing byte for a null byte:
SizeForFill(mork_fill inFill)320   static mork_size SizeForFill(mork_fill inFill) {
321     return sizeof(morkBigBookAtom) + inFill;
322   }
323 
324  private:  // copying is not allowed
325   morkBigBookAtom(const morkBigBookAtom& other);
326   morkBigBookAtom& operator=(const morkBigBookAtom& other);
327 };
328 
329 /*| MaxBookAtom: .
330 |*/
331 class morkMaxBookAtom : public morkBigBookAtom {  //
332 
333   // mork_u1       mAtom_Kind;      // identifies a specific atom subclass
334   // mork_u1       mAtom_CellUses;  // number of persistent uses in a cell
335   // mork_change   mAtom_Change;    // how has this atom been changed?
336   // mork_u1       mAtom_Size;      // NOT USED IN "BIG" format atoms
337 
338   // morkAtomSpace* mBookAtom_Space; // mBookAtom_Space->SpaceScope() is scope
339   // mork_aid       mBookAtom_Id;    // identity token for this shared atom
340 
341   // mork_cscode   mBigBookAtom_Form;      // charset format encoding
342   // mork_size     mBigBookAtom_Size;      // size of content vector
343   // mork_u1       mBigBookAtom_Body[ 1 ]; // 1st byte of immed content vector
344 
345  public:
346   mork_u1 mMaxBookAtom_Body[morkBookAtom_kMaxBodySize + 3];  // max bytes
347 
348  public:  // empty construction does nothing
morkMaxBookAtom()349   morkMaxBookAtom() {}
InitMaxBookAtom(morkEnv * ev,const morkBuf & inBuf,mork_cscode inForm,morkAtomSpace * ioSpace,mork_aid inAid)350   void InitMaxBookAtom(morkEnv* ev, const morkBuf& inBuf, mork_cscode inForm,
351                        morkAtomSpace* ioSpace, mork_aid inAid) {
352     this->InitBigBookAtom(ev, inBuf, inForm, ioSpace, inAid);
353   }
354 
355  private:  // copying is not allowed
356   morkMaxBookAtom(const morkMaxBookAtom& other);
357   morkMaxBookAtom& operator=(const morkMaxBookAtom& other);
358 };
359 
360 // 456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789
361 
362 #endif /* _MORKATOM_ */
363