1 // Copyright 2016-2021 Doug Moen
2 // Licensed under the Apache License, version 2.0
3 // See accompanying file LICENSE or https://www.apache.org/licenses/LICENSE-2.0
4 
5 #ifndef LIBCURV_VALUE_H
6 #define LIBCURV_VALUE_H
7 
8 #include <libcurv/fail.h>
9 #include <libcurv/shared.h>
10 #include <libcurv/ternary.h>
11 #include <cstdint>
12 #include <ostream>
13 
14 namespace curv {
15 
16 union Value;
17 struct Symbol_Ref;
18 struct Context;
19 
20 /// Base class for the object referenced by a curv reference value.
21 ///
22 /// The memory layout for Ref_Value is:
23 /// * vtable_pointer -- 64 bits
24 /// * use_count -- 32 bits
25 /// * type -- 32 bits
26 ///
27 /// The next data member has 128 bit alignment with no hole on 64 bit platforms.
28 ///
29 /// The type_ and subtype_ fields enable us to query the type by loading the
30 /// first 128 bits of the object into a cache line, without indirecting through
31 /// the vtable, which would cost a 2nd cache line hit and either a virtual
32 /// function call or an RTTI lookup, both relatively costly.
33 ///
34 /// Why not put the type code into Value? Right now, there are an extra 3 bits
35 /// in curv::Value where I could store a type code, similar to LuaJIT and most
36 /// Javascript VMs. I chose not to do this, for simplicity, and to leave room
37 /// for updating later to support 52 bit pointers.
38 ///
39 /// All Ref_Values must be allocated on the heap: see Shared_Base.
40 struct Ref_Value : public Shared_Base
41 {
42     uint16_t type_;
43     uint16_t subtype_;
44     enum {
45         ty_symbol,
46         ty_abstract_list,
47             sty_list,
48             sty_string,
49         ty_record,
50             sty_drecord,
51             sty_module,
52             sty_dir_record,
53         ty_function,
54         ty_lambda,
55         ty_reactive,
56             sty_uniform_variable,
57             sty_reactive_expression,
58         ty_type,
59             sty_error_type,
60             sty_bool_type,
61             sty_num_type,
62             sty_list_type,
63         ty_index,
64             sty_this,
65             sty_tpath,
66             sty_tslice
67     };
Ref_ValueRef_Value68     Ref_Value(int type) : Shared_Base(), type_(type), subtype_(type) {}
Ref_ValueRef_Value69     Ref_Value(int type, int subtype)
70     :
71         Shared_Base(), type_(type), subtype_(subtype)
72     {}
73 
74     /// Print a value like a Curv expression.
75     virtual void print_repr(std::ostream&) const = 0;
76 
77     /// Print a value like a string.
78     virtual void print_string(std::ostream&) const;
79 
80     virtual void print_help(std::ostream&) const;
81 };
82 
83 /// A boxed, dynamically typed value in the Curv runtime.
84 ///
85 /// A Value is 64 bits. 64 bit IEEE floats are represented as themselves
86 /// (same bit pattern), while all other types are represented as NaNs,
87 /// with the data stored in the low order 48 bits of the NaN. This is called
88 /// "NaN boxing", a technique also used by LuaJIT and JavaScriptCore.
89 ///
90 /// "Immediate" values are stored entirely in the 64 bit pattern of a Value.
91 /// These are numbers, booleans, characters, and the "missing" pseudovalue
92 /// that denotes the absence of a value within libcurv APIs.
93 ///
94 /// String, List, Record and Function values are "reference" values:
95 /// a Ref_Value* pointer is stored in the low order 48 bits of the Value.
96 /// This works on 64 bit Intel and ARM systems because those architectures
97 /// use 48 bit virtual addresses, with the upper 16 bits of a 64 bit pointer
98 /// being wasted space.
99 ///
100 /// Since this code was written, ARM and Intel have added support for pointers
101 /// with more than 48 significant bits. The Linux kernel does not enable this
102 /// feature by default, because it breaks all of the modern web browsers,
103 /// which use NaN boxing. So my implementation continues to be viable, for now.
104 /// I could upgrade my Nan boxing implementation to support 52 bit pointers,
105 /// but it's not clear if that would actually help anybody.
106 ///
107 /// Reference values have Shared semantics. The copy constructor increments
108 /// the reference count, the destructor decrements the reference count and
109 /// deletes the object if the refcount reaches 0.
110 ///
111 /// Each Value has a unique bit pattern (not a given: I'm forcing the values
112 /// of unused bits in the NaN box to ensure this). Only positive NaNs are used.
113 /// This speeds up is_ref() and some equality tests.
114 union Value
115 {
116 private:
117     // internal representation
118     double number_;
119     uint64_t bits_;
120     int64_t signed_bits_;
121 
122     // A double whose upper 16 bits is 0x7FFF is a quiet NaN on both Intel
123     // and ARM. The low order 48 bits can store arbitrary data which the FPU
124     // will ignore. (On PA-RISC and MIPS we'd use 0x7FF7 for a quiet NaN, but
125     // we don't currently support those architectures.)
126     static constexpr uint64_t k_nanbits = 0x7FFF'0000'0000'0000;
127     // The missing value has a 48 bit payload of 0, same representation as the
128     // null pointer. So Value((Ref_Value*)0) is missing.
129     static constexpr uint64_t k_missingbits = k_nanbits|0;
130     // false and true have a payload of 2 and 3. The '2' bit is only set in the
131     // payload for a boolean value (assuming all Ref_Value* pointers have
132     // at least 4 byte alignment). The '1' bit is 0 or 1 for false and true.
133     static constexpr uint64_t k_boolbits = k_nanbits|2;
134     static constexpr uint64_t k_boolmask = 0xFFFF'FFFF'FFFF'FFFE;
135 
136     /// In a character, the low order two bits are 01.
137     /// The 8 bits adjacent to this are the character value.
138     static constexpr uint64_t k_charbits = k_nanbits|1;
139     static constexpr uint64_t k_charmask = 0xFFFF'FFFF'FFFF'FC03;
140     static constexpr unsigned k_charoffset = 2;
141 
142     // Note: the corresponding public constructor takes a Shared argument.
Value(const Ref_Value * r)143     inline Value(const Ref_Value* r)
144     {
145         #if UINTPTR_MAX == UINT64_MAX
146             // 64 bit pointers
147             bits_ = ((uint64_t)r & 0x0000'FFFF'FFFF'FFFF) | Value::k_nanbits;
148         #elif UINTPTR_MAX == UINT32_MAX
149             // 32 bit pointers
150             bits_ = (uint64_t)(uint32_t)r | Value::k_nanbits;
151         #else
152             static_assert(false, "only 32 and 64 bit architectures supported");
153         #endif
154     }
155     template<class T, class... Args> friend Value make_ref_value(Args&&...);
156 public:
157     /// Construct the `missing` value (default constructor).
158     ///
159     /// There is no way for a Curv program to construct the missing value.
160     /// It's kind of like a null pointer: it can be used as a special value
161     /// to signify that no value is present.
Value()162     inline constexpr Value() noexcept : bits_{k_missingbits} {}
163 
164     /// True if value is missing.
is_missing()165     inline bool is_missing() const noexcept
166     {
167         return bits_ == k_missingbits;
168     }
169     explicit operator bool () const noexcept
170     {
171         return !is_missing();
172     }
173 
174     /// Construct a boolean value.
Value(bool b)175     inline constexpr Value(bool b) : bits_{k_boolbits|(uint64_t)b} {}
176 
177     /// True if the value is boolean.
is_bool()178     inline bool is_bool() const noexcept
179     {
180         return (bits_ & k_boolmask) == k_boolbits;
181     }
182     /// Convert a boxed boolean Value to `bool`.
183     ///
184     /// Only defined if is_bool() is true.
to_bool_unsafe()185     inline bool to_bool_unsafe() const noexcept
186     {
187         return (bool)(bits_ & 1);
188     }
189     /// Convert a Value to `bool`, throw an exception if wrong type.
190     bool to_bool(const Context&) const;
191 
192     /**************
193      * characters *
194      **************/
195     /// Construct a character value.
Value(char c)196     inline constexpr Value(char c)
197     : bits_{k_charbits|(uint64_t((unsigned char)c) << k_charoffset)}
198     {}
199     /// True if the value is a character.
is_char()200     inline bool is_char() const noexcept
201     {
202         return (bits_ & k_charmask) == k_charbits;
203     }
204     /// Convert a boxed character Value to type 'char'.
205     /// Only defined if is_char() is true.
to_char_unsafe()206     inline char to_char_unsafe() const noexcept
207     {
208         return char(bits_ >> k_charoffset);
209     }
210     /// Convert a Value to `char`, throw an exception if wrong type.
211     char to_char(const Context&) const;
212 
213     /// Construct a number value.
214     ///
215     /// The Curv Number type includes all of the IEEE 64 bit floating point
216     /// values except for NaN.
217     /// If the argument is NaN, construct the missing value.
Value(double n)218     inline Value(double n)
219     {
220         if (n == n)
221             number_ = n;
222         else
223             bits_ = k_missingbits;
224     }
225 
226     /// True if the value is a number.
is_num()227     inline bool is_num() const noexcept
228     {
229         return number_ == number_;
230     }
231 
232     bool is_int() const noexcept;
233 
234     /// Convert a number value to `double`.
235     ///
236     /// Only defined if `is_num()` is true.
237     /// Potentially faster than `to_num_or_nan()`,
238     /// so call this version when guarded by `if(v.is_num())`.
to_num_unsafe()239     inline double to_num_unsafe() const noexcept
240     {
241         return number_;
242     }
243 
244     /// Convert a number value to `double`.
245     ///
246     /// If is_num() is false then NaN is returned.
to_num_or_nan()247     inline double to_num_or_nan() const noexcept
248     {
249         return number_;
250     }
251 
252     /// Convert a Value to `double`, throw an exception if wrong type.
253     double to_num(const Context&) const;
254 
255     // Convert a Value to `int`.
256     // Throw an exception if wrong type or out of range.
257     int to_int(int lo, int hi, const Context&) const;
258 
259     /// Construct a reference value.
260     ///
261     /// If the argument is nullptr, construct the null value.
Value(Shared<const Ref_Value> ptr)262     inline Value(Shared<const Ref_Value> ptr) : Value(ptr.detach())
263     {
264     }
265 
266     /// True if the value is a reference value.
is_ref()267     inline bool is_ref() const noexcept
268     {
269         // Negative numbers will have the sign bit set, which means
270         // signed_bits_ < 0. Positive infinity has all 1s in the exponent,
271         // and is 0x7FF0'0000'0000'0000. Positive numbers have a smaller
272         // exponent than this, so will test < +inf as an integer bit pattern.
273         // The positive NaNs have the largest signed integer magnitude,
274         // and all non-numeric values are encoded as positive NaNs.
275         // The 3 special immediate values (missing, false and true)
276         // are encoded like pointer values in the range 0...3.
277         return signed_bits_ > (int64_t)(k_nanbits|3)
278             && (bits_ & 3) == 0;
279         // TODO: the second test rules out characters; make this faster.
280     }
281 
282     /// Convert a reference value to `Ref_Value&`.
283     ///
284     /// Unsafe unless `is_ref()` is true, otherwise it returns a bad reference,
285     /// causing memory corruption or a crash if dereferenced.
286     /// Another reason it's unsafe is that the returned reference becomes
287     /// invalid if the original Value is destroyed.
288     /// See `maybe` for a completely safe alternative.
to_ref_unsafe()289     inline Ref_Value& to_ref_unsafe() const noexcept
290     {
291         #if UINTPTR_MAX == UINT64_MAX
292             // 64 bit pointers.
293 
294             // Intel: "bits 48 through 63 of any virtual address must be copies
295             // of bit 47 (in a manner akin to sign extension), or the processor
296             // will raise an exception." https://en.wikipedia.org/wiki/X86-64
297 
298             // Arm-64: also has 48 bit virtual addresses, and also seems to
299             // require the sign extension logic. "AArch64 features two 48-bit
300             // virtual address spaces, one for the kernel and one for
301             // applications. Application addressing starts at 0 and grows
302             // upwards, while kernel space grows down from 2^64; any references
303             // to unmapped addresses in between will trigger a fault. Pointers
304             // are sign extended to 64-bits, and can optionally be configured
305             // to use the upper 8-bits for tagging pointers with additional
306             // information." http://www.realworldtech.com/arm64/4/
307 
308             // The following code truncates to 48 bits, then fills the upper
309             // 16 bits using sign extension. Is this sign extension necessary?
310             // LuaJIT and SpiderMonkey both assume 47 bit pointers (not 48)
311             // in their NaN boxes. They assume all pointers are positive.
312             //
313             // The sign extension is necessary. Here's a LuaJIT bug where
314             // a negative 48 bit pointer on ARM64 leads to a crash.
315             // https://github.com/LuaJIT/LuaJIT/issues/49
316             //
317             // ARMv8.2 will support 52 bit virtual addresses.
318             // That can still work (a NaN has a 52 bit payload). We'll need
319             // changes to the Value internals but the API won't change.
320             // I don't store type codes in the NaN box (unlike LuaJIT)
321             // which will make this change easier.
322             return *(Ref_Value*)((signed_bits_ << 16) >> 16);
323         #elif UINTPTR_MAX == UINT32_MAX
324             // 32 bit pointers
325             return *(Ref_Value*)(uint32_t)bits_;
326         #else
327             static_assert(false, "only 32 and 64 bit architectures supported");
328         #endif
329     }
330 
331     /// Like dynamic_cast for a Value.
332     template <class T>
maybe()333     inline Shared<T> maybe() const noexcept
334     {
335         if (is_ref()) {
336             T* p = dynamic_cast<T*>(&to_ref_unsafe());
337             if (p != nullptr)
338                 return share(*p);
339         }
340         return nullptr;
341     }
342     template <class T>
to(const Context & cx)343     inline Shared<T> to(const Context& cx) const
344     {
345         if (is_ref()) {
346             T* p = dynamic_cast<T*>(&to_ref_unsafe());
347             if (p != nullptr)
348                 return share(*p);
349         }
350         to_abort(cx, T::name);
351     }
352     void to_abort [[noreturn]] (const Context&, const char*) const;
353     template <class T>
to(Fail fl,const Context & cx)354     inline Shared<T> to(Fail fl, const Context& cx) const
355     {
356         if (is_ref()) {
357             T* p = dynamic_cast<T*>(&to_ref_unsafe());
358             if (p != nullptr)
359                 return share(*p);
360         }
361         if (fl == Fail::soft) return nullptr; else to_abort(cx, T::name);
362     }
363 
364     Value at(Symbol_Ref fieldname, const Context& cx) const;
365 
366     /// The copy constructor increments the use_count of a ref value.
Value(const Value & val)367     inline Value(const Value& val) noexcept
368     {
369         bits_ = val.bits_;
370         if (is_ref())
371             intrusive_ptr_add_ref(&to_ref_unsafe());
372     }
373 
374     /// The move constructor.
Value(Value && val)375     inline Value(Value&& val) noexcept
376     {
377         bits_ = val.bits_;
378         val.bits_ = k_missingbits;
379     }
380 
381     /// The assignment operator.
382     ///
383     /// There's just one assignment operator, and it's by-value.
384     /// This is simpler than `const Value&` and `const Value&&`
385     /// copy and move assignments, and allegedly the most efficient.
386     /// When possible, use `val = move(rhs);` for a non-rvalue rhs
387     /// for efficiency.
388     inline Value& operator=(Value rhs) noexcept
389     {
390         rhs.swap(*this);
391         return *this;
392     }
393 
394     /// The swap operation.
swap(Value & rhs)395     void swap(Value& rhs) noexcept
396     {
397         auto tmpbits = bits_;
398         bits_ = rhs.bits_;
399         rhs.bits_ = tmpbits;
400     }
401 
402     /// The destructor.
~Value()403     ~Value()
404     {
405         if (is_ref())
406             intrusive_ptr_release(&to_ref_unsafe());
407     }
408 
409     /// Print a value like a Curv expression.
410     void print_repr(std::ostream&) const;
411 
412     /// Print a value like a string.
413     void print_string(std::ostream&) const;
414 
415     // Deep equality, which traverses the value tree and forces thunks.
416     // May throw an Exception if forcing a thunk fails.
417     // May return Ternary::Unknown when comparing reactive values.
418     // Used to implement `a == b` in the Curv language.
419     // Optimized for numbers, may be expensive for lists and records.
420     Ternary equal(Value, const Context&) const;
421 
422     // Shallow equality, compares the bit patterns of two Values. Fast.
eq(Value rhs)423     bool eq(Value rhs) const
424     {
425         return bits_ == rhs.bits_;
426     }
427 
428     size_t hash() const noexcept;
429     bool hash_eq(Value) const noexcept;
430 
431     struct Hash
432     {
operatorValue::Hash433         size_t operator()(Value val) const noexcept
434         {
435             return val.hash();
436         }
437     };
438     struct Hash_Eq
439     {
operatorValue::Hash_Eq440         bool operator()(Value v1, Value v2) const noexcept
441         {
442             return v1.hash_eq(v2);
443         }
444     };
445 };
446 
447 /// Special marker that denotes the absence of a value
448 extern const Value missing;
449 
450 inline std::ostream&
451 operator<<(std::ostream& out, Value val)
452 {
453     val.print_repr(out);
454     return out;
455 }
456 
457 /// Allocate a reference value with given class and constructor arguments.
make_ref_value(Args &&...args)458 template<typename T, class... Args> Value make_ref_value(Args&&... args)
459 {
460     T* ptr = new T(args...);
461     intrusive_ptr_add_ref(ptr);
462     return Value(ptr);
463 }
464 
465 void help(Value, std::ostream&);
466 
467 } // namespace curv
468 #endif // header guard
469