1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_DB_COLUMN_H_
18 #define SRC_TRACE_PROCESSOR_DB_COLUMN_H_
19 
20 #include <stdint.h>
21 
22 #include "perfetto/base/logging.h"
23 #include "perfetto/ext/base/optional.h"
24 #include "perfetto/trace_processor/basic_types.h"
25 #include "src/trace_processor/containers/nullable_vector.h"
26 #include "src/trace_processor/containers/row_map.h"
27 #include "src/trace_processor/containers/string_pool.h"
28 #include "src/trace_processor/db/compare.h"
29 
30 namespace perfetto {
31 namespace trace_processor {
32 
33 // Id type which can be used as a base for strongly typed ids.
34 // TypedColumn has support for storing descendents of BaseId seamlessly
35 // in a Column.
36 struct BaseId {
37   BaseId() = default;
BaseIdBaseId38   explicit constexpr BaseId(uint32_t v) : value(v) {}
39 
40   bool operator==(const BaseId& o) const { return o.value == value; }
41   bool operator<(const BaseId& o) const { return value < o.value; }
42 
43   uint32_t value;
44 };
45 
46 // Represents the possible filter operations on a column.
47 enum class FilterOp {
48   kEq,
49   kNe,
50   kGt,
51   kLt,
52   kGe,
53   kLe,
54   kIsNull,
55   kIsNotNull,
56 };
57 
58 // Represents a constraint on a column.
59 struct Constraint {
60   uint32_t col_idx;
61   FilterOp op;
62   SqlValue value;
63 };
64 
65 // Represents an order by operation on a column.
66 struct Order {
67   uint32_t col_idx;
68   bool desc;
69 };
70 
71 // Represents a column which is to be joined on.
72 struct JoinKey {
73   uint32_t col_idx;
74 };
75 
76 class Table;
77 
78 // Represents a named, strongly typed list of data.
79 class Column {
80  public:
81   // Flags which indicate properties of the data in the column. These features
82   // are used to speed up column methods like filtering/sorting.
83   enum Flag : uint32_t {
84     // Indicates that this column has no special properties.
85     kNoFlag = 0,
86 
87     // Indicates the data in the column is sorted. This can be used to speed
88     // up filtering and skip sorting.
89     kSorted = 1 << 0,
90 
91     // Indicates the data in the column is non-null. That is, the NullableVector
92     // passed in will never have any null entries. This is only used for
93     // numeric columns (string columns and id columns both have special
94     // handling which ignores this flag).
95     //
96     // This is used to speed up filters as we can safely index NullableVector
97     // directly if this flag is set.
98     kNonNull = 1 << 1,
99 
100     // Indicates that the data in the column is "hidden". This can by used to
101     // hint to users of Table and Column that this column should not be
102     // displayed to the user as it is part of the internal implementation
103     // details of the table.
104     kHidden = 1 << 2,
105 
106     // Indicates that the data in this column is stored densely. This
107     // allows for fast Set calls to change the data in the column.
108     //
109     // This flag is only meaningful for nullable columns has no effect for
110     // non-null columns.
111     kDense = 1 << 3,
112   };
113 
114   // Iterator over a column which conforms to std iterator interface
115   // to allow using std algorithms (e.g. upper_bound, lower_bound etc.).
116   class Iterator {
117    public:
118     using iterator_category = std::random_access_iterator_tag;
119     using value_type = SqlValue;
120     using difference_type = uint32_t;
121     using pointer = uint32_t*;
122     using reference = uint32_t&;
123 
Iterator(const Column * col,uint32_t row)124     Iterator(const Column* col, uint32_t row) : col_(col), row_(row) {}
125 
126     Iterator(const Iterator&) = default;
127     Iterator& operator=(const Iterator&) = default;
128 
129     bool operator==(const Iterator& other) const { return other.row_ == row_; }
130     bool operator!=(const Iterator& other) const { return !(*this == other); }
131     bool operator<(const Iterator& other) const { return row_ < other.row_; }
132     bool operator>(const Iterator& other) const { return other < *this; }
133     bool operator<=(const Iterator& other) const { return !(other < *this); }
134     bool operator>=(const Iterator& other) const { return !(*this < other); }
135 
136     SqlValue operator*() const { return col_->Get(row_); }
137     Iterator& operator++() {
138       row_++;
139       return *this;
140     }
141     Iterator& operator--() {
142       row_--;
143       return *this;
144     }
145 
146     Iterator& operator+=(uint32_t diff) {
147       row_ += diff;
148       return *this;
149     }
150     uint32_t operator-(const Iterator& other) const {
151       return row_ - other.row_;
152     }
153 
row()154     uint32_t row() const { return row_; }
155 
156    private:
157     const Column* col_ = nullptr;
158     uint32_t row_ = 0;
159   };
160 
161   // Flags specified for an id column.
162   static constexpr uint32_t kIdFlags = Flag::kSorted | Flag::kNonNull;
163 
164   template <typename T>
Column(const char * name,NullableVector<T> * storage,uint32_t flags,Table * table,uint32_t col_idx_in_table,uint32_t row_map_idx)165   Column(const char* name,
166          NullableVector<T>* storage,
167          /* Flag */ uint32_t flags,
168          Table* table,
169          uint32_t col_idx_in_table,
170          uint32_t row_map_idx)
171       : Column(name,
172                ToColumnType<T>(),
173                flags,
174                table,
175                col_idx_in_table,
176                row_map_idx,
177                storage,
178                nullptr) {}
179 
180   // Create a Column has the same name and is backed by the same data as
181   // |column| but is associated to a different table.
182   Column(const Column& column,
183          Table* table,
184          uint32_t col_idx_in_table,
185          uint32_t row_map_idx);
186 
187   // Columns are movable but not copyable.
188   Column(Column&&) noexcept = default;
189   Column& operator=(Column&&) = default;
190 
191   template <typename T>
WithOwnedStorage(const char * name,std::unique_ptr<NullableVector<T>> storage,uint32_t flags,Table * table,uint32_t col_idx_in_table,uint32_t row_map_idx)192   static Column WithOwnedStorage(const char* name,
193                                  std::unique_ptr<NullableVector<T>> storage,
194                                  /* Flag */ uint32_t flags,
195                                  Table* table,
196                                  uint32_t col_idx_in_table,
197                                  uint32_t row_map_idx) {
198     NullableVector<T>* ptr = storage.get();
199     return Column(name, ToColumnType<T>(), flags, table, col_idx_in_table,
200                   row_map_idx, ptr, std::move(storage));
201   }
202 
203   // Creates a Column which returns the index as the value of the row.
204   static Column IdColumn(Table* table,
205                          uint32_t col_idx_in_table,
206                          uint32_t row_map_idx);
207 
208   // Gets the value of the Column at the given |row|.
Get(uint32_t row)209   SqlValue Get(uint32_t row) const { return GetAtIdx(row_map().Get(row)); }
210 
211   // Returns the row containing the given value in the Column.
IndexOf(SqlValue value)212   base::Optional<uint32_t> IndexOf(SqlValue value) const {
213     switch (type_) {
214       // TODO(lalitm): investigate whether we could make this more efficient
215       // by first checking the type of the column and comparing explicitly
216       // based on that type.
217       case ColumnType::kInt32:
218       case ColumnType::kUint32:
219       case ColumnType::kInt64:
220       case ColumnType::kDouble:
221       case ColumnType::kString: {
222         for (uint32_t i = 0; i < row_map().size(); i++) {
223           if (compare::SqlValue(Get(i), value) == 0)
224             return i;
225         }
226         return base::nullopt;
227       }
228       case ColumnType::kId: {
229         if (value.type != SqlValue::Type::kLong)
230           return base::nullopt;
231         return row_map().IndexOf(static_cast<uint32_t>(value.long_value));
232       }
233     }
234     PERFETTO_FATAL("For GCC");
235   }
236 
237   // Sets the value of the column at the given |row|.
Set(uint32_t row,SqlValue value)238   void Set(uint32_t row, SqlValue value) {
239     PERFETTO_CHECK(value.type == type());
240     switch (type_) {
241       case ColumnType::kInt32: {
242         mutable_nullable_vector<int32_t>()->Set(
243             row, static_cast<int32_t>(value.long_value));
244         break;
245       }
246       case ColumnType::kUint32: {
247         mutable_nullable_vector<uint32_t>()->Set(
248             row, static_cast<uint32_t>(value.long_value));
249         break;
250       }
251       case ColumnType::kInt64: {
252         mutable_nullable_vector<int64_t>()->Set(
253             row, static_cast<int64_t>(value.long_value));
254         break;
255       }
256       case ColumnType::kDouble: {
257         mutable_nullable_vector<double>()->Set(row, value.double_value);
258         break;
259       }
260       case ColumnType::kString: {
261         PERFETTO_FATAL(
262             "Setting a generic value on a string column is not implemented");
263       }
264       case ColumnType::kId: {
265         PERFETTO_FATAL("Cannot set value on a id column");
266       }
267     }
268   }
269 
270   // Sorts |idx| in ascending or descending order (determined by |desc|) based
271   // on the contents of this column.
272   void StableSort(bool desc, std::vector<uint32_t>* idx) const;
273 
274   // Updates the given RowMap by only keeping rows where this column meets the
275   // given filter constraint.
FilterInto(FilterOp op,SqlValue value,RowMap * rm)276   void FilterInto(FilterOp op, SqlValue value, RowMap* rm) const {
277     if (IsId() && op == FilterOp::kEq) {
278       // If this is an equality constraint on an id column, try and find the
279       // single row with the id (if it exists).
280       auto opt_idx = IndexOf(value);
281       if (opt_idx) {
282         rm->Intersect(RowMap::SingleRow(*opt_idx));
283       } else {
284         rm->Intersect(RowMap());
285       }
286       return;
287     }
288 
289     if (IsSorted() && value.type == type()) {
290       // If the column is sorted and the value has the same type as the column,
291       // we should be able to just do a binary search to find the range of rows
292       // instead of a full table scan.
293       bool handled = FilterIntoSorted(op, value, rm);
294       if (handled)
295         return;
296     }
297 
298     FilterIntoSlow(op, value, rm);
299   }
300 
301   // Returns the minimum value in this column. Returns nullopt if this column
302   // is empty.
Min()303   base::Optional<SqlValue> Min() const {
304     if (row_map().empty())
305       return base::nullopt;
306 
307     if (IsSorted())
308       return Get(0);
309 
310     Iterator b(this, 0);
311     Iterator e(this, row_map().size());
312     return *std::min_element(b, e, &compare::SqlValueComparator);
313   }
314 
315   // Returns the minimum value in this column. Returns nullopt if this column
316   // is empty.
Max()317   base::Optional<SqlValue> Max() const {
318     if (row_map().empty())
319       return base::nullopt;
320 
321     if (IsSorted())
322       return Get(row_map().size() - 1);
323 
324     Iterator b(this, 0);
325     Iterator e(this, row_map().size());
326     return *std::max_element(b, e, &compare::SqlValueComparator);
327   }
328 
329   // Returns true if this column is considered an id column.
IsId()330   bool IsId() const { return type_ == ColumnType::kId; }
331 
332   // Returns true if this column is a nullable column.
IsNullable()333   bool IsNullable() const { return (flags_ & Flag::kNonNull) == 0; }
334 
335   // Returns true if this column is a sorted column.
IsSorted()336   bool IsSorted() const { return (flags_ & Flag::kSorted) != 0; }
337 
338   // Returns true if this column is a dense column.
IsDense()339   bool IsDense() const { return (flags_ & Flag::kDense) != 0; }
340 
341   // Returns the backing RowMap for this Column.
342   // This function is defined out of line because of a circular dependency
343   // between |Table| and |Column|.
344   const RowMap& row_map() const;
345 
346   // Returns the name of the column.
name()347   const char* name() const { return name_; }
348 
349   // Returns the type of this Column in terms of SqlValue::Type.
type()350   SqlValue::Type type() const { return ToSqlValueType(type_); }
351 
352   // Test the type of this Column.
353   template <typename T>
IsColumnType()354   bool IsColumnType() const {
355     return ToColumnType<T>() == type_;
356   }
357 
358   // Returns the index of the current column in the containing table.
index_in_table()359   uint32_t index_in_table() const { return col_idx_in_table_; }
360 
361   // Returns a Constraint for each type of filter operation for this Column.
eq_value(SqlValue value)362   Constraint eq_value(SqlValue value) const {
363     return Constraint{col_idx_in_table_, FilterOp::kEq, value};
364   }
gt_value(SqlValue value)365   Constraint gt_value(SqlValue value) const {
366     return Constraint{col_idx_in_table_, FilterOp::kGt, value};
367   }
lt_value(SqlValue value)368   Constraint lt_value(SqlValue value) const {
369     return Constraint{col_idx_in_table_, FilterOp::kLt, value};
370   }
ne_value(SqlValue value)371   Constraint ne_value(SqlValue value) const {
372     return Constraint{col_idx_in_table_, FilterOp::kNe, value};
373   }
ge_value(SqlValue value)374   Constraint ge_value(SqlValue value) const {
375     return Constraint{col_idx_in_table_, FilterOp::kGe, value};
376   }
le_value(SqlValue value)377   Constraint le_value(SqlValue value) const {
378     return Constraint{col_idx_in_table_, FilterOp::kLe, value};
379   }
is_not_null()380   Constraint is_not_null() const {
381     return Constraint{col_idx_in_table_, FilterOp::kIsNotNull, SqlValue()};
382   }
is_null()383   Constraint is_null() const {
384     return Constraint{col_idx_in_table_, FilterOp::kIsNull, SqlValue()};
385   }
386 
387   // Returns an Order for each Order type for this Column.
ascending()388   Order ascending() const { return Order{col_idx_in_table_, false}; }
descending()389   Order descending() const { return Order{col_idx_in_table_, true}; }
390 
391   // Returns the JoinKey for this Column.
join_key()392   JoinKey join_key() const { return JoinKey{col_idx_in_table_}; }
393 
394   // Returns an iterator to the first entry in this column.
begin()395   Iterator begin() const { return Iterator(this, 0); }
396 
397   // Returns an iterator pointing beyond the last entry in this column.
end()398   Iterator end() const { return Iterator(this, row_map().size()); }
399 
400  protected:
401   // Returns the backing sparse vector cast to contain data of type T.
402   // Should only be called when |type_| == ToColumnType<T>().
403   template <typename T>
mutable_nullable_vector()404   NullableVector<T>* mutable_nullable_vector() {
405     PERFETTO_DCHECK(ToColumnType<T>() == type_);
406     return static_cast<NullableVector<T>*>(nullable_vector_);
407   }
408 
409   // Returns the backing sparse vector cast to contain data of type T.
410   // Should only be called when |type_| == ToColumnType<T>().
411   template <typename T>
nullable_vector()412   const NullableVector<T>& nullable_vector() const {
413     PERFETTO_DCHECK(ToColumnType<T>() == type_);
414     return *static_cast<const NullableVector<T>*>(nullable_vector_);
415   }
416 
417   // Returns the type of this Column in terms of SqlValue::Type.
418   template <typename T>
ToSqlValueType()419   static SqlValue::Type ToSqlValueType() {
420     return ToSqlValueType(ToColumnType<T>());
421   }
422 
string_pool()423   const StringPool& string_pool() const { return *string_pool_; }
424 
425  private:
426   enum class ColumnType {
427     // Standard primitive types.
428     kInt32,
429     kUint32,
430     kInt64,
431     kDouble,
432     kString,
433 
434     // Types generated on the fly.
435     kId,
436   };
437 
438   friend class Table;
439 
440   // Base constructor for this class which all other constructors call into.
441   Column(const char* name,
442          ColumnType type,
443          uint32_t flags,
444          Table* table,
445          uint32_t col_idx_in_table,
446          uint32_t row_map_idx,
447          NullableVectorBase* nullable_vector,
448          std::shared_ptr<NullableVectorBase> owned_nullable_vector);
449 
450   Column(const Column&) = delete;
451   Column& operator=(const Column&) = delete;
452 
453   // Gets the value of the Column at the given |row|.
GetAtIdx(uint32_t idx)454   SqlValue GetAtIdx(uint32_t idx) const {
455     switch (type_) {
456       case ColumnType::kInt32: {
457         auto opt_value = nullable_vector<int32_t>().Get(idx);
458         return opt_value ? SqlValue::Long(*opt_value) : SqlValue();
459       }
460       case ColumnType::kUint32: {
461         auto opt_value = nullable_vector<uint32_t>().Get(idx);
462         return opt_value ? SqlValue::Long(*opt_value) : SqlValue();
463       }
464       case ColumnType::kInt64: {
465         auto opt_value = nullable_vector<int64_t>().Get(idx);
466         return opt_value ? SqlValue::Long(*opt_value) : SqlValue();
467       }
468       case ColumnType::kDouble: {
469         auto opt_value = nullable_vector<double>().Get(idx);
470         return opt_value ? SqlValue::Double(*opt_value) : SqlValue();
471       }
472       case ColumnType::kString: {
473         auto str = GetStringPoolStringAtIdx(idx).c_str();
474         return str == nullptr ? SqlValue() : SqlValue::String(str);
475       }
476       case ColumnType::kId:
477         return SqlValue::Long(idx);
478     }
479     PERFETTO_FATAL("For GCC");
480   }
481 
482   // Optimized filter method for sorted columns.
483   // Returns whether the constraint was handled by the method.
FilterIntoSorted(FilterOp op,SqlValue value,RowMap * rm)484   bool FilterIntoSorted(FilterOp op, SqlValue value, RowMap* rm) const {
485     PERFETTO_DCHECK(IsSorted());
486     PERFETTO_DCHECK(value.type == type());
487 
488     Iterator b(this, 0);
489     Iterator e(this, row_map().size());
490     switch (op) {
491       case FilterOp::kEq: {
492         uint32_t beg = std::distance(
493             b, std::lower_bound(b, e, value, &compare::SqlValueComparator));
494         uint32_t end = std::distance(
495             b, std::upper_bound(b, e, value, &compare::SqlValueComparator));
496         rm->Intersect(RowMap(beg, end));
497         return true;
498       }
499       case FilterOp::kLe: {
500         uint32_t end = std::distance(
501             b, std::upper_bound(b, e, value, &compare::SqlValueComparator));
502         rm->Intersect(RowMap(0, end));
503         return true;
504       }
505       case FilterOp::kLt: {
506         uint32_t end = std::distance(
507             b, std::lower_bound(b, e, value, &compare::SqlValueComparator));
508         rm->Intersect(RowMap(0, end));
509         return true;
510       }
511       case FilterOp::kGe: {
512         uint32_t beg = std::distance(
513             b, std::lower_bound(b, e, value, &compare::SqlValueComparator));
514         rm->Intersect(RowMap(beg, row_map().size()));
515         return true;
516       }
517       case FilterOp::kGt: {
518         uint32_t beg = std::distance(
519             b, std::upper_bound(b, e, value, &compare::SqlValueComparator));
520         rm->Intersect(RowMap(beg, row_map().size()));
521         return true;
522       }
523       case FilterOp::kNe:
524       case FilterOp::kIsNull:
525       case FilterOp::kIsNotNull:
526         break;
527     }
528     return false;
529   }
530 
531   // Slow path filter method which will perform a full table scan.
532   void FilterIntoSlow(FilterOp op, SqlValue value, RowMap* rm) const;
533 
534   // Slow path filter method for numerics which will perform a full table scan.
535   template <typename T, bool is_nullable>
536   void FilterIntoNumericSlow(FilterOp op, SqlValue value, RowMap* rm) const;
537 
538   // Slow path filter method for numerics with a comparator which will perform a
539   // full table scan.
540   template <typename T, bool is_nullable, typename Comparator = int(T)>
541   void FilterIntoNumericWithComparatorSlow(FilterOp op,
542                                            RowMap* rm,
543                                            Comparator cmp) const;
544 
545   // Slow path filter method for strings which will perform a full table scan.
546   void FilterIntoStringSlow(FilterOp op, SqlValue value, RowMap* rm) const;
547 
548   // Slow path filter method for ids which will perform a full table scan.
549   void FilterIntoIdSlow(FilterOp op, SqlValue value, RowMap* rm) const;
550 
551   // Stable sorts this column storing the result in |out|.
552   template <bool desc>
553   void StableSort(std::vector<uint32_t>* out) const;
554 
555   // Stable sorts this column storing the result in |out|.
556   // |T| and |is_nullable| should match the type and nullability of this column.
557   template <bool desc, typename T, bool is_nullable>
558   void StableSortNumeric(std::vector<uint32_t>* out) const;
559 
560   template <typename T>
ToColumnType()561   static ColumnType ToColumnType() {
562     if (std::is_same<T, uint32_t>::value) {
563       return ColumnType::kUint32;
564     } else if (std::is_same<T, int64_t>::value) {
565       return ColumnType::kInt64;
566     } else if (std::is_same<T, int32_t>::value) {
567       return ColumnType::kInt32;
568     } else if (std::is_same<T, StringPool::Id>::value) {
569       return ColumnType::kString;
570     } else if (std::is_same<T, double>::value) {
571       return ColumnType::kDouble;
572     } else {
573       PERFETTO_FATAL("Unsupported type of column");
574     }
575   }
576 
ToSqlValueType(ColumnType type)577   static SqlValue::Type ToSqlValueType(ColumnType type) {
578     switch (type) {
579       case ColumnType::kInt32:
580       case ColumnType::kUint32:
581       case ColumnType::kInt64:
582       case ColumnType::kId:
583         return SqlValue::Type::kLong;
584       case ColumnType::kDouble:
585         return SqlValue::Type::kDouble;
586       case ColumnType::kString:
587         return SqlValue::Type::kString;
588     }
589     PERFETTO_FATAL("For GCC");
590   }
591 
592   // Returns the string at the index |idx|.
593   // Should only be called when |type_| == ColumnType::kString.
GetStringPoolStringAtIdx(uint32_t idx)594   NullTermStringView GetStringPoolStringAtIdx(uint32_t idx) const {
595     PERFETTO_DCHECK(type_ == ColumnType::kString);
596     return string_pool_->Get(nullable_vector<StringPool::Id>().GetNonNull(idx));
597   }
598 
599   // Only filled for columns which own the data inside them. Generally this is
600   // only true for columns which are dynamically generated at runtime.
601   // Keep this before |nullable_vector_|.
602   std::shared_ptr<NullableVectorBase> owned_nullable_vector_;
603 
604   // type_ is used to cast nullable_vector_ to the correct type.
605   ColumnType type_ = ColumnType::kInt64;
606   NullableVectorBase* nullable_vector_ = nullptr;
607 
608   const char* name_ = nullptr;
609   uint32_t flags_ = Flag::kNoFlag;
610   const Table* table_ = nullptr;
611   uint32_t col_idx_in_table_ = 0;
612   uint32_t row_map_idx_ = 0;
613   const StringPool* string_pool_ = nullptr;
614 };
615 
616 }  // namespace trace_processor
617 }  // namespace perfetto
618 
619 #endif  // SRC_TRACE_PROCESSOR_DB_COLUMN_H_
620