1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_SQLITE_SPAN_JOIN_OPERATOR_TABLE_H_
18 #define SRC_TRACE_PROCESSOR_SQLITE_SPAN_JOIN_OPERATOR_TABLE_H_
19 
20 #include <sqlite3.h>
21 
22 #include <array>
23 #include <deque>
24 #include <limits>
25 #include <map>
26 #include <memory>
27 #include <string>
28 #include <unordered_map>
29 #include <vector>
30 
31 #include "perfetto/trace_processor/basic_types.h"
32 #include "perfetto/trace_processor/status.h"
33 #include "src/trace_processor/sqlite/scoped_db.h"
34 #include "src/trace_processor/sqlite/sqlite_table.h"
35 
36 namespace perfetto {
37 namespace trace_processor {
38 
39 // Implements the SPAN JOIN operation between two tables on a particular column.
40 //
41 // Span:
42 // A span is a row with a timestamp and a duration. It can is used to model
43 // operations which run for a particular *span* of time.
44 //
45 // We draw spans like so (time on the x-axis):
46 // start of span->[ time where opertion is running ]<- end of span
47 //
48 // Multiple spans can happen in parallel:
49 // [      ]
50 //    [        ]
51 //   [                    ]
52 //  [ ]
53 //
54 // The above for example, models scheduling activity on a 4-core computer for a
55 // short period of time.
56 //
57 // Span join:
58 // The span join operation can be thought of as the intersection of span tables.
59 // That is, the join table has a span for each pair of spans in the child tables
60 // where the spans overlap. Because many spans are possible in parallel, an
61 // extra metadata column (labelled the "join column") is used to distinguish
62 // between the spanned tables.
63 //
64 // For a given join key suppose these were the two span tables:
65 // Table 1:   [        ]              [      ]         [ ]
66 // Table 2:          [      ]            [  ]           [      ]
67 // Output :          [ ]                 [  ]           []
68 //
69 // All other columns apart from timestamp (ts), duration (dur) and the join key
70 // are passed through unchanged.
71 class SpanJoinOperatorTable : public SqliteTable {
72  public:
73   // Enum indicating whether the queries on the two inner tables should
74   // emit shadows.
75   enum class EmitShadowType {
76     // Used when the table should emit all shadow slices (both present and
77     // missing partition shadows).
78     kAll,
79 
80     // Used when the table should only emit shadows for partitions which are
81     // present.
82     kPresentPartitionOnly,
83 
84     // Used when the table should emit no shadow slices.
85     kNone,
86   };
87 
88   // Contains the definition of the child tables.
89   class TableDefinition {
90    public:
91     TableDefinition() = default;
92 
93     TableDefinition(std::string name,
94                     std::string partition_col,
95                     std::vector<SqliteTable::Column> cols,
96                     EmitShadowType emit_shadow_type,
97                     uint32_t ts_idx,
98                     uint32_t dur_idx,
99                     uint32_t partition_idx);
100 
101     // Returns whether this table should emit present partition shadow slices.
ShouldEmitPresentPartitionShadow()102     bool ShouldEmitPresentPartitionShadow() const {
103       return emit_shadow_type_ == EmitShadowType::kAll ||
104              emit_shadow_type_ == EmitShadowType::kPresentPartitionOnly;
105     }
106 
107     // Returns whether this table should emit missing partition shadow slices.
ShouldEmitMissingPartitionShadow()108     bool ShouldEmitMissingPartitionShadow() const {
109       return emit_shadow_type_ == EmitShadowType::kAll;
110     }
111 
112     // Returns whether the table is partitioned.
IsPartitioned()113     bool IsPartitioned() const { return !partition_col_.empty(); }
114 
name()115     const std::string& name() const { return name_; }
partition_col()116     const std::string& partition_col() const { return partition_col_; }
columns()117     const std::vector<SqliteTable::Column>& columns() const { return cols_; }
118 
ts_idx()119     uint32_t ts_idx() const { return ts_idx_; }
dur_idx()120     uint32_t dur_idx() const { return dur_idx_; }
partition_idx()121     uint32_t partition_idx() const { return partition_idx_; }
122 
123    private:
124     EmitShadowType emit_shadow_type_ = EmitShadowType::kNone;
125 
126     std::string name_;
127     std::string partition_col_;
128     std::vector<SqliteTable::Column> cols_;
129 
130     uint32_t ts_idx_ = std::numeric_limits<uint32_t>::max();
131     uint32_t dur_idx_ = std::numeric_limits<uint32_t>::max();
132     uint32_t partition_idx_ = std::numeric_limits<uint32_t>::max();
133   };
134 
135   // Stores information about a single subquery into one of the two child
136   // tables.
137   //
138   // This class is implemented as a state machine which steps from one slice to
139   // the next.
140   class Query {
141    public:
142     // Enum encoding the current state of the query in the state machine.
143     enum class State {
144       // Encodes that the current slice is a real slice (i.e. comes directly
145       // from the cursor).
146       kReal,
147 
148       // Encodes that the current slice is on a partition for which there is a
149       // real slice present.
150       kPresentPartitionShadow,
151 
152       // Encodes that the current slice is on a paritition(s) for which there is
153       // no real slice for those partition(s).
154       kMissingPartitionShadow,
155 
156       // Encodes that this query has reached the end.
157       kEof,
158     };
159 
160     Query(SpanJoinOperatorTable*, const TableDefinition*, sqlite3* db);
161     virtual ~Query();
162 
163     Query(Query&&) noexcept = default;
164     Query& operator=(Query&&) = default;
165 
166     // Initializes the query with the given constraints and query parameters.
167     util::Status Initialize(const QueryConstraints& qc, sqlite3_value** argv);
168 
169     // Forwards the query to the next valid slice.
170     util::Status Next();
171 
172     // Rewinds the query to the first valid slice
173     // This is used in the mixed partitioning case where the query with no
174     // partitions is rewound to the start on every new partition.
175     util::Status Rewind();
176 
177     // Reports the column at the given index to given context.
178     void ReportSqliteResult(sqlite3_context* context, size_t index);
179 
180     // Returns whether the cursor has reached eof.
IsEof()181     bool IsEof() const { return state_ == State::kEof; }
182 
183     // Returns whether the current slice pointed to is a real slice.
IsReal()184     bool IsReal() const { return state_ == State::kReal; }
185 
186     // Returns the first partition this slice covers (for real/single partition
187     // shadows, this is the same as partition()).
188     // This partition encodes a [start, end] (closed at start and at end) range
189     // of partitions which works as the partitions are integers.
FirstPartition()190     int64_t FirstPartition() const {
191       PERFETTO_DCHECK(!IsEof());
192       return IsMissingPartitionShadow() ? missing_partition_start_
193                                         : partition();
194     }
195 
196     // Returns the last partition this slice covers (for real/single partition
197     // shadows, this is the same as partition()).
198     // This partition encodes a [start, end] (closed at start and at end) range
199     // of partitions which works as the partitions are integers.
LastPartition()200     int64_t LastPartition() const {
201       PERFETTO_DCHECK(!IsEof());
202       return IsMissingPartitionShadow() ? missing_partition_end_ - 1
203                                         : partition();
204     }
205 
206     // Returns the end timestamp of this slice adjusted to ensure that -1
207     // duration slices always returns ts.
AdjustedTsEnd()208     int64_t AdjustedTsEnd() const {
209       PERFETTO_DCHECK(!IsEof());
210       return ts_end_ - ts() == -1 ? ts() : ts_end_;
211     }
212 
ts()213     int64_t ts() const {
214       PERFETTO_DCHECK(!IsEof());
215       return ts_;
216     }
partition()217     int64_t partition() const {
218       PERFETTO_DCHECK(!IsEof() && defn_->IsPartitioned());
219       return partition_;
220     }
221 
raw_ts_end()222     int64_t raw_ts_end() const {
223       PERFETTO_DCHECK(!IsEof());
224       return ts_end_;
225     }
226 
definition()227     const TableDefinition* definition() const { return defn_; }
228 
229    private:
230     Query(Query&) = delete;
231     Query& operator=(const Query&) = delete;
232 
233     // Returns whether the current slice pointed to is a valid slice.
234     bool IsValidSlice();
235 
236     // Forwards the query to the next valid slice.
237     util::Status FindNextValidSlice();
238 
239     // Advances the query state machine by one slice.
240     util::Status NextSliceState();
241 
242     // Forwards the cursor to point to the next real slice.
243     util::Status CursorNext();
244 
245     // Creates an SQL query from the given set of constraint strings.
246     std::string CreateSqlQuery(const std::vector<std::string>& cs) const;
247 
248     // Returns whether the current slice pointed to is a present partition
249     // shadow.
IsPresentPartitionShadow()250     bool IsPresentPartitionShadow() const {
251       return state_ == State::kPresentPartitionShadow;
252     }
253 
254     // Returns whether the current slice pointed to is a missing partition
255     // shadow.
IsMissingPartitionShadow()256     bool IsMissingPartitionShadow() const {
257       return state_ == State::kMissingPartitionShadow;
258     }
259 
260     // Returns whether the current slice pointed to is an empty shadow.
IsEmptyShadow()261     bool IsEmptyShadow() const {
262       PERFETTO_DCHECK(!IsEof());
263       return (!IsReal() && ts_ == ts_end_) ||
264              (IsMissingPartitionShadow() &&
265               missing_partition_start_ == missing_partition_end_);
266     }
267 
CursorTs()268     int64_t CursorTs() const {
269       PERFETTO_DCHECK(!cursor_eof_);
270       auto ts_idx = static_cast<int>(defn_->ts_idx());
271       return sqlite3_column_int64(stmt_.get(), ts_idx);
272     }
273 
CursorDur()274     int64_t CursorDur() const {
275       PERFETTO_DCHECK(!cursor_eof_);
276       auto dur_idx = static_cast<int>(defn_->dur_idx());
277       return sqlite3_column_int64(stmt_.get(), dur_idx);
278     }
279 
CursorPartition()280     int64_t CursorPartition() const {
281       PERFETTO_DCHECK(!cursor_eof_);
282       PERFETTO_DCHECK(defn_->IsPartitioned());
283       auto partition_idx = static_cast<int>(defn_->partition_idx());
284       return sqlite3_column_int64(stmt_.get(), partition_idx);
285     }
286 
287     State state_ = State::kMissingPartitionShadow;
288     bool cursor_eof_ = false;
289 
290     // Only valid when |state_| != kEof.
291     int64_t ts_ = 0;
292     int64_t ts_end_ = std::numeric_limits<int64_t>::max();
293 
294     // Only valid when |state_| == kReal or |state_| == kPresentPartitionShadow.
295     int64_t partition_ = std::numeric_limits<int64_t>::min();
296 
297     // Only valid when |state_| == kMissingPartitionShadow.
298     int64_t missing_partition_start_ = 0;
299     int64_t missing_partition_end_ = 0;
300 
301     std::string sql_query_;
302     ScopedStmt stmt_;
303 
304     const TableDefinition* defn_ = nullptr;
305     sqlite3* db_ = nullptr;
306     SpanJoinOperatorTable* table_ = nullptr;
307   };
308 
309   // Base class for a cursor on the span table.
310   class Cursor : public SqliteTable::Cursor {
311    public:
312     Cursor(SpanJoinOperatorTable*, sqlite3* db);
313     ~Cursor() override = default;
314 
315     int Filter(const QueryConstraints& qc,
316                sqlite3_value** argv,
317                FilterHistory) override;
318     int Column(sqlite3_context* context, int N) override;
319     int Next() override;
320     int Eof() override;
321 
322    private:
323     Cursor(Cursor&) = delete;
324     Cursor& operator=(const Cursor&) = delete;
325 
326     Cursor(Cursor&&) noexcept = default;
327     Cursor& operator=(Cursor&&) = default;
328 
329     bool IsOverlappingSpan();
330 
331     util::Status FindOverlappingSpan();
332 
333     Query* FindEarliestFinishQuery();
334 
335     Query t1_;
336     Query t2_;
337 
338     Query* next_query_ = nullptr;
339 
340     // Only valid for kMixedPartition.
341     int64_t last_mixed_partition_ = std::numeric_limits<int64_t>::min();
342 
343     SpanJoinOperatorTable* table_;
344   };
345 
346   SpanJoinOperatorTable(sqlite3*, const TraceStorage*);
347 
348   static void RegisterTable(sqlite3* db, const TraceStorage* storage);
349 
350   // Table implementation.
351   util::Status Init(int, const char* const*, SqliteTable::Schema*) override;
352   std::unique_ptr<SqliteTable::Cursor> CreateCursor() override;
353   int BestIndex(const QueryConstraints& qc, BestIndexInfo* info) override;
354 
355  private:
356   // Columns of the span operator table.
357   enum Column {
358     kTimestamp = 0,
359     kDuration = 1,
360     kPartition = 2,
361     // All other columns are dynamic depending on the joined tables.
362   };
363 
364   // Enum indicating the possible partitionings of the two tables in span join.
365   enum class PartitioningType {
366     // Used when both tables don't have a partition specified.
367     kNoPartitioning = 0,
368 
369     // Used when both tables have the same partition specified.
370     kSamePartitioning = 1,
371 
372     // Used when one table has a partition and the other table doesn't.
373     kMixedPartitioning = 2
374   };
375 
376   // Parsed version of a table descriptor.
377   struct TableDescriptor {
378     static util::Status Parse(const std::string& raw_descriptor,
379                               TableDescriptor* descriptor);
380 
IsPartitionedTableDescriptor381     bool IsPartitioned() const { return !partition_col.empty(); }
382 
383     std::string name;
384     std::string partition_col;
385   };
386 
387   // Identifier for a column by index in a given table.
388   struct ColumnLocator {
389     const TableDefinition* defn;
390     size_t col_index;
391   };
392 
IsLeftJoin()393   bool IsLeftJoin() const { return name() == "span_left_join"; }
IsOuterJoin()394   bool IsOuterJoin() const { return name() == "span_outer_join"; }
395 
partition_col()396   const std::string& partition_col() const {
397     return t1_defn_.IsPartitioned() ? t1_defn_.partition_col()
398                                     : t2_defn_.partition_col();
399   }
400 
401   util::Status CreateTableDefinition(
402       const TableDescriptor& desc,
403       EmitShadowType emit_shadow_type,
404       SpanJoinOperatorTable::TableDefinition* defn);
405 
406   std::vector<std::string> ComputeSqlConstraintsForDefinition(
407       const TableDefinition& defn,
408       const QueryConstraints& qc,
409       sqlite3_value** argv);
410 
411   std::string GetNameForGlobalColumnIndex(const TableDefinition& defn,
412                                           int global_column);
413 
414   void CreateSchemaColsForDefn(const TableDefinition& defn,
415                                std::vector<SqliteTable::Column>* cols);
416 
417   TableDefinition t1_defn_;
418   TableDefinition t2_defn_;
419   PartitioningType partitioning_;
420   std::unordered_map<size_t, ColumnLocator> global_index_to_column_locator_;
421 
422   sqlite3* const db_;
423 };
424 
425 }  // namespace trace_processor
426 }  // namespace perfetto
427 
428 #endif  // SRC_TRACE_PROCESSOR_SQLITE_SPAN_JOIN_OPERATOR_TABLE_H_
429