1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_SQLITE_SPAN_JOIN_OPERATOR_TABLE_H_ 18 #define SRC_TRACE_PROCESSOR_SQLITE_SPAN_JOIN_OPERATOR_TABLE_H_ 19 20 #include <sqlite3.h> 21 22 #include <array> 23 #include <deque> 24 #include <limits> 25 #include <map> 26 #include <memory> 27 #include <string> 28 #include <unordered_map> 29 #include <vector> 30 31 #include "perfetto/trace_processor/basic_types.h" 32 #include "perfetto/trace_processor/status.h" 33 #include "src/trace_processor/sqlite/scoped_db.h" 34 #include "src/trace_processor/sqlite/sqlite_table.h" 35 36 namespace perfetto { 37 namespace trace_processor { 38 39 // Implements the SPAN JOIN operation between two tables on a particular column. 40 // 41 // Span: 42 // A span is a row with a timestamp and a duration. It can is used to model 43 // operations which run for a particular *span* of time. 44 // 45 // We draw spans like so (time on the x-axis): 46 // start of span->[ time where opertion is running ]<- end of span 47 // 48 // Multiple spans can happen in parallel: 49 // [ ] 50 // [ ] 51 // [ ] 52 // [ ] 53 // 54 // The above for example, models scheduling activity on a 4-core computer for a 55 // short period of time. 56 // 57 // Span join: 58 // The span join operation can be thought of as the intersection of span tables. 59 // That is, the join table has a span for each pair of spans in the child tables 60 // where the spans overlap. Because many spans are possible in parallel, an 61 // extra metadata column (labelled the "join column") is used to distinguish 62 // between the spanned tables. 63 // 64 // For a given join key suppose these were the two span tables: 65 // Table 1: [ ] [ ] [ ] 66 // Table 2: [ ] [ ] [ ] 67 // Output : [ ] [ ] [] 68 // 69 // All other columns apart from timestamp (ts), duration (dur) and the join key 70 // are passed through unchanged. 71 class SpanJoinOperatorTable : public SqliteTable { 72 public: 73 // Enum indicating whether the queries on the two inner tables should 74 // emit shadows. 75 enum class EmitShadowType { 76 // Used when the table should emit all shadow slices (both present and 77 // missing partition shadows). 78 kAll, 79 80 // Used when the table should only emit shadows for partitions which are 81 // present. 82 kPresentPartitionOnly, 83 84 // Used when the table should emit no shadow slices. 85 kNone, 86 }; 87 88 // Contains the definition of the child tables. 89 class TableDefinition { 90 public: 91 TableDefinition() = default; 92 93 TableDefinition(std::string name, 94 std::string partition_col, 95 std::vector<SqliteTable::Column> cols, 96 EmitShadowType emit_shadow_type, 97 uint32_t ts_idx, 98 uint32_t dur_idx, 99 uint32_t partition_idx); 100 101 // Returns whether this table should emit present partition shadow slices. ShouldEmitPresentPartitionShadow()102 bool ShouldEmitPresentPartitionShadow() const { 103 return emit_shadow_type_ == EmitShadowType::kAll || 104 emit_shadow_type_ == EmitShadowType::kPresentPartitionOnly; 105 } 106 107 // Returns whether this table should emit missing partition shadow slices. ShouldEmitMissingPartitionShadow()108 bool ShouldEmitMissingPartitionShadow() const { 109 return emit_shadow_type_ == EmitShadowType::kAll; 110 } 111 112 // Returns whether the table is partitioned. IsPartitioned()113 bool IsPartitioned() const { return !partition_col_.empty(); } 114 name()115 const std::string& name() const { return name_; } partition_col()116 const std::string& partition_col() const { return partition_col_; } columns()117 const std::vector<SqliteTable::Column>& columns() const { return cols_; } 118 ts_idx()119 uint32_t ts_idx() const { return ts_idx_; } dur_idx()120 uint32_t dur_idx() const { return dur_idx_; } partition_idx()121 uint32_t partition_idx() const { return partition_idx_; } 122 123 private: 124 EmitShadowType emit_shadow_type_ = EmitShadowType::kNone; 125 126 std::string name_; 127 std::string partition_col_; 128 std::vector<SqliteTable::Column> cols_; 129 130 uint32_t ts_idx_ = std::numeric_limits<uint32_t>::max(); 131 uint32_t dur_idx_ = std::numeric_limits<uint32_t>::max(); 132 uint32_t partition_idx_ = std::numeric_limits<uint32_t>::max(); 133 }; 134 135 // Stores information about a single subquery into one of the two child 136 // tables. 137 // 138 // This class is implemented as a state machine which steps from one slice to 139 // the next. 140 class Query { 141 public: 142 // Enum encoding the current state of the query in the state machine. 143 enum class State { 144 // Encodes that the current slice is a real slice (i.e. comes directly 145 // from the cursor). 146 kReal, 147 148 // Encodes that the current slice is on a partition for which there is a 149 // real slice present. 150 kPresentPartitionShadow, 151 152 // Encodes that the current slice is on a paritition(s) for which there is 153 // no real slice for those partition(s). 154 kMissingPartitionShadow, 155 156 // Encodes that this query has reached the end. 157 kEof, 158 }; 159 160 Query(SpanJoinOperatorTable*, const TableDefinition*, sqlite3* db); 161 virtual ~Query(); 162 163 Query(Query&&) noexcept = default; 164 Query& operator=(Query&&) = default; 165 166 // Initializes the query with the given constraints and query parameters. 167 util::Status Initialize(const QueryConstraints& qc, sqlite3_value** argv); 168 169 // Forwards the query to the next valid slice. 170 util::Status Next(); 171 172 // Rewinds the query to the first valid slice 173 // This is used in the mixed partitioning case where the query with no 174 // partitions is rewound to the start on every new partition. 175 util::Status Rewind(); 176 177 // Reports the column at the given index to given context. 178 void ReportSqliteResult(sqlite3_context* context, size_t index); 179 180 // Returns whether the cursor has reached eof. IsEof()181 bool IsEof() const { return state_ == State::kEof; } 182 183 // Returns whether the current slice pointed to is a real slice. IsReal()184 bool IsReal() const { return state_ == State::kReal; } 185 186 // Returns the first partition this slice covers (for real/single partition 187 // shadows, this is the same as partition()). 188 // This partition encodes a [start, end] (closed at start and at end) range 189 // of partitions which works as the partitions are integers. FirstPartition()190 int64_t FirstPartition() const { 191 PERFETTO_DCHECK(!IsEof()); 192 return IsMissingPartitionShadow() ? missing_partition_start_ 193 : partition(); 194 } 195 196 // Returns the last partition this slice covers (for real/single partition 197 // shadows, this is the same as partition()). 198 // This partition encodes a [start, end] (closed at start and at end) range 199 // of partitions which works as the partitions are integers. LastPartition()200 int64_t LastPartition() const { 201 PERFETTO_DCHECK(!IsEof()); 202 return IsMissingPartitionShadow() ? missing_partition_end_ - 1 203 : partition(); 204 } 205 206 // Returns the end timestamp of this slice adjusted to ensure that -1 207 // duration slices always returns ts. AdjustedTsEnd()208 int64_t AdjustedTsEnd() const { 209 PERFETTO_DCHECK(!IsEof()); 210 return ts_end_ - ts() == -1 ? ts() : ts_end_; 211 } 212 ts()213 int64_t ts() const { 214 PERFETTO_DCHECK(!IsEof()); 215 return ts_; 216 } partition()217 int64_t partition() const { 218 PERFETTO_DCHECK(!IsEof() && defn_->IsPartitioned()); 219 return partition_; 220 } 221 raw_ts_end()222 int64_t raw_ts_end() const { 223 PERFETTO_DCHECK(!IsEof()); 224 return ts_end_; 225 } 226 definition()227 const TableDefinition* definition() const { return defn_; } 228 229 private: 230 Query(Query&) = delete; 231 Query& operator=(const Query&) = delete; 232 233 // Returns whether the current slice pointed to is a valid slice. 234 bool IsValidSlice(); 235 236 // Forwards the query to the next valid slice. 237 util::Status FindNextValidSlice(); 238 239 // Advances the query state machine by one slice. 240 util::Status NextSliceState(); 241 242 // Forwards the cursor to point to the next real slice. 243 util::Status CursorNext(); 244 245 // Creates an SQL query from the given set of constraint strings. 246 std::string CreateSqlQuery(const std::vector<std::string>& cs) const; 247 248 // Returns whether the current slice pointed to is a present partition 249 // shadow. IsPresentPartitionShadow()250 bool IsPresentPartitionShadow() const { 251 return state_ == State::kPresentPartitionShadow; 252 } 253 254 // Returns whether the current slice pointed to is a missing partition 255 // shadow. IsMissingPartitionShadow()256 bool IsMissingPartitionShadow() const { 257 return state_ == State::kMissingPartitionShadow; 258 } 259 260 // Returns whether the current slice pointed to is an empty shadow. IsEmptyShadow()261 bool IsEmptyShadow() const { 262 PERFETTO_DCHECK(!IsEof()); 263 return (!IsReal() && ts_ == ts_end_) || 264 (IsMissingPartitionShadow() && 265 missing_partition_start_ == missing_partition_end_); 266 } 267 CursorTs()268 int64_t CursorTs() const { 269 PERFETTO_DCHECK(!cursor_eof_); 270 auto ts_idx = static_cast<int>(defn_->ts_idx()); 271 return sqlite3_column_int64(stmt_.get(), ts_idx); 272 } 273 CursorDur()274 int64_t CursorDur() const { 275 PERFETTO_DCHECK(!cursor_eof_); 276 auto dur_idx = static_cast<int>(defn_->dur_idx()); 277 return sqlite3_column_int64(stmt_.get(), dur_idx); 278 } 279 CursorPartition()280 int64_t CursorPartition() const { 281 PERFETTO_DCHECK(!cursor_eof_); 282 PERFETTO_DCHECK(defn_->IsPartitioned()); 283 auto partition_idx = static_cast<int>(defn_->partition_idx()); 284 return sqlite3_column_int64(stmt_.get(), partition_idx); 285 } 286 287 State state_ = State::kMissingPartitionShadow; 288 bool cursor_eof_ = false; 289 290 // Only valid when |state_| != kEof. 291 int64_t ts_ = 0; 292 int64_t ts_end_ = std::numeric_limits<int64_t>::max(); 293 294 // Only valid when |state_| == kReal or |state_| == kPresentPartitionShadow. 295 int64_t partition_ = std::numeric_limits<int64_t>::min(); 296 297 // Only valid when |state_| == kMissingPartitionShadow. 298 int64_t missing_partition_start_ = 0; 299 int64_t missing_partition_end_ = 0; 300 301 std::string sql_query_; 302 ScopedStmt stmt_; 303 304 const TableDefinition* defn_ = nullptr; 305 sqlite3* db_ = nullptr; 306 SpanJoinOperatorTable* table_ = nullptr; 307 }; 308 309 // Base class for a cursor on the span table. 310 class Cursor : public SqliteTable::Cursor { 311 public: 312 Cursor(SpanJoinOperatorTable*, sqlite3* db); 313 ~Cursor() override = default; 314 315 int Filter(const QueryConstraints& qc, 316 sqlite3_value** argv, 317 FilterHistory) override; 318 int Column(sqlite3_context* context, int N) override; 319 int Next() override; 320 int Eof() override; 321 322 private: 323 Cursor(Cursor&) = delete; 324 Cursor& operator=(const Cursor&) = delete; 325 326 Cursor(Cursor&&) noexcept = default; 327 Cursor& operator=(Cursor&&) = default; 328 329 bool IsOverlappingSpan(); 330 331 util::Status FindOverlappingSpan(); 332 333 Query* FindEarliestFinishQuery(); 334 335 Query t1_; 336 Query t2_; 337 338 Query* next_query_ = nullptr; 339 340 // Only valid for kMixedPartition. 341 int64_t last_mixed_partition_ = std::numeric_limits<int64_t>::min(); 342 343 SpanJoinOperatorTable* table_; 344 }; 345 346 SpanJoinOperatorTable(sqlite3*, const TraceStorage*); 347 348 static void RegisterTable(sqlite3* db, const TraceStorage* storage); 349 350 // Table implementation. 351 util::Status Init(int, const char* const*, SqliteTable::Schema*) override; 352 std::unique_ptr<SqliteTable::Cursor> CreateCursor() override; 353 int BestIndex(const QueryConstraints& qc, BestIndexInfo* info) override; 354 355 private: 356 // Columns of the span operator table. 357 enum Column { 358 kTimestamp = 0, 359 kDuration = 1, 360 kPartition = 2, 361 // All other columns are dynamic depending on the joined tables. 362 }; 363 364 // Enum indicating the possible partitionings of the two tables in span join. 365 enum class PartitioningType { 366 // Used when both tables don't have a partition specified. 367 kNoPartitioning = 0, 368 369 // Used when both tables have the same partition specified. 370 kSamePartitioning = 1, 371 372 // Used when one table has a partition and the other table doesn't. 373 kMixedPartitioning = 2 374 }; 375 376 // Parsed version of a table descriptor. 377 struct TableDescriptor { 378 static util::Status Parse(const std::string& raw_descriptor, 379 TableDescriptor* descriptor); 380 IsPartitionedTableDescriptor381 bool IsPartitioned() const { return !partition_col.empty(); } 382 383 std::string name; 384 std::string partition_col; 385 }; 386 387 // Identifier for a column by index in a given table. 388 struct ColumnLocator { 389 const TableDefinition* defn; 390 size_t col_index; 391 }; 392 IsLeftJoin()393 bool IsLeftJoin() const { return name() == "span_left_join"; } IsOuterJoin()394 bool IsOuterJoin() const { return name() == "span_outer_join"; } 395 partition_col()396 const std::string& partition_col() const { 397 return t1_defn_.IsPartitioned() ? t1_defn_.partition_col() 398 : t2_defn_.partition_col(); 399 } 400 401 util::Status CreateTableDefinition( 402 const TableDescriptor& desc, 403 EmitShadowType emit_shadow_type, 404 SpanJoinOperatorTable::TableDefinition* defn); 405 406 std::vector<std::string> ComputeSqlConstraintsForDefinition( 407 const TableDefinition& defn, 408 const QueryConstraints& qc, 409 sqlite3_value** argv); 410 411 std::string GetNameForGlobalColumnIndex(const TableDefinition& defn, 412 int global_column); 413 414 void CreateSchemaColsForDefn(const TableDefinition& defn, 415 std::vector<SqliteTable::Column>* cols); 416 417 TableDefinition t1_defn_; 418 TableDefinition t2_defn_; 419 PartitioningType partitioning_; 420 std::unordered_map<size_t, ColumnLocator> global_index_to_column_locator_; 421 422 sqlite3* const db_; 423 }; 424 425 } // namespace trace_processor 426 } // namespace perfetto 427 428 #endif // SRC_TRACE_PROCESSOR_SQLITE_SPAN_JOIN_OPERATOR_TABLE_H_ 429