/* -*- C++ -*- This file contains a benchmark for job processing in ThreadWeaver. SPDX-FileCopyrightText: 2005-2013 Mirko Boehm SPDX-License-Identifier: LGPL-2.0-or-later */ #include #include #include #include #include #include #include #include #include #include #include class AccumulateJob : public ThreadWeaver::Job { public: explicit AccumulateJob() : m_count(0) , m_result(0) { } AccumulateJob(const AccumulateJob &a) : ThreadWeaver::Job() , m_count(a.m_count) , m_result(a.m_result) { } void setCount(quint64 count) { m_count = count; } quint64 result() const { return m_result; } void payload() { std::vector numbers(m_count); std::generate(numbers.begin(), numbers.end(), []() -> quint64 { static quint64 i = 0; return i++; }); m_result = std::accumulate(numbers.begin(), numbers.end(), 0); } protected: void run(ThreadWeaver::JobPointer, ThreadWeaver::Thread *) override { payload(); } private: quint64 m_count; quint64 m_result; }; class QueueBenchmarksTest : public QObject { Q_OBJECT public: QueueBenchmarksTest(); private Q_SLOTS: void initTestCase(); void cleanupTestCase(); void BaselineBenchmark(); void BaselineBenchmark_data(); void BaselineAsJobsBenchmark(); void BaselineAsJobsBenchmark_data(); void IndividualJobsBenchmark(); void IndividualJobsBenchmark_data(); void CollectionsBenchmark(); void CollectionsBenchmark_data(); void SequencesBenchmark(); void SequencesBenchmark_data(); private: void defaultBenchmarkData(bool singleThreaded); }; QueueBenchmarksTest::QueueBenchmarksTest() { } void QueueBenchmarksTest::initTestCase() { } void QueueBenchmarksTest::cleanupTestCase() { } /** @brief BaselineBenchmark simply performs the same operations in a loop. * * The result amounts to what time the jobs used in the benchmark need to execute without queueing or thread * synchronization overhead. */ void QueueBenchmarksTest::BaselineBenchmark() { QFETCH(int, m); QFETCH(int, c); QFETCH(int, b); QFETCH(int, t); const int n = c * b; Q_UNUSED(t); // in this case QVector jobs(n); for (int i = 0; i < n; ++i) { jobs[i].setCount(m); } // executeLocal needs to emit similar signals as execute(), to be comparable to the threaded variants. // BaselineAsJobsBenchmark does that. Compare BaselineAsJobsBenchmark and BaselineBenchmark to evaluate the overhead of executing // an operation in a job. QBENCHMARK { for (int i = 0; i < n; ++i) { jobs[i].payload(); } } } void QueueBenchmarksTest::BaselineBenchmark_data() { defaultBenchmarkData(true); } void QueueBenchmarksTest::BaselineAsJobsBenchmark() { QFETCH(int, m); QFETCH(int, c); QFETCH(int, b); QFETCH(int, t); const int n = c * b; Q_UNUSED(t); // in this case QVector jobs(n); for (int i = 0; i < n; ++i) { jobs[i].setCount(m); } QBENCHMARK { for (int i = 0; i < n; ++i) { jobs[i].blockingExecute(); } } } void QueueBenchmarksTest::BaselineAsJobsBenchmark_data() { defaultBenchmarkData(true); } void QueueBenchmarksTest::IndividualJobsBenchmark() { QFETCH(int, m); QFETCH(int, c); QFETCH(int, b); QFETCH(int, t); const int n = c * b; ThreadWeaver::Queue weaver; weaver.setMaximumNumberOfThreads(t); weaver.suspend(); QVector jobs(n); { ThreadWeaver::QueueStream stream(&weaver); for (int i = 0; i < n; ++i) { jobs[i].setCount(m); stream << jobs[i]; } } QBENCHMARK_ONCE { weaver.resume(); weaver.finish(); } } void QueueBenchmarksTest::IndividualJobsBenchmark_data() { defaultBenchmarkData(false); } void QueueBenchmarksTest::CollectionsBenchmark() { QFETCH(int, m); QFETCH(int, c); QFETCH(int, b); QFETCH(int, t); const int n = c * b; ThreadWeaver::Queue weaver; weaver.setMaximumNumberOfThreads(t); weaver.suspend(); QVector jobs(n); // FIXME currently, memory management of the job sequences (they are deleted when they go out of scope) // is measured as part of the benchmark qDebug() << b << "blocks" << c << "operations, queueing..."; // queue the jobs blockwise as collections for (int block = 0; block < b; ++block) { ThreadWeaver::Collection *collection = new ThreadWeaver::Collection(); for (int operation = 0; operation < c; ++operation) { const int index = block * b + operation; jobs[index].setCount(m); *collection << jobs[index]; } weaver.stream() << collection; } qDebug() << b << "blocks" << c << "operations, executing..."; QBENCHMARK_ONCE { weaver.resume(); weaver.finish(); } } void QueueBenchmarksTest::CollectionsBenchmark_data() { defaultBenchmarkData(false); } void QueueBenchmarksTest::SequencesBenchmark() { QFETCH(int, m); QFETCH(int, c); QFETCH(int, b); QFETCH(int, t); const int n = c * b; ThreadWeaver::Queue weaver; weaver.setMaximumNumberOfThreads(t); weaver.suspend(); QVector jobs(n); qDebug() << b << "blocks" << c << "operations, queueing..."; // queue the jobs blockwise as collections for (int block = 0; block < b; ++block) { ThreadWeaver::Sequence *sequence = new ThreadWeaver::Sequence(); for (int operation = 0; operation < c; ++operation) { const int index = block * b + operation; jobs[index].setCount(m); *sequence << jobs[index]; } weaver.stream() << sequence; } qDebug() << b << "blocks" << c << "operations, executing..."; QBENCHMARK_ONCE { weaver.resume(); weaver.finish(); } } void QueueBenchmarksTest::SequencesBenchmark_data() { defaultBenchmarkData(false); } void QueueBenchmarksTest::defaultBenchmarkData(bool singleThreaded) { QTest::addColumn("m"); // number of quint64's to accumulate QTest::addColumn("c"); // operations per block QTest::addColumn("b"); // number of blocks, number of jobs is b*c QTest::addColumn("t"); // number of worker threads const QList threads = singleThreaded ? QList() << 1 : QList() << 1 << 2 << 4 << 8 << 16 << 32 << 64 << 128; const QList ms = QList() << 1 << 10 << 100 << 1000 << 10000 << 100000; for (int m : ms) { for (int t : threads) { const QString name = QString::fromLatin1("%1 threads, %2 values").arg(t).arg(m); // newRow expects const char*, but then qstrdup's it in the QTestData constructor. Eeeew. QTest::newRow(qPrintable(name)) << m << 256 << 256 << t; } } } QTEST_MAIN(QueueBenchmarksTest) #include "QueueBenchmarks.moc"