1 /*
2     SPDX-FileCopyrightText: 2021 Jean-Baptiste Mardelle <jb@kdenlive.org>
3 
4 SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
5 */
6 
7 #include "speechdialog.h"
8 
9 #include "core.h"
10 #include "kdenlivesettings.h"
11 #include "monitor/monitor.h"
12 #include "mainwindow.h"
13 #include "bin/model/subtitlemodel.hpp"
14 #include "kdenlive_debug.h"
15 
16 #include "mlt++/MltProfile.h"
17 #include "mlt++/MltTractor.h"
18 #include "mlt++/MltConsumer.h"
19 
20 #include <KLocalizedString>
21 #include <KMessageWidget>
22 #include <QDir>
23 #include <QFontDatabase>
24 #include <QProcess>
25 #include <memory>
26 #include <utility>
27 
SpeechDialog(std::shared_ptr<TimelineItemModel> timeline,QPoint zone,bool,bool,QWidget * parent)28 SpeechDialog::SpeechDialog(std::shared_ptr<TimelineItemModel> timeline, QPoint zone, bool, bool, QWidget *parent)
29     : QDialog(parent)
30     , m_timeline(timeline)
31 
32 {
33     setFont(QFontDatabase::systemFont(QFontDatabase::SmallestReadableFont));
34     setupUi(this);
35     buttonBox->button(QDialogButtonBox::Apply)->setText(i18n("Process"));
36     speech_info->hide();
37     m_voskConfig = new QAction(i18n("Configure"), this);
38     connect(m_voskConfig, &QAction::triggered, []() {
39         pCore->window()->slotPreferences(8);
40     });
41     m_modelsConnection = connect(pCore.get(), &Core::voskModelUpdate, this, [&](QStringList models) {
42         language_box->clear();
43         language_box->addItems(models);
44         if (models.isEmpty()) {
45             speech_info->addAction(m_voskConfig);
46             speech_info->setMessageType(KMessageWidget::Information);
47             speech_info->setText(i18n("Please install speech recognition models"));
48             speech_info->animatedShow();
49         } else {
50             if (!KdenliveSettings::vosk_srt_model().isEmpty() && models.contains(KdenliveSettings::vosk_srt_model())) {
51                 int ix = language_box->findText(KdenliveSettings::vosk_srt_model());
52                 if (ix > -1) {
53                     language_box->setCurrentIndex(ix);
54                 }
55             }
56         }
57     });
58     connect(language_box, static_cast<void (QComboBox::*)(int)>(&QComboBox::activated), this, [this]() {
59         KdenliveSettings::setVosk_srt_model(language_box->currentText());
60     });
61     connect(buttonBox->button(QDialogButtonBox::Apply), &QPushButton::clicked, this, [this, zone]() {
62         slotProcessSpeech(zone);
63     });
64     parseVoskDictionaries();
65     frame_progress->setVisible(false);
66     button_abort->setIcon(QIcon::fromTheme(QStringLiteral("process-stop")));
67     connect(button_abort, &QToolButton::clicked, this, [this]() {
68         if (m_speechJob && m_speechJob->state() == QProcess::Running) {
69             m_speechJob->kill();
70         }
71     });
72 }
73 
~SpeechDialog()74 SpeechDialog::~SpeechDialog()
75 {
76     QObject::disconnect(m_modelsConnection);
77 }
78 
slotProcessSpeech(QPoint zone)79 void SpeechDialog::slotProcessSpeech(QPoint zone)
80 {
81 #ifdef Q_OS_WIN
82     QString pyExec = QStandardPaths::findExecutable(QStringLiteral("python"));
83 #else
84     QString pyExec = QStandardPaths::findExecutable(QStringLiteral("python3"));
85 #endif
86     if (pyExec.isEmpty()) {
87         speech_info->removeAction(m_voskConfig);
88         speech_info->setMessageType(KMessageWidget::Warning);
89         speech_info->setText(i18n("Cannot find python3, please install it on your system."));
90         speech_info->animatedShow();
91         return;
92     }
93     if (!KdenliveSettings::vosk_found() || !KdenliveSettings::vosk_srt_found()) {
94         speech_info->setMessageType(KMessageWidget::Warning);
95         speech_info->setText(i18n("Please configure speech to text."));
96         speech_info->animatedShow();
97         speech_info->addAction(m_voskConfig);
98         return;
99     }
100     speech_info->removeAction(m_voskConfig);
101     speech_info->setMessageType(KMessageWidget::Information);
102     speech_info->setText(i18n("Starting audio export"));
103     speech_info->show();
104     qApp->processEvents();
105     QString sceneList;
106     QString speech;
107     QString audio;
108     QTemporaryFile tmpPlaylist(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.mlt")));
109     m_tmpSrt = std::make_unique<QTemporaryFile>(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.srt")));
110     m_tmpAudio = std::make_unique<QTemporaryFile>(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.wav")));
111     if (tmpPlaylist.open()) {
112         sceneList = tmpPlaylist.fileName();
113     }
114     tmpPlaylist.close();
115     if (m_tmpSrt->open()) {
116         speech = m_tmpSrt->fileName();
117     }
118     m_tmpSrt->close();
119     if (m_tmpAudio->open()) {
120         audio = m_tmpAudio->fileName();
121     }
122     m_tmpAudio->close();
123     pCore->getMonitor(Kdenlive::ProjectMonitor)->sceneList(QDir::temp().absolutePath(), sceneList);
124     Mlt::Producer producer(*m_timeline->tractor()->profile(), "xml", sceneList.toUtf8().constData());
125     qDebug()<<"=== STARTING RENDER B";
126     Mlt::Consumer xmlConsumer(*m_timeline->tractor()->profile(), "avformat", audio.toUtf8().constData());
127     QString speechScript = QStandardPaths::locate(QStandardPaths::AppDataLocation, QStringLiteral("scripts/speech.py"));
128     if (speechScript.isEmpty()) {
129         speech_info->setMessageType(KMessageWidget::Warning);
130         speech_info->setText(i18n("The speech script was not found, check your install."));
131         speech_info->animatedShow();
132         buttonBox->button(QDialogButtonBox::Apply)->setEnabled(true);
133         return;
134     }
135     if (!xmlConsumer.is_valid() || !producer.is_valid()) {
136         qDebug()<<"=== STARTING CONSUMER ERROR";
137         if (!producer.is_valid()) {
138             qDebug()<<"=== PRODUCER INVALID";
139         }
140         speech_info->setMessageType(KMessageWidget::Warning);
141         speech_info->setText(i18n("Audio export failed"));
142         qApp->processEvents();
143         return;
144     }
145     speech_progress->setValue(0);
146     frame_progress->setVisible(true);
147     buttonBox->button(QDialogButtonBox::Apply)->setEnabled(false);
148     qApp->processEvents();
149     xmlConsumer.set("terminate_on_pause", 1);
150     xmlConsumer.set("properties", "WAV");
151     producer.set_in_and_out(zone.x(), zone.y());
152     xmlConsumer.connect(producer);
153     qDebug()<<"=== STARTING RENDER C, IN:"<<zone.x()<<" - "<<zone.y();
154     m_duration = zone.y() - zone.x();
155     qApp->processEvents();
156     xmlConsumer.run();
157     qApp->processEvents();
158     qDebug()<<"=== STARTING RENDER D";
159     QString language = language_box->currentText();
160     qDebug()<<"=== RUNNING SPEECH ANALYSIS: "<<speechScript;
161     speech_info->setMessageType(KMessageWidget::Information);
162     speech_info->setText(i18n("Starting speech recognition"));
163     qApp->processEvents();
164     QString modelDirectory = KdenliveSettings::vosk_folder_path();
165     if (modelDirectory.isEmpty()) {
166         modelDirectory = QStandardPaths::locate(QStandardPaths::AppDataLocation, QStringLiteral("speechmodels"), QStandardPaths::LocateDirectory);
167     }
168     qDebug()<<"==== ANALYSIS SPEECH: "<<modelDirectory<<" - "<<language<<" - "<<audio<<" - "<<speech;
169     m_speechJob = std::make_unique<QProcess>(this);
170     connect(m_speechJob.get(), &QProcess::readyReadStandardOutput, this, &SpeechDialog::slotProcessProgress);
171     connect(m_speechJob.get(), static_cast<void (QProcess::*)(int, QProcess::ExitStatus)>(&QProcess::finished), this, [this, speech, zone](int, QProcess::ExitStatus status) {
172        slotProcessSpeechStatus(status, speech, zone);
173     });
174     m_speechJob->start(pyExec, {speechScript, modelDirectory, language, audio, speech});
175 }
176 
slotProcessSpeechStatus(QProcess::ExitStatus status,const QString & srtFile,const QPoint zone)177 void SpeechDialog::slotProcessSpeechStatus(QProcess::ExitStatus status, const QString &srtFile, const QPoint zone)
178 {
179     qDebug()<<"/// TERMINATING SPEECH JOB\n\n+++++++++++++++++++++++++++";
180     if (status == QProcess::CrashExit) {
181         speech_info->setMessageType(KMessageWidget::Warning);
182         speech_info->setText(i18n("Speech recognition aborted."));
183         speech_info->animatedShow();
184     } else {
185         if (QFile::exists(srtFile)) {
186             m_timeline->getSubtitleModel()->importSubtitle(srtFile, zone.x(), true);
187             speech_info->setMessageType(KMessageWidget::Positive);
188             speech_info->setText(i18n("Subtitles imported"));
189         } else {
190             speech_info->setMessageType(KMessageWidget::Warning);
191             speech_info->setText(i18n("Speech recognition failed"));
192         }
193     }
194     buttonBox->button(QDialogButtonBox::Apply)->setEnabled(true);
195     frame_progress->setVisible(false);
196 }
197 
slotProcessProgress()198 void SpeechDialog::slotProcessProgress()
199 {
200      QString saveData = QString::fromUtf8(m_speechJob->readAll());
201      qDebug()<<"==== GOT SPEECH DATA: "<<saveData;
202      if (saveData.startsWith(QStringLiteral("progress:"))) {
203          double prog = saveData.section(QLatin1Char(':'), 1).toInt() * 3.12;
204         qDebug()<<"=== GOT DATA:\n"<<saveData;
205         speech_progress->setValue(static_cast<int>(100 * prog / m_duration));
206      }
207 }
208 
parseVoskDictionaries()209 void SpeechDialog::parseVoskDictionaries()
210 {
211     QString modelDirectory = KdenliveSettings::vosk_folder_path();
212     QDir dir;
213     if (modelDirectory.isEmpty()) {
214         modelDirectory = QStandardPaths::writableLocation(QStandardPaths::AppDataLocation);
215         dir = QDir(modelDirectory);
216         if (!dir.cd(QStringLiteral("speechmodels"))) {
217             qDebug()<<"=== /// CANNOT ACCESS SPEECH DICTIONARIES FOLDER";
218             emit pCore->voskModelUpdate({});
219             return;
220         }
221     } else {
222         dir = QDir(modelDirectory);
223     }
224     QStringList dicts = dir.entryList(QDir::Dirs | QDir::NoDotAndDotDot);
225     QStringList final;
226     for (auto &d : dicts) {
227         QDir sub(dir.absoluteFilePath(d));
228         if (sub.exists(QStringLiteral("mfcc.conf")) || (sub.exists(QStringLiteral("conf/mfcc.conf")))) {
229             final << d;
230         }
231     }
232     emit pCore->voskModelUpdate(final);
233 }
234