1 /*
2 SPDX-FileCopyrightText: 2021 Jean-Baptiste Mardelle <jb@kdenlive.org>
3
4 SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
5 */
6
7 #include "speechdialog.h"
8
9 #include "core.h"
10 #include "kdenlivesettings.h"
11 #include "monitor/monitor.h"
12 #include "mainwindow.h"
13 #include "bin/model/subtitlemodel.hpp"
14 #include "kdenlive_debug.h"
15
16 #include "mlt++/MltProfile.h"
17 #include "mlt++/MltTractor.h"
18 #include "mlt++/MltConsumer.h"
19
20 #include <KLocalizedString>
21 #include <KMessageWidget>
22 #include <QDir>
23 #include <QFontDatabase>
24 #include <QProcess>
25 #include <memory>
26 #include <utility>
27
SpeechDialog(std::shared_ptr<TimelineItemModel> timeline,QPoint zone,bool,bool,QWidget * parent)28 SpeechDialog::SpeechDialog(std::shared_ptr<TimelineItemModel> timeline, QPoint zone, bool, bool, QWidget *parent)
29 : QDialog(parent)
30 , m_timeline(timeline)
31
32 {
33 setFont(QFontDatabase::systemFont(QFontDatabase::SmallestReadableFont));
34 setupUi(this);
35 buttonBox->button(QDialogButtonBox::Apply)->setText(i18n("Process"));
36 speech_info->hide();
37 m_voskConfig = new QAction(i18n("Configure"), this);
38 connect(m_voskConfig, &QAction::triggered, []() {
39 pCore->window()->slotPreferences(8);
40 });
41 m_modelsConnection = connect(pCore.get(), &Core::voskModelUpdate, this, [&](QStringList models) {
42 language_box->clear();
43 language_box->addItems(models);
44 if (models.isEmpty()) {
45 speech_info->addAction(m_voskConfig);
46 speech_info->setMessageType(KMessageWidget::Information);
47 speech_info->setText(i18n("Please install speech recognition models"));
48 speech_info->animatedShow();
49 } else {
50 if (!KdenliveSettings::vosk_srt_model().isEmpty() && models.contains(KdenliveSettings::vosk_srt_model())) {
51 int ix = language_box->findText(KdenliveSettings::vosk_srt_model());
52 if (ix > -1) {
53 language_box->setCurrentIndex(ix);
54 }
55 }
56 }
57 });
58 connect(language_box, static_cast<void (QComboBox::*)(int)>(&QComboBox::activated), this, [this]() {
59 KdenliveSettings::setVosk_srt_model(language_box->currentText());
60 });
61 connect(buttonBox->button(QDialogButtonBox::Apply), &QPushButton::clicked, this, [this, zone]() {
62 slotProcessSpeech(zone);
63 });
64 parseVoskDictionaries();
65 frame_progress->setVisible(false);
66 button_abort->setIcon(QIcon::fromTheme(QStringLiteral("process-stop")));
67 connect(button_abort, &QToolButton::clicked, this, [this]() {
68 if (m_speechJob && m_speechJob->state() == QProcess::Running) {
69 m_speechJob->kill();
70 }
71 });
72 }
73
~SpeechDialog()74 SpeechDialog::~SpeechDialog()
75 {
76 QObject::disconnect(m_modelsConnection);
77 }
78
slotProcessSpeech(QPoint zone)79 void SpeechDialog::slotProcessSpeech(QPoint zone)
80 {
81 #ifdef Q_OS_WIN
82 QString pyExec = QStandardPaths::findExecutable(QStringLiteral("python"));
83 #else
84 QString pyExec = QStandardPaths::findExecutable(QStringLiteral("python3"));
85 #endif
86 if (pyExec.isEmpty()) {
87 speech_info->removeAction(m_voskConfig);
88 speech_info->setMessageType(KMessageWidget::Warning);
89 speech_info->setText(i18n("Cannot find python3, please install it on your system."));
90 speech_info->animatedShow();
91 return;
92 }
93 if (!KdenliveSettings::vosk_found() || !KdenliveSettings::vosk_srt_found()) {
94 speech_info->setMessageType(KMessageWidget::Warning);
95 speech_info->setText(i18n("Please configure speech to text."));
96 speech_info->animatedShow();
97 speech_info->addAction(m_voskConfig);
98 return;
99 }
100 speech_info->removeAction(m_voskConfig);
101 speech_info->setMessageType(KMessageWidget::Information);
102 speech_info->setText(i18n("Starting audio export"));
103 speech_info->show();
104 qApp->processEvents();
105 QString sceneList;
106 QString speech;
107 QString audio;
108 QTemporaryFile tmpPlaylist(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.mlt")));
109 m_tmpSrt = std::make_unique<QTemporaryFile>(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.srt")));
110 m_tmpAudio = std::make_unique<QTemporaryFile>(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.wav")));
111 if (tmpPlaylist.open()) {
112 sceneList = tmpPlaylist.fileName();
113 }
114 tmpPlaylist.close();
115 if (m_tmpSrt->open()) {
116 speech = m_tmpSrt->fileName();
117 }
118 m_tmpSrt->close();
119 if (m_tmpAudio->open()) {
120 audio = m_tmpAudio->fileName();
121 }
122 m_tmpAudio->close();
123 pCore->getMonitor(Kdenlive::ProjectMonitor)->sceneList(QDir::temp().absolutePath(), sceneList);
124 Mlt::Producer producer(*m_timeline->tractor()->profile(), "xml", sceneList.toUtf8().constData());
125 qDebug()<<"=== STARTING RENDER B";
126 Mlt::Consumer xmlConsumer(*m_timeline->tractor()->profile(), "avformat", audio.toUtf8().constData());
127 QString speechScript = QStandardPaths::locate(QStandardPaths::AppDataLocation, QStringLiteral("scripts/speech.py"));
128 if (speechScript.isEmpty()) {
129 speech_info->setMessageType(KMessageWidget::Warning);
130 speech_info->setText(i18n("The speech script was not found, check your install."));
131 speech_info->animatedShow();
132 buttonBox->button(QDialogButtonBox::Apply)->setEnabled(true);
133 return;
134 }
135 if (!xmlConsumer.is_valid() || !producer.is_valid()) {
136 qDebug()<<"=== STARTING CONSUMER ERROR";
137 if (!producer.is_valid()) {
138 qDebug()<<"=== PRODUCER INVALID";
139 }
140 speech_info->setMessageType(KMessageWidget::Warning);
141 speech_info->setText(i18n("Audio export failed"));
142 qApp->processEvents();
143 return;
144 }
145 speech_progress->setValue(0);
146 frame_progress->setVisible(true);
147 buttonBox->button(QDialogButtonBox::Apply)->setEnabled(false);
148 qApp->processEvents();
149 xmlConsumer.set("terminate_on_pause", 1);
150 xmlConsumer.set("properties", "WAV");
151 producer.set_in_and_out(zone.x(), zone.y());
152 xmlConsumer.connect(producer);
153 qDebug()<<"=== STARTING RENDER C, IN:"<<zone.x()<<" - "<<zone.y();
154 m_duration = zone.y() - zone.x();
155 qApp->processEvents();
156 xmlConsumer.run();
157 qApp->processEvents();
158 qDebug()<<"=== STARTING RENDER D";
159 QString language = language_box->currentText();
160 qDebug()<<"=== RUNNING SPEECH ANALYSIS: "<<speechScript;
161 speech_info->setMessageType(KMessageWidget::Information);
162 speech_info->setText(i18n("Starting speech recognition"));
163 qApp->processEvents();
164 QString modelDirectory = KdenliveSettings::vosk_folder_path();
165 if (modelDirectory.isEmpty()) {
166 modelDirectory = QStandardPaths::locate(QStandardPaths::AppDataLocation, QStringLiteral("speechmodels"), QStandardPaths::LocateDirectory);
167 }
168 qDebug()<<"==== ANALYSIS SPEECH: "<<modelDirectory<<" - "<<language<<" - "<<audio<<" - "<<speech;
169 m_speechJob = std::make_unique<QProcess>(this);
170 connect(m_speechJob.get(), &QProcess::readyReadStandardOutput, this, &SpeechDialog::slotProcessProgress);
171 connect(m_speechJob.get(), static_cast<void (QProcess::*)(int, QProcess::ExitStatus)>(&QProcess::finished), this, [this, speech, zone](int, QProcess::ExitStatus status) {
172 slotProcessSpeechStatus(status, speech, zone);
173 });
174 m_speechJob->start(pyExec, {speechScript, modelDirectory, language, audio, speech});
175 }
176
slotProcessSpeechStatus(QProcess::ExitStatus status,const QString & srtFile,const QPoint zone)177 void SpeechDialog::slotProcessSpeechStatus(QProcess::ExitStatus status, const QString &srtFile, const QPoint zone)
178 {
179 qDebug()<<"/// TERMINATING SPEECH JOB\n\n+++++++++++++++++++++++++++";
180 if (status == QProcess::CrashExit) {
181 speech_info->setMessageType(KMessageWidget::Warning);
182 speech_info->setText(i18n("Speech recognition aborted."));
183 speech_info->animatedShow();
184 } else {
185 if (QFile::exists(srtFile)) {
186 m_timeline->getSubtitleModel()->importSubtitle(srtFile, zone.x(), true);
187 speech_info->setMessageType(KMessageWidget::Positive);
188 speech_info->setText(i18n("Subtitles imported"));
189 } else {
190 speech_info->setMessageType(KMessageWidget::Warning);
191 speech_info->setText(i18n("Speech recognition failed"));
192 }
193 }
194 buttonBox->button(QDialogButtonBox::Apply)->setEnabled(true);
195 frame_progress->setVisible(false);
196 }
197
slotProcessProgress()198 void SpeechDialog::slotProcessProgress()
199 {
200 QString saveData = QString::fromUtf8(m_speechJob->readAll());
201 qDebug()<<"==== GOT SPEECH DATA: "<<saveData;
202 if (saveData.startsWith(QStringLiteral("progress:"))) {
203 double prog = saveData.section(QLatin1Char(':'), 1).toInt() * 3.12;
204 qDebug()<<"=== GOT DATA:\n"<<saveData;
205 speech_progress->setValue(static_cast<int>(100 * prog / m_duration));
206 }
207 }
208
parseVoskDictionaries()209 void SpeechDialog::parseVoskDictionaries()
210 {
211 QString modelDirectory = KdenliveSettings::vosk_folder_path();
212 QDir dir;
213 if (modelDirectory.isEmpty()) {
214 modelDirectory = QStandardPaths::writableLocation(QStandardPaths::AppDataLocation);
215 dir = QDir(modelDirectory);
216 if (!dir.cd(QStringLiteral("speechmodels"))) {
217 qDebug()<<"=== /// CANNOT ACCESS SPEECH DICTIONARIES FOLDER";
218 emit pCore->voskModelUpdate({});
219 return;
220 }
221 } else {
222 dir = QDir(modelDirectory);
223 }
224 QStringList dicts = dir.entryList(QDir::Dirs | QDir::NoDotAndDotDot);
225 QStringList final;
226 for (auto &d : dicts) {
227 QDir sub(dir.absoluteFilePath(d));
228 if (sub.exists(QStringLiteral("mfcc.conf")) || (sub.exists(QStringLiteral("conf/mfcc.conf")))) {
229 final << d;
230 }
231 }
232 emit pCore->voskModelUpdate(final);
233 }
234