1 /*
2 This file is part of Kiten, a KDE Japanese Reference Tool
3 SPDX-FileCopyrightText: 2001 Jason Katz-Brown <jason@katzbrown.com>
4 SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
5 SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com>
6 SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
7
8 SPDX-License-Identifier: LGPL-2.0-or-later
9 */
10
11 #include "dictfileedict.h"
12
13 #include <KConfig>
14 #include <KConfigSkeleton>
15
16 #include <QDebug>
17 #include <QFile>
18 #include <QString>
19 #include <QTextCodec>
20 #include <QTextStream>
21 #include <QVector>
22
23 #include "deinflection.h"
24 #include "dictfilefieldselector.h"
25 #include "dictquery.h"
26 #include "entryedict.h"
27 #include "entrylist.h"
28 #include "kitenmacros.h"
29
30 QString *DictFileEdict::deinflectionLabel = nullptr;
31 QStringList *DictFileEdict::displayFields = nullptr;
32 QString *DictFileEdict::wordType = nullptr;
33
34 /**
35 * Per instructions in the super-class, this constructor basically sets the
36 * dictionaryType member variable to identify this as an edict-type database handler.
37 */
DictFileEdict()38 DictFileEdict::DictFileEdict()
39 : DictFile( EDICT )
40 , m_deinflection( 0 )
41 , m_hasDeinflection( false )
42 {
43 m_dictionaryType = EDICT;
44 m_searchableAttributes.insert( QStringLiteral("common"), QStringLiteral("common") );
45 }
46
47 /**
48 * The destructor... ditch our memory maps and close our files here
49 * (if they were open).
50 */
~DictFileEdict()51 DictFileEdict::~DictFileEdict()
52 {
53 delete m_deinflection;
54 m_deinflection = nullptr;
55 }
56
displayOptions() const57 QMap<QString,QString> DictFileEdict::displayOptions() const
58 {
59 QMap<QString,QString> list;
60 list[ QStringLiteral("Part of speech(type)") ] = QStringLiteral("type");
61 return list;
62 }
63
64 /**
65 * Do a search, respond with a list of entries.
66 * The general strategy will be to take the first word of the query, and do a
67 * binary search on the dictionary for that item. Take all results and filter
68 * them using the rest of the query with the validate method.
69 */
doSearch(const DictQuery & query)70 EntryList *DictFileEdict::doSearch( const DictQuery &query )
71 {
72 if( query.isEmpty() || ! m_edictFile.valid() ) //No query or dict, no results.
73 {
74 return new EntryList();
75 }
76
77 qDebug()<< "Search from : " << getName();
78
79 QString firstChoice = query.getWord();
80 if( firstChoice.length() == 0 )
81 {
82 firstChoice = query.getPronunciation();
83 if( firstChoice.length() == 0 )
84 {
85 firstChoice = query.getMeaning().split( ' ' ).first().toLower();
86 if( firstChoice.length() == 0 )
87 {
88 //The nastiest situation... we have to assemble a search string
89 //from the first property
90 QList<QString> keys = query.listPropertyKeys();
91 if( keys.size() == 0 ) //Shouldn't happen... but maybe in the future
92 {
93 return new EntryList();
94 }
95 firstChoice = keys[ 0 ];
96 firstChoice = firstChoice + query.getProperty( firstChoice );
97 //TODO: doSearch: some accommodation for searching for ranges and such of properties
98 }
99 }
100 }
101 else
102 {
103 // Only search for one kanji or the
104 // binary lookup mechanism breaks
105 firstChoice = firstChoice.at( 0 );
106 }
107
108 QVector<QString> preliminaryResults = m_edictFile.findMatches( firstChoice );
109
110 if( preliminaryResults.size() == 0 ) //If there were no matches... return an empty list
111 {
112 return new EntryList();
113 }
114
115 EntryList *results = new EntryList();
116 foreach( const QString &it, preliminaryResults )
117 {
118 // qDebug() << "result: " << it << endl;
119 Entry *result = makeEntry( it );
120 EntryEdict *resultEdict = static_cast<EntryEdict*>( result );
121 if( result->matchesQuery( query ) && resultEdict->matchesWordType( query ) )
122 {
123 results->append( result );
124 }
125 else
126 {
127 delete result;
128 }
129 }
130
131 // At this point we should have some preliminary results
132 // and if there were no matches, it probably means the user
133 // input was a verb or adjective, so we have to deinflect it.
134 bool isAnyQuery = query.getMatchWordType() == DictQuery::Any;
135 bool isVerbQuery = query.getMatchWordType() == DictQuery::Verb;
136 bool isAdjectiveQuery = query.getMatchWordType() == DictQuery::Adjective;
137 if( results->count() == 0 && ( isAnyQuery || isVerbQuery || isAdjectiveQuery ) )
138 {
139 delete results;
140 results = m_deinflection->search( query, preliminaryResults );
141 QString *label = m_deinflection->getDeinflectionLabel();
142 if( ! label->isEmpty() && ! m_hasDeinflection )
143 {
144 deinflectionLabel = label;
145 m_hasDeinflection = true;
146 wordType = m_deinflection->getWordType();
147 }
148 }
149 else
150 {
151 deinflectionLabel = nullptr;
152 wordType = nullptr;
153 m_hasDeinflection = false;
154 }
155
156 if( results )
157 {
158 EntryList *common = new EntryList();
159 EntryList *uncommon = new EntryList();
160 EntryList::EntryIterator i( *results );
161 while( i.hasNext() )
162 {
163 EntryEdict *entry = static_cast<EntryEdict*>( i.next() );
164 if( entry->isCommon() )
165 {
166 common->append( entry );
167 }
168 else
169 {
170 uncommon->append( entry );
171 }
172 }
173
174 delete results;
175 results = new EntryList();
176 results->appendList( common );
177 results->appendList( uncommon );
178 delete common;
179 delete uncommon;
180
181 EntryList *exact = new EntryList();
182 EntryList *beginning = new EntryList();
183 EntryList *ending = new EntryList();
184 EntryList *anywhere = new EntryList();
185 EntryList::EntryIterator it( *results );
186 while( it.hasNext() )
187 {
188 Entry *entry = it.next();
189
190 if( entry->getWord() == query.getWord() )
191 {
192 exact->append( entry );
193 }
194 else if( entry->getWord().startsWith( query.getWord() ) )
195 {
196 beginning->append( entry );
197 }
198 else if( entry->getWord().endsWith( query.getWord() ) )
199 {
200 ending->append( entry );
201 }
202 else
203 {
204 anywhere->append( entry );
205 }
206 }
207
208 delete results;
209 results = new EntryList();
210 results->appendList( exact );
211 results->appendList( beginning );
212 results->appendList( ending );
213 results->appendList( anywhere );
214 delete exact;
215 delete beginning;
216 delete ending;
217 delete anywhere;
218 }
219
220 return results;
221 }
222
223 /**
224 * Make a list of all the extra fields in our db.. Entry uses this to decide
225 * what goes in the interpretations it gives.
226 */
listDictDisplayOptions(QStringList x) const227 QStringList DictFileEdict::listDictDisplayOptions( QStringList x ) const
228 {
229 x += displayOptions().keys();
230 return x;
231 }
232
233 /**
234 * Load up the dictionary
235 */
loadDictionary(const QString & fileName,const QString & dictName)236 bool DictFileEdict::loadDictionary( const QString &fileName, const QString &dictName )
237 {
238 if( m_edictFile.valid() )
239 {
240 return false; //Already loaded
241 }
242
243 if( m_edictFile.loadFile( fileName ) )
244 {
245 m_dictionaryName = dictName;
246 m_dictionaryFile = fileName;
247
248 m_deinflection = new Deinflection( m_dictionaryName );
249 m_deinflection->load();
250
251 return true;
252 }
253
254 return false;
255 }
256
loadDisplayOptions() const257 QMap<QString,QString> DictFileEdict::loadDisplayOptions() const
258 {
259 QMap<QString,QString> list = displayOptions();
260 list[ QStringLiteral("Word/Kanji") ] = QStringLiteral("Word/Kanji");
261 list[ QStringLiteral("Reading") ] = QStringLiteral("Reading");
262 list[ QStringLiteral("Meaning") ] = QStringLiteral("Meaning");
263 list[ QStringLiteral("--Newline--") ] = QStringLiteral("--Newline--");
264
265 return list;
266 }
267
loadListType(KConfigSkeletonItem * item,QStringList * list,const QMap<QString,QString> & long2short)268 QStringList* DictFileEdict::loadListType( KConfigSkeletonItem *item
269 , QStringList *list
270 , const QMap<QString,QString> &long2short )
271 {
272 QStringList listFromItem;
273
274 if( item != nullptr )
275 {
276 listFromItem = item->property().toStringList();
277 }
278
279 if( ! listFromItem.isEmpty() )
280 {
281 delete list;
282
283 list = new QStringList();
284 foreach( const QString &it, listFromItem )
285 {
286 if( long2short.contains( it ) )
287 {
288 list->append( long2short[ it ] );
289 }
290 }
291 }
292
293 return list;
294 }
295
loadSettings()296 void DictFileEdict::loadSettings()
297 {
298 this->displayFields = new QStringList( loadDisplayOptions().values() );
299 }
300
loadSettings(KConfigSkeleton * config)301 void DictFileEdict::loadSettings( KConfigSkeleton *config )
302 {
303 QMap<QString,QString> long2short = displayOptions();
304 long2short[ QStringLiteral("Word/Kanji") ] = QStringLiteral("Word/Kanji");
305 long2short[ QStringLiteral("Reading") ] = QStringLiteral("Reading");
306 long2short[ QStringLiteral("Meaning") ] = QStringLiteral("Meaning");
307 long2short[ QStringLiteral("--Newline--") ] = QStringLiteral("--Newline--");
308
309 KConfigSkeletonItem *item = config->findItem( getType() + "__displayFields" );
310 this->displayFields = loadListType( item, this->displayFields, long2short );
311 }
312
makeEntry(const QString & entry)313 inline Entry* DictFileEdict::makeEntry( const QString &entry )
314 {
315 return new EntryEdict( getName(), entry );
316 }
317
preferencesWidget(KConfigSkeleton * config,QWidget * parent)318 DictionaryPreferenceDialog *DictFileEdict::preferencesWidget( KConfigSkeleton *config, QWidget *parent )
319 {
320 DictFileFieldSelector *dialog = new DictFileFieldSelector( config, getType(), parent );
321 dialog->addAvailable( listDictDisplayOptions( QStringList() ) );
322 return dialog;
323 }
324
325 /**
326 * Scan a potential file for the correct format, remembering to skip comment
327 * characters. This is not a foolproof scan, but it should be checked before adding
328 * a new dictionary.
329 * Valid EDICT format is considered:
330 * \<kanji or kana\>+ [\<kana\>] /latin characters & symbols/separated with slashes/
331 * Comment lines start with... something... not remembering now.
332 */
validDictionaryFile(const QString & filename)333 bool DictFileEdict::validDictionaryFile( const QString &filename )
334 {
335 QFile file( filename );
336 bool returnFlag = true;
337
338 if( ! file.exists() || ! file.open( QIODevice::ReadOnly ) )
339 {
340 return false;
341 }
342
343 //Now we can actually check the file
344 QTextStream fileStream( &file );
345 fileStream.setCodec( QTextCodec::codecForName( "eucJP" ) );
346 QString commentMarker( QStringLiteral("????") ); //Note: Don't touch this! vim seems to have
347 //An odd text codec error here too :(
348 QRegExp formattedLine( "^\\S+\\s+(\\[\\S+\\]\\s+)?/.*/$" );
349 while( ! fileStream.atEnd() )
350 {
351 QString line = fileStream.readLine();
352
353 if( line.left( 4 ) == commentMarker )
354 {
355 continue;
356 }
357 if( line.contains( formattedLine ) ) //If it matches our regex
358 {
359 continue;
360 }
361
362 returnFlag = false;
363 break;
364 }
365
366 file.close();
367 return returnFlag;
368 }
369
370 /**
371 * Reject queries that specify anything we don't understand
372 */
373 //TODO: Actually write this method (validQuery)
validQuery(const DictQuery & query)374 bool DictFileEdict::validQuery( const DictQuery &query )
375 {
376 Q_UNUSED( query );
377 return true;
378 }
379