1 /*
2     This file is part of Kiten, a KDE Japanese Reference Tool
3     SPDX-FileCopyrightText: 2001 Jason Katz-Brown <jason@katzbrown.com>
4     SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
5     SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com>
6     SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
7 
8     SPDX-License-Identifier: LGPL-2.0-or-later
9 */
10 
11 #include "dictfileedict.h"
12 
13 #include <KConfig>
14 #include <KConfigSkeleton>
15 
16 #include <QDebug>
17 #include <QFile>
18 #include <QString>
19 #include <QTextCodec>
20 #include <QTextStream>
21 #include <QVector>
22 
23 #include "deinflection.h"
24 #include "dictfilefieldselector.h"
25 #include "dictquery.h"
26 #include "entryedict.h"
27 #include "entrylist.h"
28 #include "kitenmacros.h"
29 
30 QString     *DictFileEdict::deinflectionLabel = nullptr;
31 QStringList *DictFileEdict::displayFields = nullptr;
32 QString     *DictFileEdict::wordType = nullptr;
33 
34 /**
35  * Per instructions in the super-class, this constructor basically sets the
36  * dictionaryType member variable to identify this as an edict-type database handler.
37  */
DictFileEdict()38 DictFileEdict::DictFileEdict()
39 : DictFile( EDICT )
40 , m_deinflection( 0 )
41 , m_hasDeinflection( false )
42 {
43   m_dictionaryType = EDICT;
44   m_searchableAttributes.insert( QStringLiteral("common"), QStringLiteral("common") );
45 }
46 
47 /**
48  * The destructor... ditch our memory maps and close our files here
49  * (if they were open).
50  */
~DictFileEdict()51 DictFileEdict::~DictFileEdict()
52 {
53   delete m_deinflection;
54   m_deinflection = nullptr;
55 }
56 
displayOptions() const57 QMap<QString,QString> DictFileEdict::displayOptions() const
58 {
59   QMap<QString,QString> list;
60   list[ QStringLiteral("Part of speech(type)") ] = QStringLiteral("type");
61   return list;
62 }
63 
64 /**
65  * Do a search, respond with a list of entries.
66  * The general strategy will be to take the first word of the query, and do a
67  * binary search on the dictionary for that item. Take all results and filter
68  * them using the rest of the query with the validate method.
69  */
doSearch(const DictQuery & query)70 EntryList *DictFileEdict::doSearch( const DictQuery &query )
71 {
72   if( query.isEmpty() || ! m_edictFile.valid() )	//No query or dict, no results.
73   {
74     return new EntryList();
75   }
76 
77   qDebug()<< "Search from : " << getName();
78 
79   QString firstChoice = query.getWord();
80   if( firstChoice.length() == 0 )
81   {
82     firstChoice = query.getPronunciation();
83     if( firstChoice.length() == 0 )
84     {
85       firstChoice = query.getMeaning().split( ' ' ).first().toLower();
86       if( firstChoice.length() == 0 )
87       {
88         //The nastiest situation... we have to assemble a search string
89         //from the first property
90         QList<QString> keys = query.listPropertyKeys();
91         if( keys.size() == 0 ) //Shouldn't happen... but maybe in the future
92         {
93           return new EntryList();
94         }
95         firstChoice = keys[ 0 ];
96         firstChoice = firstChoice + query.getProperty( firstChoice );
97         //TODO: doSearch: some accommodation for searching for ranges and such of properties
98       }
99     }
100   }
101   else
102   {
103     // Only search for one kanji or the
104     // binary lookup mechanism breaks
105     firstChoice = firstChoice.at( 0 );
106   }
107 
108   QVector<QString> preliminaryResults = m_edictFile.findMatches( firstChoice );
109 
110   if( preliminaryResults.size() == 0 )	//If there were no matches... return an empty list
111   {
112     return new EntryList();
113   }
114 
115   EntryList *results = new EntryList();
116   foreach( const QString &it, preliminaryResults )
117   {
118 //     qDebug() << "result: " << it << endl;
119     Entry *result = makeEntry( it );
120     EntryEdict *resultEdict = static_cast<EntryEdict*>( result );
121     if( result->matchesQuery( query ) && resultEdict->matchesWordType( query ) )
122     {
123       results->append( result );
124     }
125     else
126     {
127       delete result;
128     }
129   }
130 
131   // At this point we should have some preliminary results
132   // and if there were no matches, it probably means the user
133   // input was a verb or adjective, so we have to deinflect it.
134   bool isAnyQuery       = query.getMatchWordType() == DictQuery::Any;
135   bool isVerbQuery      = query.getMatchWordType() == DictQuery::Verb;
136   bool isAdjectiveQuery = query.getMatchWordType() == DictQuery::Adjective;
137   if( results->count() == 0 && ( isAnyQuery || isVerbQuery || isAdjectiveQuery ) )
138   {
139     delete results;
140     results = m_deinflection->search( query, preliminaryResults );
141     QString *label = m_deinflection->getDeinflectionLabel();
142     if( ! label->isEmpty() && ! m_hasDeinflection )
143     {
144       deinflectionLabel = label;
145       m_hasDeinflection = true;
146       wordType = m_deinflection->getWordType();
147     }
148   }
149   else
150   {
151     deinflectionLabel = nullptr;
152     wordType = nullptr;
153     m_hasDeinflection = false;
154   }
155 
156   if( results )
157   {
158     EntryList *common   = new EntryList();
159     EntryList *uncommon = new EntryList();
160     EntryList::EntryIterator i( *results );
161     while( i.hasNext() )
162     {
163       EntryEdict *entry = static_cast<EntryEdict*>( i.next() );
164       if( entry->isCommon() )
165       {
166         common->append( entry );
167       }
168       else
169       {
170         uncommon->append( entry );
171       }
172     }
173 
174     delete results;
175     results = new EntryList();
176     results->appendList( common );
177     results->appendList( uncommon );
178     delete common;
179     delete uncommon;
180 
181     EntryList *exact     = new EntryList();
182     EntryList *beginning = new EntryList();
183     EntryList *ending    = new EntryList();
184     EntryList *anywhere  = new EntryList();
185     EntryList::EntryIterator it( *results );
186     while( it.hasNext() )
187     {
188       Entry *entry = it.next();
189 
190       if( entry->getWord() == query.getWord() )
191       {
192         exact->append( entry );
193       }
194       else if( entry->getWord().startsWith( query.getWord() ) )
195       {
196         beginning->append( entry );
197       }
198       else if( entry->getWord().endsWith( query.getWord() ) )
199       {
200         ending->append( entry );
201       }
202       else
203       {
204         anywhere->append( entry );
205       }
206     }
207 
208     delete results;
209     results = new EntryList();
210     results->appendList( exact );
211     results->appendList( beginning );
212     results->appendList( ending );
213     results->appendList( anywhere );
214     delete exact;
215     delete beginning;
216     delete ending;
217     delete anywhere;
218   }
219 
220   return results;
221 }
222 
223 /**
224  * Make a list of all the extra fields in our db.. Entry uses this to decide
225  * what goes in the interpretations it gives.
226  */
listDictDisplayOptions(QStringList x) const227 QStringList DictFileEdict::listDictDisplayOptions( QStringList x ) const
228 {
229   x += displayOptions().keys();
230   return x;
231 }
232 
233 /**
234  * Load up the dictionary
235  */
loadDictionary(const QString & fileName,const QString & dictName)236 bool DictFileEdict::loadDictionary( const QString &fileName, const QString &dictName )
237 {
238   if( m_edictFile.valid() )
239   {
240     return false; //Already loaded
241   }
242 
243   if( m_edictFile.loadFile( fileName ) )
244   {
245     m_dictionaryName = dictName;
246     m_dictionaryFile = fileName;
247 
248     m_deinflection = new Deinflection( m_dictionaryName );
249     m_deinflection->load();
250 
251     return true;
252   }
253 
254   return false;
255 }
256 
loadDisplayOptions() const257 QMap<QString,QString> DictFileEdict::loadDisplayOptions() const
258 {
259   QMap<QString,QString> list = displayOptions();
260   list[ QStringLiteral("Word/Kanji") ]  = QStringLiteral("Word/Kanji");
261   list[ QStringLiteral("Reading") ]     = QStringLiteral("Reading");
262   list[ QStringLiteral("Meaning") ]     = QStringLiteral("Meaning");
263   list[ QStringLiteral("--Newline--") ] = QStringLiteral("--Newline--");
264 
265   return list;
266 }
267 
loadListType(KConfigSkeletonItem * item,QStringList * list,const QMap<QString,QString> & long2short)268 QStringList* DictFileEdict::loadListType(  KConfigSkeletonItem *item
269                                          , QStringList *list
270                                          , const QMap<QString,QString> &long2short )
271 {
272   QStringList listFromItem;
273 
274   if( item != nullptr )
275   {
276     listFromItem = item->property().toStringList();
277   }
278 
279   if( ! listFromItem.isEmpty() )
280   {
281     delete list;
282 
283     list = new QStringList();
284     foreach( const QString &it, listFromItem )
285     {
286       if( long2short.contains( it ) )
287       {
288         list->append( long2short[ it ] );
289       }
290     }
291   }
292 
293   return list;
294 }
295 
loadSettings()296 void DictFileEdict::loadSettings()
297 {
298   this->displayFields = new QStringList( loadDisplayOptions().values() );
299 }
300 
loadSettings(KConfigSkeleton * config)301 void DictFileEdict::loadSettings( KConfigSkeleton *config )
302 {
303   QMap<QString,QString> long2short = displayOptions();
304   long2short[ QStringLiteral("Word/Kanji") ]  = QStringLiteral("Word/Kanji");
305   long2short[ QStringLiteral("Reading") ]     = QStringLiteral("Reading");
306   long2short[ QStringLiteral("Meaning") ]     = QStringLiteral("Meaning");
307   long2short[ QStringLiteral("--Newline--") ] = QStringLiteral("--Newline--");
308 
309   KConfigSkeletonItem *item = config->findItem( getType() + "__displayFields" );
310   this->displayFields = loadListType( item, this->displayFields, long2short );
311 }
312 
makeEntry(const QString & entry)313 inline Entry* DictFileEdict::makeEntry( const QString &entry )
314 {
315   return new EntryEdict( getName(), entry );
316 }
317 
preferencesWidget(KConfigSkeleton * config,QWidget * parent)318 DictionaryPreferenceDialog *DictFileEdict::preferencesWidget( KConfigSkeleton *config, QWidget *parent )
319 {
320   DictFileFieldSelector *dialog = new DictFileFieldSelector( config, getType(), parent );
321   dialog->addAvailable( listDictDisplayOptions( QStringList() ) );
322   return dialog;
323 }
324 
325 /**
326  * Scan a potential file for the correct format, remembering to skip comment
327  * characters. This is not a foolproof scan, but it should be checked before adding
328  * a new dictionary.
329  * Valid EDICT format is considered:
330  * \<kanji or kana\>+ [\<kana\>] /latin characters & symbols/separated with slashes/
331  * Comment lines start with... something... not remembering now.
332  */
validDictionaryFile(const QString & filename)333 bool DictFileEdict::validDictionaryFile( const QString &filename )
334 {
335   QFile file( filename );
336   bool returnFlag = true;
337 
338   if( ! file.exists() || ! file.open( QIODevice::ReadOnly ) )
339   {
340     return false;
341   }
342 
343   //Now we can actually check the file
344   QTextStream fileStream( &file );
345   fileStream.setCodec( QTextCodec::codecForName( "eucJP" ) );
346   QString commentMarker( QStringLiteral("????") ); //Note: Don't touch this! vim seems to have
347                                       //An odd text codec error here too :(
348   QRegExp formattedLine( "^\\S+\\s+(\\[\\S+\\]\\s+)?/.*/$" );
349   while( ! fileStream.atEnd() )
350   {
351     QString line = fileStream.readLine();
352 
353     if( line.left( 4 ) == commentMarker )
354     {
355       continue;
356     }
357     if( line.contains( formattedLine ) ) //If it matches our regex
358     {
359       continue;
360     }
361 
362     returnFlag = false;
363     break;
364   }
365 
366   file.close();
367   return returnFlag;
368 }
369 
370 /**
371  * Reject queries that specify anything we don't understand
372  */
373 //TODO: Actually write this method (validQuery)
validQuery(const DictQuery & query)374 bool DictFileEdict::validQuery( const DictQuery &query )
375 {
376   Q_UNUSED( query );
377   return true;
378 }
379