1 /*
2     This file is part of Kiten, a KDE Japanese Reference Tool
3     SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
4     SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com>
5     SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
6 
7     SPDX-License-Identifier: LGPL-2.0-or-later
8 */
9 
10 #include "entryedict.h"
11 
12 #include "dictfileedict.h"
13 #include "kitenmacros.h"
14 
15 #include <QDebug>
16 #include <KLocalizedString>
17 
18 #define QSTRINGLISTCHECK(x) (x==NULL?QStringList():*x)
19 
EntryEdict(const QString & dict)20 EntryEdict::EntryEdict( const QString &dict )
21 : Entry( dict )
22 {
23 }
24 
EntryEdict(const QString & dict,const QString & entry)25 EntryEdict::EntryEdict( const QString &dict, const QString &entry )
26 : Entry( dict )
27 {
28   loadEntry( entry );
29 }
30 
clone() const31 Entry* EntryEdict::clone() const
32 {
33   return new EntryEdict( *this );
34 }
35 
36 /**
37  * Regenerate a QString like the one we got in loadEntry()
38  */
dumpEntry() const39 QString EntryEdict::dumpEntry() const
40 {
41   QString readings = QString( Readings.isEmpty() ? QStringLiteral(" ") : " [" + Readings.first() + "] " );
42 
43   return QStringLiteral( "%1%2/%3/" ).arg( Word )
44                               .arg( readings )
45                               .arg( Meanings.join( QLatin1Char('/') ) );
46 }
47 
getDictionaryType() const48 QString EntryEdict::getDictionaryType() const
49 {
50   return EDICT;
51 }
52 
getTypes() const53 QString EntryEdict::getTypes() const
54 {
55   return m_types.join( outputListDelimiter );
56 }
57 
getTypesList() const58 QStringList EntryEdict::getTypesList() const
59 {
60   return m_types;
61 }
62 
isAdjective() const63 bool EntryEdict::isAdjective() const
64 {
65   foreach( const QString &type, EdictFormatting::Adjectives )
66   {
67     if( m_types.contains( type ) )
68     {
69       return true;
70     }
71   }
72 
73   return false;
74 }
75 
isAdverb() const76 bool EntryEdict::isAdverb() const
77 {
78   foreach( const QString &type, EdictFormatting::Adverbs )
79   {
80     if( m_types.contains( type ) )
81     {
82       return true;
83     }
84   }
85 
86   return false;
87 }
88 
isCommon() const89 bool EntryEdict::isCommon() const
90 {
91   return getExtendedInfoItem( QStringLiteral( "common" ) ) == QLatin1Char('1');
92 }
93 
isExpression() const94 bool EntryEdict::isExpression() const
95 {
96   foreach( const QString &type, EdictFormatting::Expressions )
97   {
98     if( m_types.contains( type ) )
99     {
100       return true;
101     }
102   }
103 
104   return false;
105 }
106 
isFukisokuVerb() const107 bool EntryEdict::isFukisokuVerb() const
108 {
109   foreach( const QString &type, EdictFormatting::FukisokuVerbs )
110   {
111     if( m_types.contains( type ) )
112     {
113       return true;
114     }
115   }
116 
117   return false;
118 }
119 
isGodanVerb() const120 bool EntryEdict::isGodanVerb() const
121 {
122   foreach( const QString &type, EdictFormatting::GodanVerbs )
123   {
124     if( m_types.contains( type ) )
125     {
126       return true;
127     }
128   }
129 
130   return false;
131 }
132 
isIchidanVerb() const133 bool EntryEdict::isIchidanVerb() const
134 {
135   foreach( const QString &type, EdictFormatting::IchidanVerbs )
136   {
137     if( m_types.contains( type ) )
138     {
139       return true;
140     }
141   }
142 
143   return false;
144 }
145 
isNoun() const146 bool EntryEdict::isNoun() const
147 {
148   foreach( const QString &type, EdictFormatting::Nouns )
149   {
150     if( m_types.contains( type ) )
151     {
152       return true;
153     }
154   }
155 
156   return false;
157 }
158 
isParticle() const159 bool EntryEdict::isParticle() const
160 {
161   return m_types.contains( EdictFormatting::Particle );
162 }
163 
isPrefix() const164 bool EntryEdict::isPrefix() const
165 {
166   foreach( const QString &type, EdictFormatting::Prefix )
167   {
168     if( m_types.contains( type ) )
169     {
170       return true;
171     }
172   }
173 
174   return false;
175 }
176 
isSuffix() const177 bool EntryEdict::isSuffix() const
178 {
179   foreach( const QString &type, EdictFormatting::Suffix )
180   {
181     if( m_types.contains( type ) )
182     {
183       return true;
184     }
185   }
186 
187   return false;
188 }
189 
isVerb() const190 bool EntryEdict::isVerb() const
191 {
192   foreach( const QString &type, EdictFormatting::Verbs )
193   {
194     if( m_types.contains( type ) )
195     {
196       return true;
197     }
198   }
199 
200   return false;
201 }
202 
HTMLWord() const203 QString EntryEdict::HTMLWord() const
204 {
205   return QStringLiteral( "<span class=\"Word\">%1</span>" )
206              .arg( Word.isEmpty() ? kanjiLinkify( Meanings.first() ) : kanjiLinkify( Word ) );
207 }
208 
209 /**
210  * Makes a link out of each kanji in @param inString
211  */
kanjiLinkify(const QString & inString) const212 QString EntryEdict::kanjiLinkify( const QString &inString ) const
213 {
214   QString outString;
215 
216   for( int i = 0; i < inString.length(); i++ )
217   {
218     if( isKanji( inString.at( i ) ) )
219     {
220       outString += makeLink( QString( inString.at( i ) ) );
221     }
222     else
223     {
224       outString += inString.at( i );
225     }
226   }
227 
228   return outString;
229 }
230 
231 /**
232  * Take a QString and load it into the Entry as appropriate
233  * The format is basically: KANJI [KANA] /(general information) gloss/gloss/.../
234  * Note that they can rudely place more (general information) in gloss's that are
235  * not the first one.
236  */
loadEntry(const QString & entryLine)237 bool EntryEdict::loadEntry( const QString &entryLine )
238 {
239   /* Set tempQString to be the reading and word portion of the entryLine */
240   int endOfKanjiAndKanaSection = entryLine.indexOf( '/' );
241   if( endOfKanjiAndKanaSection == -1 )
242   {
243     return false;
244   }
245   QString tempQString = entryLine.left( endOfKanjiAndKanaSection );
246   /* The actual Word is the beginning of the line */
247   int endOfKanji = tempQString.indexOf( ' ' );
248   if( endOfKanji == -1 )
249   {
250     return false;
251   }
252   Word = tempQString.left( endOfKanji );
253 
254   /* The Reading is either Word or encased in '[' */
255   Readings.clear();
256   int startOfReading = tempQString.indexOf( '[' );
257   if( startOfReading != -1 )  // This field is optional for EDICT (and kiten)
258   {
259     Readings.append( tempQString.left( tempQString.lastIndexOf( ']' ) ).mid( startOfReading + 1 ) );
260   }
261   /* TODO: use this code or not?
262   * app does not handle only reading and no word entries
263   * very well so far
264   else
265   {
266     Readings.append(Word);
267     Word.clear();
268   }
269   */
270 
271   /* set Meanings to be all of the meanings in the definition */
272   QString remainingLine = entryLine.mid( endOfKanjiAndKanaSection );
273   //Trim to last '/'
274   remainingLine = remainingLine.left( remainingLine.lastIndexOf( '/' ) );
275 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
276   Meanings = remainingLine.split( '/', QString::SkipEmptyParts );
277 #else
278   Meanings = remainingLine.split( '/', Qt::SkipEmptyParts );
279 #endif
280 
281   if( Meanings.size() == 0 )
282   {
283     return false;
284   }
285 
286   if( Meanings.last() == QLatin1String("(P)") )
287   {
288     ExtendedInfo[ QStringLiteral( "common" ) ] = QStringLiteral("1");
289     Meanings.removeLast();
290   }
291 
292   QString firstWord = Meanings.first();
293   QStringList stringTypes;
294 
295   //Pulls the various types out
296   //TODO: Remove them from the original string
297   for ( int i = firstWord.indexOf( QLatin1Char('(') );
298         i != -1;
299         i = firstWord.indexOf( QLatin1Char('('), i + 1 ) )
300   {
301     QString parentheses = firstWord.mid( i + 1, firstWord.indexOf( QLatin1Char(')'), i ) - i - 1 );
302     stringTypes += parentheses.split( ',' );
303   }
304 
305   foreach( const QString &str, stringTypes )
306   {
307     if( EdictFormatting::PartsOfSpeech.contains( str ) )
308     {
309       m_types += str;
310     }
311     else if( EdictFormatting::FieldOfApplication.contains( str ) )
312     {
313       ExtendedInfo[ QStringLiteral("field") ] = str;
314     }
315     else if( EdictFormatting::MiscMarkings.contains( str ) )
316     {
317       m_miscMarkings += str;
318     }
319   }
320 
321   return true;
322 }
323 
matchesWordType(const DictQuery & query) const324 bool EntryEdict::matchesWordType( const DictQuery &query ) const
325 {
326   if( ! query.isEmpty() )
327   {
328     if( query.getMatchWordType() == DictQuery::Verb
329         && isVerb() )
330     {
331       return true;
332     }
333     if( query.getMatchWordType() == DictQuery::Noun
334         && isNoun() )
335     {
336       return true;
337     }
338     if( query.getMatchWordType() == DictQuery::Adjective
339         && isAdjective() )
340     {
341       return true;
342     }
343     if( query.getMatchWordType() == DictQuery::Adverb
344         && isAdverb() )
345     {
346       return true;
347     }
348     if( query.getMatchWordType() == DictQuery::Expression
349         && isExpression() )
350     {
351       return true;
352     }
353     if( query.getMatchWordType() == DictQuery::Prefix
354         && isPrefix() )
355     {
356       return true;
357     }
358     if( query.getMatchWordType() == DictQuery::Suffix
359         && isSuffix() )
360     {
361       return true;
362     }
363     if( query.getMatchWordType() == DictQuery::Any )
364     {
365       return true;
366     }
367   }
368 
369   return false;
370 }
371 
372 /**
373  * Returns a HTML version of an Entry
374  */
toHTML() const375 QString EntryEdict::toHTML() const
376 {
377   QString result = QStringLiteral( "<div class=\"%1\">" ).arg( EDICT.toUpper() );
378   if( isCommon() )
379   {
380     result += QLatin1String("<div class=\"Common\">");
381   }
382 
383   foreach( const QString &field, QSTRINGLISTCHECK( DictFileEdict::displayFields ) )
384   {
385     if( field == QLatin1String("--NewLine--") )		 result += QLatin1String("<br>");
386     else if( field == QLatin1String("Word/Kanji") ) result += HTMLWord()+' ';
387     else if( field == QLatin1String("Meaning") )		 result += HTMLMeanings()+' ';
388     else if( field == QLatin1String("Reading") )		 result += HTMLReadings()+' ';
389     else qDebug() << "Unknown field: " << field;
390   }
391 
392   if( isCommon() )
393   {
394     result += QLatin1String("</div>");
395   }
396 
397   result += QLatin1String("</div>");
398   return result;
399 }
400 
401 
402 
403 #ifdef KITEN_EDICTFORMATTING
404 
405 /**
406  * The basic idea of this is to provide a mapping from possible entry types to
407  * possible things the user could enter. Then our code for the matching entry can simply
408  * use this mapping to determine if a given entry could be understood to match the user's input.
409  *
410  * There are two basic approaches we could take:
411  *   Convert the user's entry into a list of types, see if the Entry type matches any of
412  *           the conversions from this list (the list comparisons will be MANY enums).
413  *   Convert our Entry types to a list of acceptable string aliases. Then compare the
414  *           user's input to this list (the list will be a relatively small list of strings).
415  *
416  * My gut instinct is that the first case (comparison of a largish list of ints) will be
417  * faster, and so that's the one that's implemented here.
418  *
419  * The following are the minimum list of case-insensitive aliases that the user could enter:
420  *   noun
421  *   verb:
422  *     ichidan
423  *     godan
424  *   adjective
425  *   adverb
426  *   particle
427  *
428  * Note that our File Parser will also expand to general cases, if not included already:
429  * For Example: v5aru -> v5aru,v5 (so that a search for "godan" will find it)
430  */
431 namespace EdictFormatting
432 {
433   enum WordType
434   {
435     noun,
436     verb,
437     adjective,
438     adverb,
439     particle,
440     ichidanVerb,
441     godanVerb,
442     fukisokuVerb,
443     expression,
444     idiomaticExpression,
445     prefix,
446     suffix,
447     nounPrefix,
448     nounSuffix
449   };
450 
451   // Forward declarations of our functions to be used.
452   QMultiHash<WordType, QString> createPartOfSpeechCategories();
453   QSet<QString>                createPartsOfSpeech();
454   QSet<QString>                createMiscMarkings();
455   QSet<QString>                createFieldOfApplication();
456   QStringList                  createNounsList();
457   QStringList                  createVerbsList();
458   QStringList                  createExpressionsList();
459   QStringList                  createPrefixesList();
460   QStringList                  createSuffixesList();
461 
462   // Define our public variables.
463   QMultiHash<WordType, QString> PartOfSpeechCategories = createPartOfSpeechCategories();
464   QSet<QString> PartsOfSpeech      = createPartsOfSpeech();
465   QSet<QString> MiscMarkings       = createMiscMarkings();
466   QSet<QString> FieldOfApplication = createFieldOfApplication();
467 
468   // PartOfSpeechCategories needs to has some values before this line.
469   QStringList Nouns         = createNounsList();
470   QStringList Adjectives    = PartOfSpeechCategories.values( adjective );
471   QStringList Adverbs       = PartOfSpeechCategories.values( adverb );
472   QStringList IchidanVerbs  = PartOfSpeechCategories.values( ichidanVerb );
473   QStringList GodanVerbs    = PartOfSpeechCategories.values( godanVerb );
474   QStringList FukisokuVerbs = PartOfSpeechCategories.values( fukisokuVerb );
475   QStringList Verbs         = createVerbsList();
476   QStringList Expressions   = createExpressionsList();
477   QStringList Prefix        = createPrefixesList();
478   QStringList Suffix        = createSuffixesList();
479   QString     Particle      = PartOfSpeechCategories.value( particle );
480 
481 
482 
createNounsList()483   QStringList createNounsList()
484   {
485     QStringList list;
486     list.append( PartOfSpeechCategories.values( noun ) );
487     list.append( PartOfSpeechCategories.values( nounPrefix ) );
488     list.append( PartOfSpeechCategories.values( nounSuffix ) );
489     return list;
490   }
491 
createVerbsList()492   QStringList createVerbsList()
493   {
494     QStringList list;
495     list.append( PartOfSpeechCategories.values( verb ) );
496     list.append( IchidanVerbs );
497     list.append( GodanVerbs );
498     list.append( FukisokuVerbs );
499     return list;
500   }
501 
createExpressionsList()502   QStringList createExpressionsList()
503   {
504     QStringList list;
505     list.append( PartOfSpeechCategories.values( expression ) );
506     list.append( PartOfSpeechCategories.values( idiomaticExpression ) );
507     return list;
508   }
509 
createPrefixesList()510   QStringList createPrefixesList()
511   {
512     QStringList list;
513     list.append( PartOfSpeechCategories.values( prefix ) );
514     list.append( PartOfSpeechCategories.values( nounPrefix ) );
515     return list;
516   }
517 
createSuffixesList()518   QStringList createSuffixesList()
519   {
520     QStringList list;
521     list.append( PartOfSpeechCategories.values( suffix ) );
522     list.append( PartOfSpeechCategories.values( nounSuffix ) );
523     return list;
524   }
525 
createPartOfSpeechCategories()526   QMultiHash<WordType, QString> createPartOfSpeechCategories()
527   {
528     QMultiHash<WordType, QString> categories;
529 
530     // Nouns
531     categories.insert( noun, QStringLiteral("n") );
532     categories.insert( noun, QStringLiteral("n-adv") );
533     categories.insert( noun, QStringLiteral("n-t") );
534     categories.insert( noun, QStringLiteral("adv-n") );
535 
536     // Noun (used as a prefix)
537     categories.insert( nounPrefix, QStringLiteral("n-pref") );
538 
539     // Noun (used as a suffix)
540     categories.insert( nounSuffix, QStringLiteral("n-suf") );
541 
542     // Ichidan Verbs
543     categories.insert( ichidanVerb, QStringLiteral("v1") );
544     categories.insert( ichidanVerb, QStringLiteral("vz") );
545 
546     // Godan Verbs
547     categories.insert( godanVerb, QStringLiteral("v5") );
548     categories.insert( godanVerb, QStringLiteral("v5aru") );
549     categories.insert( godanVerb, QStringLiteral("v5b") );
550     categories.insert( godanVerb, QStringLiteral("v5g") );
551     categories.insert( godanVerb, QStringLiteral("v5k") );
552     categories.insert( godanVerb, QStringLiteral("v5k-s") );
553     categories.insert( godanVerb, QStringLiteral("v5m") );
554     categories.insert( godanVerb, QStringLiteral("v5n") );
555     categories.insert( godanVerb, QStringLiteral("v5r") );
556     categories.insert( godanVerb, QStringLiteral("v5r-i") );
557     categories.insert( godanVerb, QStringLiteral("v5s") );
558     categories.insert( godanVerb, QStringLiteral("v5t") );
559     categories.insert( godanVerb, QStringLiteral("v5u") );
560     categories.insert( godanVerb, QStringLiteral("v5u-s") );
561     categories.insert( godanVerb, QStringLiteral("v5uru") );
562     categories.insert( godanVerb, QStringLiteral("v5z") );
563 
564     // Fukisoku verbs
565     categories.insert( fukisokuVerb, QStringLiteral("iv") );
566     categories.insert( fukisokuVerb, QStringLiteral("vk") );
567     categories.insert( fukisokuVerb, QStringLiteral("vn") );
568     categories.insert( fukisokuVerb, QStringLiteral("vs-i") );
569     categories.insert( fukisokuVerb, QStringLiteral("vs-s") );
570 
571     // Other Verbs
572     categories.insert( verb, QStringLiteral("vi") );
573     categories.insert( verb, QStringLiteral("vs") );
574     categories.insert( verb, QStringLiteral("vt") );
575     categories.insert( verb, QStringLiteral("aux-v") );
576 
577     // Adjectives
578     categories.insert( adjective, QStringLiteral("adj-i") );
579     categories.insert( adjective, QStringLiteral("adj-na") );
580     categories.insert( adjective, QStringLiteral("adj-no") );
581     categories.insert( adjective, QStringLiteral("adj-pn") );
582     categories.insert( adjective, QStringLiteral("adj-t") );
583     categories.insert( adjective, QStringLiteral("adj-f") );
584     categories.insert( adjective, QStringLiteral("adj") );
585     categories.insert( adjective, QStringLiteral("aux-adj") );
586 
587     // Adverbs
588     categories.insert( adverb, QStringLiteral("adv") );
589     categories.insert( adverb, QStringLiteral("adv-n") );
590     categories.insert( adverb, QStringLiteral("adv-to") );
591 
592     // Particle
593     categories.insert( particle, QStringLiteral("prt") );
594 
595     // Expression
596     categories.insert( expression, QStringLiteral("exp") );
597 
598     // Idiomatic expression
599     categories.insert( idiomaticExpression, QStringLiteral("id") );
600 
601     // Prefix
602     categories.insert( prefix, QStringLiteral("pref") );
603 
604     // Suffix
605     categories.insert( suffix, QStringLiteral("suf") );
606 
607     return categories;
608   }
609 
createPartsOfSpeech()610   QSet<QString> createPartsOfSpeech()
611   {
612     QSet<QString> category;
613 
614     category << QStringLiteral("adj-i") << QStringLiteral("adj-na") << QStringLiteral("adj-no") << QStringLiteral("adj-pn") << QStringLiteral("adj-t") << QStringLiteral("adj-f")
615              << QStringLiteral("adj") << QStringLiteral("adv") << QStringLiteral("adv-n") << QStringLiteral("adv-to") << QStringLiteral("aux") << QStringLiteral("aux-v")
616              << QStringLiteral("aux-adj") << QStringLiteral("conj") << QStringLiteral("ctr") << QStringLiteral("exp") << QStringLiteral("id") << QStringLiteral("int")
617              << QStringLiteral("iv") << QStringLiteral("n") << QStringLiteral("n-adv") << QStringLiteral("n-pref") << QStringLiteral("n-suf") << QStringLiteral("n-t")
618              << QStringLiteral("num") << QStringLiteral("pn") << QStringLiteral("pref") << QStringLiteral("prt") << QStringLiteral("suf") << QStringLiteral("v1")
619              << QStringLiteral("v5") << QStringLiteral("v5aru") << QStringLiteral("v5b") << QStringLiteral("v5g") << QStringLiteral("v5k") << QStringLiteral("v5k-s")
620              << QStringLiteral("v5m") << QStringLiteral("v5n") << QStringLiteral("v5r") << QStringLiteral("v5r-i") <<  QStringLiteral("v5s") << QStringLiteral("v5t")
621              << QStringLiteral("v5u") << QStringLiteral("v5u-s") << QStringLiteral("v5uru") << QStringLiteral("v5z") << QStringLiteral("vz") << QStringLiteral("vi")
622              << QStringLiteral("vk") << QStringLiteral("vn") << QStringLiteral("vs") << QStringLiteral("vs-i") << QStringLiteral("vs-s") << QStringLiteral("vt");
623 
624     return category;
625   }
626 
createFieldOfApplication()627   QSet<QString> createFieldOfApplication()
628   {
629     QSet<QString> category;
630 
631     // Field of Application terms
632     category << QStringLiteral("Buddh") << QStringLiteral("MA")   << QStringLiteral("comp") << QStringLiteral("food") << QStringLiteral("geom")
633              << QStringLiteral("ling")  << QStringLiteral("math") << QStringLiteral("mil")  << QStringLiteral("physics");
634 
635     return category;
636   }
637 
createMiscMarkings()638   QSet<QString> createMiscMarkings()
639   {
640     QSet<QString> category;
641 
642     // Miscellaneous Markings (in EDICT terms)
643     category << QStringLiteral("X")    << QStringLiteral("abbr") << QStringLiteral("arch") << QStringLiteral("ateji")   << QStringLiteral("chn")   << QStringLiteral("col") << QStringLiteral("derog")
644              << QStringLiteral("eK")   << QStringLiteral("ek")   << QStringLiteral("fam")  << QStringLiteral("fem")     << QStringLiteral("gikun") << QStringLiteral("hon") << QStringLiteral("hum") << QStringLiteral("iK")   << QStringLiteral("id")
645              << QStringLiteral("io")   << QStringLiteral("m-sl") << QStringLiteral("male") << QStringLiteral("male-sl") << QStringLiteral("ng")    << QStringLiteral("oK")  << QStringLiteral("obs") << QStringLiteral("obsc") << QStringLiteral("ok")
646              << QStringLiteral("poet") << QStringLiteral("pol")  << QStringLiteral("rare") << QStringLiteral("sens")    << QStringLiteral("sl")    << QStringLiteral("uK")  << QStringLiteral("uk")  << QStringLiteral("vulg");
647 
648     return category;
649   }
650 }
651 
652 #endif
653