1 /*
2 This file is part of Kiten, a KDE Japanese Reference Tool
3 SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
4 SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com>
5 SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
6
7 SPDX-License-Identifier: LGPL-2.0-or-later
8 */
9
10 #include "entryedict.h"
11
12 #include "dictfileedict.h"
13 #include "kitenmacros.h"
14
15 #include <QDebug>
16 #include <KLocalizedString>
17
18 #define QSTRINGLISTCHECK(x) (x==NULL?QStringList():*x)
19
EntryEdict(const QString & dict)20 EntryEdict::EntryEdict( const QString &dict )
21 : Entry( dict )
22 {
23 }
24
EntryEdict(const QString & dict,const QString & entry)25 EntryEdict::EntryEdict( const QString &dict, const QString &entry )
26 : Entry( dict )
27 {
28 loadEntry( entry );
29 }
30
clone() const31 Entry* EntryEdict::clone() const
32 {
33 return new EntryEdict( *this );
34 }
35
36 /**
37 * Regenerate a QString like the one we got in loadEntry()
38 */
dumpEntry() const39 QString EntryEdict::dumpEntry() const
40 {
41 QString readings = QString( Readings.isEmpty() ? QStringLiteral(" ") : " [" + Readings.first() + "] " );
42
43 return QStringLiteral( "%1%2/%3/" ).arg( Word )
44 .arg( readings )
45 .arg( Meanings.join( QLatin1Char('/') ) );
46 }
47
getDictionaryType() const48 QString EntryEdict::getDictionaryType() const
49 {
50 return EDICT;
51 }
52
getTypes() const53 QString EntryEdict::getTypes() const
54 {
55 return m_types.join( outputListDelimiter );
56 }
57
getTypesList() const58 QStringList EntryEdict::getTypesList() const
59 {
60 return m_types;
61 }
62
isAdjective() const63 bool EntryEdict::isAdjective() const
64 {
65 foreach( const QString &type, EdictFormatting::Adjectives )
66 {
67 if( m_types.contains( type ) )
68 {
69 return true;
70 }
71 }
72
73 return false;
74 }
75
isAdverb() const76 bool EntryEdict::isAdverb() const
77 {
78 foreach( const QString &type, EdictFormatting::Adverbs )
79 {
80 if( m_types.contains( type ) )
81 {
82 return true;
83 }
84 }
85
86 return false;
87 }
88
isCommon() const89 bool EntryEdict::isCommon() const
90 {
91 return getExtendedInfoItem( QStringLiteral( "common" ) ) == QLatin1Char('1');
92 }
93
isExpression() const94 bool EntryEdict::isExpression() const
95 {
96 foreach( const QString &type, EdictFormatting::Expressions )
97 {
98 if( m_types.contains( type ) )
99 {
100 return true;
101 }
102 }
103
104 return false;
105 }
106
isFukisokuVerb() const107 bool EntryEdict::isFukisokuVerb() const
108 {
109 foreach( const QString &type, EdictFormatting::FukisokuVerbs )
110 {
111 if( m_types.contains( type ) )
112 {
113 return true;
114 }
115 }
116
117 return false;
118 }
119
isGodanVerb() const120 bool EntryEdict::isGodanVerb() const
121 {
122 foreach( const QString &type, EdictFormatting::GodanVerbs )
123 {
124 if( m_types.contains( type ) )
125 {
126 return true;
127 }
128 }
129
130 return false;
131 }
132
isIchidanVerb() const133 bool EntryEdict::isIchidanVerb() const
134 {
135 foreach( const QString &type, EdictFormatting::IchidanVerbs )
136 {
137 if( m_types.contains( type ) )
138 {
139 return true;
140 }
141 }
142
143 return false;
144 }
145
isNoun() const146 bool EntryEdict::isNoun() const
147 {
148 foreach( const QString &type, EdictFormatting::Nouns )
149 {
150 if( m_types.contains( type ) )
151 {
152 return true;
153 }
154 }
155
156 return false;
157 }
158
isParticle() const159 bool EntryEdict::isParticle() const
160 {
161 return m_types.contains( EdictFormatting::Particle );
162 }
163
isPrefix() const164 bool EntryEdict::isPrefix() const
165 {
166 foreach( const QString &type, EdictFormatting::Prefix )
167 {
168 if( m_types.contains( type ) )
169 {
170 return true;
171 }
172 }
173
174 return false;
175 }
176
isSuffix() const177 bool EntryEdict::isSuffix() const
178 {
179 foreach( const QString &type, EdictFormatting::Suffix )
180 {
181 if( m_types.contains( type ) )
182 {
183 return true;
184 }
185 }
186
187 return false;
188 }
189
isVerb() const190 bool EntryEdict::isVerb() const
191 {
192 foreach( const QString &type, EdictFormatting::Verbs )
193 {
194 if( m_types.contains( type ) )
195 {
196 return true;
197 }
198 }
199
200 return false;
201 }
202
HTMLWord() const203 QString EntryEdict::HTMLWord() const
204 {
205 return QStringLiteral( "<span class=\"Word\">%1</span>" )
206 .arg( Word.isEmpty() ? kanjiLinkify( Meanings.first() ) : kanjiLinkify( Word ) );
207 }
208
209 /**
210 * Makes a link out of each kanji in @param inString
211 */
kanjiLinkify(const QString & inString) const212 QString EntryEdict::kanjiLinkify( const QString &inString ) const
213 {
214 QString outString;
215
216 for( int i = 0; i < inString.length(); i++ )
217 {
218 if( isKanji( inString.at( i ) ) )
219 {
220 outString += makeLink( QString( inString.at( i ) ) );
221 }
222 else
223 {
224 outString += inString.at( i );
225 }
226 }
227
228 return outString;
229 }
230
231 /**
232 * Take a QString and load it into the Entry as appropriate
233 * The format is basically: KANJI [KANA] /(general information) gloss/gloss/.../
234 * Note that they can rudely place more (general information) in gloss's that are
235 * not the first one.
236 */
loadEntry(const QString & entryLine)237 bool EntryEdict::loadEntry( const QString &entryLine )
238 {
239 /* Set tempQString to be the reading and word portion of the entryLine */
240 int endOfKanjiAndKanaSection = entryLine.indexOf( '/' );
241 if( endOfKanjiAndKanaSection == -1 )
242 {
243 return false;
244 }
245 QString tempQString = entryLine.left( endOfKanjiAndKanaSection );
246 /* The actual Word is the beginning of the line */
247 int endOfKanji = tempQString.indexOf( ' ' );
248 if( endOfKanji == -1 )
249 {
250 return false;
251 }
252 Word = tempQString.left( endOfKanji );
253
254 /* The Reading is either Word or encased in '[' */
255 Readings.clear();
256 int startOfReading = tempQString.indexOf( '[' );
257 if( startOfReading != -1 ) // This field is optional for EDICT (and kiten)
258 {
259 Readings.append( tempQString.left( tempQString.lastIndexOf( ']' ) ).mid( startOfReading + 1 ) );
260 }
261 /* TODO: use this code or not?
262 * app does not handle only reading and no word entries
263 * very well so far
264 else
265 {
266 Readings.append(Word);
267 Word.clear();
268 }
269 */
270
271 /* set Meanings to be all of the meanings in the definition */
272 QString remainingLine = entryLine.mid( endOfKanjiAndKanaSection );
273 //Trim to last '/'
274 remainingLine = remainingLine.left( remainingLine.lastIndexOf( '/' ) );
275 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
276 Meanings = remainingLine.split( '/', QString::SkipEmptyParts );
277 #else
278 Meanings = remainingLine.split( '/', Qt::SkipEmptyParts );
279 #endif
280
281 if( Meanings.size() == 0 )
282 {
283 return false;
284 }
285
286 if( Meanings.last() == QLatin1String("(P)") )
287 {
288 ExtendedInfo[ QStringLiteral( "common" ) ] = QStringLiteral("1");
289 Meanings.removeLast();
290 }
291
292 QString firstWord = Meanings.first();
293 QStringList stringTypes;
294
295 //Pulls the various types out
296 //TODO: Remove them from the original string
297 for ( int i = firstWord.indexOf( QLatin1Char('(') );
298 i != -1;
299 i = firstWord.indexOf( QLatin1Char('('), i + 1 ) )
300 {
301 QString parentheses = firstWord.mid( i + 1, firstWord.indexOf( QLatin1Char(')'), i ) - i - 1 );
302 stringTypes += parentheses.split( ',' );
303 }
304
305 foreach( const QString &str, stringTypes )
306 {
307 if( EdictFormatting::PartsOfSpeech.contains( str ) )
308 {
309 m_types += str;
310 }
311 else if( EdictFormatting::FieldOfApplication.contains( str ) )
312 {
313 ExtendedInfo[ QStringLiteral("field") ] = str;
314 }
315 else if( EdictFormatting::MiscMarkings.contains( str ) )
316 {
317 m_miscMarkings += str;
318 }
319 }
320
321 return true;
322 }
323
matchesWordType(const DictQuery & query) const324 bool EntryEdict::matchesWordType( const DictQuery &query ) const
325 {
326 if( ! query.isEmpty() )
327 {
328 if( query.getMatchWordType() == DictQuery::Verb
329 && isVerb() )
330 {
331 return true;
332 }
333 if( query.getMatchWordType() == DictQuery::Noun
334 && isNoun() )
335 {
336 return true;
337 }
338 if( query.getMatchWordType() == DictQuery::Adjective
339 && isAdjective() )
340 {
341 return true;
342 }
343 if( query.getMatchWordType() == DictQuery::Adverb
344 && isAdverb() )
345 {
346 return true;
347 }
348 if( query.getMatchWordType() == DictQuery::Expression
349 && isExpression() )
350 {
351 return true;
352 }
353 if( query.getMatchWordType() == DictQuery::Prefix
354 && isPrefix() )
355 {
356 return true;
357 }
358 if( query.getMatchWordType() == DictQuery::Suffix
359 && isSuffix() )
360 {
361 return true;
362 }
363 if( query.getMatchWordType() == DictQuery::Any )
364 {
365 return true;
366 }
367 }
368
369 return false;
370 }
371
372 /**
373 * Returns a HTML version of an Entry
374 */
toHTML() const375 QString EntryEdict::toHTML() const
376 {
377 QString result = QStringLiteral( "<div class=\"%1\">" ).arg( EDICT.toUpper() );
378 if( isCommon() )
379 {
380 result += QLatin1String("<div class=\"Common\">");
381 }
382
383 foreach( const QString &field, QSTRINGLISTCHECK( DictFileEdict::displayFields ) )
384 {
385 if( field == QLatin1String("--NewLine--") ) result += QLatin1String("<br>");
386 else if( field == QLatin1String("Word/Kanji") ) result += HTMLWord()+' ';
387 else if( field == QLatin1String("Meaning") ) result += HTMLMeanings()+' ';
388 else if( field == QLatin1String("Reading") ) result += HTMLReadings()+' ';
389 else qDebug() << "Unknown field: " << field;
390 }
391
392 if( isCommon() )
393 {
394 result += QLatin1String("</div>");
395 }
396
397 result += QLatin1String("</div>");
398 return result;
399 }
400
401
402
403 #ifdef KITEN_EDICTFORMATTING
404
405 /**
406 * The basic idea of this is to provide a mapping from possible entry types to
407 * possible things the user could enter. Then our code for the matching entry can simply
408 * use this mapping to determine if a given entry could be understood to match the user's input.
409 *
410 * There are two basic approaches we could take:
411 * Convert the user's entry into a list of types, see if the Entry type matches any of
412 * the conversions from this list (the list comparisons will be MANY enums).
413 * Convert our Entry types to a list of acceptable string aliases. Then compare the
414 * user's input to this list (the list will be a relatively small list of strings).
415 *
416 * My gut instinct is that the first case (comparison of a largish list of ints) will be
417 * faster, and so that's the one that's implemented here.
418 *
419 * The following are the minimum list of case-insensitive aliases that the user could enter:
420 * noun
421 * verb:
422 * ichidan
423 * godan
424 * adjective
425 * adverb
426 * particle
427 *
428 * Note that our File Parser will also expand to general cases, if not included already:
429 * For Example: v5aru -> v5aru,v5 (so that a search for "godan" will find it)
430 */
431 namespace EdictFormatting
432 {
433 enum WordType
434 {
435 noun,
436 verb,
437 adjective,
438 adverb,
439 particle,
440 ichidanVerb,
441 godanVerb,
442 fukisokuVerb,
443 expression,
444 idiomaticExpression,
445 prefix,
446 suffix,
447 nounPrefix,
448 nounSuffix
449 };
450
451 // Forward declarations of our functions to be used.
452 QMultiHash<WordType, QString> createPartOfSpeechCategories();
453 QSet<QString> createPartsOfSpeech();
454 QSet<QString> createMiscMarkings();
455 QSet<QString> createFieldOfApplication();
456 QStringList createNounsList();
457 QStringList createVerbsList();
458 QStringList createExpressionsList();
459 QStringList createPrefixesList();
460 QStringList createSuffixesList();
461
462 // Define our public variables.
463 QMultiHash<WordType, QString> PartOfSpeechCategories = createPartOfSpeechCategories();
464 QSet<QString> PartsOfSpeech = createPartsOfSpeech();
465 QSet<QString> MiscMarkings = createMiscMarkings();
466 QSet<QString> FieldOfApplication = createFieldOfApplication();
467
468 // PartOfSpeechCategories needs to has some values before this line.
469 QStringList Nouns = createNounsList();
470 QStringList Adjectives = PartOfSpeechCategories.values( adjective );
471 QStringList Adverbs = PartOfSpeechCategories.values( adverb );
472 QStringList IchidanVerbs = PartOfSpeechCategories.values( ichidanVerb );
473 QStringList GodanVerbs = PartOfSpeechCategories.values( godanVerb );
474 QStringList FukisokuVerbs = PartOfSpeechCategories.values( fukisokuVerb );
475 QStringList Verbs = createVerbsList();
476 QStringList Expressions = createExpressionsList();
477 QStringList Prefix = createPrefixesList();
478 QStringList Suffix = createSuffixesList();
479 QString Particle = PartOfSpeechCategories.value( particle );
480
481
482
createNounsList()483 QStringList createNounsList()
484 {
485 QStringList list;
486 list.append( PartOfSpeechCategories.values( noun ) );
487 list.append( PartOfSpeechCategories.values( nounPrefix ) );
488 list.append( PartOfSpeechCategories.values( nounSuffix ) );
489 return list;
490 }
491
createVerbsList()492 QStringList createVerbsList()
493 {
494 QStringList list;
495 list.append( PartOfSpeechCategories.values( verb ) );
496 list.append( IchidanVerbs );
497 list.append( GodanVerbs );
498 list.append( FukisokuVerbs );
499 return list;
500 }
501
createExpressionsList()502 QStringList createExpressionsList()
503 {
504 QStringList list;
505 list.append( PartOfSpeechCategories.values( expression ) );
506 list.append( PartOfSpeechCategories.values( idiomaticExpression ) );
507 return list;
508 }
509
createPrefixesList()510 QStringList createPrefixesList()
511 {
512 QStringList list;
513 list.append( PartOfSpeechCategories.values( prefix ) );
514 list.append( PartOfSpeechCategories.values( nounPrefix ) );
515 return list;
516 }
517
createSuffixesList()518 QStringList createSuffixesList()
519 {
520 QStringList list;
521 list.append( PartOfSpeechCategories.values( suffix ) );
522 list.append( PartOfSpeechCategories.values( nounSuffix ) );
523 return list;
524 }
525
createPartOfSpeechCategories()526 QMultiHash<WordType, QString> createPartOfSpeechCategories()
527 {
528 QMultiHash<WordType, QString> categories;
529
530 // Nouns
531 categories.insert( noun, QStringLiteral("n") );
532 categories.insert( noun, QStringLiteral("n-adv") );
533 categories.insert( noun, QStringLiteral("n-t") );
534 categories.insert( noun, QStringLiteral("adv-n") );
535
536 // Noun (used as a prefix)
537 categories.insert( nounPrefix, QStringLiteral("n-pref") );
538
539 // Noun (used as a suffix)
540 categories.insert( nounSuffix, QStringLiteral("n-suf") );
541
542 // Ichidan Verbs
543 categories.insert( ichidanVerb, QStringLiteral("v1") );
544 categories.insert( ichidanVerb, QStringLiteral("vz") );
545
546 // Godan Verbs
547 categories.insert( godanVerb, QStringLiteral("v5") );
548 categories.insert( godanVerb, QStringLiteral("v5aru") );
549 categories.insert( godanVerb, QStringLiteral("v5b") );
550 categories.insert( godanVerb, QStringLiteral("v5g") );
551 categories.insert( godanVerb, QStringLiteral("v5k") );
552 categories.insert( godanVerb, QStringLiteral("v5k-s") );
553 categories.insert( godanVerb, QStringLiteral("v5m") );
554 categories.insert( godanVerb, QStringLiteral("v5n") );
555 categories.insert( godanVerb, QStringLiteral("v5r") );
556 categories.insert( godanVerb, QStringLiteral("v5r-i") );
557 categories.insert( godanVerb, QStringLiteral("v5s") );
558 categories.insert( godanVerb, QStringLiteral("v5t") );
559 categories.insert( godanVerb, QStringLiteral("v5u") );
560 categories.insert( godanVerb, QStringLiteral("v5u-s") );
561 categories.insert( godanVerb, QStringLiteral("v5uru") );
562 categories.insert( godanVerb, QStringLiteral("v5z") );
563
564 // Fukisoku verbs
565 categories.insert( fukisokuVerb, QStringLiteral("iv") );
566 categories.insert( fukisokuVerb, QStringLiteral("vk") );
567 categories.insert( fukisokuVerb, QStringLiteral("vn") );
568 categories.insert( fukisokuVerb, QStringLiteral("vs-i") );
569 categories.insert( fukisokuVerb, QStringLiteral("vs-s") );
570
571 // Other Verbs
572 categories.insert( verb, QStringLiteral("vi") );
573 categories.insert( verb, QStringLiteral("vs") );
574 categories.insert( verb, QStringLiteral("vt") );
575 categories.insert( verb, QStringLiteral("aux-v") );
576
577 // Adjectives
578 categories.insert( adjective, QStringLiteral("adj-i") );
579 categories.insert( adjective, QStringLiteral("adj-na") );
580 categories.insert( adjective, QStringLiteral("adj-no") );
581 categories.insert( adjective, QStringLiteral("adj-pn") );
582 categories.insert( adjective, QStringLiteral("adj-t") );
583 categories.insert( adjective, QStringLiteral("adj-f") );
584 categories.insert( adjective, QStringLiteral("adj") );
585 categories.insert( adjective, QStringLiteral("aux-adj") );
586
587 // Adverbs
588 categories.insert( adverb, QStringLiteral("adv") );
589 categories.insert( adverb, QStringLiteral("adv-n") );
590 categories.insert( adverb, QStringLiteral("adv-to") );
591
592 // Particle
593 categories.insert( particle, QStringLiteral("prt") );
594
595 // Expression
596 categories.insert( expression, QStringLiteral("exp") );
597
598 // Idiomatic expression
599 categories.insert( idiomaticExpression, QStringLiteral("id") );
600
601 // Prefix
602 categories.insert( prefix, QStringLiteral("pref") );
603
604 // Suffix
605 categories.insert( suffix, QStringLiteral("suf") );
606
607 return categories;
608 }
609
createPartsOfSpeech()610 QSet<QString> createPartsOfSpeech()
611 {
612 QSet<QString> category;
613
614 category << QStringLiteral("adj-i") << QStringLiteral("adj-na") << QStringLiteral("adj-no") << QStringLiteral("adj-pn") << QStringLiteral("adj-t") << QStringLiteral("adj-f")
615 << QStringLiteral("adj") << QStringLiteral("adv") << QStringLiteral("adv-n") << QStringLiteral("adv-to") << QStringLiteral("aux") << QStringLiteral("aux-v")
616 << QStringLiteral("aux-adj") << QStringLiteral("conj") << QStringLiteral("ctr") << QStringLiteral("exp") << QStringLiteral("id") << QStringLiteral("int")
617 << QStringLiteral("iv") << QStringLiteral("n") << QStringLiteral("n-adv") << QStringLiteral("n-pref") << QStringLiteral("n-suf") << QStringLiteral("n-t")
618 << QStringLiteral("num") << QStringLiteral("pn") << QStringLiteral("pref") << QStringLiteral("prt") << QStringLiteral("suf") << QStringLiteral("v1")
619 << QStringLiteral("v5") << QStringLiteral("v5aru") << QStringLiteral("v5b") << QStringLiteral("v5g") << QStringLiteral("v5k") << QStringLiteral("v5k-s")
620 << QStringLiteral("v5m") << QStringLiteral("v5n") << QStringLiteral("v5r") << QStringLiteral("v5r-i") << QStringLiteral("v5s") << QStringLiteral("v5t")
621 << QStringLiteral("v5u") << QStringLiteral("v5u-s") << QStringLiteral("v5uru") << QStringLiteral("v5z") << QStringLiteral("vz") << QStringLiteral("vi")
622 << QStringLiteral("vk") << QStringLiteral("vn") << QStringLiteral("vs") << QStringLiteral("vs-i") << QStringLiteral("vs-s") << QStringLiteral("vt");
623
624 return category;
625 }
626
createFieldOfApplication()627 QSet<QString> createFieldOfApplication()
628 {
629 QSet<QString> category;
630
631 // Field of Application terms
632 category << QStringLiteral("Buddh") << QStringLiteral("MA") << QStringLiteral("comp") << QStringLiteral("food") << QStringLiteral("geom")
633 << QStringLiteral("ling") << QStringLiteral("math") << QStringLiteral("mil") << QStringLiteral("physics");
634
635 return category;
636 }
637
createMiscMarkings()638 QSet<QString> createMiscMarkings()
639 {
640 QSet<QString> category;
641
642 // Miscellaneous Markings (in EDICT terms)
643 category << QStringLiteral("X") << QStringLiteral("abbr") << QStringLiteral("arch") << QStringLiteral("ateji") << QStringLiteral("chn") << QStringLiteral("col") << QStringLiteral("derog")
644 << QStringLiteral("eK") << QStringLiteral("ek") << QStringLiteral("fam") << QStringLiteral("fem") << QStringLiteral("gikun") << QStringLiteral("hon") << QStringLiteral("hum") << QStringLiteral("iK") << QStringLiteral("id")
645 << QStringLiteral("io") << QStringLiteral("m-sl") << QStringLiteral("male") << QStringLiteral("male-sl") << QStringLiteral("ng") << QStringLiteral("oK") << QStringLiteral("obs") << QStringLiteral("obsc") << QStringLiteral("ok")
646 << QStringLiteral("poet") << QStringLiteral("pol") << QStringLiteral("rare") << QStringLiteral("sens") << QStringLiteral("sl") << QStringLiteral("uK") << QStringLiteral("uk") << QStringLiteral("vulg");
647
648 return category;
649 }
650 }
651
652 #endif
653