1 /* $Id: unit_test_fix_pub.cpp 621236 2020-12-08 19:11:33Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Alexey Dobronadezhdin
27 *
28 * File Description:
29 * Unit tests for CPubFixing.
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35
36 // This header must be included before all Boost.Test headers if there are any
37 #include <corelib/test_boost.hpp>
38
39 #include <corelib/ncbi_message.hpp>
40 #include <corelib/ncbimisc.hpp>
41
42 #include <objects/biblio/Author.hpp>
43 #include <objects/biblio/Auth_list.hpp>
44 #include <objects/biblio/Cit_art.hpp>
45 #include <objects/biblio/Cit_book.hpp>
46 #include <objects/biblio/Cit_jour.hpp>
47 #include <objects/biblio/Cit_proc.hpp>
48 #include <objects/biblio/Imprint.hpp>
49 #include <objects/biblio/Title.hpp>
50 #include <objects/biblio/ArticleIdSet.hpp>
51 #include <objects/biblio/ArticleId.hpp>
52 #include <objects/biblio/PubMedId.hpp>
53 #include <objects/general/Name_std.hpp>
54 #include <objects/general/Person_id.hpp>
55 #include <objects/general/Date.hpp>
56 #include <objects/general/Date_std.hpp>
57 #include <objects/medline/Medline_entry.hpp>
58 #include <objects/general/Dbtag.hpp>
59
60
61 #include <objects/pub/Pub.hpp>
62 #include <objects/pub/Pub_equiv.hpp>
63
64
65 #include "../fix_pub_aux.hpp"
66 #include <misc/fix_pub/fix_pub.hpp>
67
68 #include <common/test_assert.h> /* This header must go last */
69
70 USING_NCBI_SCOPE;
71 USING_SCOPE(objects);
72
BOOST_AUTO_TEST_CASE(Test_IsFromBook)73 BOOST_AUTO_TEST_CASE(Test_IsFromBook)
74 {
75 CCit_art art;
76
77 BOOST_CHECK_EQUAL(fix_pub::IsFromBook(art), false);
78
79 art.SetFrom();
80 BOOST_CHECK_EQUAL(fix_pub::IsFromBook(art), false);
81
82 art.SetFrom().SetBook();
83 BOOST_CHECK_EQUAL(fix_pub::IsFromBook(art), true);
84 }
85
BOOST_AUTO_TEST_CASE(Test_IsInpress)86 BOOST_AUTO_TEST_CASE(Test_IsInpress)
87 {
88 CCit_art art;
89
90 BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
91
92 art.SetFrom();
93 BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
94
95 art.SetFrom().SetBook();
96 BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
97
98 art.SetFrom().SetBook().SetImp();
99 BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
100
101 art.SetFrom().SetBook().SetImp().SetPrepub(CImprint::ePrepub_in_press);
102 BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), true);
103
104 art.SetFrom().SetProc();
105 BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
106
107 art.SetFrom().SetProc().SetBook().SetImp().SetPrepub(CImprint::ePrepub_in_press);
108 BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), true);
109
110 art.SetFrom().SetJournal();
111 BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
112
113 art.SetFrom().SetJournal().SetImp().SetPrepub(CImprint::ePrepub_in_press);
114 BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), true);
115 }
116
BOOST_AUTO_TEST_CASE(Test_NeedToPropagateInJournal)117 BOOST_AUTO_TEST_CASE(Test_NeedToPropagateInJournal)
118 {
119 CCit_art art;
120
121 BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
122
123 art.SetFrom();
124 BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
125
126 art.SetFrom().SetBook();
127 BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
128
129 art.SetFrom().SetJournal();
130 BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
131
132 CRef<CTitle::C_E> title(new CTitle::C_E);
133 title->SetName("journal");
134 art.SetFrom().SetJournal().SetTitle().Set().push_back(title);
135 BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
136
137 art.SetFrom().SetJournal().SetImp().SetVolume("1");
138 art.SetFrom().SetJournal().SetImp().SetPages("2");
139
140 art.SetFrom().SetJournal().SetImp().SetDate().SetStd();
141
142 BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), false);
143
144 art.SetFrom().SetJournal().ResetTitle();
145 BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
146 }
147
BOOST_AUTO_TEST_CASE(Test_PropagateInPress)148 BOOST_AUTO_TEST_CASE(Test_PropagateInPress)
149 {
150 CCit_art art,
151 orig_art;
152
153 fix_pub::PropagateInPress(true, art);
154 BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
155
156 art.SetFrom().SetBook();
157 orig_art.Assign(art);
158 fix_pub::PropagateInPress(true, art);
159
160 BOOST_CHECK_EQUAL(orig_art.Equals(art), false);
161
162 orig_art.SetFrom().SetBook().SetImp().SetPrepub(CImprint::ePrepub_in_press);
163 BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
164
165 art.SetFrom().SetJournal();
166 orig_art.Assign(art);
167 fix_pub::PropagateInPress(true, art);
168
169 BOOST_CHECK_EQUAL(orig_art.Equals(art), false);
170
171 orig_art.SetFrom().SetJournal().SetImp().SetPrepub(CImprint::ePrepub_in_press);
172 BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
173
174 art.SetFrom().SetProc().SetBook();
175 orig_art.Assign(art);
176 fix_pub::PropagateInPress(true, art);
177
178 BOOST_CHECK_EQUAL(orig_art.Equals(art), false);
179
180 orig_art.SetFrom().SetProc().SetBook().SetImp().SetPrepub(CImprint::ePrepub_in_press);
181 BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
182
183
184 art.SetFrom().SetProc().SetMeet();
185 orig_art.Assign(art);
186 fix_pub::PropagateInPress(true, art);
187
188 BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
189
190 art.SetFrom().SetJournal();
191 orig_art.Assign(art);
192 fix_pub::PropagateInPress(false, art);
193
194 BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
195 }
196
197
BOOST_AUTO_TEST_CASE(Test_MergeNonPubmedPubIds)198 BOOST_AUTO_TEST_CASE(Test_MergeNonPubmedPubIds)
199 {
200 CCit_art orig_art,
201 modified_art,
202 old_art;
203
204 fix_pub::MergeNonPubmedPubIds(old_art, modified_art);
205 BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), true);
206
207 CRef<CArticleId> art_id(new CArticleId);
208
209 // PMID will not be merged
210 static const TEntrezId PMID = ENTREZ_ID_CONST(2626);
211 art_id->SetPubmed().Set(PMID);
212 old_art.SetIds().Set().push_back(art_id);
213
214 fix_pub::MergeNonPubmedPubIds(old_art, modified_art);
215 BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), true);
216
217 // Doi ID should be merged
218 static const string DOI_ID = "2727";
219 art_id.Reset(new CArticleId);
220 art_id->SetDoi().Set(DOI_ID);
221 old_art.SetIds().Set().push_back(art_id);
222
223 fix_pub::MergeNonPubmedPubIds(old_art, modified_art);
224 BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), false);
225
226 orig_art.SetIds().Set().push_front(art_id);
227 BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), true);
228
229 // Other ID should be merged
230 static const string TEST_DB = "Test DB";
231 art_id.Reset(new CArticleId);
232 art_id->SetOther().SetDb(TEST_DB);
233 old_art.SetIds().Set().push_back(art_id);
234
235 fix_pub::MergeNonPubmedPubIds(old_art, modified_art);
236 BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), false);
237
238 orig_art.SetIds().Set().push_front(art_id);
239 BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), true);
240 }
241
242
BOOST_AUTO_TEST_CASE(Test_MedlineToISO)243 BOOST_AUTO_TEST_CASE(Test_MedlineToISO)
244 {
245 CCit_art art,
246 expected_art;
247
248 fix_pub::MedlineToISO(art);
249 BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
250
251 // ML list of authors
252 art.SetAuthors().SetNames().SetMl().push_back("Doe J");
253 art.SetAuthors().SetNames().SetMl().push_back("Author S");
254
255 fix_pub::MedlineToISO(art);
256
257 CRef<CAuthor> author(new CAuthor);
258 author->SetName().SetName().SetLast("Doe");
259 author->SetName().SetName().SetFirst("J");
260 author->SetName().SetName().SetInitials("J.");
261 expected_art.SetAuthors().SetNames().SetStd().push_back(author);
262
263 author.Reset(new CAuthor);
264 author->SetName().SetName().SetLast("Author");
265 author->SetName().SetName().SetFirst("S");
266 author->SetName().SetName().SetInitials("S.");
267 expected_art.SetAuthors().SetNames().SetStd().push_back(author);
268
269 BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
270
271
272 // Std list of authors with ML format of authors' names
273 art.ResetAuthors();
274
275 author.Reset(new CAuthor);
276 author->SetName().SetMl("Doe J");
277 art.SetAuthors().SetNames().SetStd().push_back(author);
278
279 author.Reset(new CAuthor);
280 author->SetName().SetMl("Author S");
281 art.SetAuthors().SetNames().SetStd().push_back(author);
282
283 fix_pub::MedlineToISO(art);
284
285 BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
286
287 // Cit_art is from a journal
288 CRef<CTitle::C_E> title(new CTitle::C_E);
289 title->SetName("Nature");
290 art.SetFrom().SetJournal().SetTitle().Set().push_back(title);
291
292 title.Reset(new CTitle::C_E);
293 title->SetIso_jta("Nature");
294 expected_art.SetFrom().SetJournal().SetTitle().Set().push_back(title);
295
296 fix_pub::MedlineToISO(art);
297 //BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
298
299
300 // MedlineToISO also removes the language if it is "Eng"
301 art.SetFrom().SetJournal().SetImp().SetLanguage("Eng");
302 fix_pub::MedlineToISO(art);
303
304 //BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
305 }
306
307
BOOST_AUTO_TEST_CASE(Test_SplitMedlineEntry)308 BOOST_AUTO_TEST_CASE(Test_SplitMedlineEntry)
309 {
310 CPub_equiv::Tdata medlines;
311 CRef<CPub> pub(new CPub);
312
313 // Set medline
314 static const TEntrezId TEST_PMID = ENTREZ_ID_CONST(1);
315 pub->SetMedline().SetCit().SetAuthors().SetNames().SetMl().push_back("Doe J");
316
317 CRef<CTitle::C_E> title(new CTitle::C_E);
318 title->SetName("Nature");
319 pub->SetMedline().SetCit().SetFrom().SetJournal().SetTitle().Set().push_back(title);
320 pub->SetMedline().SetPmid().Set(TEST_PMID);
321
322 medlines.push_back(pub);
323
324 fix_pub::SplitMedlineEntry(medlines);
325
326 // medlines should contain two items now
327 BOOST_CHECK_EQUAL(medlines.size(), 2);
328
329 if (medlines.size() == 2) {
330 // first one is CPub->pmid
331 auto it = medlines.begin();
332
333 pub.Reset(new CPub);
334 pub->SetPmid().Set(ENTREZ_ID_CONST(1));
335 BOOST_CHECK_EQUAL((*it)->Equals(*pub), true);
336
337 // second one is CPub->cit-art
338 ++it;
339 pub.Reset(new CPub);
340 title.Reset(new CTitle::C_E);
341 title->SetIso_jta("Nature");
342 pub->SetArticle().SetFrom().SetJournal().SetTitle().Set().push_back(title);
343
344 CRef<CAuthor> author(new CAuthor);
345 author->SetName().SetName().SetLast("Doe");
346 author->SetName().SetName().SetFirst("J");
347 author->SetName().SetName().SetInitials("J.");
348 pub->SetArticle().SetAuthors().SetNames().SetStd().push_back(author);
349 //BOOST_CHECK_EQUAL((*it)->Equals(*pub), true);
350 }
351 }
352
BOOST_AUTO_TEST_CASE(Test_MULooksLikeISSN)353 BOOST_AUTO_TEST_CASE(Test_MULooksLikeISSN)
354 {
355 BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("1234-1234"), true);
356 BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("1234-123X"), true);
357
358 BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("X234-1234"), false);
359 BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("123-41234"), false);
360 BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("123341234"), false);
361 }
362
BOOST_AUTO_TEST_CASE(Test_MUIsJournalIndexed)363 BOOST_AUTO_TEST_CASE(Test_MUIsJournalIndexed)
364 {
365 BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Nature (Reviews Molecular Cell Biology)"), true);
366 BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Molecular Cell"), true);
367 BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Genome Biology."), true);
368
369 BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Journal"), false); // Too many entries found
370 BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Fake journal"), false);
371 BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Journal (which does not exist)"), false);
372 }
373
374 struct STestErrorText
375 {
376 fix_pub::EFixPubErrorCategory m_err_code;
377 int m_err_subcode;
378 EDiagSev m_severity;
379
380 string m_text;
381 };
382
CheckPrintPubProblems(const IMessageListener & log,const STestErrorText * expected)383 void CheckPrintPubProblems(const IMessageListener& log, const STestErrorText* expected)
384 {
385 size_t num_of_errors = log.Count();
386 for (size_t i = 0; i < num_of_errors; ++i) {
387 const IMessage& msg = log.GetMessage(i);
388
389 BOOST_CHECK_EQUAL(msg.GetCode(), expected[i].m_err_code);
390 BOOST_CHECK_EQUAL(msg.GetSubCode(), expected[i].m_err_subcode);
391 BOOST_CHECK_EQUAL(msg.GetSeverity(), expected[i].m_severity);
392 BOOST_CHECK_EQUAL(msg.GetText(), expected[i].m_text);
393 }
394 }
395
BOOST_AUTO_TEST_CASE(Test_PrintPub)396 BOOST_AUTO_TEST_CASE(Test_PrintPub)
397 {
398 CCit_art art;
399
400 CMessageListener_Basic log;
401 fix_pub::PrintPub(art, false, false, 0, &log);
402
403 static const STestErrorText expected_1[] =
404 {
405 { fix_pub::err_Print, fix_pub::err_Print_Failed, eDiag_Warning, "Authors NULL" },
406 { fix_pub::err_Reference, fix_pub::err_Reference_NoPmidJournalNotInPubMed, eDiag_Info, " |journal unknown|(0)|no volume number|no page number" }
407 };
408
409 CheckPrintPubProblems(log, expected_1);
410
411 log.Clear();
412 CRef<CAuthor> author(new CAuthor);
413 author->SetName().SetName().SetLast("Doe");
414 author->SetName().SetName().SetInitials("J.");
415 art.SetAuthors().SetNames().SetStd().push_back(author);
416
417 fix_pub::PrintPub(art, false, false, 0, &log);
418
419 static const STestErrorText expected_2[] =
420 {
421 { fix_pub::err_Reference, fix_pub::err_Reference_NoPmidJournalNotInPubMed, eDiag_Info, "Doe J.|journal unknown|(0)|no volume number|no page number" }
422 };
423
424 CheckPrintPubProblems(log, expected_2);
425
426 log.Clear();
427 art.SetAuthors().SetNames().SetStr().push_back("Doe J");
428 CRef<CTitle::C_E> title(new CTitle::C_E);
429 title->SetName("Molecular Cell");
430 art.SetFrom().SetJournal().SetTitle().Set().push_back(title);
431
432 fix_pub::PrintPub(art, false, false, 0, &log);
433
434 static const STestErrorText expected_3[] =
435 {
436 { fix_pub::err_Reference, fix_pub::err_Reference_PmidNotFound, eDiag_Warning, "Doe J |Molecular Cell|(0)|no volume number|no page number" }
437 };
438
439 CheckPrintPubProblems(log, expected_3);
440
441 log.Clear();
442 art.SetFrom().SetJournal().SetImp().SetDate().SetStd().SetYear(2010);
443 art.SetFrom().SetJournal().SetImp().SetVolume("1");
444 art.SetFrom().SetJournal().SetImp().SetPages("15");
445
446 fix_pub::PrintPub(art, false, false, 0, &log);
447
448 static const STestErrorText expected_4[] =
449 {
450 { fix_pub::err_Reference, fix_pub::err_Reference_PmidNotFound, eDiag_Warning, "Doe J |Molecular Cell|(2010)|1|15" }
451 };
452
453 CheckPrintPubProblems(log, expected_4);
454
455 log.Clear();
456 art.SetFrom().SetJournal().SetImp().SetPrepub(CImprint::ePrepub_in_press);
457 fix_pub::PrintPub(art, false, false, 0, &log);
458
459 static const STestErrorText expected_5[] =
460 {
461 { fix_pub::err_Reference, fix_pub::err_Reference_OldInPress, eDiag_Warning, "encountered in-press article more than 2 years old: Doe J |Molecular Cell|(2010)|1|15" },
462 { fix_pub::err_Reference, fix_pub::err_Reference_PmidNotFoundInPress, eDiag_Warning, "Doe J |Molecular Cell|(2010)|1|15" }
463 };
464
465 CheckPrintPubProblems(log, expected_5);
466
467 log.Clear();
468 }
469
BOOST_AUTO_TEST_CASE(Test_TenAuthorsCompare)470 BOOST_AUTO_TEST_CASE(Test_TenAuthorsCompare)
471 {
472 CCit_art art_new,
473 art_old;
474
475 art_new.SetAuthors().SetNames().SetStr().push_back("Doe John");
476 art_old.Assign(art_new);
477
478 BOOST_CHECK_EQUAL(fix_pub::TenAuthorsCompare(art_old, art_new), true);
479
480 art_new.SetAuthors().SetNames().SetStr().push_back("First Author");
481 BOOST_CHECK_EQUAL(fix_pub::TenAuthorsCompare(art_old, art_new), true);
482
483 art_new.SetAuthors().SetNames().SetStr().push_back("Second Author");
484 art_new.SetAuthors().SetNames().SetStr().push_back("Forth Author");
485 art_new.SetAuthors().SetNames().SetStr().push_back("Fifth Author");
486
487 art_old.SetAuthors().SetNames().SetStr().push_back("Sixth Author");
488 art_old.SetAuthors().SetNames().SetStr().push_back("Seventh Author");
489 art_old.SetAuthors().SetNames().SetStr().push_back("Eighth Author");
490 art_old.SetAuthors().SetNames().SetStr().push_back("Ninth Author");
491 BOOST_CHECK_EQUAL(fix_pub::TenAuthorsCompare(art_old, art_new), false);
492
493
494 // make the list of authors > 10
495 art_new.SetAuthors().SetNames().SetStr().push_back("a b");
496 art_new.SetAuthors().SetNames().SetStr().push_back("c d");
497 art_new.SetAuthors().SetNames().SetStr().push_back("e f");
498 art_new.SetAuthors().SetNames().SetStr().push_back("g h");
499 art_new.SetAuthors().SetNames().SetStr().push_back("i j");
500 art_new.SetAuthors().SetNames().SetStr().push_back("ii jj");
501
502 art_old.SetAuthors().SetNames().SetStr().push_back("First Author");
503 art_old.SetAuthors().SetNames().SetStr().push_back("a b");
504 art_old.SetAuthors().SetNames().SetStr().push_back("c d");
505 art_old.SetAuthors().SetNames().SetStr().push_back("e f");
506 art_old.SetAuthors().SetNames().SetStr().push_back("g h");
507 art_old.SetAuthors().SetNames().SetStr().push_back("i j");
508 art_old.SetAuthors().SetNames().SetStr().push_back("ii jj");
509
510 CCit_art expected;
511 expected.Assign(art_old);
512
513 BOOST_CHECK_EQUAL(fix_pub::TenAuthorsCompare(art_old, art_new), true);
514 BOOST_CHECK_EQUAL(art_old.IsSetAuthors(), false);
515 BOOST_CHECK_EQUAL(art_new.GetAuthors().Equals(expected.GetAuthors()), true);
516
517 }
518
BOOST_AUTO_TEST_CASE(Test_ExtractConsortiums)519 BOOST_AUTO_TEST_CASE(Test_ExtractConsortiums)
520 {
521 CAuth_list_Base::C_Names::TStd list_of_authors;
522
523 CRef<CAuthor> author(new CAuthor);
524 author->SetName().SetName().SetLast("Doe");
525 author->SetName().SetName().SetInitials("J.");
526 list_of_authors.push_back(author);
527
528 list<string> extracted;
529 size_t num_of_authors = fix_pub::ExtractConsortiums(list_of_authors, extracted);
530 BOOST_CHECK_EQUAL(num_of_authors, 1);
531 BOOST_CHECK_EQUAL(extracted.size(), 0);
532
533 author.Reset(new CAuthor);
534 author->SetName().SetConsortium("First consortium");
535 list_of_authors.push_back(author);
536
537 num_of_authors = fix_pub::ExtractConsortiums(list_of_authors, extracted);
538
539 BOOST_CHECK_EQUAL(num_of_authors, 1);
540 BOOST_CHECK_EQUAL(extracted.size(), 1);
541
542 if (!extracted.empty()) {
543 BOOST_CHECK_EQUAL(extracted.front(), "First consortium");
544 }
545 }
546
BOOST_AUTO_TEST_CASE(Test_GetFirstTenNames)547 BOOST_AUTO_TEST_CASE(Test_GetFirstTenNames)
548 {
549 CAuth_list_Base::C_Names::TStd list_of_authors;
550
551 CRef<CAuthor> author(new CAuthor);
552
553 string lastname("Alsatname"),
554 initials("A.");
555
556 author->SetName().SetName().SetLast(lastname);
557 author->SetName().SetName().SetInitials(initials);
558 list_of_authors.push_back(author);
559
560 list<CTempString> result;
561 fix_pub::GetFirstTenNames(list_of_authors, result);
562
563 BOOST_CHECK_EQUAL(result.size(), 1);
564 if (!result.empty()) {
565 BOOST_CHECK_EQUAL(result.front(), list_of_authors.front()->GetName().GetName().GetLast());
566 }
567 result.clear();
568
569 static const size_t MAX_NUM_OF_AUTHORS = 10;
570 for (char first_letter = 'B'; first_letter <= 'B' + MAX_NUM_OF_AUTHORS; ++first_letter) {
571
572 author.Reset(new CAuthor);
573 lastname[0] = first_letter;
574 initials[0] = first_letter;
575
576 author->SetName().SetName().SetLast(lastname);
577 author->SetName().SetName().SetInitials(initials);
578 list_of_authors.push_back(author);
579 }
580
581 fix_pub::GetFirstTenNames(list_of_authors, result);
582 BOOST_CHECK_EQUAL(result.size(), MAX_NUM_OF_AUTHORS);
583 if (result.size() == MAX_NUM_OF_AUTHORS) {
584
585 auto orig_it = list_of_authors.begin();
586 for (auto& cur_last_name: result) {
587
588 BOOST_CHECK_EQUAL(cur_last_name, (*orig_it)->GetName().GetName().GetLast());
589 ++orig_it;
590 }
591 }
592 }
593
BOOST_AUTO_TEST_CASE(Test_TenAuthorsProcess)594 BOOST_AUTO_TEST_CASE(Test_TenAuthorsProcess)
595 {
596 // Complicated test checking a condition when PubMed data is obsolete (contains less authors, but should have more).
597 // This is a real publication with PMID=1302004
598 static const string GENBANK_AUTHORS[] = {
599 "Waterston", "R.",
600 "Martin", "C.",
601 "Craxton", "M.",
602 "Huynh", "C.",
603 "Coulson", "A.",
604 "Hillier", "L.",
605 "Durbin", "R.K.",
606 "Green", "P.",
607 "Shownkeen", "R.",
608 "Halloran", "N.",
609 "Hawkins", "T.",
610 "Wilson", "R.",
611 "Berks", "M.",
612 "Du", "Z.",
613 "Thomas", "K.",
614 "Thierry-Mieg", "J.",
615 "Sulston", "J."
616 };
617
618 CCit_art art_new,
619 art_old;
620
621 for (size_t i = 0; i < ArraySize(GENBANK_AUTHORS); i += 2) {
622 CRef<CAuthor> author(new CAuthor);
623 author->SetName().SetName().SetLast(GENBANK_AUTHORS[i]);
624 author->SetName().SetName().SetInitials(GENBANK_AUTHORS[i + 1]);
625 art_old.SetAuthors().SetNames().SetStd().push_back(author);
626 }
627
628 static const string PUBMED_AUTHORS[] = {
629 "Waterston", "R.",
630 "Martin", "C.",
631 "Craxton", "M.",
632 "Huynh", "C.",
633 "Coulson", "A.",
634 "Hillier", "L.",
635 "Durbin", "R.K.",
636 "Green", "P.",
637 "Shownkeen", "R.",
638 "Halloran", "N.",
639 "et", "al"
640 };
641
642 for (size_t i = 0; i < ArraySize(PUBMED_AUTHORS); i += 2) {
643 CRef<CAuthor> author(new CAuthor);
644 author->SetName().SetName().SetLast(PUBMED_AUTHORS[i]);
645 author->SetName().SetName().SetInitials(PUBMED_AUTHORS[i + 1]);
646 art_new.SetAuthors().SetNames().SetStd().push_back(author);
647 }
648
649 CCit_art expected;
650 expected.Assign(art_old);
651
652 BOOST_CHECK_EQUAL(fix_pub::TenAuthorsProcess(art_old, art_new, nullptr), true);
653 BOOST_CHECK_EQUAL(art_old.IsSetAuthors(), false);
654 BOOST_CHECK_EQUAL(art_new.GetAuthors().Equals(expected.GetAuthors()), true);
655 }
656
BOOST_AUTO_TEST_CASE(Test_FixPub)657 BOOST_AUTO_TEST_CASE(Test_FixPub)
658 {
659 static const char* TEST_PUB =
660 "Pub ::= \
661 equiv { \
662 pmid 17659802, \
663 article { \
664 title { \
665 name \"Genetic diversity and reassortments among Akabane virus field isolates.\" \
666 }, \
667 authors { \
668 names std { \
669 { \
670 name name { \
671 last \"Kobayashi\", \
672 initials \"T.\" \
673 } \
674 }, \
675 { \
676 name name { \
677 last \"Yanase\", \
678 initials \"T.\" \
679 } \
680 }, \
681 { \
682 name name { \
683 last \"Yamakawa\", \
684 initials \"M.\" \
685 } \
686 }, \
687 { \
688 name name { \
689 last \"Kato\", \
690 initials \"T.\" \
691 } \
692 }, \
693 { \
694 name name { \
695 last \"Yoshida\", \
696 initials \"K.\" \
697 } \
698 }, \
699 { \
700 name name { \
701 last \"Tsuda\", \
702 initials \"T.\" \
703 } \
704 } \
705 }, \
706 affil str \"Division 1, Second Production Department, the Chemo - Sero - Therapeutic Research Institute, 1 - 6 - 1 Okubo, Kumamoto 860 - 8568, Japan.\" \
707 }, \
708 from journal { \
709 title { \
710 iso-jta \"Virus Res.\", \
711 ml-jta \"Virus Res\", \
712 issn \"0168-1702\", \
713 name \"Virus research\" \
714 }, \
715 imp { \
716 date std { \
717 year 2007, \
718 month 12 \
719 }, \
720 volume \"130\", \
721 issue \"1-2\", \
722 pages \"162-171\", \
723 language \"ENG\", \
724 pubstatus ppublish, \
725 history { \
726 { \
727 pubstatus received, \
728 date std { \
729 year 2007, \
730 month 1, \
731 day 15 \
732 } \
733 }, \
734 { \
735 pubstatus revised, \
736 date std { \
737 year 2007, \
738 month 6, \
739 day 5 \
740 } \
741 }, \
742 { \
743 pubstatus accepted, \
744 date std { \
745 year 2007, \
746 month 6, \
747 day 11 \
748 } \
749 }, \
750 { \
751 pubstatus aheadofprint, \
752 date std { \
753 year 2007, \
754 month 7, \
755 day 30 \
756 } \
757 }, \
758 { \
759 pubstatus pubmed, \
760 date std { \
761 year 2007, \
762 month 7, \
763 day 31, \
764 hour 9, \
765 minute 0 \
766 } \
767 }, \
768 { \
769 pubstatus medline, \
770 date std { \
771 year 2007, \
772 month 7, \
773 day 31, \
774 hour 9, \
775 minute 0 \
776 } \
777 } \
778 } \
779 } \
780 }, \
781 ids { \
782 pii \"S0168-1702(07)00221-3\", \
783 doi \"10.1016/j.virusres.2007.06.007\", \
784 pubmed 17659802 \
785 } \
786 } \
787 }";
788
789 CPub pub;
790 CNcbiIstrstream input(TEST_PUB);
791
792 input >> MSerial_AsnText >> pub;
793
794 CPubFixing pub_fixing(true, true, true, nullptr);
795 pub_fixing.FixPub(pub);
796
797
798 // cout << MSerial_AsnText << pub;
799
800 // No any tests for now. There will be in the future
801 }
802
803
BOOST_AUTO_TEST_CASE(Test_FixPubPreserveOriginalListOfAuthors)804 BOOST_AUTO_TEST_CASE(Test_FixPubPreserveOriginalListOfAuthors)
805 {
806 static const char* TEST_PUB =
807 "Pub ::= \
808 equiv { \
809 pmid 1302004, \
810 article { \
811 title { name \"A survey of expressed genes in Caenorhabditis elegans\" }, \
812 authors { \
813 names std { \
814 { \
815 name name { \
816 last \"Waterston\", \
817 initials \"R.\" \
818 } \
819 }, \
820 { \
821 name name { \
822 last \"Martin\", \
823 initials \"C.\" \
824 } \
825 }, \
826 { \
827 name name { \
828 last \"Craxton\", \
829 initials \"M.\" \
830 } \
831 }, \
832 { \
833 name name { \
834 last \"Huynh\", \
835 initials \"C.\" \
836 } \
837 }, \
838 { \
839 name name { \
840 last \"Coulson\", \
841 initials \"A.\" \
842 } \
843 }, \
844 { \
845 name name { \
846 last \"Hillier\", \
847 initials \"L.\" \
848 } \
849 }, \
850 { \
851 name name { \
852 last \"Durbin\", \
853 initials \"R.K.\" \
854 } \
855 }, \
856 { \
857 name name { \
858 last \"Green\", \
859 initials \"P.\" \
860 } \
861 }, \
862 { \
863 name name { \
864 last \"Shownkeen\", \
865 initials \"R.\" \
866 } \
867 }, \
868 { \
869 name name { \
870 last \"Halloran\", \
871 initials \"N.\" \
872 } \
873 }, \
874 { \
875 name name { \
876 last \"Hawkins\", \
877 initials \"T.\" \
878 } \
879 }, \
880 { \
881 name name { \
882 last \"Wilson\", \
883 initials \"R.\" \
884 } \
885 }, \
886 { \
887 name name { \
888 last \"Berks\", \
889 initials \"M.\" \
890 } \
891 }, \
892 { \
893 name name { \
894 last \"Du\", \
895 initials \"Z.\" \
896 } \
897 }, \
898 { \
899 name name { \
900 last \"Thomas\", \
901 initials \"K.\" \
902 } \
903 }, \
904 { \
905 name name { \
906 last \"Thierry-Mieg\", \
907 initials \"J.\" \
908 } \
909 }, \
910 { \
911 name name { \
912 last \"Sulston\", \
913 initials \"J.\" \
914 } \
915 } \
916 } \
917 }, \
918 from journal { \
919 title { iso-jta \"Nat. Genet.\" }, \
920 imp { \
921 date std { year 1992 }, \
922 volume \"1\", \
923 pages \"114-123\" \
924 } \
925 } \
926 } \
927 }";
928
929 CPub pub;
930 CNcbiIstrstream input(TEST_PUB);
931
932 input >> MSerial_AsnText >> pub;
933
934 CPubFixing pub_fixing(true, true, true, nullptr);
935 pub_fixing.FixPub(pub);
936
937
938 // cout << MSerial_AsnText << pub;
939
940 // No any tests for now. There will be in the future
941 }
942