1 /*  $Id: unit_test_fix_pub.cpp 621236 2020-12-08 19:11:33Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Alexey Dobronadezhdin
27  *
28  * File Description:
29  *   Unit tests for CPubFixing.
30  *
31  * ===========================================================================
32  */
33 
34 #include <ncbi_pch.hpp>
35 
36 // This header must be included before all Boost.Test headers if there are any
37 #include <corelib/test_boost.hpp>
38 
39 #include <corelib/ncbi_message.hpp>
40 #include <corelib/ncbimisc.hpp>
41 
42 #include <objects/biblio/Author.hpp>
43 #include <objects/biblio/Auth_list.hpp>
44 #include <objects/biblio/Cit_art.hpp>
45 #include <objects/biblio/Cit_book.hpp>
46 #include <objects/biblio/Cit_jour.hpp>
47 #include <objects/biblio/Cit_proc.hpp>
48 #include <objects/biblio/Imprint.hpp>
49 #include <objects/biblio/Title.hpp>
50 #include <objects/biblio/ArticleIdSet.hpp>
51 #include <objects/biblio/ArticleId.hpp>
52 #include <objects/biblio/PubMedId.hpp>
53 #include <objects/general/Name_std.hpp>
54 #include <objects/general/Person_id.hpp>
55 #include <objects/general/Date.hpp>
56 #include <objects/general/Date_std.hpp>
57 #include <objects/medline/Medline_entry.hpp>
58 #include <objects/general/Dbtag.hpp>
59 
60 
61 #include <objects/pub/Pub.hpp>
62 #include <objects/pub/Pub_equiv.hpp>
63 
64 
65 #include "../fix_pub_aux.hpp"
66 #include <misc/fix_pub/fix_pub.hpp>
67 
68 #include <common/test_assert.h>  /* This header must go last */
69 
70 USING_NCBI_SCOPE;
71 USING_SCOPE(objects);
72 
BOOST_AUTO_TEST_CASE(Test_IsFromBook)73 BOOST_AUTO_TEST_CASE(Test_IsFromBook)
74 {
75     CCit_art art;
76 
77     BOOST_CHECK_EQUAL(fix_pub::IsFromBook(art), false);
78 
79     art.SetFrom();
80     BOOST_CHECK_EQUAL(fix_pub::IsFromBook(art), false);
81 
82     art.SetFrom().SetBook();
83     BOOST_CHECK_EQUAL(fix_pub::IsFromBook(art), true);
84 }
85 
BOOST_AUTO_TEST_CASE(Test_IsInpress)86 BOOST_AUTO_TEST_CASE(Test_IsInpress)
87 {
88     CCit_art art;
89 
90     BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
91 
92     art.SetFrom();
93     BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
94 
95     art.SetFrom().SetBook();
96     BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
97 
98     art.SetFrom().SetBook().SetImp();
99     BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
100 
101     art.SetFrom().SetBook().SetImp().SetPrepub(CImprint::ePrepub_in_press);
102     BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), true);
103 
104     art.SetFrom().SetProc();
105     BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
106 
107     art.SetFrom().SetProc().SetBook().SetImp().SetPrepub(CImprint::ePrepub_in_press);
108     BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), true);
109 
110     art.SetFrom().SetJournal();
111     BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), false);
112 
113     art.SetFrom().SetJournal().SetImp().SetPrepub(CImprint::ePrepub_in_press);
114     BOOST_CHECK_EQUAL(fix_pub::IsInpress(art), true);
115 }
116 
BOOST_AUTO_TEST_CASE(Test_NeedToPropagateInJournal)117 BOOST_AUTO_TEST_CASE(Test_NeedToPropagateInJournal)
118 {
119     CCit_art art;
120 
121     BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
122 
123     art.SetFrom();
124     BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
125 
126     art.SetFrom().SetBook();
127     BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
128 
129     art.SetFrom().SetJournal();
130     BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
131 
132     CRef<CTitle::C_E> title(new CTitle::C_E);
133     title->SetName("journal");
134     art.SetFrom().SetJournal().SetTitle().Set().push_back(title);
135     BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
136 
137     art.SetFrom().SetJournal().SetImp().SetVolume("1");
138     art.SetFrom().SetJournal().SetImp().SetPages("2");
139 
140     art.SetFrom().SetJournal().SetImp().SetDate().SetStd();
141 
142     BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), false);
143 
144     art.SetFrom().SetJournal().ResetTitle();
145     BOOST_CHECK_EQUAL(fix_pub::NeedToPropagateInJournal(art), true);
146 }
147 
BOOST_AUTO_TEST_CASE(Test_PropagateInPress)148 BOOST_AUTO_TEST_CASE(Test_PropagateInPress)
149 {
150     CCit_art art,
151              orig_art;
152 
153     fix_pub::PropagateInPress(true, art);
154     BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
155 
156     art.SetFrom().SetBook();
157     orig_art.Assign(art);
158     fix_pub::PropagateInPress(true, art);
159 
160     BOOST_CHECK_EQUAL(orig_art.Equals(art), false);
161 
162     orig_art.SetFrom().SetBook().SetImp().SetPrepub(CImprint::ePrepub_in_press);
163     BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
164 
165     art.SetFrom().SetJournal();
166     orig_art.Assign(art);
167     fix_pub::PropagateInPress(true, art);
168 
169     BOOST_CHECK_EQUAL(orig_art.Equals(art), false);
170 
171     orig_art.SetFrom().SetJournal().SetImp().SetPrepub(CImprint::ePrepub_in_press);
172     BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
173 
174     art.SetFrom().SetProc().SetBook();
175     orig_art.Assign(art);
176     fix_pub::PropagateInPress(true, art);
177 
178     BOOST_CHECK_EQUAL(orig_art.Equals(art), false);
179 
180     orig_art.SetFrom().SetProc().SetBook().SetImp().SetPrepub(CImprint::ePrepub_in_press);
181     BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
182 
183 
184     art.SetFrom().SetProc().SetMeet();
185     orig_art.Assign(art);
186     fix_pub::PropagateInPress(true, art);
187 
188     BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
189 
190     art.SetFrom().SetJournal();
191     orig_art.Assign(art);
192     fix_pub::PropagateInPress(false, art);
193 
194     BOOST_CHECK_EQUAL(orig_art.Equals(art), true);
195 }
196 
197 
BOOST_AUTO_TEST_CASE(Test_MergeNonPubmedPubIds)198 BOOST_AUTO_TEST_CASE(Test_MergeNonPubmedPubIds)
199 {
200     CCit_art orig_art,
201              modified_art,
202              old_art;
203 
204     fix_pub::MergeNonPubmedPubIds(old_art, modified_art);
205     BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), true);
206 
207     CRef<CArticleId> art_id(new CArticleId);
208 
209     // PMID will not be merged
210     static const TEntrezId PMID = ENTREZ_ID_CONST(2626);
211     art_id->SetPubmed().Set(PMID);
212     old_art.SetIds().Set().push_back(art_id);
213 
214     fix_pub::MergeNonPubmedPubIds(old_art, modified_art);
215     BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), true);
216 
217     // Doi ID should be merged
218     static const string DOI_ID = "2727";
219     art_id.Reset(new CArticleId);
220     art_id->SetDoi().Set(DOI_ID);
221     old_art.SetIds().Set().push_back(art_id);
222 
223     fix_pub::MergeNonPubmedPubIds(old_art, modified_art);
224     BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), false);
225 
226     orig_art.SetIds().Set().push_front(art_id);
227     BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), true);
228 
229     // Other ID should be merged
230     static const string TEST_DB = "Test DB";
231     art_id.Reset(new CArticleId);
232     art_id->SetOther().SetDb(TEST_DB);
233     old_art.SetIds().Set().push_back(art_id);
234 
235     fix_pub::MergeNonPubmedPubIds(old_art, modified_art);
236     BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), false);
237 
238     orig_art.SetIds().Set().push_front(art_id);
239     BOOST_CHECK_EQUAL(orig_art.Equals(modified_art), true);
240 }
241 
242 
BOOST_AUTO_TEST_CASE(Test_MedlineToISO)243 BOOST_AUTO_TEST_CASE(Test_MedlineToISO)
244 {
245     CCit_art art,
246              expected_art;
247 
248     fix_pub::MedlineToISO(art);
249     BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
250 
251     // ML list of authors
252     art.SetAuthors().SetNames().SetMl().push_back("Doe J");
253     art.SetAuthors().SetNames().SetMl().push_back("Author S");
254 
255     fix_pub::MedlineToISO(art);
256 
257     CRef<CAuthor> author(new CAuthor);
258     author->SetName().SetName().SetLast("Doe");
259     author->SetName().SetName().SetFirst("J");
260     author->SetName().SetName().SetInitials("J.");
261     expected_art.SetAuthors().SetNames().SetStd().push_back(author);
262 
263     author.Reset(new CAuthor);
264     author->SetName().SetName().SetLast("Author");
265     author->SetName().SetName().SetFirst("S");
266     author->SetName().SetName().SetInitials("S.");
267     expected_art.SetAuthors().SetNames().SetStd().push_back(author);
268 
269     BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
270 
271 
272     // Std list of authors with ML format of authors' names
273     art.ResetAuthors();
274 
275     author.Reset(new CAuthor);
276     author->SetName().SetMl("Doe J");
277     art.SetAuthors().SetNames().SetStd().push_back(author);
278 
279     author.Reset(new CAuthor);
280     author->SetName().SetMl("Author S");
281     art.SetAuthors().SetNames().SetStd().push_back(author);
282 
283     fix_pub::MedlineToISO(art);
284 
285     BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
286 
287     // Cit_art is from a journal
288     CRef<CTitle::C_E> title(new CTitle::C_E);
289     title->SetName("Nature");
290     art.SetFrom().SetJournal().SetTitle().Set().push_back(title);
291 
292     title.Reset(new CTitle::C_E);
293     title->SetIso_jta("Nature");
294     expected_art.SetFrom().SetJournal().SetTitle().Set().push_back(title);
295 
296     fix_pub::MedlineToISO(art);
297     //BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
298 
299 
300     // MedlineToISO also removes the language if it is "Eng"
301     art.SetFrom().SetJournal().SetImp().SetLanguage("Eng");
302     fix_pub::MedlineToISO(art);
303 
304     //BOOST_CHECK_EQUAL(expected_art.Equals(art), true);
305 }
306 
307 
BOOST_AUTO_TEST_CASE(Test_SplitMedlineEntry)308 BOOST_AUTO_TEST_CASE(Test_SplitMedlineEntry)
309 {
310     CPub_equiv::Tdata medlines;
311     CRef<CPub> pub(new CPub);
312 
313     // Set medline
314     static const TEntrezId TEST_PMID = ENTREZ_ID_CONST(1);
315     pub->SetMedline().SetCit().SetAuthors().SetNames().SetMl().push_back("Doe J");
316 
317     CRef<CTitle::C_E> title(new CTitle::C_E);
318     title->SetName("Nature");
319     pub->SetMedline().SetCit().SetFrom().SetJournal().SetTitle().Set().push_back(title);
320     pub->SetMedline().SetPmid().Set(TEST_PMID);
321 
322     medlines.push_back(pub);
323 
324     fix_pub::SplitMedlineEntry(medlines);
325 
326     // medlines should contain two items now
327     BOOST_CHECK_EQUAL(medlines.size(), 2);
328 
329     if (medlines.size() == 2) {
330         // first one is CPub->pmid
331         auto it = medlines.begin();
332 
333         pub.Reset(new CPub);
334         pub->SetPmid().Set(ENTREZ_ID_CONST(1));
335         BOOST_CHECK_EQUAL((*it)->Equals(*pub), true);
336 
337         // second one is CPub->cit-art
338         ++it;
339         pub.Reset(new CPub);
340         title.Reset(new CTitle::C_E);
341         title->SetIso_jta("Nature");
342         pub->SetArticle().SetFrom().SetJournal().SetTitle().Set().push_back(title);
343 
344         CRef<CAuthor> author(new CAuthor);
345         author->SetName().SetName().SetLast("Doe");
346         author->SetName().SetName().SetFirst("J");
347         author->SetName().SetName().SetInitials("J.");
348         pub->SetArticle().SetAuthors().SetNames().SetStd().push_back(author);
349         //BOOST_CHECK_EQUAL((*it)->Equals(*pub), true);
350     }
351 }
352 
BOOST_AUTO_TEST_CASE(Test_MULooksLikeISSN)353 BOOST_AUTO_TEST_CASE(Test_MULooksLikeISSN)
354 {
355     BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("1234-1234"), true);
356     BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("1234-123X"), true);
357 
358     BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("X234-1234"), false);
359     BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("123-41234"), false);
360     BOOST_CHECK_EQUAL(fix_pub::MULooksLikeISSN("123341234"), false);
361 }
362 
BOOST_AUTO_TEST_CASE(Test_MUIsJournalIndexed)363 BOOST_AUTO_TEST_CASE(Test_MUIsJournalIndexed)
364 {
365     BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Nature (Reviews Molecular Cell Biology)"), true);
366     BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Molecular Cell"), true);
367     BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Genome Biology."), true);
368 
369     BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Journal"), false); // Too many entries found
370     BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Fake journal"), false);
371     BOOST_CHECK_EQUAL(fix_pub::MUIsJournalIndexed("Journal (which does not exist)"), false);
372 }
373 
374 struct STestErrorText
375 {
376     fix_pub::EFixPubErrorCategory m_err_code;
377     int m_err_subcode;
378     EDiagSev m_severity;
379 
380     string m_text;
381 };
382 
CheckPrintPubProblems(const IMessageListener & log,const STestErrorText * expected)383 void CheckPrintPubProblems(const IMessageListener& log, const STestErrorText* expected)
384 {
385     size_t num_of_errors = log.Count();
386     for (size_t i = 0; i < num_of_errors; ++i) {
387         const IMessage& msg = log.GetMessage(i);
388 
389         BOOST_CHECK_EQUAL(msg.GetCode(), expected[i].m_err_code);
390         BOOST_CHECK_EQUAL(msg.GetSubCode(), expected[i].m_err_subcode);
391         BOOST_CHECK_EQUAL(msg.GetSeverity(), expected[i].m_severity);
392         BOOST_CHECK_EQUAL(msg.GetText(), expected[i].m_text);
393     }
394 }
395 
BOOST_AUTO_TEST_CASE(Test_PrintPub)396 BOOST_AUTO_TEST_CASE(Test_PrintPub)
397 {
398     CCit_art art;
399 
400     CMessageListener_Basic log;
401     fix_pub::PrintPub(art, false, false, 0, &log);
402 
403     static const STestErrorText expected_1[] =
404     {
405         { fix_pub::err_Print, fix_pub::err_Print_Failed, eDiag_Warning, "Authors NULL" },
406         { fix_pub::err_Reference, fix_pub::err_Reference_NoPmidJournalNotInPubMed, eDiag_Info, " |journal unknown|(0)|no volume number|no page number" }
407     };
408 
409     CheckPrintPubProblems(log, expected_1);
410 
411     log.Clear();
412     CRef<CAuthor> author(new CAuthor);
413     author->SetName().SetName().SetLast("Doe");
414     author->SetName().SetName().SetInitials("J.");
415     art.SetAuthors().SetNames().SetStd().push_back(author);
416 
417     fix_pub::PrintPub(art, false, false, 0, &log);
418 
419     static const STestErrorText expected_2[] =
420     {
421         { fix_pub::err_Reference, fix_pub::err_Reference_NoPmidJournalNotInPubMed, eDiag_Info, "Doe J.|journal unknown|(0)|no volume number|no page number" }
422     };
423 
424     CheckPrintPubProblems(log, expected_2);
425 
426     log.Clear();
427     art.SetAuthors().SetNames().SetStr().push_back("Doe J");
428     CRef<CTitle::C_E> title(new CTitle::C_E);
429     title->SetName("Molecular Cell");
430     art.SetFrom().SetJournal().SetTitle().Set().push_back(title);
431 
432     fix_pub::PrintPub(art, false, false, 0, &log);
433 
434     static const STestErrorText expected_3[] =
435     {
436         { fix_pub::err_Reference, fix_pub::err_Reference_PmidNotFound, eDiag_Warning, "Doe J |Molecular Cell|(0)|no volume number|no page number" }
437     };
438 
439     CheckPrintPubProblems(log, expected_3);
440 
441     log.Clear();
442     art.SetFrom().SetJournal().SetImp().SetDate().SetStd().SetYear(2010);
443     art.SetFrom().SetJournal().SetImp().SetVolume("1");
444     art.SetFrom().SetJournal().SetImp().SetPages("15");
445 
446     fix_pub::PrintPub(art, false, false, 0, &log);
447 
448     static const STestErrorText expected_4[] =
449     {
450         { fix_pub::err_Reference, fix_pub::err_Reference_PmidNotFound, eDiag_Warning, "Doe J |Molecular Cell|(2010)|1|15" }
451     };
452 
453     CheckPrintPubProblems(log, expected_4);
454 
455     log.Clear();
456     art.SetFrom().SetJournal().SetImp().SetPrepub(CImprint::ePrepub_in_press);
457     fix_pub::PrintPub(art, false, false, 0, &log);
458 
459     static const STestErrorText expected_5[] =
460     {
461         { fix_pub::err_Reference, fix_pub::err_Reference_OldInPress, eDiag_Warning, "encountered in-press article more than 2 years old: Doe J |Molecular Cell|(2010)|1|15" },
462         { fix_pub::err_Reference, fix_pub::err_Reference_PmidNotFoundInPress, eDiag_Warning, "Doe J |Molecular Cell|(2010)|1|15" }
463     };
464 
465     CheckPrintPubProblems(log, expected_5);
466 
467     log.Clear();
468 }
469 
BOOST_AUTO_TEST_CASE(Test_TenAuthorsCompare)470 BOOST_AUTO_TEST_CASE(Test_TenAuthorsCompare)
471 {
472     CCit_art art_new,
473              art_old;
474 
475     art_new.SetAuthors().SetNames().SetStr().push_back("Doe John");
476     art_old.Assign(art_new);
477 
478     BOOST_CHECK_EQUAL(fix_pub::TenAuthorsCompare(art_old, art_new), true);
479 
480     art_new.SetAuthors().SetNames().SetStr().push_back("First Author");
481     BOOST_CHECK_EQUAL(fix_pub::TenAuthorsCompare(art_old, art_new), true);
482 
483     art_new.SetAuthors().SetNames().SetStr().push_back("Second Author");
484     art_new.SetAuthors().SetNames().SetStr().push_back("Forth Author");
485     art_new.SetAuthors().SetNames().SetStr().push_back("Fifth Author");
486 
487     art_old.SetAuthors().SetNames().SetStr().push_back("Sixth Author");
488     art_old.SetAuthors().SetNames().SetStr().push_back("Seventh Author");
489     art_old.SetAuthors().SetNames().SetStr().push_back("Eighth Author");
490     art_old.SetAuthors().SetNames().SetStr().push_back("Ninth Author");
491     BOOST_CHECK_EQUAL(fix_pub::TenAuthorsCompare(art_old, art_new), false);
492 
493 
494     // make the list of authors > 10
495     art_new.SetAuthors().SetNames().SetStr().push_back("a b");
496     art_new.SetAuthors().SetNames().SetStr().push_back("c d");
497     art_new.SetAuthors().SetNames().SetStr().push_back("e f");
498     art_new.SetAuthors().SetNames().SetStr().push_back("g h");
499     art_new.SetAuthors().SetNames().SetStr().push_back("i j");
500     art_new.SetAuthors().SetNames().SetStr().push_back("ii jj");
501 
502     art_old.SetAuthors().SetNames().SetStr().push_back("First Author");
503     art_old.SetAuthors().SetNames().SetStr().push_back("a b");
504     art_old.SetAuthors().SetNames().SetStr().push_back("c d");
505     art_old.SetAuthors().SetNames().SetStr().push_back("e f");
506     art_old.SetAuthors().SetNames().SetStr().push_back("g h");
507     art_old.SetAuthors().SetNames().SetStr().push_back("i j");
508     art_old.SetAuthors().SetNames().SetStr().push_back("ii jj");
509 
510     CCit_art expected;
511     expected.Assign(art_old);
512 
513     BOOST_CHECK_EQUAL(fix_pub::TenAuthorsCompare(art_old, art_new), true);
514     BOOST_CHECK_EQUAL(art_old.IsSetAuthors(), false);
515     BOOST_CHECK_EQUAL(art_new.GetAuthors().Equals(expected.GetAuthors()), true);
516 
517 }
518 
BOOST_AUTO_TEST_CASE(Test_ExtractConsortiums)519 BOOST_AUTO_TEST_CASE(Test_ExtractConsortiums)
520 {
521     CAuth_list_Base::C_Names::TStd list_of_authors;
522 
523     CRef<CAuthor> author(new CAuthor);
524     author->SetName().SetName().SetLast("Doe");
525     author->SetName().SetName().SetInitials("J.");
526     list_of_authors.push_back(author);
527 
528     list<string> extracted;
529     size_t num_of_authors = fix_pub::ExtractConsortiums(list_of_authors, extracted);
530     BOOST_CHECK_EQUAL(num_of_authors, 1);
531     BOOST_CHECK_EQUAL(extracted.size(), 0);
532 
533     author.Reset(new CAuthor);
534     author->SetName().SetConsortium("First consortium");
535     list_of_authors.push_back(author);
536 
537     num_of_authors = fix_pub::ExtractConsortiums(list_of_authors, extracted);
538 
539     BOOST_CHECK_EQUAL(num_of_authors, 1);
540     BOOST_CHECK_EQUAL(extracted.size(), 1);
541 
542     if (!extracted.empty()) {
543         BOOST_CHECK_EQUAL(extracted.front(), "First consortium");
544     }
545 }
546 
BOOST_AUTO_TEST_CASE(Test_GetFirstTenNames)547 BOOST_AUTO_TEST_CASE(Test_GetFirstTenNames)
548 {
549     CAuth_list_Base::C_Names::TStd list_of_authors;
550 
551     CRef<CAuthor> author(new CAuthor);
552 
553     string lastname("Alsatname"),
554            initials("A.");
555 
556     author->SetName().SetName().SetLast(lastname);
557     author->SetName().SetName().SetInitials(initials);
558     list_of_authors.push_back(author);
559 
560     list<CTempString> result;
561     fix_pub::GetFirstTenNames(list_of_authors, result);
562 
563     BOOST_CHECK_EQUAL(result.size(), 1);
564     if (!result.empty()) {
565         BOOST_CHECK_EQUAL(result.front(), list_of_authors.front()->GetName().GetName().GetLast());
566     }
567     result.clear();
568 
569     static const size_t MAX_NUM_OF_AUTHORS = 10;
570     for (char first_letter = 'B'; first_letter <= 'B' + MAX_NUM_OF_AUTHORS; ++first_letter) {
571 
572         author.Reset(new CAuthor);
573         lastname[0] = first_letter;
574         initials[0] = first_letter;
575 
576         author->SetName().SetName().SetLast(lastname);
577         author->SetName().SetName().SetInitials(initials);
578         list_of_authors.push_back(author);
579     }
580 
581     fix_pub::GetFirstTenNames(list_of_authors, result);
582     BOOST_CHECK_EQUAL(result.size(), MAX_NUM_OF_AUTHORS);
583     if (result.size() == MAX_NUM_OF_AUTHORS) {
584 
585         auto orig_it = list_of_authors.begin();
586         for (auto& cur_last_name: result) {
587 
588             BOOST_CHECK_EQUAL(cur_last_name, (*orig_it)->GetName().GetName().GetLast());
589             ++orig_it;
590         }
591     }
592 }
593 
BOOST_AUTO_TEST_CASE(Test_TenAuthorsProcess)594 BOOST_AUTO_TEST_CASE(Test_TenAuthorsProcess)
595 {
596     // Complicated test checking a condition when PubMed data is obsolete (contains less authors, but should have more).
597     // This is a real publication with PMID=1302004
598     static const string GENBANK_AUTHORS[] = {
599         "Waterston", "R.",
600         "Martin", "C.",
601         "Craxton", "M.",
602         "Huynh", "C.",
603         "Coulson", "A.",
604         "Hillier", "L.",
605         "Durbin", "R.K.",
606         "Green", "P.",
607         "Shownkeen", "R.",
608         "Halloran", "N.",
609         "Hawkins", "T.",
610         "Wilson", "R.",
611         "Berks", "M.",
612         "Du", "Z.",
613         "Thomas", "K.",
614         "Thierry-Mieg", "J.",
615         "Sulston", "J."
616     };
617 
618     CCit_art art_new,
619         art_old;
620 
621     for (size_t i = 0; i < ArraySize(GENBANK_AUTHORS); i += 2) {
622         CRef<CAuthor> author(new CAuthor);
623         author->SetName().SetName().SetLast(GENBANK_AUTHORS[i]);
624         author->SetName().SetName().SetInitials(GENBANK_AUTHORS[i + 1]);
625         art_old.SetAuthors().SetNames().SetStd().push_back(author);
626     }
627 
628     static const string PUBMED_AUTHORS[] = {
629         "Waterston", "R.",
630         "Martin", "C.",
631         "Craxton", "M.",
632         "Huynh", "C.",
633         "Coulson", "A.",
634         "Hillier", "L.",
635         "Durbin", "R.K.",
636         "Green", "P.",
637         "Shownkeen", "R.",
638         "Halloran", "N.",
639         "et", "al"
640     };
641 
642     for (size_t i = 0; i < ArraySize(PUBMED_AUTHORS); i += 2) {
643         CRef<CAuthor> author(new CAuthor);
644         author->SetName().SetName().SetLast(PUBMED_AUTHORS[i]);
645         author->SetName().SetName().SetInitials(PUBMED_AUTHORS[i + 1]);
646         art_new.SetAuthors().SetNames().SetStd().push_back(author);
647     }
648 
649     CCit_art expected;
650     expected.Assign(art_old);
651 
652     BOOST_CHECK_EQUAL(fix_pub::TenAuthorsProcess(art_old, art_new, nullptr), true);
653     BOOST_CHECK_EQUAL(art_old.IsSetAuthors(), false);
654     BOOST_CHECK_EQUAL(art_new.GetAuthors().Equals(expected.GetAuthors()), true);
655 }
656 
BOOST_AUTO_TEST_CASE(Test_FixPub)657 BOOST_AUTO_TEST_CASE(Test_FixPub)
658 {
659     static const char* TEST_PUB =
660       "Pub ::= \
661        equiv { \
662          pmid 17659802, \
663          article { \
664            title { \
665              name \"Genetic diversity and reassortments among Akabane virus field isolates.\" \
666            }, \
667            authors { \
668              names std { \
669                { \
670                  name name { \
671                    last \"Kobayashi\", \
672                    initials \"T.\" \
673                  } \
674                }, \
675                { \
676                  name name { \
677                    last \"Yanase\", \
678                    initials \"T.\" \
679                  } \
680                }, \
681                { \
682                  name name { \
683                  last \"Yamakawa\", \
684                  initials \"M.\" \
685                  } \
686                }, \
687                { \
688                  name name { \
689                    last \"Kato\", \
690                    initials \"T.\" \
691                  } \
692                }, \
693                { \
694                  name name { \
695                    last \"Yoshida\", \
696                    initials \"K.\" \
697                  } \
698                }, \
699                { \
700                  name name { \
701                    last \"Tsuda\", \
702                    initials \"T.\" \
703                  } \
704                } \
705              }, \
706              affil str \"Division 1, Second Production Department, the Chemo - Sero - Therapeutic Research Institute, 1 - 6 - 1 Okubo, Kumamoto 860 - 8568, Japan.\" \
707            }, \
708            from journal { \
709              title { \
710                iso-jta \"Virus Res.\", \
711                ml-jta \"Virus Res\", \
712                issn \"0168-1702\", \
713                name \"Virus research\" \
714              }, \
715              imp { \
716                date std { \
717                  year 2007, \
718                  month 12 \
719                }, \
720                volume \"130\", \
721                issue \"1-2\", \
722                pages \"162-171\", \
723                language \"ENG\", \
724                pubstatus ppublish, \
725                history { \
726                  { \
727                    pubstatus received, \
728                    date std { \
729                      year 2007, \
730                      month 1, \
731                      day 15 \
732                    } \
733                  }, \
734                  { \
735                    pubstatus revised, \
736                    date std { \
737                      year 2007, \
738                      month 6, \
739                      day 5 \
740                    } \
741                  }, \
742                  { \
743                    pubstatus accepted, \
744                    date std { \
745                      year 2007, \
746                      month 6, \
747                      day 11 \
748                    } \
749                  }, \
750                  { \
751                    pubstatus aheadofprint, \
752                    date std { \
753                      year 2007, \
754                      month 7, \
755                      day 30 \
756                    } \
757                  }, \
758                  { \
759                    pubstatus pubmed, \
760                    date std { \
761                      year 2007, \
762                      month 7, \
763                      day 31, \
764                      hour 9, \
765                      minute 0 \
766                    } \
767                  }, \
768                  { \
769                    pubstatus medline, \
770                    date std { \
771                      year 2007, \
772                      month 7, \
773                      day 31, \
774                      hour 9, \
775                      minute 0 \
776                    } \
777                  } \
778                } \
779              } \
780            }, \
781            ids { \
782              pii \"S0168-1702(07)00221-3\", \
783              doi \"10.1016/j.virusres.2007.06.007\", \
784              pubmed 17659802 \
785            } \
786          } \
787        }";
788 
789    CPub pub;
790    CNcbiIstrstream input(TEST_PUB);
791 
792    input >> MSerial_AsnText >> pub;
793 
794    CPubFixing pub_fixing(true, true, true, nullptr);
795    pub_fixing.FixPub(pub);
796 
797 
798    // cout << MSerial_AsnText << pub;
799 
800    // No any tests for now. There will be in the future
801 }
802 
803 
BOOST_AUTO_TEST_CASE(Test_FixPubPreserveOriginalListOfAuthors)804 BOOST_AUTO_TEST_CASE(Test_FixPubPreserveOriginalListOfAuthors)
805 {
806     static const char* TEST_PUB =
807         "Pub ::= \
808          equiv { \
809            pmid 1302004, \
810            article { \
811              title { name \"A survey of expressed genes in Caenorhabditis elegans\" }, \
812              authors { \
813                names std { \
814                  { \
815                    name name { \
816                      last \"Waterston\", \
817                      initials \"R.\" \
818                    } \
819                  }, \
820                  { \
821                    name name { \
822                      last \"Martin\", \
823                      initials \"C.\" \
824                    } \
825                  }, \
826                  { \
827                    name name { \
828                      last \"Craxton\", \
829                      initials \"M.\" \
830                    } \
831                  }, \
832                  { \
833                    name name { \
834                      last \"Huynh\", \
835                      initials \"C.\" \
836                    } \
837                  }, \
838                  { \
839                    name name { \
840                      last \"Coulson\", \
841                      initials \"A.\" \
842                    } \
843                  }, \
844                  { \
845                    name name { \
846                      last \"Hillier\", \
847                      initials \"L.\" \
848                    } \
849                  }, \
850                  { \
851                    name name { \
852                      last \"Durbin\", \
853                      initials \"R.K.\" \
854                    } \
855                  }, \
856                  { \
857                    name name { \
858                      last \"Green\", \
859                      initials \"P.\" \
860                    } \
861                  }, \
862                  { \
863                    name name { \
864                      last \"Shownkeen\", \
865                      initials \"R.\" \
866                    } \
867                  }, \
868                  { \
869                    name name { \
870                      last \"Halloran\", \
871                      initials \"N.\" \
872                    } \
873                  }, \
874                  { \
875                    name name { \
876                      last \"Hawkins\", \
877                      initials \"T.\" \
878                    } \
879                  }, \
880                  { \
881                    name name { \
882                      last \"Wilson\", \
883                      initials \"R.\" \
884                    } \
885                  }, \
886                  { \
887                    name name { \
888                      last \"Berks\", \
889                      initials \"M.\" \
890                    } \
891                  }, \
892                  { \
893                    name name { \
894                      last \"Du\", \
895                      initials \"Z.\" \
896                    } \
897                  }, \
898                  { \
899                    name name { \
900                      last \"Thomas\", \
901                      initials \"K.\" \
902                    } \
903                  }, \
904                  { \
905                    name name { \
906                      last \"Thierry-Mieg\", \
907                      initials \"J.\" \
908                    } \
909                  }, \
910                  { \
911                    name name { \
912                      last \"Sulston\", \
913                      initials \"J.\" \
914                    } \
915                  } \
916                } \
917              }, \
918              from journal { \
919                title { iso-jta \"Nat. Genet.\" }, \
920                imp { \
921                  date std { year 1992 }, \
922                  volume \"1\", \
923                  pages \"114-123\" \
924                } \
925              } \
926            } \
927          }";
928 
929     CPub pub;
930     CNcbiIstrstream input(TEST_PUB);
931 
932     input >> MSerial_AsnText >> pub;
933 
934     CPubFixing pub_fixing(true, true, true, nullptr);
935     pub_fixing.FixPub(pub);
936 
937 
938     // cout << MSerial_AsnText << pub;
939 
940     // No any tests for now. There will be in the future
941 }
942