1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "CloningUtilTasks.h"
23 
24 #include <U2Core/AddDocumentTask.h>
25 #include <U2Core/AppContext.h>
26 #include <U2Core/BaseDocumentFormats.h>
27 #include <U2Core/Counter.h>
28 #include <U2Core/DNASequenceObject.h>
29 #include <U2Core/DNATranslation.h>
30 #include <U2Core/DocumentModel.h>
31 #include <U2Core/FormatUtils.h>
32 #include <U2Core/GObjectRelationRoles.h>
33 #include <U2Core/GObjectUtils.h>
34 #include <U2Core/IOAdapter.h>
35 #include <U2Core/L10n.h>
36 #include <U2Core/MultiTask.h>
37 #include <U2Core/ProjectModel.h>
38 #include <U2Core/SaveDocumentTask.h>
39 #include <U2Core/U1AnnotationUtils.h>
40 #include <U2Core/U2AlphabetUtils.h>
41 #include <U2Core/U2SafePoints.h>
42 #include <U2Core/U2SequenceUtils.h>
43 #include <U2Core/Version.h>
44 
45 #include <U2Gui/OpenViewTask.h>
46 
47 #include "FindEnzymesTask.h"
48 
49 namespace U2 {
50 
operator <(const GenomicPosition & left,const GenomicPosition & right)51 bool operator<(const GenomicPosition &left, const GenomicPosition &right) {
52     return left.coord < right.coord;
53 }
54 
DigestSequenceTask(U2SequenceObject * so,AnnotationTableObject * source,AnnotationTableObject * dest,const DigestSequenceTaskConfig & config)55 DigestSequenceTask::DigestSequenceTask(U2SequenceObject *so, AnnotationTableObject *source, AnnotationTableObject *dest, const DigestSequenceTaskConfig &config)
56     : Task("DigestSequenceTask", TaskFlags_FOSCOE | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled),
57       sourceObj(source), destObj(dest), dnaObj(so), cfg(config) {
58     GCOUNTER(cvar, "DigestSequenceIntoFragments");
59 
60     SAFE_POINT_EXT(sourceObj != nullptr, setError(L10N::nullPointerError("source object")), );
61     SAFE_POINT_EXT(destObj != nullptr, setError(L10N::nullPointerError("destination object")), );
62     SAFE_POINT_EXT(dnaObj != nullptr, setError(L10N::nullPointerError("sequence object")), );
63     isCircular = cfg.forceCircular;
64 }
65 
prepare()66 void DigestSequenceTask::prepare() {
67     seqRange = U2Region(0, dnaObj->getSequenceLength());
68 
69     if (cfg.searchForRestrictionSites) {
70         assert(sourceObj == destObj);
71         FindEnzymesTaskConfig feCfg;
72         feCfg.circular = isCircular;
73         feCfg.groupName = ANNOTATION_GROUP_ENZYME;
74         Task *t = new FindEnzymesToAnnotationsTask(sourceObj, dnaObj->getSequenceRef(), cfg.enzymeData, feCfg);
75         addSubTask(t);
76     }
77 }
78 
createFragment(int pos1,const DNAFragmentTerm & leftTerm,int pos2,const DNAFragmentTerm & rightTerm)79 SharedAnnotationData DigestSequenceTask::createFragment(int pos1, const DNAFragmentTerm &leftTerm, int pos2, const DNAFragmentTerm &rightTerm) {
80     SharedAnnotationData ad(new AnnotationData);
81     if (pos1 < pos2) {
82         U2Region reg(pos1, pos2 - pos1);
83         assert(reg.length > 0);
84 
85         ad->location->regions.append(reg);
86     } else {
87         U2Region reg1(pos1, seqRange.endPos() - pos1);
88         if (pos2 < 0) {
89             reg1.length += pos2;
90             pos2 = 0;
91         }
92         U2Region reg2(seqRange.startPos, pos2 - seqRange.startPos);
93         assert(reg1.length >= 0 && reg2.length >= 0);
94         assert(!reg1.isEmpty() || !reg2.isEmpty());
95 
96         if (!reg1.isEmpty()) {
97             ad->location->regions.append(reg1);
98         }
99         if (!reg2.isEmpty()) {
100             ad->location->regions.append(reg2);
101         }
102     }
103 
104     ad->qualifiers.append(U2Qualifier(QUALIFIER_LEFT_TERM, leftTerm.enzymeId));
105     ad->qualifiers.append(U2Qualifier(QUALIFIER_RIGHT_TERM, rightTerm.enzymeId));
106 
107     ad->qualifiers.append(U2Qualifier(QUALIFIER_LEFT_OVERHANG, leftTerm.overhang));
108     ad->qualifiers.append(U2Qualifier(QUALIFIER_RIGHT_OVERHANG, rightTerm.overhang));
109 
110     QString leftOverhangStrand = leftTerm.isDirect ? OVERHANG_STRAND_DIRECT : OVERHANG_STRAND_COMPL;
111     ad->qualifiers.append(U2Qualifier(QUALIFIER_LEFT_STRAND, leftOverhangStrand));
112     QString rightOverhangStrand = rightTerm.isDirect ? OVERHANG_STRAND_DIRECT : OVERHANG_STRAND_COMPL;
113     ad->qualifiers.append(U2Qualifier(QUALIFIER_RIGHT_STRAND, rightOverhangStrand));
114 
115     QString leftOverhangType = leftTerm.enzymeId.isEmpty() || leftTerm.overhang.isEmpty() ? OVERHANG_TYPE_BLUNT : OVERHANG_TYPE_STICKY;
116     ad->qualifiers.append(U2Qualifier(QUALIFIER_LEFT_TYPE, leftOverhangType));
117     QString rightOverhangType = rightTerm.enzymeId.isEmpty() || rightTerm.overhang.isEmpty() ? OVERHANG_TYPE_BLUNT : OVERHANG_TYPE_STICKY;
118     ad->qualifiers.append(U2Qualifier(QUALIFIER_RIGHT_TYPE, rightOverhangType));
119 
120     ad->qualifiers.append(U2Qualifier(QUALIFIER_SOURCE, dnaObj->getGObjectName()));
121 
122     U1AnnotationUtils::addDescriptionQualifier(ad, cfg.annDescription);
123 
124     return ad;
125 }
126 
report()127 Task::ReportResult DigestSequenceTask::report() {
128     checkForConservedAnnotations();
129 
130     if (hasError() || isCanceled()) {
131         return ReportResult_Finished;
132     }
133 
134     saveResults();
135 
136     return ReportResult_Finished;
137 }
138 
findCutSites()139 void DigestSequenceTask::findCutSites() {
140     foreach (const SEnzymeData &enzyme, cfg.enzymeData) {
141         if (enzyme->cutDirect == ENZYME_CUT_UNKNOWN || enzyme->cutComplement == ENZYME_CUT_UNKNOWN) {
142             setError(tr("Can't use restriction site %1 for digestion,  cleavage site is unknown ").arg(enzyme->id));
143             return;
144         }
145 
146         QList<Annotation *> anns;
147         foreach (Annotation *a, sourceObj->getAnnotations()) {
148             if (a->getName() == enzyme->id) {
149                 anns.append(a);
150             }
151         }
152 
153         if (anns.isEmpty()) {
154             stateInfo.setError(QString("Restriction site %1 is not found").arg(enzyme->id));
155             continue;
156         }
157 
158         foreach (Annotation *a, anns) {
159             const QVector<U2Region> &location = a->getRegions();
160             int cutPos = location.first().startPos;
161             cutSiteMap.insertMulti(GenomicPosition(cutPos, a->getStrand().isDirect()), enzyme);
162         }
163     }
164 }
165 
166 // TODO: add this function
167 /*
168 static void prepareLeftDnaTerm(TermData& leftTerm,
169                                const GenomicPosition& enzymePos,
170                                const EnzymeData& leftCutter)
171 {
172     int lcLen = leftCutter->seq.length();
173     bool lcStrandDirect = enzymePos.directStrand;
174     int lcDirectStrandCut = lcStrandDirect ? left->cutDirect : lcLen - leftCutter->cutDirect;
175     int lcComplementStrandCut = lcStrandDirect ? lcLen - leftCutter->cutComplement : leftCutter->cutComplement;
176     leftTerm.cutPos = enzymePos.coord + qMax(lcDirectStrandCut, lcComplementStrandCut);
177     leftTerm.overhangStart = enzymePos.coord + qMin(lcDirectStrandCut, lcComplementStrandCut);
178     leftTerm.isDirect = lcStrandDirect ? lcDirectStrandCut < lcComplementStrandCut :
179         lcDirectStrandCut > lcComplementStrandCut;
180 
181 }
182 */
183 
compareAnnotationsbyLength(const SharedAnnotationData & a1,const SharedAnnotationData & a2)184 bool compareAnnotationsbyLength(const SharedAnnotationData &a1, const SharedAnnotationData &a2) {
185     int length1 = 0;
186     foreach (const U2Region &reg, a1->getRegions()) {
187         length1 += reg.length;
188     }
189 
190     int length2 = 0;
191     foreach (const U2Region &reg, a2->getRegions()) {
192         length2 += reg.length;
193     }
194     return length1 > length2;
195 }
196 
run()197 void DigestSequenceTask::run() {
198     CHECK_OP(stateInfo, );
199 
200     findCutSites();
201 
202     CHECK(!cutSiteMap.isEmpty(), );
203 
204     QMap<GenomicPosition, SEnzymeData>::const_iterator prev = cutSiteMap.constBegin(), current = cutSiteMap.constBegin();
205     int count = 2;
206     qint64 seqLen = dnaObj->getSequenceLength();
207 
208     while ((++current) != cutSiteMap.constEnd()) {
209         int pos1 = prev.key().coord;
210         int pos2 = current.key().coord;
211         const SEnzymeData &enzyme1 = prev.value();
212         const SEnzymeData &enzyme2 = current.value();
213         int len1 = enzyme1->seq.length();
214         int len2 = enzyme2->seq.length();
215 
216         {
217             U2Region region1(pos1, len1);
218             U2Region region2(pos2, len2);
219 
220             if (region1.intersects(region2)) {
221                 setError(tr("Unable to digest into fragments: intersecting restriction sites %1 (%2..%3) and %4 (%5..%6)")
222                              .arg(enzyme1->id)
223                              .arg(region1.startPos)
224                              .arg(region1.endPos())
225                              .arg(enzyme2->id)
226                              .arg(region2.startPos)
227                              .arg(region2.endPos()));
228                 return;
229             }
230         }
231 
232         DNAFragmentTerm leftTerm;
233         bool leftStrandDirect = prev.key().directStrand;
234         int leftCutDirect = leftStrandDirect ? enzyme1->cutDirect : len1 - enzyme1->cutDirect;
235         int leftCutCompl = leftStrandDirect ? len1 - enzyme1->cutComplement : enzyme1->cutComplement;
236         int leftCutPos = correctPos(pos1 + qMax(leftCutDirect, leftCutCompl));
237         int leftOverhangStart = correctPos(pos1 + qMin(leftCutDirect, leftCutCompl));
238         leftTerm.overhang = getOverhang(U2Region(leftOverhangStart, leftCutPos - leftOverhangStart));
239         leftTerm.enzymeId = enzyme1->id.toLatin1();
240         leftTerm.isDirect = leftStrandDirect ? leftCutDirect < leftCutCompl : leftCutDirect > leftCutCompl;
241 
242         DNAFragmentTerm rightTerm;
243         bool rightStrandDirect = current.key().directStrand;
244         int rightCutDirect = rightStrandDirect ? enzyme2->cutDirect : len2 - enzyme2->cutDirect;
245         int rightCutCompl = rightStrandDirect ? len2 - enzyme2->cutComplement : enzyme2->cutComplement;
246         qint64 rightCutPos = correctPos(pos2 + qMin(rightCutDirect, rightCutCompl));
247         qint64 rightOverhangStart = correctPos(pos2 + qMax(rightCutDirect, rightCutCompl));
248         rightTerm.overhang = getOverhang(U2Region(rightCutPos, rightOverhangStart - rightCutPos));
249         rightTerm.enzymeId = enzyme2->id.toLatin1();
250         rightTerm.isDirect = rightStrandDirect ? rightCutDirect > rightCutCompl : rightCutDirect < rightCutCompl;
251         if (rightOverhangStart > seqLen) {
252             qint64 leftCutPosWithOverhang = rightOverhangStart - seqLen;
253             rightTerm.overhang += getOverhang(U2Region(0, leftCutPosWithOverhang));
254         }
255         SharedAnnotationData ad = createFragment(leftCutPos, leftTerm, rightCutPos, rightTerm);
256         results.append(ad);
257         ++count;
258         ++prev;
259     }
260 
261     QMap<GenomicPosition, SEnzymeData>::const_iterator first = cutSiteMap.constBegin();
262 
263     const SEnzymeData &firstCutter = first.value();
264     int fcLen = firstCutter->seq.length();
265     bool fcStrandDirect = first.key().directStrand;
266     int fcDirectStrandCut = fcStrandDirect ? firstCutter->cutDirect : fcLen - firstCutter->cutDirect;
267     int fcComplementStrandCut = fcStrandDirect ? fcLen - firstCutter->cutComplement : firstCutter->cutComplement;
268     int firstCutPos = correctPos(first.key().coord + qMin(fcDirectStrandCut, fcComplementStrandCut));
269     int rightOverhangStart = correctPos(first.key().coord + qMax(fcDirectStrandCut, fcComplementStrandCut));
270     bool rightOverhangIsDirect = fcStrandDirect ? fcDirectStrandCut > fcComplementStrandCut : fcDirectStrandCut < fcComplementStrandCut;
271     QByteArray firstRightOverhang = getOverhang(U2Region(firstCutPos, rightOverhangStart - firstCutPos));
272 
273     const SEnzymeData &lastCutter = prev.value();
274     int lcLen = lastCutter->seq.length();
275     bool lcStrandDirect = prev.key().directStrand;
276     int lcDirectStrandCut = lcStrandDirect ? lastCutter->cutDirect : lcLen - lastCutter->cutDirect;
277     int lcComplementStrandCut = lcStrandDirect ? lcLen - lastCutter->cutComplement : lastCutter->cutComplement;
278     int lastCutPos = correctPos(prev.key().coord + qMax(lcDirectStrandCut, lcComplementStrandCut));
279     int leftOverhangStart = correctPos(prev.key().coord + qMin(lcDirectStrandCut, lcComplementStrandCut));
280     bool leftOverhangIsDirect = lcStrandDirect ? lcDirectStrandCut < lcComplementStrandCut : lcDirectStrandCut > lcComplementStrandCut;
281 
282     if (lastCutPos > seqLen) {
283         // last restriction site is situated between sequence start and end
284         assert(isCircular);
285         int leftCutPos = lastCutPos - seqLen;
286         QByteArray leftOverhang = getOverhang(U2Region(leftOverhangStart, seqLen - leftOverhangStart)) + getOverhang(U2Region(0, leftCutPos));
287         QByteArray rightOverhang = first == prev ? leftOverhang : firstRightOverhang;
288         SharedAnnotationData ad1 = createFragment(leftCutPos, DNAFragmentTerm(lastCutter->id, leftOverhang, leftOverhangIsDirect), firstCutPos, DNAFragmentTerm(firstCutter->id, rightOverhang, rightOverhangIsDirect));
289         results.append(ad1);
290     } else {
291         QByteArray lastLeftOverhang = getOverhang(U2Region(leftOverhangStart, lastCutPos - leftOverhangStart));
292         if (isCircular) {
293             SharedAnnotationData ad = createFragment(lastCutPos, DNAFragmentTerm(lastCutter->id, lastLeftOverhang, leftOverhangIsDirect), firstCutPos, DNAFragmentTerm(firstCutter->id, firstRightOverhang, rightOverhangIsDirect));
294 
295             results.append(ad);
296         } else {
297             if (firstCutPos != 0) {
298                 SharedAnnotationData ad1 = createFragment(seqRange.startPos, DNAFragmentTerm(), firstCutPos, DNAFragmentTerm(firstCutter->id, firstRightOverhang, rightOverhangIsDirect));
299                 results.append(ad1);
300             }
301             if (lastCutPos != dnaObj->getSequenceLength()) {
302                 SharedAnnotationData ad2 = createFragment(lastCutPos, DNAFragmentTerm(lastCutter->id, lastLeftOverhang, leftOverhangIsDirect), seqRange.endPos(), DNAFragmentTerm());
303                 results.append(ad2);
304             }
305         }
306     }
307     std::sort(results.begin(), results.end(), compareAnnotationsbyLength);
308 
309     for (int fragmentCounter = 0; fragmentCounter < results.size(); fragmentCounter++) {
310         results[fragmentCounter]->name = QString("Fragment %1").arg(fragmentCounter + 1);
311     }
312 }
313 
saveResults()314 void DigestSequenceTask::saveResults() {
315     destObj->addAnnotations(results, ANNOTATION_GROUP_FRAGMENTS);
316 }
317 
generateReport() const318 QString DigestSequenceTask::generateReport() const {
319     QString res;
320 
321     if (hasError()) {
322         return res;
323     }
324 
325     QString topology = dnaObj->isCircular() ? tr("circular") : tr("linear");
326     res += tr("<h3><br>Digest into fragments %1 (%2)</h3>").arg(dnaObj->getDocument()->getName()).arg(topology);
327     res += tr("<br>Generated %1 fragments.").arg(results.count());
328     int counter = 1;
329     foreach (const SharedAnnotationData &sdata, results) {
330         const int startPos = sdata->location->regions.first().startPos + 1;
331         const int endPos = sdata->location->regions.last().endPos();
332         int len = 0;
333         foreach (const U2Region &r, sdata->location->regions) {
334             len += r.endPos() - r.startPos;
335         }
336         res += tr("<br><br>&nbsp;&nbsp;&nbsp;&nbsp;%1:&nbsp;&nbsp;&nbsp;&nbsp;From %3 (%2) To %5 (%4) - %6 bp ").arg(counter).arg(startPos).arg(sdata->findFirstQualifierValue(QUALIFIER_LEFT_TERM)).arg(endPos).arg(sdata->findFirstQualifierValue(QUALIFIER_RIGHT_TERM)).arg(len);
337         ++counter;
338     }
339 
340     return res;
341 }
342 
checkForConservedAnnotations()343 void DigestSequenceTask::checkForConservedAnnotations() {
344     QMap<QString, U2Region>::const_iterator it = cfg.conservedRegions.constBegin();
345     for (; it != cfg.conservedRegions.constEnd(); ++it) {
346         bool found = false;
347         U2Region annRegion = it.value();
348         foreach (const SharedAnnotationData &data, results) {
349             const U2Region resRegion = data->location->regions.first();
350             if (resRegion.contains(annRegion)) {
351                 found = true;
352                 break;
353             }
354         }
355         if (!found) {
356             QString locationStr = QString("%1..%2").arg(annRegion.startPos + 1).arg(annRegion.endPos());
357             setError(tr("Conserved annotation %1 (%2) is disrupted by the digestion. Try changing the restriction sites.")
358                          .arg(it.key())
359                          .arg(locationStr));
360             return;
361         }
362     }
363 }
364 
correctPos(const qint64 pos) const365 qint64 DigestSequenceTask::correctPos(const qint64 pos) const {
366     qint64 res = pos;
367     if (!isCircular) {
368         res = qBound<qint64>(0, pos, dnaObj->getSequenceLength());
369     }
370     return res;
371 }
372 
getOverhang(const U2Region & region) const373 QByteArray DigestSequenceTask::getOverhang(const U2Region &region) const {
374     QByteArray result;
375     if (region.startPos < 0 && isCircular) {
376         result = dnaObj->getSequenceData(U2Region(dnaObj->getSequenceLength() + region.startPos, qAbs(region.startPos)));
377         result += dnaObj->getSequenceData(U2Region(0, region.length + region.startPos));
378     } else {
379         result = dnaObj->getSequenceData(region);
380     }
381 
382     return result;
383 }
384 
385 //////////////////////////////////////////////////////////////////////////
386 
LigateFragmentsTask(const QList<DNAFragment> & fragments,const LigateFragmentsTaskConfig & config)387 LigateFragmentsTask::LigateFragmentsTask(const QList<DNAFragment> &fragments, const LigateFragmentsTaskConfig &config)
388     : Task("LigateFragmentsTask", TaskFlags_NR_FOSCOE), fragmentList(fragments), cfg(config),
389       resultDoc(nullptr), resultAlphabet(nullptr) {
390     GCOUNTER(cvar, "LigateFragments");
391 }
392 
processOverhangs(const DNAFragment & prevFragment,const DNAFragment & curFragment,QByteArray & overhangAddition)393 void LigateFragmentsTask::processOverhangs(const DNAFragment &prevFragment, const DNAFragment &curFragment, QByteArray &overhangAddition) {
394     const DNAFragmentTerm &prevTerm = prevFragment.getRightTerminus();
395     const DNAFragmentTerm &curTerm = curFragment.getLeftTerminus();
396 
397     if (prevTerm.type != curTerm.type) {
398         stateInfo.setError(tr("Fragments %1 and  %2 are inconsistent. Blunt and sticky ends incompatibility")
399                                .arg(prevFragment.getName())
400                                .arg(curFragment.getName()));
401         return;
402     }
403 
404     QByteArray prevOverhang = prevFragment.getRightTerminus().overhang;
405     QByteArray curOverhang = curFragment.getLeftTerminus().overhang;
406 
407     if (prevTerm.type == OVERHANG_TYPE_STICKY) {
408         if (!overhangsAreConsistent(prevFragment.getRightTerminus(), curFragment.getLeftTerminus())) {
409             stateInfo.setError(tr("Right overhang from %1 and left overhang from %2 are inconsistent.")
410                                    .arg(prevFragment.getName())
411                                    .arg(curFragment.getName()));
412             return;
413         } else {
414             overhangAddition += curOverhang;
415         }
416     } else if (prevTerm.type == OVERHANG_TYPE_BLUNT) {
417         overhangAddition += prevOverhang + curOverhang;
418     } else {
419         assert(0);
420     }
421 }
422 
overhangsAreConsistent(const DNAFragmentTerm & curTerm,const DNAFragmentTerm & prevTerm)423 bool LigateFragmentsTask::overhangsAreConsistent(const DNAFragmentTerm &curTerm, const DNAFragmentTerm &prevTerm) {
424     QByteArray curOverhang = curTerm.overhang;
425     QByteArray prevOverhang = prevTerm.overhang;
426 
427     bool curStrand = curTerm.isDirect;
428     bool prevStrand = prevTerm.isDirect;
429     if (curStrand == prevStrand) {
430         return false;
431     }
432 
433     return curOverhang == prevOverhang;
434 }
435 
prepare()436 void LigateFragmentsTask::prepare() {
437     QByteArray resultSeq;
438     QVector<U2Region> fragmentRegions;
439 
440     DNAFragment prevFragment;
441     assert(prevFragment.isEmpty());
442 
443     if (!cfg.makeCircular && cfg.checkOverhangs) {
444         const DNAFragment &first = fragmentList.first();
445         QByteArray leftOverhangAddition = first.getLeftTerminus().overhang;
446         resultSeq.append(leftOverhangAddition);
447     }
448 
449     foreach (const DNAFragment &dnaFragment, fragmentList) {
450         QVector<U2Region> location = dnaFragment.getFragmentRegions();
451         assert(location.size() > 0);
452 
453         // check alphabet consistency
454         const DNAAlphabet *fragmentAlphabet = dnaFragment.getAlphabet();
455         if (resultAlphabet == nullptr) {
456             resultAlphabet = fragmentAlphabet;
457         } else if (resultAlphabet != fragmentAlphabet) {
458             if (fragmentAlphabet == nullptr) {
459                 stateInfo.setError(tr("Unknown DNA alphabet in fragment %1 of %2")
460                                        .arg(dnaFragment.getName())
461                                        .arg(dnaFragment.getSequenceName()));
462                 return;
463             }
464             resultAlphabet = U2AlphabetUtils::deriveCommonAlphabet(resultAlphabet, fragmentAlphabet);
465         }
466 
467         // check if overhangs are compatible
468         QByteArray overhangAddition;
469         if (cfg.checkOverhangs) {
470             if (!prevFragment.isEmpty()) {
471                 processOverhangs(prevFragment, dnaFragment, overhangAddition);
472                 if (stateInfo.hasError()) {
473                     return;
474                 }
475             }
476             prevFragment = dnaFragment;
477         }
478 
479         // handle fragment annotations
480         int resultLen = resultSeq.length() + overhangAddition.length();
481         foreach (AnnotationTableObject *aObj, dnaFragment.getRelatedAnnotations()) {
482             QList<SharedAnnotationData> toSave = cloneAnnotationsInFragmentRegion(dnaFragment, aObj, resultLen);
483             annotations.append(toSave);
484         }
485 
486         if (cfg.annotateFragments) {
487             SharedAnnotationData a = createFragmentAnnotation(dnaFragment, resultLen);
488             annotations.append(a);
489         }
490 
491         resultSeq.append(overhangAddition);
492         resultSeq.append(dnaFragment.getSequence(stateInfo));
493         CHECK_OP(stateInfo, );
494     }
495 
496     if (cfg.checkOverhangs) {
497         if (cfg.makeCircular) {
498             const DNAFragment &first = fragmentList.first();
499             const DNAFragment &last = fragmentList.last();
500             QByteArray overhangAddition;
501             processOverhangs(last, first, overhangAddition);
502             if (stateInfo.hasError()) {
503                 return;
504             }
505             resultSeq.append(overhangAddition);
506         } else {
507             const DNAFragment &last = fragmentList.last();
508             QByteArray rightOverhangAddition = last.getRightTerminus().overhang;
509             resultSeq.append(rightOverhangAddition);
510         }
511     }
512 
513     // create comment
514     SharedAnnotationData sourceAnnot = createSourceAnnotation(resultSeq.length());
515     annotations.append(sourceAnnot);
516 
517     createDocument(resultSeq, annotations);
518 
519     if (!cfg.addDocToProject) {
520         return;
521     }
522 
523     QList<Task *> tasks;
524     tasks.append(new AddDocumentTask(resultDoc));
525 
526     if (cfg.openView) {
527         tasks.append(new OpenViewTask(resultDoc));
528     }
529     if (cfg.saveDoc) {
530         tasks.append(new SaveDocumentTask(resultDoc));
531     }
532 
533     Task *multiTask = new MultiTask(tr("Add constructed molecule"), tasks);
534     addSubTask(multiTask);
535 }
536 
createSourceAnnotation(int regLen)537 SharedAnnotationData LigateFragmentsTask::createSourceAnnotation(int regLen) {
538     Version v = Version::appVersion();
539     SharedAnnotationData d(new AnnotationData);
540     d->name = "source";
541     d->location->regions << U2Region(0, regLen);
542     d->qualifiers.append(U2Qualifier("comment", QString("Molecule is created with Unipro UGENE v%1.%2").arg(v.major).arg(v.minor)));
543     return d;
544 }
545 
createFragmentAnnotation(const DNAFragment & fragment,int startPos)546 SharedAnnotationData LigateFragmentsTask::createFragmentAnnotation(const DNAFragment &fragment, int startPos) {
547     SharedAnnotationData d(new AnnotationData);
548     d->name = QString("%1 %2").arg(fragment.getSequenceName()).arg(fragment.getName());
549     d->location->regions << U2Region(startPos, fragment.getLength());
550     d->qualifiers.append(U2Qualifier("source_doc", fragment.getSequenceDocName()));
551 
552     return d;
553 }
554 
cloneAnnotationsInRegion(const U2Region & fragmentRegion,AnnotationTableObject * source,int globalOffset)555 QList<SharedAnnotationData> LigateFragmentsTask::cloneAnnotationsInRegion(const U2Region &fragmentRegion, AnnotationTableObject *source, int globalOffset) {
556     QList<SharedAnnotationData> results;
557     // TODO: allow to cut annotations
558     // TODO: consider optimizing the code below using AnnotationTableObject::getAnnotationsByRegion()
559     foreach (Annotation *a, source->getAnnotations()) {
560         bool ok = true;
561         const QVector<U2Region> &location = a->getRegions();
562         foreach (const U2Region &region, location) {
563             if (!fragmentRegion.contains(region) || fragmentRegion == region) {
564                 ok = false;
565                 break;
566             }
567         }
568         if (ok) {
569             int newPos = globalOffset + location.first().startPos - fragmentRegion.startPos;
570             SharedAnnotationData cloned(new AnnotationData(*a->getData()));
571             QVector<U2Region> newLocation;
572             foreach (const U2Region &region, a->getRegions()) {
573                 U2Region newRegion(region);
574                 newRegion.startPos = newPos;
575                 newLocation.append(newRegion);
576             }
577             cloned->location->regions = newLocation;
578             results.append(cloned);
579         }
580     }
581 
582     return results;
583 }
584 
fragmentContainsRegion(const DNAFragment & fragment,const U2Region region)585 static bool fragmentContainsRegion(const DNAFragment &fragment, const U2Region region) {
586     QVector<U2Region> fragmentRegions = fragment.getFragmentRegions();
587 
588     bool result = false;
589     foreach (const U2Region &fR, fragmentRegions) {
590         if (fR.contains(region)) {
591             result = true;
592             break;
593         }
594     }
595 
596     return result;
597 }
598 
getRelativeStartPos(const DNAFragment & fragment,const U2Region region)599 static int getRelativeStartPos(const DNAFragment &fragment, const U2Region region) {
600     QVector<U2Region> fragmentRegions = fragment.getFragmentRegions();
601 
602     int offset = 0;
603     foreach (const U2Region &fR, fragmentRegions) {
604         if (fR.contains(region)) {
605             return offset + region.startPos - fR.startPos;
606         }
607         offset += fR.length;
608     }
609 
610     // the fragment doesn't contain the region
611     return -1;
612 }
613 
cloneAnnotationsInFragmentRegion(const DNAFragment & fragment,AnnotationTableObject * source,int globalOffset)614 QList<SharedAnnotationData> LigateFragmentsTask::cloneAnnotationsInFragmentRegion(const DNAFragment &fragment,
615                                                                                   AnnotationTableObject *source,
616                                                                                   int globalOffset) {
617     QList<SharedAnnotationData> results;
618 
619     // TODO: allow to remove annotations
620 
621     foreach (Annotation *a, source->getAnnotations()) {
622         QVector<U2Region> location = a->getRegions();
623         if (a->getName().startsWith("Fragment")) {
624             continue;
625         }
626 
627         bool ok = true;
628         foreach (const U2Region &r, location) {
629             // sneaky annotations shall not pass!
630             if (!fragmentContainsRegion(fragment, r)) {
631                 ok = false;
632                 break;
633             }
634         }
635 
636         if (ok) {
637             SharedAnnotationData cloned(new AnnotationData(*a->getData()));
638             QVector<U2Region> newLocation;
639             foreach (const U2Region &region, location) {
640                 int startPos = getRelativeStartPos(fragment, region);
641                 if (fragment.isInverted()) {
642                     startPos = fragment.getLength() - startPos - region.length;
643                     U2Strand strand = cloned->getStrand();
644                     if (strand.isDirect()) {
645                         cloned->setStrand(U2Strand::Complementary);
646                     } else {
647                         cloned->setStrand(U2Strand::Direct);
648                     }
649                 }
650                 assert(startPos != -1);
651                 int newPos = globalOffset + startPos;
652                 U2Region newRegion(region);
653                 newRegion.startPos = newPos;
654                 newLocation.append(newRegion);
655             }
656 
657             cloned->location->regions = newLocation;
658             results.append(cloned);
659         }
660     }
661 
662     return results;
663 }
664 
createDocument(const QByteArray & seq,const QList<SharedAnnotationData> & annotations)665 void LigateFragmentsTask::createDocument(const QByteArray &seq, const QList<SharedAnnotationData> &annotations) {
666     DocumentFormat *df = AppContext::getDocumentFormatRegistry()->getFormatById(BaseDocumentFormats::PLAIN_GENBANK);
667     IOAdapterFactory *iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(BaseIOAdapters::LOCAL_FILE);
668     QList<GObject *> objects;
669 
670     QString seqName = cfg.seqName.isEmpty() ? cfg.docUrl.baseFileName() : cfg.seqName;
671     DNASequence dna(seqName, seq, resultAlphabet);
672     dna.circular = cfg.makeCircular;
673 
674     // set Genbank header
675     DNALocusInfo loi;
676     loi.name = seqName;
677     loi.topology = cfg.makeCircular ? "circular" : "linear";
678     loi.molecule = "DNA";
679     loi.division = "SYN";
680     QDate date = QDate::currentDate();
681     loi.date = QString("%1-%2-%3").arg(date.toString("dd")).arg(FormatUtils::getShortMonthName(date.month())).arg(date.toString("yyyy"));
682 
683     dna.info.insert(DNAInfo::LOCUS, qVariantFromValue<DNALocusInfo>(loi));
684 
685     resultDoc = df->createNewLoadedDocument(iof, cfg.docUrl, stateInfo);
686     CHECK_OP(stateInfo, );
687 
688     U2EntityRef seqRef = U2SequenceUtils::import(stateInfo, resultDoc->getDbiRef(), dna);
689     CHECK_OP_EXT(stateInfo, delete resultDoc; resultDoc = nullptr, );
690 
691     U2SequenceObject *dnaObj = new U2SequenceObject(seqName, seqRef);
692     resultDoc->addObject(dnaObj);
693 
694     AnnotationTableObject *aObj = new AnnotationTableObject(QString("%1 annotations").arg(seqName), resultDoc->getDbiRef());
695     aObj->addAnnotations(annotations);
696     resultDoc->addObject(aObj);
697 
698     aObj->addObjectRelation(dnaObj, ObjectRole_Sequence);
699 }
700 
701 }  // namespace U2
702