1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "CloningUtilTasks.h"
23
24 #include <U2Core/AddDocumentTask.h>
25 #include <U2Core/AppContext.h>
26 #include <U2Core/BaseDocumentFormats.h>
27 #include <U2Core/Counter.h>
28 #include <U2Core/DNASequenceObject.h>
29 #include <U2Core/DNATranslation.h>
30 #include <U2Core/DocumentModel.h>
31 #include <U2Core/FormatUtils.h>
32 #include <U2Core/GObjectRelationRoles.h>
33 #include <U2Core/GObjectUtils.h>
34 #include <U2Core/IOAdapter.h>
35 #include <U2Core/L10n.h>
36 #include <U2Core/MultiTask.h>
37 #include <U2Core/ProjectModel.h>
38 #include <U2Core/SaveDocumentTask.h>
39 #include <U2Core/U1AnnotationUtils.h>
40 #include <U2Core/U2AlphabetUtils.h>
41 #include <U2Core/U2SafePoints.h>
42 #include <U2Core/U2SequenceUtils.h>
43 #include <U2Core/Version.h>
44
45 #include <U2Gui/OpenViewTask.h>
46
47 #include "FindEnzymesTask.h"
48
49 namespace U2 {
50
operator <(const GenomicPosition & left,const GenomicPosition & right)51 bool operator<(const GenomicPosition &left, const GenomicPosition &right) {
52 return left.coord < right.coord;
53 }
54
DigestSequenceTask(U2SequenceObject * so,AnnotationTableObject * source,AnnotationTableObject * dest,const DigestSequenceTaskConfig & config)55 DigestSequenceTask::DigestSequenceTask(U2SequenceObject *so, AnnotationTableObject *source, AnnotationTableObject *dest, const DigestSequenceTaskConfig &config)
56 : Task("DigestSequenceTask", TaskFlags_FOSCOE | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled),
57 sourceObj(source), destObj(dest), dnaObj(so), cfg(config) {
58 GCOUNTER(cvar, "DigestSequenceIntoFragments");
59
60 SAFE_POINT_EXT(sourceObj != nullptr, setError(L10N::nullPointerError("source object")), );
61 SAFE_POINT_EXT(destObj != nullptr, setError(L10N::nullPointerError("destination object")), );
62 SAFE_POINT_EXT(dnaObj != nullptr, setError(L10N::nullPointerError("sequence object")), );
63 isCircular = cfg.forceCircular;
64 }
65
prepare()66 void DigestSequenceTask::prepare() {
67 seqRange = U2Region(0, dnaObj->getSequenceLength());
68
69 if (cfg.searchForRestrictionSites) {
70 assert(sourceObj == destObj);
71 FindEnzymesTaskConfig feCfg;
72 feCfg.circular = isCircular;
73 feCfg.groupName = ANNOTATION_GROUP_ENZYME;
74 Task *t = new FindEnzymesToAnnotationsTask(sourceObj, dnaObj->getSequenceRef(), cfg.enzymeData, feCfg);
75 addSubTask(t);
76 }
77 }
78
createFragment(int pos1,const DNAFragmentTerm & leftTerm,int pos2,const DNAFragmentTerm & rightTerm)79 SharedAnnotationData DigestSequenceTask::createFragment(int pos1, const DNAFragmentTerm &leftTerm, int pos2, const DNAFragmentTerm &rightTerm) {
80 SharedAnnotationData ad(new AnnotationData);
81 if (pos1 < pos2) {
82 U2Region reg(pos1, pos2 - pos1);
83 assert(reg.length > 0);
84
85 ad->location->regions.append(reg);
86 } else {
87 U2Region reg1(pos1, seqRange.endPos() - pos1);
88 if (pos2 < 0) {
89 reg1.length += pos2;
90 pos2 = 0;
91 }
92 U2Region reg2(seqRange.startPos, pos2 - seqRange.startPos);
93 assert(reg1.length >= 0 && reg2.length >= 0);
94 assert(!reg1.isEmpty() || !reg2.isEmpty());
95
96 if (!reg1.isEmpty()) {
97 ad->location->regions.append(reg1);
98 }
99 if (!reg2.isEmpty()) {
100 ad->location->regions.append(reg2);
101 }
102 }
103
104 ad->qualifiers.append(U2Qualifier(QUALIFIER_LEFT_TERM, leftTerm.enzymeId));
105 ad->qualifiers.append(U2Qualifier(QUALIFIER_RIGHT_TERM, rightTerm.enzymeId));
106
107 ad->qualifiers.append(U2Qualifier(QUALIFIER_LEFT_OVERHANG, leftTerm.overhang));
108 ad->qualifiers.append(U2Qualifier(QUALIFIER_RIGHT_OVERHANG, rightTerm.overhang));
109
110 QString leftOverhangStrand = leftTerm.isDirect ? OVERHANG_STRAND_DIRECT : OVERHANG_STRAND_COMPL;
111 ad->qualifiers.append(U2Qualifier(QUALIFIER_LEFT_STRAND, leftOverhangStrand));
112 QString rightOverhangStrand = rightTerm.isDirect ? OVERHANG_STRAND_DIRECT : OVERHANG_STRAND_COMPL;
113 ad->qualifiers.append(U2Qualifier(QUALIFIER_RIGHT_STRAND, rightOverhangStrand));
114
115 QString leftOverhangType = leftTerm.enzymeId.isEmpty() || leftTerm.overhang.isEmpty() ? OVERHANG_TYPE_BLUNT : OVERHANG_TYPE_STICKY;
116 ad->qualifiers.append(U2Qualifier(QUALIFIER_LEFT_TYPE, leftOverhangType));
117 QString rightOverhangType = rightTerm.enzymeId.isEmpty() || rightTerm.overhang.isEmpty() ? OVERHANG_TYPE_BLUNT : OVERHANG_TYPE_STICKY;
118 ad->qualifiers.append(U2Qualifier(QUALIFIER_RIGHT_TYPE, rightOverhangType));
119
120 ad->qualifiers.append(U2Qualifier(QUALIFIER_SOURCE, dnaObj->getGObjectName()));
121
122 U1AnnotationUtils::addDescriptionQualifier(ad, cfg.annDescription);
123
124 return ad;
125 }
126
report()127 Task::ReportResult DigestSequenceTask::report() {
128 checkForConservedAnnotations();
129
130 if (hasError() || isCanceled()) {
131 return ReportResult_Finished;
132 }
133
134 saveResults();
135
136 return ReportResult_Finished;
137 }
138
findCutSites()139 void DigestSequenceTask::findCutSites() {
140 foreach (const SEnzymeData &enzyme, cfg.enzymeData) {
141 if (enzyme->cutDirect == ENZYME_CUT_UNKNOWN || enzyme->cutComplement == ENZYME_CUT_UNKNOWN) {
142 setError(tr("Can't use restriction site %1 for digestion, cleavage site is unknown ").arg(enzyme->id));
143 return;
144 }
145
146 QList<Annotation *> anns;
147 foreach (Annotation *a, sourceObj->getAnnotations()) {
148 if (a->getName() == enzyme->id) {
149 anns.append(a);
150 }
151 }
152
153 if (anns.isEmpty()) {
154 stateInfo.setError(QString("Restriction site %1 is not found").arg(enzyme->id));
155 continue;
156 }
157
158 foreach (Annotation *a, anns) {
159 const QVector<U2Region> &location = a->getRegions();
160 int cutPos = location.first().startPos;
161 cutSiteMap.insertMulti(GenomicPosition(cutPos, a->getStrand().isDirect()), enzyme);
162 }
163 }
164 }
165
166 // TODO: add this function
167 /*
168 static void prepareLeftDnaTerm(TermData& leftTerm,
169 const GenomicPosition& enzymePos,
170 const EnzymeData& leftCutter)
171 {
172 int lcLen = leftCutter->seq.length();
173 bool lcStrandDirect = enzymePos.directStrand;
174 int lcDirectStrandCut = lcStrandDirect ? left->cutDirect : lcLen - leftCutter->cutDirect;
175 int lcComplementStrandCut = lcStrandDirect ? lcLen - leftCutter->cutComplement : leftCutter->cutComplement;
176 leftTerm.cutPos = enzymePos.coord + qMax(lcDirectStrandCut, lcComplementStrandCut);
177 leftTerm.overhangStart = enzymePos.coord + qMin(lcDirectStrandCut, lcComplementStrandCut);
178 leftTerm.isDirect = lcStrandDirect ? lcDirectStrandCut < lcComplementStrandCut :
179 lcDirectStrandCut > lcComplementStrandCut;
180
181 }
182 */
183
compareAnnotationsbyLength(const SharedAnnotationData & a1,const SharedAnnotationData & a2)184 bool compareAnnotationsbyLength(const SharedAnnotationData &a1, const SharedAnnotationData &a2) {
185 int length1 = 0;
186 foreach (const U2Region ®, a1->getRegions()) {
187 length1 += reg.length;
188 }
189
190 int length2 = 0;
191 foreach (const U2Region ®, a2->getRegions()) {
192 length2 += reg.length;
193 }
194 return length1 > length2;
195 }
196
run()197 void DigestSequenceTask::run() {
198 CHECK_OP(stateInfo, );
199
200 findCutSites();
201
202 CHECK(!cutSiteMap.isEmpty(), );
203
204 QMap<GenomicPosition, SEnzymeData>::const_iterator prev = cutSiteMap.constBegin(), current = cutSiteMap.constBegin();
205 int count = 2;
206 qint64 seqLen = dnaObj->getSequenceLength();
207
208 while ((++current) != cutSiteMap.constEnd()) {
209 int pos1 = prev.key().coord;
210 int pos2 = current.key().coord;
211 const SEnzymeData &enzyme1 = prev.value();
212 const SEnzymeData &enzyme2 = current.value();
213 int len1 = enzyme1->seq.length();
214 int len2 = enzyme2->seq.length();
215
216 {
217 U2Region region1(pos1, len1);
218 U2Region region2(pos2, len2);
219
220 if (region1.intersects(region2)) {
221 setError(tr("Unable to digest into fragments: intersecting restriction sites %1 (%2..%3) and %4 (%5..%6)")
222 .arg(enzyme1->id)
223 .arg(region1.startPos)
224 .arg(region1.endPos())
225 .arg(enzyme2->id)
226 .arg(region2.startPos)
227 .arg(region2.endPos()));
228 return;
229 }
230 }
231
232 DNAFragmentTerm leftTerm;
233 bool leftStrandDirect = prev.key().directStrand;
234 int leftCutDirect = leftStrandDirect ? enzyme1->cutDirect : len1 - enzyme1->cutDirect;
235 int leftCutCompl = leftStrandDirect ? len1 - enzyme1->cutComplement : enzyme1->cutComplement;
236 int leftCutPos = correctPos(pos1 + qMax(leftCutDirect, leftCutCompl));
237 int leftOverhangStart = correctPos(pos1 + qMin(leftCutDirect, leftCutCompl));
238 leftTerm.overhang = getOverhang(U2Region(leftOverhangStart, leftCutPos - leftOverhangStart));
239 leftTerm.enzymeId = enzyme1->id.toLatin1();
240 leftTerm.isDirect = leftStrandDirect ? leftCutDirect < leftCutCompl : leftCutDirect > leftCutCompl;
241
242 DNAFragmentTerm rightTerm;
243 bool rightStrandDirect = current.key().directStrand;
244 int rightCutDirect = rightStrandDirect ? enzyme2->cutDirect : len2 - enzyme2->cutDirect;
245 int rightCutCompl = rightStrandDirect ? len2 - enzyme2->cutComplement : enzyme2->cutComplement;
246 qint64 rightCutPos = correctPos(pos2 + qMin(rightCutDirect, rightCutCompl));
247 qint64 rightOverhangStart = correctPos(pos2 + qMax(rightCutDirect, rightCutCompl));
248 rightTerm.overhang = getOverhang(U2Region(rightCutPos, rightOverhangStart - rightCutPos));
249 rightTerm.enzymeId = enzyme2->id.toLatin1();
250 rightTerm.isDirect = rightStrandDirect ? rightCutDirect > rightCutCompl : rightCutDirect < rightCutCompl;
251 if (rightOverhangStart > seqLen) {
252 qint64 leftCutPosWithOverhang = rightOverhangStart - seqLen;
253 rightTerm.overhang += getOverhang(U2Region(0, leftCutPosWithOverhang));
254 }
255 SharedAnnotationData ad = createFragment(leftCutPos, leftTerm, rightCutPos, rightTerm);
256 results.append(ad);
257 ++count;
258 ++prev;
259 }
260
261 QMap<GenomicPosition, SEnzymeData>::const_iterator first = cutSiteMap.constBegin();
262
263 const SEnzymeData &firstCutter = first.value();
264 int fcLen = firstCutter->seq.length();
265 bool fcStrandDirect = first.key().directStrand;
266 int fcDirectStrandCut = fcStrandDirect ? firstCutter->cutDirect : fcLen - firstCutter->cutDirect;
267 int fcComplementStrandCut = fcStrandDirect ? fcLen - firstCutter->cutComplement : firstCutter->cutComplement;
268 int firstCutPos = correctPos(first.key().coord + qMin(fcDirectStrandCut, fcComplementStrandCut));
269 int rightOverhangStart = correctPos(first.key().coord + qMax(fcDirectStrandCut, fcComplementStrandCut));
270 bool rightOverhangIsDirect = fcStrandDirect ? fcDirectStrandCut > fcComplementStrandCut : fcDirectStrandCut < fcComplementStrandCut;
271 QByteArray firstRightOverhang = getOverhang(U2Region(firstCutPos, rightOverhangStart - firstCutPos));
272
273 const SEnzymeData &lastCutter = prev.value();
274 int lcLen = lastCutter->seq.length();
275 bool lcStrandDirect = prev.key().directStrand;
276 int lcDirectStrandCut = lcStrandDirect ? lastCutter->cutDirect : lcLen - lastCutter->cutDirect;
277 int lcComplementStrandCut = lcStrandDirect ? lcLen - lastCutter->cutComplement : lastCutter->cutComplement;
278 int lastCutPos = correctPos(prev.key().coord + qMax(lcDirectStrandCut, lcComplementStrandCut));
279 int leftOverhangStart = correctPos(prev.key().coord + qMin(lcDirectStrandCut, lcComplementStrandCut));
280 bool leftOverhangIsDirect = lcStrandDirect ? lcDirectStrandCut < lcComplementStrandCut : lcDirectStrandCut > lcComplementStrandCut;
281
282 if (lastCutPos > seqLen) {
283 // last restriction site is situated between sequence start and end
284 assert(isCircular);
285 int leftCutPos = lastCutPos - seqLen;
286 QByteArray leftOverhang = getOverhang(U2Region(leftOverhangStart, seqLen - leftOverhangStart)) + getOverhang(U2Region(0, leftCutPos));
287 QByteArray rightOverhang = first == prev ? leftOverhang : firstRightOverhang;
288 SharedAnnotationData ad1 = createFragment(leftCutPos, DNAFragmentTerm(lastCutter->id, leftOverhang, leftOverhangIsDirect), firstCutPos, DNAFragmentTerm(firstCutter->id, rightOverhang, rightOverhangIsDirect));
289 results.append(ad1);
290 } else {
291 QByteArray lastLeftOverhang = getOverhang(U2Region(leftOverhangStart, lastCutPos - leftOverhangStart));
292 if (isCircular) {
293 SharedAnnotationData ad = createFragment(lastCutPos, DNAFragmentTerm(lastCutter->id, lastLeftOverhang, leftOverhangIsDirect), firstCutPos, DNAFragmentTerm(firstCutter->id, firstRightOverhang, rightOverhangIsDirect));
294
295 results.append(ad);
296 } else {
297 if (firstCutPos != 0) {
298 SharedAnnotationData ad1 = createFragment(seqRange.startPos, DNAFragmentTerm(), firstCutPos, DNAFragmentTerm(firstCutter->id, firstRightOverhang, rightOverhangIsDirect));
299 results.append(ad1);
300 }
301 if (lastCutPos != dnaObj->getSequenceLength()) {
302 SharedAnnotationData ad2 = createFragment(lastCutPos, DNAFragmentTerm(lastCutter->id, lastLeftOverhang, leftOverhangIsDirect), seqRange.endPos(), DNAFragmentTerm());
303 results.append(ad2);
304 }
305 }
306 }
307 std::sort(results.begin(), results.end(), compareAnnotationsbyLength);
308
309 for (int fragmentCounter = 0; fragmentCounter < results.size(); fragmentCounter++) {
310 results[fragmentCounter]->name = QString("Fragment %1").arg(fragmentCounter + 1);
311 }
312 }
313
saveResults()314 void DigestSequenceTask::saveResults() {
315 destObj->addAnnotations(results, ANNOTATION_GROUP_FRAGMENTS);
316 }
317
generateReport() const318 QString DigestSequenceTask::generateReport() const {
319 QString res;
320
321 if (hasError()) {
322 return res;
323 }
324
325 QString topology = dnaObj->isCircular() ? tr("circular") : tr("linear");
326 res += tr("<h3><br>Digest into fragments %1 (%2)</h3>").arg(dnaObj->getDocument()->getName()).arg(topology);
327 res += tr("<br>Generated %1 fragments.").arg(results.count());
328 int counter = 1;
329 foreach (const SharedAnnotationData &sdata, results) {
330 const int startPos = sdata->location->regions.first().startPos + 1;
331 const int endPos = sdata->location->regions.last().endPos();
332 int len = 0;
333 foreach (const U2Region &r, sdata->location->regions) {
334 len += r.endPos() - r.startPos;
335 }
336 res += tr("<br><br> %1: From %3 (%2) To %5 (%4) - %6 bp ").arg(counter).arg(startPos).arg(sdata->findFirstQualifierValue(QUALIFIER_LEFT_TERM)).arg(endPos).arg(sdata->findFirstQualifierValue(QUALIFIER_RIGHT_TERM)).arg(len);
337 ++counter;
338 }
339
340 return res;
341 }
342
checkForConservedAnnotations()343 void DigestSequenceTask::checkForConservedAnnotations() {
344 QMap<QString, U2Region>::const_iterator it = cfg.conservedRegions.constBegin();
345 for (; it != cfg.conservedRegions.constEnd(); ++it) {
346 bool found = false;
347 U2Region annRegion = it.value();
348 foreach (const SharedAnnotationData &data, results) {
349 const U2Region resRegion = data->location->regions.first();
350 if (resRegion.contains(annRegion)) {
351 found = true;
352 break;
353 }
354 }
355 if (!found) {
356 QString locationStr = QString("%1..%2").arg(annRegion.startPos + 1).arg(annRegion.endPos());
357 setError(tr("Conserved annotation %1 (%2) is disrupted by the digestion. Try changing the restriction sites.")
358 .arg(it.key())
359 .arg(locationStr));
360 return;
361 }
362 }
363 }
364
correctPos(const qint64 pos) const365 qint64 DigestSequenceTask::correctPos(const qint64 pos) const {
366 qint64 res = pos;
367 if (!isCircular) {
368 res = qBound<qint64>(0, pos, dnaObj->getSequenceLength());
369 }
370 return res;
371 }
372
getOverhang(const U2Region & region) const373 QByteArray DigestSequenceTask::getOverhang(const U2Region ®ion) const {
374 QByteArray result;
375 if (region.startPos < 0 && isCircular) {
376 result = dnaObj->getSequenceData(U2Region(dnaObj->getSequenceLength() + region.startPos, qAbs(region.startPos)));
377 result += dnaObj->getSequenceData(U2Region(0, region.length + region.startPos));
378 } else {
379 result = dnaObj->getSequenceData(region);
380 }
381
382 return result;
383 }
384
385 //////////////////////////////////////////////////////////////////////////
386
LigateFragmentsTask(const QList<DNAFragment> & fragments,const LigateFragmentsTaskConfig & config)387 LigateFragmentsTask::LigateFragmentsTask(const QList<DNAFragment> &fragments, const LigateFragmentsTaskConfig &config)
388 : Task("LigateFragmentsTask", TaskFlags_NR_FOSCOE), fragmentList(fragments), cfg(config),
389 resultDoc(nullptr), resultAlphabet(nullptr) {
390 GCOUNTER(cvar, "LigateFragments");
391 }
392
processOverhangs(const DNAFragment & prevFragment,const DNAFragment & curFragment,QByteArray & overhangAddition)393 void LigateFragmentsTask::processOverhangs(const DNAFragment &prevFragment, const DNAFragment &curFragment, QByteArray &overhangAddition) {
394 const DNAFragmentTerm &prevTerm = prevFragment.getRightTerminus();
395 const DNAFragmentTerm &curTerm = curFragment.getLeftTerminus();
396
397 if (prevTerm.type != curTerm.type) {
398 stateInfo.setError(tr("Fragments %1 and %2 are inconsistent. Blunt and sticky ends incompatibility")
399 .arg(prevFragment.getName())
400 .arg(curFragment.getName()));
401 return;
402 }
403
404 QByteArray prevOverhang = prevFragment.getRightTerminus().overhang;
405 QByteArray curOverhang = curFragment.getLeftTerminus().overhang;
406
407 if (prevTerm.type == OVERHANG_TYPE_STICKY) {
408 if (!overhangsAreConsistent(prevFragment.getRightTerminus(), curFragment.getLeftTerminus())) {
409 stateInfo.setError(tr("Right overhang from %1 and left overhang from %2 are inconsistent.")
410 .arg(prevFragment.getName())
411 .arg(curFragment.getName()));
412 return;
413 } else {
414 overhangAddition += curOverhang;
415 }
416 } else if (prevTerm.type == OVERHANG_TYPE_BLUNT) {
417 overhangAddition += prevOverhang + curOverhang;
418 } else {
419 assert(0);
420 }
421 }
422
overhangsAreConsistent(const DNAFragmentTerm & curTerm,const DNAFragmentTerm & prevTerm)423 bool LigateFragmentsTask::overhangsAreConsistent(const DNAFragmentTerm &curTerm, const DNAFragmentTerm &prevTerm) {
424 QByteArray curOverhang = curTerm.overhang;
425 QByteArray prevOverhang = prevTerm.overhang;
426
427 bool curStrand = curTerm.isDirect;
428 bool prevStrand = prevTerm.isDirect;
429 if (curStrand == prevStrand) {
430 return false;
431 }
432
433 return curOverhang == prevOverhang;
434 }
435
prepare()436 void LigateFragmentsTask::prepare() {
437 QByteArray resultSeq;
438 QVector<U2Region> fragmentRegions;
439
440 DNAFragment prevFragment;
441 assert(prevFragment.isEmpty());
442
443 if (!cfg.makeCircular && cfg.checkOverhangs) {
444 const DNAFragment &first = fragmentList.first();
445 QByteArray leftOverhangAddition = first.getLeftTerminus().overhang;
446 resultSeq.append(leftOverhangAddition);
447 }
448
449 foreach (const DNAFragment &dnaFragment, fragmentList) {
450 QVector<U2Region> location = dnaFragment.getFragmentRegions();
451 assert(location.size() > 0);
452
453 // check alphabet consistency
454 const DNAAlphabet *fragmentAlphabet = dnaFragment.getAlphabet();
455 if (resultAlphabet == nullptr) {
456 resultAlphabet = fragmentAlphabet;
457 } else if (resultAlphabet != fragmentAlphabet) {
458 if (fragmentAlphabet == nullptr) {
459 stateInfo.setError(tr("Unknown DNA alphabet in fragment %1 of %2")
460 .arg(dnaFragment.getName())
461 .arg(dnaFragment.getSequenceName()));
462 return;
463 }
464 resultAlphabet = U2AlphabetUtils::deriveCommonAlphabet(resultAlphabet, fragmentAlphabet);
465 }
466
467 // check if overhangs are compatible
468 QByteArray overhangAddition;
469 if (cfg.checkOverhangs) {
470 if (!prevFragment.isEmpty()) {
471 processOverhangs(prevFragment, dnaFragment, overhangAddition);
472 if (stateInfo.hasError()) {
473 return;
474 }
475 }
476 prevFragment = dnaFragment;
477 }
478
479 // handle fragment annotations
480 int resultLen = resultSeq.length() + overhangAddition.length();
481 foreach (AnnotationTableObject *aObj, dnaFragment.getRelatedAnnotations()) {
482 QList<SharedAnnotationData> toSave = cloneAnnotationsInFragmentRegion(dnaFragment, aObj, resultLen);
483 annotations.append(toSave);
484 }
485
486 if (cfg.annotateFragments) {
487 SharedAnnotationData a = createFragmentAnnotation(dnaFragment, resultLen);
488 annotations.append(a);
489 }
490
491 resultSeq.append(overhangAddition);
492 resultSeq.append(dnaFragment.getSequence(stateInfo));
493 CHECK_OP(stateInfo, );
494 }
495
496 if (cfg.checkOverhangs) {
497 if (cfg.makeCircular) {
498 const DNAFragment &first = fragmentList.first();
499 const DNAFragment &last = fragmentList.last();
500 QByteArray overhangAddition;
501 processOverhangs(last, first, overhangAddition);
502 if (stateInfo.hasError()) {
503 return;
504 }
505 resultSeq.append(overhangAddition);
506 } else {
507 const DNAFragment &last = fragmentList.last();
508 QByteArray rightOverhangAddition = last.getRightTerminus().overhang;
509 resultSeq.append(rightOverhangAddition);
510 }
511 }
512
513 // create comment
514 SharedAnnotationData sourceAnnot = createSourceAnnotation(resultSeq.length());
515 annotations.append(sourceAnnot);
516
517 createDocument(resultSeq, annotations);
518
519 if (!cfg.addDocToProject) {
520 return;
521 }
522
523 QList<Task *> tasks;
524 tasks.append(new AddDocumentTask(resultDoc));
525
526 if (cfg.openView) {
527 tasks.append(new OpenViewTask(resultDoc));
528 }
529 if (cfg.saveDoc) {
530 tasks.append(new SaveDocumentTask(resultDoc));
531 }
532
533 Task *multiTask = new MultiTask(tr("Add constructed molecule"), tasks);
534 addSubTask(multiTask);
535 }
536
createSourceAnnotation(int regLen)537 SharedAnnotationData LigateFragmentsTask::createSourceAnnotation(int regLen) {
538 Version v = Version::appVersion();
539 SharedAnnotationData d(new AnnotationData);
540 d->name = "source";
541 d->location->regions << U2Region(0, regLen);
542 d->qualifiers.append(U2Qualifier("comment", QString("Molecule is created with Unipro UGENE v%1.%2").arg(v.major).arg(v.minor)));
543 return d;
544 }
545
createFragmentAnnotation(const DNAFragment & fragment,int startPos)546 SharedAnnotationData LigateFragmentsTask::createFragmentAnnotation(const DNAFragment &fragment, int startPos) {
547 SharedAnnotationData d(new AnnotationData);
548 d->name = QString("%1 %2").arg(fragment.getSequenceName()).arg(fragment.getName());
549 d->location->regions << U2Region(startPos, fragment.getLength());
550 d->qualifiers.append(U2Qualifier("source_doc", fragment.getSequenceDocName()));
551
552 return d;
553 }
554
cloneAnnotationsInRegion(const U2Region & fragmentRegion,AnnotationTableObject * source,int globalOffset)555 QList<SharedAnnotationData> LigateFragmentsTask::cloneAnnotationsInRegion(const U2Region &fragmentRegion, AnnotationTableObject *source, int globalOffset) {
556 QList<SharedAnnotationData> results;
557 // TODO: allow to cut annotations
558 // TODO: consider optimizing the code below using AnnotationTableObject::getAnnotationsByRegion()
559 foreach (Annotation *a, source->getAnnotations()) {
560 bool ok = true;
561 const QVector<U2Region> &location = a->getRegions();
562 foreach (const U2Region ®ion, location) {
563 if (!fragmentRegion.contains(region) || fragmentRegion == region) {
564 ok = false;
565 break;
566 }
567 }
568 if (ok) {
569 int newPos = globalOffset + location.first().startPos - fragmentRegion.startPos;
570 SharedAnnotationData cloned(new AnnotationData(*a->getData()));
571 QVector<U2Region> newLocation;
572 foreach (const U2Region ®ion, a->getRegions()) {
573 U2Region newRegion(region);
574 newRegion.startPos = newPos;
575 newLocation.append(newRegion);
576 }
577 cloned->location->regions = newLocation;
578 results.append(cloned);
579 }
580 }
581
582 return results;
583 }
584
fragmentContainsRegion(const DNAFragment & fragment,const U2Region region)585 static bool fragmentContainsRegion(const DNAFragment &fragment, const U2Region region) {
586 QVector<U2Region> fragmentRegions = fragment.getFragmentRegions();
587
588 bool result = false;
589 foreach (const U2Region &fR, fragmentRegions) {
590 if (fR.contains(region)) {
591 result = true;
592 break;
593 }
594 }
595
596 return result;
597 }
598
getRelativeStartPos(const DNAFragment & fragment,const U2Region region)599 static int getRelativeStartPos(const DNAFragment &fragment, const U2Region region) {
600 QVector<U2Region> fragmentRegions = fragment.getFragmentRegions();
601
602 int offset = 0;
603 foreach (const U2Region &fR, fragmentRegions) {
604 if (fR.contains(region)) {
605 return offset + region.startPos - fR.startPos;
606 }
607 offset += fR.length;
608 }
609
610 // the fragment doesn't contain the region
611 return -1;
612 }
613
cloneAnnotationsInFragmentRegion(const DNAFragment & fragment,AnnotationTableObject * source,int globalOffset)614 QList<SharedAnnotationData> LigateFragmentsTask::cloneAnnotationsInFragmentRegion(const DNAFragment &fragment,
615 AnnotationTableObject *source,
616 int globalOffset) {
617 QList<SharedAnnotationData> results;
618
619 // TODO: allow to remove annotations
620
621 foreach (Annotation *a, source->getAnnotations()) {
622 QVector<U2Region> location = a->getRegions();
623 if (a->getName().startsWith("Fragment")) {
624 continue;
625 }
626
627 bool ok = true;
628 foreach (const U2Region &r, location) {
629 // sneaky annotations shall not pass!
630 if (!fragmentContainsRegion(fragment, r)) {
631 ok = false;
632 break;
633 }
634 }
635
636 if (ok) {
637 SharedAnnotationData cloned(new AnnotationData(*a->getData()));
638 QVector<U2Region> newLocation;
639 foreach (const U2Region ®ion, location) {
640 int startPos = getRelativeStartPos(fragment, region);
641 if (fragment.isInverted()) {
642 startPos = fragment.getLength() - startPos - region.length;
643 U2Strand strand = cloned->getStrand();
644 if (strand.isDirect()) {
645 cloned->setStrand(U2Strand::Complementary);
646 } else {
647 cloned->setStrand(U2Strand::Direct);
648 }
649 }
650 assert(startPos != -1);
651 int newPos = globalOffset + startPos;
652 U2Region newRegion(region);
653 newRegion.startPos = newPos;
654 newLocation.append(newRegion);
655 }
656
657 cloned->location->regions = newLocation;
658 results.append(cloned);
659 }
660 }
661
662 return results;
663 }
664
createDocument(const QByteArray & seq,const QList<SharedAnnotationData> & annotations)665 void LigateFragmentsTask::createDocument(const QByteArray &seq, const QList<SharedAnnotationData> &annotations) {
666 DocumentFormat *df = AppContext::getDocumentFormatRegistry()->getFormatById(BaseDocumentFormats::PLAIN_GENBANK);
667 IOAdapterFactory *iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(BaseIOAdapters::LOCAL_FILE);
668 QList<GObject *> objects;
669
670 QString seqName = cfg.seqName.isEmpty() ? cfg.docUrl.baseFileName() : cfg.seqName;
671 DNASequence dna(seqName, seq, resultAlphabet);
672 dna.circular = cfg.makeCircular;
673
674 // set Genbank header
675 DNALocusInfo loi;
676 loi.name = seqName;
677 loi.topology = cfg.makeCircular ? "circular" : "linear";
678 loi.molecule = "DNA";
679 loi.division = "SYN";
680 QDate date = QDate::currentDate();
681 loi.date = QString("%1-%2-%3").arg(date.toString("dd")).arg(FormatUtils::getShortMonthName(date.month())).arg(date.toString("yyyy"));
682
683 dna.info.insert(DNAInfo::LOCUS, qVariantFromValue<DNALocusInfo>(loi));
684
685 resultDoc = df->createNewLoadedDocument(iof, cfg.docUrl, stateInfo);
686 CHECK_OP(stateInfo, );
687
688 U2EntityRef seqRef = U2SequenceUtils::import(stateInfo, resultDoc->getDbiRef(), dna);
689 CHECK_OP_EXT(stateInfo, delete resultDoc; resultDoc = nullptr, );
690
691 U2SequenceObject *dnaObj = new U2SequenceObject(seqName, seqRef);
692 resultDoc->addObject(dnaObj);
693
694 AnnotationTableObject *aObj = new AnnotationTableObject(QString("%1 annotations").arg(seqName), resultDoc->getDbiRef());
695 aObj->addAnnotations(annotations);
696 resultDoc->addObject(aObj);
697
698 aObj->addObjectRelation(dnaObj, ObjectRole_Sequence);
699 }
700
701 } // namespace U2
702