1 /********************************************************************** 2 parsmart.cpp - SMARTS parser. 3 4 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. 5 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison 6 7 This file is part of the Open Babel project. 8 For more information, see <http://openbabel.org/> 9 10 This program is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation version 2 of the License. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 ***********************************************************************/ 19 #include <openbabel/babelconfig.h> 20 21 #include <ctype.h> 22 #include <iomanip> 23 #include <cstring> 24 25 #include <openbabel/mol.h> 26 #include <openbabel/atom.h> 27 #include <openbabel/bond.h> 28 #include <openbabel/parsmart.h> 29 #include <openbabel/stereo/stereo.h> 30 #include <openbabel/stereo/tetrahedral.h> 31 32 using namespace std; 33 34 namespace OpenBabel 35 { 36 /*! \class OBSmartsPattern parsmart.h <openbabel/parsmart.h> 37 38 Substructure search is an incredibly useful tool in the context of a 39 small molecule programming library. Having an efficient substructure 40 search engine reduces the amount of hard code needed for molecule 41 perception, as well as increases the flexibility of certain 42 operations. For instance, atom typing can be easily performed based on 43 hard coded rules of element type and bond orders (or 44 hybridization). Alternatively, atom typing can also be done by 45 matching a set of substructure rules read at run time. In the latter 46 case customization based on application (such as changing the pH) 47 becomes a facile operation. Fortunately for Open Babel and its users, 48 Roger Sayle donated a SMARTS parser which became the basis for SMARTS 49 matching in Open Babel. 50 51 For more information on the SMARTS support in Open Babel, see the wiki page: 52 http://openbabel.org/wiki/SMARTS 53 54 The SMARTS matcher, or OBSmartsPattern, is a separate object which can 55 match patterns in the OBMol class. The following code demonstrates how 56 to use the OBSmartsPattern class: 57 \code 58 OBMol mol; 59 ... 60 OBSmartsPattern sp; 61 sp.Init("CC"); 62 sp.Match(mol); 63 vector<vector<int> > maplist; 64 maplist = sp.GetMapList(); 65 //or maplist = sp.GetUMapList(); 66 //print out the results 67 vector<vector<int> >::iterator i; 68 vector<int>::iterator j; 69 for (i = maplist.begin();i != maplist.end();++i) 70 { 71 for (j = i->begin();j != i->end();++j) 72 cout << j << ' `; 73 cout << endl; 74 } 75 \endcode 76 77 The preceding code reads in a molecule, initializes a SMARTS pattern 78 of two single-bonded carbons, and locates all instances of the 79 pattern in the molecule. Note that calling the Match() function 80 does not return the results of the substructure match. The results 81 from a match are stored in the OBSmartsPattern, and a call to 82 GetMapList() or GetUMapList() must be made to extract the 83 results. The function GetMapList() returns all matches of a 84 particular pattern while GetUMapList() returns only the unique 85 matches. For instance, the pattern [OD1]~C~[OD1] describes a 86 carboxylate group. This pattern will match both atom number 87 permutations of the carboxylate, and if GetMapList() is called, both 88 matches will be returned. If GetUMapList() is called only unique 89 matches of the pattern will be returned. A unique match is defined as 90 one which does not cover the identical atoms that a previous match 91 has covered. 92 93 */ 94 95 #define ATOMPOOL 1 96 #define BONDPOOL 1 97 98 #define AE_ANDHI 1 99 #define AE_ANDLO 2 100 #define AE_OR 3 101 #define AE_RECUR 4 102 #define AE_NOT 5 103 #define AE_TRUE 6 104 #define AE_FALSE 7 105 #define AE_AROMATIC 8 106 #define AE_ALIPHATIC 9 107 #define AE_CYCLIC 10 108 #define AE_ACYCLIC 11 109 #define AE_MASS 12 110 #define AE_ELEM 13 111 #define AE_AROMELEM 14 112 #define AE_ALIPHELEM 15 113 #define AE_HCOUNT 16 114 #define AE_CHARGE 17 115 #define AE_CONNECT 18 116 #define AE_DEGREE 19 117 #define AE_IMPLICIT 20 118 #define AE_RINGS 21 119 #define AE_SIZE 22 120 #define AE_VALENCE 23 121 #define AE_CHIRAL 24 122 #define AE_HYB 25 123 #define AE_RINGCONNECT 26 124 125 #define AL_CLOCKWISE 1 126 #define AL_ANTICLOCKWISE 2 127 #define AL_UNSPECIFIED 0 128 129 /* Each BondExpr node is identified by an integer type field 130 selected from the list of BE_ codes below. BE_ANDHI, 131 BE_ANDLO and BE_OR are binary nodes of type BondExpr.bin, 132 BE_NOT is unary node of type BondExpr.mon, and the remaining 133 code are all leaf nodes. */ 134 135 #define BE_ANDHI 1 136 #define BE_ANDLO 2 137 #define BE_OR 3 138 #define BE_NOT 4 139 #define BE_ANY 5 140 #define BE_DEFAULT 6 141 #define BE_SINGLE 7 142 #define BE_DOUBLE 8 143 #define BE_TRIPLE 9 144 #define BE_QUAD 10 145 #define BE_AROM 11 146 #define BE_RING 12 147 #define BE_UP 13 148 #define BE_DOWN 14 149 #define BE_UPUNSPEC 15 150 #define BE_DOWNUNSPEC 16 151 152 153 static int CreateAtom(Pattern*,AtomExpr*,int,int vb=0); 154 155 const int SmartsImplicitRef = -9999; // Used as a placeholder when recording atom nbrs for chiral atoms 156 157 158 /*=============================*/ 159 /* Standard Utility Routines */ 160 /*=============================*/ 161 FatalAllocationError(const char * ptr)162 static void FatalAllocationError( const char *ptr ) 163 { 164 stringstream errorMsg; 165 errorMsg << "Error: Unable to allocate" << ptr << endl; 166 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); 167 } 168 169 /*================================*/ 170 /* Atom Expression Manipulation */ 171 /*================================*/ 172 173 static void FreePattern( Pattern* ); 174 static Pattern *CopyPattern( Pattern* ); 175 CopyAtomExpr(AtomExpr * expr)176 static AtomExpr *CopyAtomExpr( AtomExpr *expr ) 177 { 178 AtomExpr *result; 179 180 result = new AtomExpr; 181 result->type = expr->type; 182 switch( expr->type ) 183 { 184 case AE_ANDHI: 185 case AE_ANDLO: 186 case AE_OR: 187 result->bin.lft = CopyAtomExpr(expr->bin.lft); 188 result->bin.rgt = CopyAtomExpr(expr->bin.rgt); 189 break; 190 191 case AE_NOT: 192 result->mon.arg = CopyAtomExpr(expr->mon.arg); 193 break; 194 195 case AE_RECUR: 196 result->recur.recur = CopyPattern((Pattern*)expr->recur.recur); 197 break; 198 199 default: 200 result->leaf.value = expr->leaf.value; 201 break; 202 } 203 return result; 204 } 205 FreeAtomExpr(AtomExpr * expr)206 static void FreeAtomExpr( AtomExpr *expr ) 207 { 208 if( expr ) 209 { 210 switch( expr->type ) 211 { 212 case AE_ANDHI: 213 case AE_ANDLO: 214 case AE_OR: 215 FreeAtomExpr(expr->bin.lft); 216 FreeAtomExpr(expr->bin.rgt); 217 break; 218 219 case AE_NOT: 220 FreeAtomExpr(expr->mon.arg); 221 break; 222 223 case AE_RECUR: 224 FreePattern((Pattern*)expr->recur.recur); 225 break; 226 } 227 228 delete expr; 229 } 230 } 231 BuildAtomPred(int type)232 static AtomExpr *BuildAtomPred( int type ) 233 { 234 AtomExpr *result; 235 236 result = new AtomExpr; 237 result->leaf.type = type; 238 result->leaf.value = 0; 239 return result; 240 } 241 BuildAtomLeaf(int type,int val)242 static AtomExpr *BuildAtomLeaf( int type, int val ) 243 { 244 AtomExpr *result; 245 246 result = new AtomExpr; 247 result->leaf.type = type; 248 result->leaf.value = val; 249 return result; 250 } 251 BuildAtomNot(AtomExpr * expr)252 static AtomExpr *BuildAtomNot( AtomExpr *expr ) 253 { 254 AtomExpr *result; 255 256 result = new AtomExpr; 257 result->mon.type = AE_NOT; 258 result->mon.arg = expr; 259 return result; 260 } 261 BuildAtomBin(int op,AtomExpr * lft,AtomExpr * rgt)262 static AtomExpr *BuildAtomBin( int op, AtomExpr *lft, AtomExpr *rgt ) 263 { 264 AtomExpr *result; 265 266 result = new AtomExpr; 267 result->bin.type = op; 268 result->bin.lft = lft; 269 result->bin.rgt = rgt; 270 return result; 271 } 272 BuildAtomRecurs(Pattern * pat)273 static AtomExpr *BuildAtomRecurs( Pattern *pat ) 274 { 275 AtomExpr *result; 276 277 result = new AtomExpr; 278 result->recur.type = AE_RECUR; 279 result->recur.recur = (void*)pat; 280 return result; 281 } 282 GenerateElement(int elem)283 static AtomExpr *GenerateElement( int elem ) 284 { 285 return BuildAtomLeaf(AE_ELEM,elem); 286 } 287 GenerateAromElem(int elem,int flag)288 static AtomExpr *GenerateAromElem( int elem, int flag ) 289 { 290 return flag ? BuildAtomLeaf(AE_AROMELEM,elem) 291 : BuildAtomLeaf(AE_ALIPHELEM,elem); 292 } 293 294 /*================================*/ 295 /* Bond Expression Manipulation */ 296 /*================================*/ 297 CopyBondExpr(BondExpr * expr)298 static BondExpr *CopyBondExpr( BondExpr *expr ) 299 { 300 BondExpr *result; 301 302 result = new BondExpr; 303 result->type = expr->type; 304 switch( expr->type ) 305 { 306 case BE_ANDHI: 307 case BE_ANDLO: 308 case BE_OR: 309 result->bin.lft = CopyBondExpr(expr->bin.lft); 310 result->bin.rgt = CopyBondExpr(expr->bin.rgt); 311 break; 312 313 case BE_NOT: 314 result->mon.arg = CopyBondExpr(expr->mon.arg); 315 break; 316 } 317 return result; 318 } 319 320 /** 321 * Check if two BondExpr objects are the same. This is used for ring closures 322 * to identify invalid SMARTS like: 323 * 324 * C-1CCCCC#1 325 * C=1CCCCC:1 326 * 327 * However, the SMARTS below are valid and the bond expression next to the the 328 * second closure digit is used. 329 * 330 * C1CCCCC#1 331 * C1CCCCC=1 332 */ EquivalentBondExpr(BondExpr * expr1,BondExpr * expr2)333 static bool EquivalentBondExpr( BondExpr *expr1, BondExpr *expr2 ) 334 { 335 if (expr1 == nullptr && expr2 == nullptr) 336 return true; 337 if (expr1 == nullptr && expr2 != nullptr) 338 return false; 339 if (expr1 != nullptr && expr2 == nullptr) 340 return false; 341 342 if (expr1->type != expr2->type) 343 return false; 344 345 switch( expr1->type ) 346 { 347 case BE_ANDHI: 348 case BE_ANDLO: 349 case BE_OR: 350 return EquivalentBondExpr(expr1->bin.lft, expr2->bin.lft) && 351 EquivalentBondExpr(expr1->bin.rgt, expr2->bin.rgt); 352 353 case BE_NOT: 354 return EquivalentBondExpr(expr1->mon.arg, expr2->mon.arg); 355 } 356 return true; 357 } 358 FreeBondExpr(BondExpr * expr)359 static void FreeBondExpr( BondExpr *expr ) 360 { 361 if( expr ) 362 { 363 switch( expr->type ) 364 { 365 case BE_ANDHI: 366 case BE_ANDLO: 367 case BE_OR: 368 FreeBondExpr(expr->bin.lft); 369 FreeBondExpr(expr->bin.rgt); 370 break; 371 372 case BE_NOT: 373 FreeBondExpr(expr->mon.arg); 374 break; 375 } 376 377 delete expr; 378 } 379 } 380 BuildBondLeaf(int type)381 static BondExpr *BuildBondLeaf( int type ) 382 { 383 BondExpr *result; 384 385 result = new BondExpr; 386 result->type = type; 387 return result; 388 } 389 BuildBondNot(BondExpr * expr)390 static BondExpr *BuildBondNot( BondExpr *expr ) 391 { 392 BondExpr *result; 393 394 result = new BondExpr; 395 result->mon.type = BE_NOT; 396 result->mon.arg = expr; 397 return result; 398 } 399 BuildBondBin(int op,BondExpr * lft,BondExpr * rgt)400 static BondExpr *BuildBondBin( int op, BondExpr *lft, BondExpr *rgt ) 401 { 402 BondExpr *result; 403 404 result = new BondExpr; 405 result->bin.type = op; 406 result->bin.lft = lft; 407 result->bin.rgt = rgt; 408 return result; 409 } 410 GenerateDefaultBond(void)411 static BondExpr *GenerateDefaultBond( void ) 412 { 413 return BuildBondLeaf(BE_DEFAULT); 414 } 415 416 /*===============================*/ 417 /* SMARTS Pattern Manipulation */ 418 /*===============================*/ 419 AllocPattern(void)420 static Pattern *AllocPattern( void ) 421 { 422 Pattern *ptr; 423 424 ptr = new Pattern; 425 if( !ptr ) { 426 FatalAllocationError("pattern"); 427 return nullptr; 428 } 429 430 ptr->atom = nullptr; 431 ptr->aalloc = 0; 432 ptr->acount = 0; 433 434 ptr->bond = nullptr; 435 ptr->balloc = 0; 436 ptr->bcount = 0; 437 438 ptr->parts = 1; 439 440 ptr->hasExplicitH=false; 441 return ptr; 442 } 443 CreateAtom(Pattern * pat,AtomExpr * expr,int part,int vb)444 static int CreateAtom( Pattern *pat, AtomExpr *expr, int part,int vb) 445 { 446 int index,size; 447 448 if (!pat) 449 return -1; // should never happen 450 451 if( pat->acount == pat->aalloc ) 452 { 453 pat->aalloc += ATOMPOOL; 454 size = (int)(pat->aalloc*sizeof(AtomSpec)); 455 if( pat->atom ) 456 { 457 AtomSpec *tmp = new AtomSpec[pat->aalloc]; 458 copy(pat->atom, pat->atom + pat->aalloc - ATOMPOOL, tmp); 459 delete [] pat->atom; 460 pat->atom = tmp; 461 } 462 else 463 pat->atom = new AtomSpec[pat->aalloc]; 464 if( !pat->atom ) 465 FatalAllocationError("atom pool"); 466 } 467 468 index = pat->acount++; 469 pat->atom[index].part = part; 470 pat->atom[index].expr = expr; 471 pat->atom[index].vb = vb; //std::vector binding 472 473 return index; 474 } 475 CreateBond(Pattern * pat,BondExpr * expr,int src,int dst)476 static int CreateBond( Pattern *pat, BondExpr *expr, int src, int dst ) 477 { 478 int index,size; 479 480 if (!pat) 481 return -1; // should never happen 482 483 if( pat->bcount == pat->balloc ) 484 { 485 pat->balloc += BONDPOOL; 486 size = (int)(pat->balloc*sizeof(BondSpec)); 487 if( pat->bond ) 488 { 489 BondSpec *tmp = new BondSpec[pat->balloc]; 490 copy(pat->bond, pat->bond + pat->balloc - BONDPOOL, tmp); 491 delete [] pat->bond; 492 pat->bond = tmp; 493 } 494 else 495 pat->bond = new BondSpec[pat->balloc]; 496 if( !pat->bond ) 497 FatalAllocationError("bond pool"); 498 } 499 500 index = pat->bcount++; 501 pat->bond[index].expr = expr; 502 pat->bond[index].src = src; 503 pat->bond[index].dst = dst; 504 return(index); 505 } 506 CopyPattern(Pattern * pat)507 static Pattern *CopyPattern( Pattern *pat ) 508 { 509 Pattern *result; 510 AtomExpr *aexpr; 511 BondExpr *bexpr; 512 int i; 513 514 result = AllocPattern(); 515 result->parts = pat->parts; 516 for( i=0; i<pat->acount; i++ ) 517 { 518 aexpr = CopyAtomExpr(pat->atom[i].expr); 519 CreateAtom(result,aexpr,pat->atom[i].part); 520 } 521 522 for( i=0; i<pat->bcount; i++ ) 523 { 524 bexpr = CopyBondExpr(pat->bond[i].expr); 525 CreateBond(result,bexpr,pat->bond[i].src,pat->bond[i].dst); 526 } 527 528 return result; 529 } 530 FreePattern(Pattern * pat)531 static void FreePattern( Pattern *pat ) 532 { 533 int i; 534 535 if( pat ) 536 { 537 if( pat->aalloc ) 538 { 539 for( i=0; i<pat->acount; i++ ) 540 FreeAtomExpr(pat->atom[i].expr); 541 if (pat->atom != nullptr) { 542 //free(pat->atom); 543 delete [] pat->atom; 544 pat->atom = nullptr; 545 } 546 } 547 548 if( pat->balloc ) 549 { 550 for( i=0; i<pat->bcount; i++ ) 551 FreeBondExpr(pat->bond[i].expr); 552 if (pat->bond != nullptr) { 553 //free(pat->bond); 554 delete [] pat->bond; 555 pat->bond = nullptr; 556 } 557 } 558 delete pat; 559 pat = nullptr; 560 } 561 } 562 563 /*=========================*/ 564 /* SMARTS Syntax Parsing */ 565 /*=========================*/ 566 567 SMARTSError(Pattern * pat)568 Pattern *OBSmartsPattern::SMARTSError( Pattern *pat ) 569 { 570 stringstream errorMsg; 571 errorMsg << "SMARTS Error:\n" << MainPtr << endl; 572 errorMsg << setw(LexPtr-MainPtr+1) << '^' << endl; 573 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError, onceOnly); 574 575 FreePattern(pat); 576 return nullptr; 577 } 578 ParseSimpleAtomPrimitive(void)579 AtomExpr *OBSmartsPattern::ParseSimpleAtomPrimitive( void ) 580 { 581 switch( *LexPtr++ ) 582 { 583 case '*': 584 return BuildAtomPred(AE_TRUE); 585 case 'A': 586 return BuildAtomPred(AE_ALIPHATIC); 587 case 'B': 588 if( *LexPtr == 'r' ) 589 { 590 LexPtr++; 591 return GenerateElement(35); 592 } 593 return GenerateElement(5); 594 case 'C': 595 if( *LexPtr == 'l' ) 596 { 597 LexPtr++; 598 return GenerateElement(17); 599 } 600 return GenerateAromElem(6,false); 601 case 'F': 602 return GenerateElement( 9); 603 case 'I': 604 return GenerateElement(53); 605 case 'N': 606 return GenerateAromElem(7,false); 607 case 'O': 608 return GenerateAromElem(8,false); 609 case 'P': 610 return GenerateAromElem(15, false); 611 case 'S': 612 return GenerateAromElem(16,false); 613 case 'a': 614 if( *LexPtr == 's' ) 615 { 616 LexPtr++; 617 return GenerateAromElem(33, true); 618 } 619 return BuildAtomPred(AE_AROMATIC); 620 case 'c': 621 return GenerateAromElem( 6,true); 622 case 'n': 623 return GenerateAromElem( 7,true); 624 case 'o': 625 return GenerateAromElem( 8,true); 626 case 'p': 627 return GenerateAromElem(15,true); 628 case 's': 629 if( *LexPtr == 'e' ) 630 { 631 LexPtr++; 632 return GenerateAromElem(34, true); 633 } 634 return GenerateAromElem(16,true); 635 } 636 LexPtr--; 637 return nullptr; 638 } 639 ParseComplexAtomPrimitive(void)640 AtomExpr *OBSmartsPattern::ParseComplexAtomPrimitive( void ) 641 { 642 Pattern *pat; 643 int index; 644 645 switch( *LexPtr++ ) 646 { 647 case('#'): 648 if( !isdigit(*LexPtr) ) 649 return nullptr; 650 651 index = 0; 652 while( isdigit(*LexPtr) ) 653 index = index*10 + ((*LexPtr++)-'0'); 654 if( index > 255 ) 655 { 656 LexPtr--; 657 return nullptr; 658 } 659 return( GenerateElement(index) ); 660 661 case('$'): 662 if( *LexPtr != '(' ) 663 return nullptr; 664 LexPtr++; 665 pat = ParseSMARTSPattern(); 666 667 if( !pat ) 668 return nullptr; 669 if( *LexPtr != ')' ) 670 { 671 FreePattern(pat); 672 return nullptr; 673 } 674 LexPtr++; 675 return( BuildAtomRecurs(pat) ); 676 677 case('*'): 678 return BuildAtomPred(AE_TRUE); 679 680 case('+'): 681 if( isdigit(*LexPtr) ) 682 { 683 index = 0; 684 while( isdigit(*LexPtr) ) 685 index = index*10 + ((*LexPtr++)-'0'); 686 } 687 else 688 { 689 index = 1; 690 while( *LexPtr == '+' ) 691 { 692 LexPtr++; 693 index++; 694 } 695 } 696 return BuildAtomLeaf(AE_CHARGE,index); 697 698 case('-'): 699 if( isdigit(*LexPtr) ) 700 { 701 index = 0; 702 while( isdigit(*LexPtr) ) 703 index = index*10 + ((*LexPtr++)-'0'); 704 } 705 else 706 { 707 index = 1; 708 while( *LexPtr == '-' ) 709 { 710 LexPtr++; 711 index++; 712 } 713 } 714 return BuildAtomLeaf(AE_CHARGE,-index); 715 716 case '@': 717 if (*LexPtr == '?') 718 { 719 LexPtr++; 720 return BuildAtomLeaf(AE_CHIRAL,AL_UNSPECIFIED); // unspecified 721 } 722 else if (*LexPtr != '@') 723 return BuildAtomLeaf(AE_CHIRAL,AL_ANTICLOCKWISE); 724 else 725 { 726 LexPtr++; 727 return BuildAtomLeaf(AE_CHIRAL,AL_CLOCKWISE); 728 } 729 730 case '^': 731 if (isdigit(*LexPtr)) 732 { 733 index = 0; 734 while( isdigit(*LexPtr) ) 735 index = index*10 + ((*LexPtr++)-'0'); 736 return BuildAtomLeaf(AE_HYB,index); 737 } 738 else 739 return BuildAtomLeaf(AE_HYB,1); 740 741 case('0'): case('1'): case('2'): case('3'): case('4'): 742 case('5'): case('6'): case('7'): case('8'): case('9'): 743 index = LexPtr[-1]-'0'; 744 while( isdigit(*LexPtr) ) 745 index = index*10 + ((*LexPtr++)-'0'); 746 return BuildAtomLeaf(AE_MASS,index); 747 748 case('A'): 749 switch( *LexPtr++ ) 750 { 751 case('c'): return GenerateElement(89); 752 case('g'): return GenerateElement(47); 753 case('l'): return GenerateElement(13); 754 case('m'): return GenerateElement(95); 755 case('r'): return GenerateElement(18); 756 case('s'): return GenerateElement(33); 757 case('t'): return GenerateElement(85); 758 case('u'): return GenerateElement(79); 759 } 760 LexPtr--; 761 return BuildAtomPred(AE_ALIPHATIC); 762 763 case('B'): 764 switch( *LexPtr++ ) 765 { 766 case('a'): return GenerateElement(56); 767 case('e'): return GenerateElement( 4); 768 case('i'): return GenerateElement(83); 769 case('k'): return GenerateElement(97); 770 case('r'): return GenerateElement(35); 771 } 772 LexPtr--; 773 return GenerateElement(5); 774 775 case('C'): 776 switch( *LexPtr++ ) 777 { 778 case('a'): return GenerateElement(20); 779 case('d'): return GenerateElement(48); 780 case('e'): return GenerateElement(58); 781 case('f'): return GenerateElement(98); 782 case('l'): return GenerateElement(17); 783 case('m'): return GenerateElement(96); 784 case('o'): return GenerateElement(27); 785 case('r'): return GenerateElement(24); 786 case('s'): return GenerateElement(55); 787 case('u'): return GenerateElement(29); 788 } 789 LexPtr--; 790 return GenerateAromElem(6,false); 791 792 case('D'): 793 if( *LexPtr == 'y' ) 794 { 795 LexPtr++; 796 return GenerateElement(66); 797 } 798 else if( isdigit(*LexPtr) ) 799 { 800 index = 0; 801 while( isdigit(*LexPtr) ) 802 index = index*10 + ((*LexPtr++)-'0'); 803 return BuildAtomLeaf(AE_DEGREE,index); 804 } 805 return BuildAtomLeaf(AE_DEGREE,1); 806 807 case('E'): 808 if( *LexPtr == 'r' ) 809 { 810 LexPtr++; 811 return GenerateElement(68); 812 } 813 else if( *LexPtr == 's' ) 814 { 815 LexPtr++; 816 return GenerateElement(99); 817 } 818 else if( *LexPtr == 'u' ) 819 { 820 LexPtr++; 821 return GenerateElement(63); 822 } 823 break; 824 825 case('F'): 826 if( *LexPtr == 'e' ) 827 { 828 LexPtr++; 829 return GenerateElement(26); 830 } 831 else if( *LexPtr == 'm' ) 832 { 833 LexPtr++; 834 return GenerateElement(100); 835 } 836 else if( *LexPtr == 'r' ) 837 { 838 LexPtr++; 839 return GenerateElement(87); 840 } 841 return GenerateElement(9); 842 843 case('G'): 844 if( *LexPtr == 'a' ) 845 { 846 LexPtr++; 847 return( GenerateElement(31) ); 848 } 849 else if( *LexPtr == 'd' ) 850 { 851 LexPtr++; 852 return( GenerateElement(64) ); 853 } 854 else if( *LexPtr == 'e' ) 855 { 856 LexPtr++; 857 return( GenerateElement(32) ); 858 } 859 break; 860 861 case('H'): 862 if( *LexPtr == 'e' ) 863 { 864 LexPtr++; 865 return( GenerateElement( 2) ); 866 } 867 else if( *LexPtr == 'f' ) 868 { 869 LexPtr++; 870 return( GenerateElement(72) ); 871 } 872 else if( *LexPtr == 'g' ) 873 { 874 LexPtr++; 875 return( GenerateElement(80) ); 876 } 877 else if( *LexPtr == 'o' ) 878 { 879 LexPtr++; 880 return( GenerateElement(67) ); 881 } 882 else if( isdigit(*LexPtr) ) 883 { 884 index = 0; 885 while( isdigit(*LexPtr) ) 886 index = index*10 + ((*LexPtr++)-'0'); 887 return BuildAtomLeaf(AE_HCOUNT,index); 888 } 889 return BuildAtomLeaf(AE_HCOUNT,1); 890 891 case('I'): 892 if( *LexPtr == 'n' ) 893 { 894 LexPtr++; 895 return( GenerateElement(49) ); 896 } 897 else if( *LexPtr == 'r' ) 898 { 899 LexPtr++; 900 return( GenerateElement(77) ); 901 } 902 return( GenerateElement(53) ); 903 904 case('K'): 905 if( *LexPtr == 'r' ) 906 { 907 LexPtr++; 908 return( GenerateElement(36) ); 909 } 910 return( GenerateElement(19) ); 911 912 case('L'): 913 if( *LexPtr == 'a' ) 914 { 915 LexPtr++; 916 return( GenerateElement( 57) ); 917 } 918 else if( *LexPtr == 'i' ) 919 { 920 LexPtr++; 921 return( GenerateElement( 3) ); 922 } 923 else if( *LexPtr == 'r' ) 924 { 925 LexPtr++; 926 return( GenerateElement(103) ); 927 } 928 else if( *LexPtr == 'u' ) 929 { 930 LexPtr++; 931 return( GenerateElement( 71) ); 932 } 933 break; 934 935 case('M'): 936 if( *LexPtr == 'd' ) 937 { 938 LexPtr++; 939 return( GenerateElement(101) ); 940 } 941 else if( *LexPtr == 'g' ) 942 { 943 LexPtr++; 944 return( GenerateElement( 12) ); 945 } 946 else if( *LexPtr == 'n' ) 947 { 948 LexPtr++; 949 return( GenerateElement( 25) ); 950 } 951 else if( *LexPtr == 'o' ) 952 { 953 LexPtr++; 954 return( GenerateElement( 42) ); 955 } 956 break; 957 958 case('N'): 959 switch( *LexPtr++ ) 960 { 961 case('a'): return( GenerateElement( 11) ); 962 case('b'): return( GenerateElement( 41) ); 963 case('d'): return( GenerateElement( 60) ); 964 case('e'): return( GenerateElement( 10) ); 965 case('i'): return( GenerateElement( 28) ); 966 case('o'): return( GenerateElement(102) ); 967 case('p'): return( GenerateElement( 93) ); 968 } 969 LexPtr--; 970 return( GenerateAromElem(7,false) ); 971 972 case('O'): 973 if( *LexPtr == 's' ) 974 { 975 LexPtr++; 976 return( GenerateElement(76) ); 977 } 978 return( GenerateAromElem(8,false) ); 979 980 case('P'): 981 switch( *LexPtr++ ) 982 { 983 case('a'): return( GenerateElement(91) ); 984 case('b'): return( GenerateElement(82) ); 985 case('d'): return( GenerateElement(46) ); 986 case('m'): return( GenerateElement(61) ); 987 case('o'): return( GenerateElement(84) ); 988 case('r'): return( GenerateElement(59) ); 989 case('t'): return( GenerateElement(78) ); 990 case('u'): return( GenerateElement(94) ); 991 } 992 LexPtr--; 993 return( GenerateElement(15) ); 994 995 case('R'): 996 switch( *LexPtr++ ) 997 { 998 case('a'): return( GenerateElement(88) ); 999 case('b'): return( GenerateElement(37) ); 1000 case('e'): return( GenerateElement(75) ); 1001 case('h'): return( GenerateElement(45) ); 1002 case('n'): return( GenerateElement(86) ); 1003 case('u'): return( GenerateElement(44) ); 1004 } 1005 LexPtr--; 1006 if( isdigit(*LexPtr) ) 1007 { 1008 index = 0; 1009 while( isdigit(*LexPtr) ) 1010 index = index*10 + ((*LexPtr++)-'0'); 1011 if( index == 0 ) 1012 return BuildAtomPred(AE_ACYCLIC); 1013 return BuildAtomLeaf(AE_RINGS,index); 1014 } 1015 return BuildAtomPred(AE_CYCLIC); 1016 1017 case('S'): 1018 switch( *LexPtr++ ) 1019 { 1020 case('b'): return( GenerateElement(51) ); 1021 case('c'): return( GenerateElement(21) ); 1022 case('e'): return( GenerateElement(34) ); 1023 case('i'): return( GenerateElement(14) ); 1024 case('m'): return( GenerateElement(62) ); 1025 case('n'): return( GenerateElement(50) ); 1026 case('r'): return( GenerateElement(38) ); 1027 } 1028 LexPtr--; 1029 return( GenerateAromElem(16,false) ); 1030 1031 case('T'): 1032 switch( *LexPtr++ ) 1033 { 1034 case('a'): return( GenerateElement(73) ); 1035 case('b'): return( GenerateElement(65) ); 1036 case('c'): return( GenerateElement(43) ); 1037 case('e'): return( GenerateElement(52) ); 1038 case('h'): return( GenerateElement(90) ); 1039 case('i'): return( GenerateElement(22) ); 1040 case('l'): return( GenerateElement(81) ); 1041 case('m'): return( GenerateElement(69) ); 1042 } 1043 LexPtr--; 1044 break; 1045 1046 case('U'): return( GenerateElement(92) ); 1047 case('V'): return( GenerateElement(23) ); 1048 case('W'): return( GenerateElement(74) ); 1049 1050 case('X'): 1051 if( *LexPtr == 'e' ) 1052 { 1053 LexPtr++; 1054 return( GenerateElement(54) ); 1055 } 1056 else if( isdigit(*LexPtr) ) 1057 { 1058 index = 0; 1059 while( isdigit(*LexPtr) ) 1060 index = index*10 + ((*LexPtr++)-'0'); 1061 if (index == 0) // default to 1 (if no number present) 1062 index = 1; 1063 return BuildAtomLeaf(AE_CONNECT,index); 1064 } 1065 return BuildAtomLeaf(AE_CONNECT,1); 1066 1067 case('Y'): 1068 if( *LexPtr == 'b' ) 1069 { 1070 LexPtr++; 1071 return( GenerateElement(70) ); 1072 } 1073 return( GenerateElement(39) ); 1074 1075 case('Z'): 1076 if( *LexPtr == 'n' ) 1077 { 1078 LexPtr++; 1079 return GenerateElement(30); 1080 } 1081 else if( *LexPtr == 'r' ) 1082 { 1083 LexPtr++; 1084 return GenerateElement(40); 1085 } 1086 break; 1087 1088 case('a'): 1089 if( *LexPtr == 's' ) 1090 { 1091 LexPtr++; 1092 return GenerateAromElem(33,true); 1093 } 1094 return BuildAtomPred(AE_AROMATIC); 1095 1096 case('c'): 1097 return GenerateAromElem(6,true); 1098 1099 case('h'): 1100 if( isdigit(*LexPtr) ) 1101 { 1102 index = 0; 1103 while( isdigit(*LexPtr) ) 1104 index = index*10 + ((*LexPtr++)-'0'); 1105 } 1106 else 1107 index = 1; 1108 return BuildAtomLeaf(AE_IMPLICIT,index); 1109 1110 case('n'): return GenerateAromElem(7,true); 1111 case('o'): return GenerateAromElem(8,true); 1112 case('p'): return GenerateAromElem(15,true); 1113 1114 case('r'): 1115 if( isdigit(*LexPtr) ) 1116 { 1117 index = 0; 1118 while( isdigit(*LexPtr) ) 1119 index = index*10 + ((*LexPtr++)-'0'); 1120 if( index == 0 ) 1121 return BuildAtomPred(AE_ACYCLIC); 1122 return BuildAtomLeaf(AE_SIZE,index); 1123 } 1124 return BuildAtomPred(AE_CYCLIC); 1125 1126 case('s'): 1127 if( *LexPtr == 'e' ) 1128 { 1129 LexPtr++; 1130 return GenerateAromElem(34,true); 1131 } 1132 return GenerateAromElem(16,true); 1133 1134 case('v'): 1135 if( isdigit(*LexPtr) ) 1136 { 1137 index = 0; 1138 while( isdigit(*LexPtr) ) 1139 index = index*10 + ((*LexPtr++)-'0'); 1140 return BuildAtomLeaf(AE_VALENCE,index); 1141 } 1142 return BuildAtomLeaf(AE_VALENCE,1); 1143 1144 case('x'): 1145 if( isdigit(*LexPtr) ) 1146 { 1147 index = 0; 1148 while( isdigit(*LexPtr) ) 1149 index = index*10 + ((*LexPtr++)-'0'); 1150 return BuildAtomLeaf(AE_RINGCONNECT,index); 1151 } 1152 return BuildAtomPred(AE_CYCLIC); 1153 } 1154 LexPtr--; 1155 return nullptr; 1156 } 1157 ParseAtomExpr(int level)1158 AtomExpr *OBSmartsPattern::ParseAtomExpr( int level ) 1159 { 1160 AtomExpr *expr1 = nullptr; 1161 AtomExpr *expr2 = nullptr; 1162 char *prev; 1163 1164 switch( level ) 1165 { 1166 case(0): /* Low Precedence Conjunction */ 1167 if( !(expr1=ParseAtomExpr(1)) ) 1168 return nullptr; 1169 1170 while( *LexPtr == ';' ) 1171 { 1172 LexPtr++; 1173 if( !(expr2=ParseAtomExpr(1)) ) 1174 { 1175 FreeAtomExpr(expr1); 1176 return nullptr; 1177 } 1178 expr1 = BuildAtomBin(AE_ANDLO,expr1,expr2); 1179 } 1180 return expr1; 1181 1182 case(1): /* Disjunction */ 1183 if( !(expr1=ParseAtomExpr(2)) ) 1184 return nullptr; 1185 1186 while( *LexPtr == ',' ) 1187 { 1188 LexPtr++; 1189 if( !(expr2=ParseAtomExpr(2)) ) 1190 { 1191 FreeAtomExpr(expr1); 1192 return nullptr; 1193 } 1194 expr1 = BuildAtomBin(AE_OR,expr1,expr2); 1195 } 1196 return( expr1 ); 1197 1198 case(2): /* High Precedence Conjunction */ 1199 if( !(expr1=ParseAtomExpr(3)) ) 1200 return nullptr; 1201 1202 while( (*LexPtr!=']') && (*LexPtr!=';') && 1203 (*LexPtr!=',') && *LexPtr ) 1204 { 1205 if( *LexPtr=='&' ) 1206 LexPtr++; 1207 prev = LexPtr; 1208 if( !(expr2=ParseAtomExpr(3)) ) 1209 { 1210 if( prev != LexPtr ) 1211 { 1212 FreeAtomExpr(expr1); 1213 return nullptr; 1214 } 1215 else 1216 return( expr1 ); 1217 } 1218 expr1 = BuildAtomBin(AE_ANDHI,expr1,expr2); 1219 } 1220 return( expr1 ); 1221 1222 case(3): /* Negation or Primitive */ 1223 if( *LexPtr == '!' ) 1224 { 1225 LexPtr++; 1226 if( !(expr1=ParseAtomExpr(3)) ) 1227 return nullptr; 1228 return( BuildAtomNot(expr1) ); 1229 } 1230 return( ParseComplexAtomPrimitive() ); 1231 } 1232 return nullptr; 1233 } 1234 ParseBondPrimitive(void)1235 BondExpr *OBSmartsPattern::ParseBondPrimitive( void ) 1236 { 1237 char bsym = *LexPtr++; 1238 1239 switch(bsym) 1240 { 1241 case '-': return BuildBondLeaf(BE_SINGLE); 1242 case '=': return BuildBondLeaf(BE_DOUBLE); 1243 case '#': return BuildBondLeaf(BE_TRIPLE); 1244 case '$': return BuildBondLeaf(BE_QUAD); 1245 case ':': return BuildBondLeaf(BE_AROM); 1246 case '@': return BuildBondLeaf(BE_RING); 1247 case '~': return BuildBondLeaf(BE_ANY); 1248 1249 // return BuildBondLeaf(*LexPtr == '?' ? BE_UPUNSPEC : BE_UP); 1250 case '/': return BuildBondLeaf(BE_SINGLE); 1251 // return BuildBondLeaf(*LexPtr == '?' ? BE_DOWNUNSPEC : BE_DOWN); 1252 case '\\': return BuildBondLeaf(BE_SINGLE); 1253 } 1254 LexPtr--; 1255 return nullptr; 1256 } 1257 ParseBondExpr(int level)1258 BondExpr *OBSmartsPattern::ParseBondExpr( int level ) 1259 { 1260 BondExpr *expr1 = nullptr; 1261 BondExpr *expr2 = nullptr; 1262 char *prev; 1263 1264 switch( level ) 1265 { 1266 case(0): /* Low Precedence Conjunction */ 1267 if( !(expr1=ParseBondExpr(1)) ) 1268 return nullptr; 1269 1270 while( *LexPtr == ';' ) 1271 { 1272 LexPtr++; 1273 if( !(expr2=ParseBondExpr(1)) ) 1274 { 1275 FreeBondExpr(expr1); 1276 return nullptr; 1277 } 1278 expr1 = BuildBondBin(BE_ANDLO,expr1,expr2); 1279 } 1280 return expr1; 1281 1282 case(1): /* Disjunction */ 1283 if( !(expr1=ParseBondExpr(2)) ) 1284 return nullptr; 1285 1286 while( *LexPtr == ',' ) 1287 { 1288 LexPtr++; 1289 if( !(expr2=ParseBondExpr(2)) ) 1290 { 1291 FreeBondExpr(expr1); 1292 return nullptr; 1293 } 1294 expr1 = BuildBondBin(BE_OR,expr1,expr2); 1295 } 1296 return expr1; 1297 1298 case(2): /* High Precedence Conjunction */ 1299 if( !(expr1=ParseBondExpr(3)) ) 1300 return nullptr; 1301 1302 while( (*LexPtr!=']') && (*LexPtr!=';') && 1303 (*LexPtr!=',') && *LexPtr ) 1304 { 1305 if( *LexPtr == '&' ) 1306 LexPtr++; 1307 prev = LexPtr; 1308 if( !(expr2=ParseBondExpr(3)) ) 1309 { 1310 if( prev != LexPtr ) 1311 { 1312 FreeBondExpr(expr1); 1313 return nullptr; 1314 } 1315 else 1316 return expr1; 1317 } 1318 expr1 = BuildBondBin(BE_ANDHI,expr1,expr2); 1319 } 1320 return expr1; 1321 1322 case(3): /* Negation or Primitive */ 1323 if( *LexPtr == '!' ) 1324 { 1325 LexPtr++; 1326 if( !(expr1=ParseBondExpr(3)) ) 1327 return nullptr; 1328 return BuildBondNot(expr1); 1329 } 1330 return ParseBondPrimitive(); 1331 } 1332 return nullptr; 1333 } 1334 GetVectorBinding()1335 int OBSmartsPattern::GetVectorBinding() 1336 { 1337 int vb=0; 1338 1339 LexPtr++; //skip colon 1340 if(isdigit(*LexPtr)) 1341 { 1342 vb = 0; 1343 while( isdigit(*LexPtr) ) 1344 vb = vb*10 + ((*LexPtr++)-'0'); 1345 } 1346 1347 return(vb); 1348 } 1349 ParseSMARTSError(Pattern * pat,BondExpr * expr)1350 Pattern *OBSmartsPattern::ParseSMARTSError( Pattern *pat, BondExpr *expr ) 1351 { 1352 if( expr ) 1353 FreeBondExpr(expr); 1354 return SMARTSError(pat); 1355 } 1356 SMARTSParser(Pattern * pat,ParseState * stat,int prev,int part)1357 Pattern *OBSmartsPattern::SMARTSParser( Pattern *pat, ParseState *stat, 1358 int prev, int part ) 1359 { 1360 int vb = 0; 1361 AtomExpr *aexpr; 1362 BondExpr *bexpr; 1363 int index; 1364 1365 bexpr = nullptr; 1366 1367 while( *LexPtr ) 1368 { 1369 switch( *LexPtr++ ) 1370 { 1371 case('.'): 1372 return ParseSMARTSError(pat,bexpr); 1373 1374 case('-'): case('='): case('#'): case('$'): 1375 case(':'): case('~'): case('@'): 1376 case('/'): case('\\'): case('!'): 1377 LexPtr--; 1378 if( (prev==-1) || bexpr ) 1379 return ParseSMARTSError(pat,bexpr); 1380 if( !(bexpr=ParseBondExpr(0)) ) 1381 return ParseSMARTSError(pat,bexpr); 1382 break; 1383 1384 case('('): 1385 if( bexpr ) 1386 { 1387 LexPtr--; 1388 return ParseSMARTSError(pat,bexpr); 1389 } 1390 if( prev == -1 ) 1391 { 1392 index = pat->acount; 1393 pat = SMARTSParser(pat,stat,-1,part); 1394 if( !pat ) 1395 return nullptr; 1396 if( index == pat->acount ) 1397 return ParseSMARTSError(pat,bexpr); 1398 prev = index; 1399 } 1400 else 1401 { 1402 pat = SMARTSParser(pat,stat,prev,part); 1403 if( !pat ) 1404 return nullptr; 1405 } 1406 1407 if( *LexPtr != ')' ) 1408 return ParseSMARTSError(pat,bexpr); 1409 LexPtr++; 1410 break; 1411 1412 case(')'): LexPtr--; 1413 if( (prev==-1) || bexpr ) 1414 return ParseSMARTSError(pat,bexpr); 1415 return pat; 1416 1417 case('%'): if( prev == -1 ) 1418 { 1419 LexPtr--; 1420 return ParseSMARTSError(pat,bexpr); 1421 } 1422 1423 if( isdigit(LexPtr[0]) && isdigit(LexPtr[1]) ) 1424 { 1425 index = 10*(LexPtr[0]-'0') + (LexPtr[1]-'0'); 1426 LexPtr += 2; 1427 } 1428 else 1429 return ParseSMARTSError(pat,bexpr); 1430 1431 if( stat->closure[index] == -1 ) 1432 { 1433 stat->closord[index] = bexpr; 1434 stat->closure[index] = prev; 1435 } 1436 else if( stat->closure[index] != prev ) 1437 { 1438 if( !bexpr ) { 1439 if (!stat->closord[index]) { 1440 bexpr = GenerateDefaultBond(); 1441 FreeBondExpr(stat->closord[index]); 1442 } else 1443 bexpr = stat->closord[index]; 1444 } else if (stat->closord[index] && !EquivalentBondExpr(bexpr, stat->closord[index])) 1445 return ParseSMARTSError(pat,bexpr); 1446 1447 CreateBond(pat,bexpr,prev,stat->closure[index]); 1448 stat->closure[index] = -1; 1449 bexpr = nullptr; 1450 } 1451 else 1452 return ParseSMARTSError(pat,bexpr); 1453 break; 1454 1455 case('0'): case('1'): case('2'): 1456 case('3'): case('4'): case('5'): 1457 case('6'): case('7'): case('8'): 1458 case('9'): LexPtr--; 1459 if( prev == -1 ) 1460 return ParseSMARTSError(pat,bexpr); 1461 index = (*LexPtr++)-'0'; 1462 1463 if( stat->closure[index] == -1 ) 1464 { // Ring opening 1465 stat->closord[index] = bexpr; 1466 stat->closure[index] = prev; 1467 pat->atom[prev].nbrs.push_back(-index); // Store the BC idx as a -ve 1468 bexpr = nullptr; 1469 } 1470 else if( stat->closure[index] != prev ) 1471 { // Ring closure 1472 if( !bexpr ) { 1473 if (!stat->closord[index]) { 1474 bexpr = GenerateDefaultBond(); 1475 FreeBondExpr(stat->closord[index]); 1476 } else 1477 bexpr = stat->closord[index]; 1478 } else if (stat->closord[index] && !EquivalentBondExpr(bexpr, stat->closord[index])) 1479 return ParseSMARTSError(pat,bexpr); 1480 1481 CreateBond(pat,bexpr,prev,stat->closure[index]); 1482 pat->atom[prev].nbrs.push_back(stat->closure[index]); 1483 for (unsigned int nbr_idx=0; nbr_idx < pat->atom[stat->closure[index]].nbrs.size(); ++nbr_idx) { 1484 if (pat->atom[stat->closure[index]].nbrs[nbr_idx] == -index) 1485 pat->atom[stat->closure[index]].nbrs[nbr_idx] = prev; 1486 } 1487 stat->closure[index] = -1; 1488 bexpr = nullptr; 1489 } 1490 else 1491 return ParseSMARTSError(pat,bexpr); 1492 break; 1493 1494 case('['): 1495 // shortcut for '[H]' primitive (PR#1463791) 1496 if (*LexPtr == 'H' && *(LexPtr+1) == ']') 1497 { 1498 aexpr = GenerateElement(1); 1499 LexPtr++; // skip the 'H' 1500 pat->hasExplicitH = true; 1501 } 1502 else 1503 aexpr = ParseAtomExpr(0); 1504 vb = (*LexPtr == ':') ? GetVectorBinding():0; 1505 if( !aexpr || (*LexPtr!=']') ) 1506 return ParseSMARTSError(pat,bexpr); 1507 index = CreateAtom(pat,aexpr,part,vb); 1508 if( prev != -1 ) 1509 { 1510 if( !bexpr ) 1511 bexpr = GenerateDefaultBond(); 1512 CreateBond(pat,bexpr,prev,index); 1513 pat->atom[index].nbrs.push_back(prev); 1514 pat->atom[prev].nbrs.push_back(index); 1515 bexpr = nullptr; 1516 } 1517 if(*(LexPtr-1) == 'H' && *(LexPtr-2) == '@' ) { // i.e. [C@H] or [C@@H] 1518 pat->atom[index].nbrs.push_back(SmartsImplicitRef); 1519 } 1520 prev = index; 1521 LexPtr++; 1522 break; 1523 1524 default: 1525 LexPtr--; 1526 aexpr = ParseSimpleAtomPrimitive(); 1527 if( !aexpr ) 1528 return ParseSMARTSError(pat,bexpr); 1529 index = CreateAtom(pat,aexpr,part); 1530 if( prev != -1 ) 1531 { 1532 if( !bexpr ) 1533 bexpr = GenerateDefaultBond(); 1534 CreateBond(pat,bexpr,prev,index); 1535 pat->atom[index].nbrs.push_back(prev); 1536 pat->atom[prev].nbrs.push_back(index); 1537 bexpr = nullptr; 1538 } 1539 prev = index; 1540 } 1541 } 1542 1543 if( (prev==-1) || bexpr ) 1544 return ParseSMARTSError(pat,bexpr); 1545 1546 return pat; 1547 } 1548 MarkGrowBonds(Pattern * pat)1549 static void MarkGrowBonds(Pattern *pat) 1550 { 1551 int i; 1552 OBBitVec bv; 1553 1554 for (i = 0;i < pat->bcount;++i) 1555 { 1556 pat->bond[i].grow = (bv[pat->bond[i].src] && bv[pat->bond[i].dst])? 1557 false:true; 1558 1559 bv.SetBitOn(pat->bond[i].src); 1560 bv.SetBitOn(pat->bond[i].dst); 1561 } 1562 } 1563 GetChiralFlag(AtomExpr * expr)1564 static int GetChiralFlag(AtomExpr *expr) 1565 { 1566 int tmp1,tmp2; 1567 1568 switch (expr->type) 1569 { 1570 case AE_CHIRAL: 1571 return expr->leaf.value; 1572 1573 case AE_ANDHI: 1574 case AE_ANDLO: 1575 tmp1 = GetChiralFlag(expr->bin.lft); 1576 tmp2 = GetChiralFlag(expr->bin.rgt); 1577 if (tmp1 == 0) return tmp2; 1578 if (tmp2 == 0) return tmp1; 1579 if (tmp1 == tmp2) return tmp1; 1580 break; 1581 1582 case AE_OR: 1583 tmp1 = GetChiralFlag(expr->bin.lft); 1584 tmp2 = GetChiralFlag(expr->bin.rgt); 1585 if (tmp1 == 0 || tmp2 == 0) return 0; 1586 if (tmp1 == tmp2) return tmp1; 1587 break; 1588 1589 case AE_NOT: 1590 // Treat [!@] as [@@], and [!@@] as [@] 1591 tmp1 = GetChiralFlag(expr->mon.arg); 1592 if (tmp1 == AL_ANTICLOCKWISE) return AL_CLOCKWISE; 1593 if (tmp1 == AL_CLOCKWISE) return AL_ANTICLOCKWISE; 1594 break; 1595 } 1596 1597 return 0; 1598 } 1599 ParseSMARTSPart(Pattern * result,int part)1600 Pattern *OBSmartsPattern::ParseSMARTSPart( Pattern *result, int part ) 1601 { 1602 ParseState stat; 1603 int i,flag; 1604 1605 for( i=0; i<100; i++ ) 1606 stat.closure[i] = -1; 1607 1608 result = SMARTSParser(result,&stat,-1,part); 1609 1610 flag = false; 1611 for( i=0; i<100; i++ ) 1612 if( stat.closure[i] != -1 ) 1613 { 1614 FreeBondExpr(stat.closord[i]); 1615 flag = true; 1616 } 1617 1618 if( result ) 1619 { 1620 if( flag ) 1621 return(SMARTSError(result)); 1622 else 1623 { 1624 MarkGrowBonds(result); 1625 result->ischiral = false; 1626 for (i = 0;i < result->acount;++i) 1627 { 1628 result->atom[i].chiral_flag = GetChiralFlag(result->atom[i].expr); 1629 if (result->atom[i].chiral_flag) 1630 result->ischiral = true; 1631 } 1632 return(result); 1633 } 1634 } 1635 else 1636 return nullptr; 1637 } 1638 1639 ParseSMARTSPattern(void)1640 Pattern *OBSmartsPattern::ParseSMARTSPattern( void ) 1641 { 1642 Pattern *result; 1643 result = AllocPattern(); 1644 1645 while( *LexPtr == '(' ) 1646 { 1647 if (!result) 1648 return nullptr; // ensure we don't get a null dereference 1649 1650 LexPtr++; 1651 result = ParseSMARTSPart(result,result->parts); 1652 if( !result ) 1653 return nullptr; 1654 result->parts++; 1655 1656 if( *LexPtr != ')' ) 1657 return SMARTSError(result); 1658 LexPtr++; 1659 1660 if( !*LexPtr || (*LexPtr==')') ) 1661 return result; 1662 1663 if( *LexPtr != '.' ) 1664 return SMARTSError(result); 1665 1666 // Here's where we'd handle fragments 1667 // cerr << " conjunction " << LexPtr[0] << endl; 1668 LexPtr++; 1669 } 1670 1671 return ParseSMARTSPart(result,0); 1672 } 1673 ParseSMARTSString(char * ptr)1674 Pattern *OBSmartsPattern::ParseSMARTSString( char *ptr ) 1675 { 1676 Pattern *result; 1677 1678 if( !ptr || !*ptr ) 1679 return nullptr; 1680 1681 LexPtr = MainPtr = ptr; 1682 result = ParseSMARTSPattern(); 1683 if( result && *LexPtr ) 1684 return SMARTSError(result); 1685 return result; 1686 } 1687 ParseSMARTSRecord(char * ptr)1688 Pattern *OBSmartsPattern::ParseSMARTSRecord( char *ptr ) 1689 { 1690 char *src; 1691 1692 src = ptr; 1693 while( *src && !isspace(*src) ) 1694 src++; 1695 1696 if( isspace(*src) ) 1697 { 1698 *src++ = '\0'; 1699 while( isspace(*src) ) 1700 src++; 1701 } 1702 1703 return ParseSMARTSString(ptr); 1704 } 1705 1706 //********************************** 1707 //********Pattern Matching********** 1708 //********************************** 1709 Init(const char * buffer)1710 bool OBSmartsPattern::Init(const char *buffer) 1711 { 1712 if (_buffer != nullptr) 1713 delete[] _buffer; 1714 _buffer = new char[strlen(buffer) + 1]; 1715 strcpy(_buffer,buffer); 1716 1717 if (_pat != nullptr) 1718 FreePattern(_pat); 1719 _pat = ParseSMARTSRecord(_buffer); 1720 _str = _buffer; 1721 1722 return _pat != nullptr; 1723 } 1724 Init(const std::string & s)1725 bool OBSmartsPattern::Init(const std::string &s) 1726 { 1727 if (_buffer != nullptr) 1728 delete[] _buffer; 1729 _buffer = new char[s.length() + 1]; 1730 strcpy(_buffer, s.c_str()); 1731 1732 if (_pat != nullptr) 1733 FreePattern(_pat); 1734 _pat = ParseSMARTSRecord(_buffer); 1735 _str = s; 1736 1737 return _pat != nullptr; 1738 } 1739 ~OBSmartsPattern()1740 OBSmartsPattern::~OBSmartsPattern() 1741 { 1742 if (_pat) 1743 FreePattern(_pat); 1744 if(_buffer) 1745 delete [] _buffer; 1746 } 1747 Match(OBMol & mol,bool single)1748 bool OBSmartsPattern::Match(OBMol &mol,bool single) 1749 { 1750 OBSmartsMatcher matcher; 1751 if (_pat == nullptr) 1752 return false; 1753 if(_pat->hasExplicitH) //The SMARTS pattern contains [H] 1754 { 1755 //Do matching on a copy of mol with explicit hydrogens 1756 OBMol tmol = mol; 1757 tmol.AddHydrogens(false,false); 1758 return(matcher.match(tmol,_pat,_mlist,single)); 1759 } 1760 return(matcher.match(mol,_pat,_mlist,single)); 1761 } 1762 HasMatch(OBMol & mol) const1763 bool OBSmartsPattern::HasMatch(OBMol &mol) const 1764 { 1765 //a convenience function 1766 std::vector<std::vector<int> > dummy; 1767 return Match(mol, dummy, Single); 1768 } 1769 Match(OBMol & mol,std::vector<std::vector<int>> & mlist,MatchType mtype) const1770 bool OBSmartsPattern::Match(OBMol &mol, std::vector<std::vector<int> > & mlist, 1771 MatchType mtype /*=All*/) const 1772 { 1773 OBSmartsMatcher matcher; 1774 mlist.clear(); 1775 if (_pat == nullptr) 1776 return false; 1777 if(_pat->hasExplicitH) //The SMARTS pattern contains [H] 1778 { 1779 //Do matching on a copy of mol with explicit hydrogens 1780 OBMol tmol = mol; 1781 tmol.AddHydrogens(false,false); 1782 if(!matcher.match(tmol,_pat,mlist,mtype == Single)) 1783 return false; 1784 } 1785 else if(!matcher.match(mol,_pat,mlist,mtype == Single)) 1786 return false; 1787 1788 if((mtype == AllUnique) && mlist.size() > 1) 1789 { 1790 //uniquify 1791 bool ok; 1792 OBBitVec bv; 1793 std::vector<OBBitVec> vbv; 1794 std::vector<std::vector<int> > ulist; 1795 std::vector<std::vector<int> >::iterator i; 1796 std::vector<OBBitVec>::iterator j; 1797 1798 for (i = mlist.begin();i != mlist.end();++i) 1799 { 1800 ok = true; 1801 bv.Clear(); 1802 bv.FromVecInt(*i); 1803 for (j = vbv.begin();j != vbv.end() && ok;++j) 1804 if ((*j) == bv) 1805 ok = false; 1806 1807 if (ok) 1808 { 1809 ulist.push_back(*i); 1810 vbv.push_back(bv); 1811 } 1812 } 1813 1814 mlist = ulist; 1815 } 1816 return true; 1817 } 1818 1819 RestrictedMatch(OBMol & mol,std::vector<std::pair<int,int>> & pr,bool single)1820 bool OBSmartsPattern::RestrictedMatch(OBMol &mol, 1821 std::vector<std::pair<int,int> > &pr, 1822 bool single) 1823 { 1824 bool ok; 1825 std::vector<std::vector<int> > mlist; 1826 std::vector<std::vector<int> >::iterator i; 1827 std::vector<std::pair<int,int> >::iterator j; 1828 1829 OBSmartsMatcher matcher; 1830 matcher.match(mol,_pat,mlist); 1831 _mlist.clear(); 1832 if (mlist.empty()) 1833 return(false); 1834 1835 for (i = mlist.begin();i != mlist.end();++i) 1836 { 1837 ok = true; 1838 for (j = pr.begin();j != pr.end() && ok;++j) 1839 if ((*i)[j->first] != j->second) 1840 ok = false; 1841 1842 if (ok) 1843 _mlist.push_back(*i); 1844 if (single && !_mlist.empty()) 1845 return(true); 1846 } 1847 1848 return((_mlist.empty()) ? false:true); 1849 } 1850 RestrictedMatch(OBMol & mol,OBBitVec & vres,bool single)1851 bool OBSmartsPattern::RestrictedMatch(OBMol &mol,OBBitVec &vres, bool single) 1852 { 1853 bool ok; 1854 std::vector<int>::iterator j; 1855 std::vector<std::vector<int> > mlist; 1856 std::vector<std::vector<int> >::iterator i; 1857 1858 OBSmartsMatcher matcher; 1859 matcher.match(mol,_pat,mlist); 1860 1861 _mlist.clear(); 1862 if (mlist.empty()) 1863 return(false); 1864 1865 for (i = mlist.begin();i != mlist.end();++i) 1866 { 1867 ok = true; 1868 for (j = i->begin();j != i->end();++j) 1869 if (!vres[*j]) 1870 { 1871 ok = false; 1872 break; 1873 } 1874 if (!ok) 1875 continue; 1876 1877 _mlist.push_back(*i); 1878 if (single && !_mlist.empty()) 1879 return(true); 1880 } 1881 1882 return((_mlist.empty()) ? false:true); 1883 } 1884 SetupAtomMatchTable(std::vector<std::vector<bool>> & ttab,const Pattern * pat,OBMol & mol)1885 void OBSmartsMatcher::SetupAtomMatchTable(std::vector<std::vector<bool> > &ttab, 1886 const Pattern *pat, OBMol &mol) 1887 { 1888 int i; 1889 1890 ttab.resize(pat->acount); 1891 for (i = 0;i < pat->acount;++i) 1892 ttab[i].resize(mol.NumAtoms()+1); 1893 1894 OBAtom *atom; 1895 std::vector<OBAtom*>::iterator j; 1896 for (i = 0;i < pat->acount;++i) 1897 for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j)) 1898 if (EvalAtomExpr(pat->atom[0].expr,atom)) 1899 ttab[i][atom->GetIdx()] = true; 1900 } 1901 FastSingleMatch(OBMol & mol,const Pattern * pat,std::vector<std::vector<int>> & mlist)1902 void OBSmartsMatcher::FastSingleMatch(OBMol &mol, const Pattern *pat, 1903 std::vector<std::vector<int> > &mlist) 1904 { 1905 OBAtom *atom,*a1,*nbr; 1906 std::vector<OBAtom*>::iterator i; 1907 1908 OBBitVec bv(mol.NumAtoms()+1); 1909 std::vector<int> map; 1910 map.resize(pat->acount); 1911 std::vector<std::vector<OBBond*>::iterator> vi; 1912 std::vector<bool> vif; 1913 1914 if (pat->bcount) 1915 { 1916 vif.resize(pat->bcount); 1917 vi.resize(pat->bcount); 1918 } 1919 1920 int bcount; 1921 for (atom = mol.BeginAtom(i);atom;atom=mol.NextAtom(i)) 1922 if (EvalAtomExpr(pat->atom[0].expr,atom)) 1923 { 1924 map[0] = atom->GetIdx(); 1925 if (pat->bcount) 1926 vif[0] = false; 1927 bv.Clear(); 1928 bv.SetBitOn(atom->GetIdx()); 1929 1930 for (bcount=0;bcount >=0;) 1931 { 1932 //***entire pattern matched*** 1933 if (bcount == pat->bcount) //save full match here 1934 { 1935 mlist.push_back(map); 1936 bcount--; 1937 return; //found a single match 1938 } 1939 1940 //***match the next bond*** 1941 if (!pat->bond[bcount].grow) //just check bond here 1942 { 1943 if ( !vif[bcount] ) 1944 { 1945 OBBond *bond = mol.GetBond(map[pat->bond[bcount].src], 1946 map[pat->bond[bcount].dst]); 1947 if (bond && EvalBondExpr(pat->bond[bcount].expr,bond)) 1948 { 1949 vif[bcount++] = true; 1950 if (bcount < pat->bcount) 1951 vif[bcount] = false; 1952 } 1953 else 1954 bcount--; 1955 } 1956 else //bond must have already been visited - backtrack 1957 bcount--; 1958 } 1959 else //need to map atom and check bond 1960 { 1961 a1 = mol.GetAtom(map[pat->bond[bcount].src]); 1962 1963 if (!vif[bcount]) //figure out which nbr atom we are mapping 1964 { 1965 nbr = a1->BeginNbrAtom(vi[bcount]); 1966 } 1967 else 1968 { 1969 bv.SetBitOff(map[pat->bond[bcount].dst]); 1970 nbr = a1->NextNbrAtom(vi[bcount]); 1971 } 1972 1973 for (;nbr;nbr=a1->NextNbrAtom(vi[bcount])) 1974 if (!bv[nbr->GetIdx()]) 1975 if (EvalAtomExpr(pat->atom[pat->bond[bcount].dst].expr,nbr) 1976 && EvalBondExpr(pat->bond[bcount].expr,(OBBond *)*(vi[bcount]))) 1977 { 1978 bv.SetBitOn(nbr->GetIdx()); 1979 map[pat->bond[bcount].dst] = nbr->GetIdx(); 1980 vif[bcount] = true; 1981 bcount++; 1982 if (bcount < pat->bcount) 1983 vif[bcount] = false; 1984 break; 1985 } 1986 1987 if (!nbr)//no match - time to backtrack 1988 bcount--; 1989 } 1990 } 1991 } 1992 } 1993 1994 match(OBMol & mol,const Pattern * pat,std::vector<std::vector<int>> & mlist,bool single)1995 bool OBSmartsMatcher::match(OBMol &mol, const Pattern *pat, 1996 std::vector<std::vector<int> > &mlist,bool single) 1997 { 1998 mlist.clear(); 1999 if (!pat || pat->acount == 0) 2000 return(false);//shouldn't ever happen 2001 2002 if (single && !pat->ischiral) { 2003 // perform a fast single match (only works for non-chiral SMARTS) 2004 FastSingleMatch(mol,pat,mlist); 2005 } else { 2006 // perform normal match (chirality ignored and checked below) 2007 OBSSMatch ssm(mol,pat); 2008 ssm.Match(mlist); 2009 } 2010 2011 if (pat->ischiral) { 2012 std::vector<std::vector<int> >::iterator m; 2013 std::vector<std::vector<int> > tmpmlist; 2014 2015 tmpmlist.clear(); 2016 // iterate over the atom mappings 2017 for (m = mlist.begin();m != mlist.end();++m) { 2018 2019 bool allStereoCentersMatch = true; 2020 2021 // for each pattern atom 2022 for (int j = 0; j < pat->acount; ++j) { 2023 // skip non-chiral pattern atoms 2024 if (!pat->atom[j].chiral_flag) 2025 continue; 2026 // ignore @? in smarts, parse like any other smarts 2027 if (pat->atom[j].chiral_flag == AL_UNSPECIFIED) 2028 continue; 2029 2030 // use the mapping the get the chiral atom in the molecule being queried 2031 OBAtom *center = mol.GetAtom((*m)[j]); 2032 2033 // get the OBTetrahedralStereo::Config from the molecule 2034 OBStereoFacade stereo(&mol); 2035 OBTetrahedralStereo *ts = stereo.GetTetrahedralStereo(center->GetId()); 2036 if (!ts || !ts->GetConfig().specified) { 2037 // no stereochemistry specified in molecule for the atom 2038 // corresponding to the chiral pattern atom using the current 2039 // mapping --> no match 2040 allStereoCentersMatch = false; 2041 break; 2042 } 2043 2044 std::vector<int> nbrs = pat->atom[j].nbrs; 2045 2046 if (nbrs.size() != 4) { // 3 nbrs currently not supported. Other values are errors. 2047 //stringstream ss; 2048 //ss << "Ignoring stereochemistry. There are " << nbrs.size() << " connections to this atom instead of 4. Title: " << mol.GetTitle(); 2049 //obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); 2050 continue; 2051 } 2052 2053 // construct a OBTetrahedralStereo::Config using the smarts pattern 2054 OBTetrahedralStereo::Config smartsConfig; 2055 smartsConfig.center = center->GetId(); 2056 if (nbrs.at(0) == SmartsImplicitRef) 2057 smartsConfig.from = OBStereo::ImplicitRef; 2058 else 2059 smartsConfig.from = mol.GetAtom( (*m)[nbrs.at(0)] )->GetId(); 2060 OBStereo::Ref firstref; 2061 if (nbrs.at(1) == SmartsImplicitRef) 2062 firstref = OBStereo::ImplicitRef; 2063 else 2064 firstref = mol.GetAtom( (*m)[nbrs.at(1)] )->GetId(); 2065 OBAtom *ra2 = mol.GetAtom( (*m)[nbrs.at(2)] ); 2066 OBAtom *ra3 = mol.GetAtom( (*m)[nbrs.at(3)] ); 2067 smartsConfig.refs = OBStereo::MakeRefs(firstref, ra2->GetId(), ra3->GetId()); 2068 2069 smartsConfig.view = OBStereo::ViewFrom; 2070 switch (pat->atom[j].chiral_flag) { 2071 case AL_CLOCKWISE: 2072 smartsConfig.winding = OBStereo::Clockwise; 2073 break; 2074 case AL_ANTICLOCKWISE: 2075 smartsConfig.winding = OBStereo::AntiClockwise; 2076 break; 2077 default: 2078 smartsConfig.specified = false; 2079 } 2080 2081 // cout << "smarts config = " << smartsConfig << endl; 2082 // cout << "molecule config = " << ts->GetConfig() << endl; 2083 // cout << "match = " << (ts->GetConfig() == smartsConfig) << endl; 2084 2085 // and save the match if the two configurations are the same 2086 if (ts->GetConfig() != smartsConfig) 2087 allStereoCentersMatch = false; 2088 2089 // don't waste time checking more stereocenters using this mapping if one didn't match 2090 if (!allStereoCentersMatch) 2091 break; 2092 } 2093 2094 // if all the atoms in the molecule match the stereochemistry specified 2095 // in the smarts pattern, save this mapping as a match 2096 if (allStereoCentersMatch) 2097 tmpmlist.push_back(*m); 2098 } 2099 2100 mlist = tmpmlist; 2101 } 2102 2103 return(!mlist.empty()); 2104 } 2105 EvalAtomExpr(AtomExpr * expr,OBAtom * atom)2106 bool OBSmartsMatcher::EvalAtomExpr(AtomExpr *expr,OBAtom *atom) 2107 { 2108 for (;;) 2109 switch (expr->type) 2110 { 2111 case AE_TRUE: 2112 return true; 2113 case AE_FALSE: 2114 return false; 2115 case AE_AROMATIC: 2116 return atom->IsAromatic(); 2117 case AE_ALIPHATIC: 2118 return !atom->IsAromatic(); 2119 case AE_CYCLIC: 2120 return atom->IsInRing(); 2121 case AE_ACYCLIC: 2122 return !atom->IsInRing(); 2123 2124 case AE_MASS: 2125 return expr->leaf.value == atom->GetIsotope(); 2126 case AE_ELEM: 2127 return expr->leaf.value == (int)atom->GetAtomicNum(); 2128 case AE_AROMELEM: 2129 return expr->leaf.value == (int)atom->GetAtomicNum() && 2130 atom->IsAromatic(); 2131 case AE_ALIPHELEM: 2132 return expr->leaf.value == (int)atom->GetAtomicNum() && 2133 !atom->IsAromatic(); 2134 case AE_HCOUNT: 2135 return expr->leaf.value == ((int)atom->ExplicitHydrogenCount() + 2136 (int)atom->GetImplicitHCount()); 2137 case AE_CHARGE: 2138 return expr->leaf.value == atom->GetFormalCharge(); 2139 case AE_CONNECT: 2140 return expr->leaf.value == (int)atom->GetTotalDegree(); 2141 case AE_DEGREE: 2142 return expr->leaf.value == (int)atom->GetExplicitDegree(); 2143 case AE_IMPLICIT: 2144 return expr->leaf.value == (int)atom->GetImplicitHCount(); 2145 case AE_RINGS: 2146 return expr->leaf.value == (int)atom->MemberOfRingCount(); 2147 case AE_SIZE: 2148 return atom->IsInRingSize(expr->leaf.value); 2149 case AE_VALENCE: 2150 return expr->leaf.value == (int)atom->GetTotalValence(); 2151 case AE_CHIRAL: 2152 // always return true (i.e. accept the match) and check later 2153 return true; 2154 case AE_HYB: 2155 return expr->leaf.value == (int)atom->GetHyb(); 2156 case AE_RINGCONNECT: 2157 return expr->leaf.value == (int)atom->CountRingBonds(); 2158 2159 case AE_NOT: 2160 return !EvalAtomExpr(expr->mon.arg,atom); 2161 case AE_ANDHI: /* Same as AE_ANDLO */ 2162 case AE_ANDLO: 2163 if (!EvalAtomExpr(expr->bin.lft,atom)) 2164 return false; 2165 expr = expr->bin.rgt; 2166 break; 2167 case AE_OR: 2168 if (EvalAtomExpr(expr->bin.lft,atom)) 2169 return true; 2170 expr = expr->bin.rgt; 2171 break; 2172 2173 case AE_RECUR: 2174 { 2175 //see if pattern has been matched 2176 std::vector<std::pair<const Pattern*,std::vector<bool> > >::iterator i; 2177 for (i = RSCACHE.begin();i != RSCACHE.end();++i) 2178 if (i->first == (Pattern*)expr->recur.recur) 2179 return(i->second[atom->GetIdx()]); 2180 2181 //perceive and match pattern 2182 std::vector<std::vector<int> >::iterator j; 2183 std::vector<bool> vb(((OBMol*) atom->GetParent())->NumAtoms()+1); 2184 std::vector<std::vector<int> > mlist; 2185 if (match( *((OBMol *) atom->GetParent()), 2186 (Pattern*)expr->recur.recur,mlist)) 2187 for (j = mlist.begin();j != mlist.end();++j) 2188 vb[(*j)[0]] = true; 2189 2190 RSCACHE.push_back(std::pair<const Pattern*, 2191 std::vector<bool> > ((const Pattern*)expr->recur.recur, 2192 vb)); 2193 2194 return(vb[atom->GetIdx()]); 2195 } 2196 2197 default: 2198 return false; 2199 } 2200 } 2201 EvalBondExpr(BondExpr * expr,OBBond * bond)2202 bool OBSmartsMatcher::EvalBondExpr(BondExpr *expr,OBBond *bond) 2203 { 2204 for (;;) 2205 switch( expr->type ) 2206 { 2207 case BE_ANDHI: 2208 case BE_ANDLO: 2209 if (!EvalBondExpr(expr->bin.lft,bond)) 2210 return false; 2211 expr = expr->bin.rgt; 2212 break; 2213 2214 case BE_OR: 2215 if (EvalBondExpr(expr->bin.lft,bond)) 2216 return true; 2217 expr = expr->bin.rgt; 2218 break; 2219 2220 case BE_NOT: 2221 return !EvalBondExpr(expr->mon.arg,bond); 2222 2223 case BE_ANY: 2224 return true; 2225 case BE_DEFAULT: 2226 return bond->GetBondOrder()==1 || bond->IsAromatic(); 2227 case BE_SINGLE: 2228 return bond->GetBondOrder()==1 && !bond->IsAromatic(); 2229 case BE_DOUBLE: 2230 return bond->GetBondOrder()==2 && !bond->IsAromatic(); 2231 case BE_TRIPLE: 2232 return bond->GetBondOrder() == 3; 2233 case BE_QUAD: 2234 return bond->GetBondOrder() == 4; 2235 case BE_AROM: 2236 return bond->IsAromatic(); 2237 case BE_RING: 2238 return bond->IsInRing(); 2239 //case BE_UP: 2240 // return bond->IsUp(); 2241 //case BE_DOWN: 2242 // return bond->IsDown(); 2243 //case BE_UPUNSPEC: // up or unspecified (i.e., not down) 2244 // return !bond->IsDown(); 2245 //case BE_DOWNUNSPEC: // down or unspecified (i.e., not up) 2246 // return !bond->IsUp(); 2247 default: 2248 return false; 2249 } 2250 } 2251 GetUMapList()2252 std::vector<std::vector<int> > &OBSmartsPattern::GetUMapList() 2253 { 2254 if (_mlist.empty() || _mlist.size() == 1) 2255 return(_mlist); 2256 2257 bool ok; 2258 OBBitVec bv; 2259 std::vector<OBBitVec> vbv; 2260 std::vector<std::vector<int> > mlist; 2261 std::vector<std::vector<int> >::iterator i; 2262 std::vector<OBBitVec>::iterator j; 2263 2264 for (i = _mlist.begin();i != _mlist.end();++i) 2265 { 2266 ok = true; 2267 bv.Clear(); 2268 bv.FromVecInt(*i); 2269 for (j = vbv.begin();j != vbv.end() && ok;++j) 2270 if ((*j) == bv) 2271 ok = false; 2272 2273 if (ok) 2274 { 2275 mlist.push_back(*i); 2276 vbv.push_back(bv); 2277 } 2278 } 2279 2280 _mlist = mlist; 2281 return(_mlist); 2282 } 2283 WriteMapList(ostream & ofs)2284 void OBSmartsPattern::WriteMapList(ostream &ofs) 2285 { 2286 std::vector<std::vector<int> >::iterator i; 2287 std::vector<int>::iterator j; 2288 2289 for ( i = _mlist.begin() ; i != _mlist.end() ; ++i ) 2290 { 2291 for (j = (*i).begin();j != (*i).end();++j) 2292 ofs << *j << ' ' << ends; 2293 ofs << endl; 2294 } 2295 } 2296 2297 //******************************************************************* 2298 // The OBSSMatch class performs exhaustive matching using recursion 2299 // Explicit stack handling is used to find just a single match in 2300 // match() 2301 //******************************************************************* 2302 OBSSMatch(OBMol & mol,const Pattern * pat)2303 OBSSMatch::OBSSMatch(OBMol &mol, const Pattern *pat) 2304 { 2305 _mol = &mol; 2306 _pat = pat; 2307 _map.resize(pat->acount); 2308 2309 if (!mol.Empty()) 2310 { 2311 _uatoms = new bool [mol.NumAtoms()+1]; 2312 memset((char*)_uatoms,'\0',sizeof(bool)*(mol.NumAtoms()+1)); 2313 } 2314 else 2315 _uatoms = nullptr; 2316 } 2317 ~OBSSMatch()2318 OBSSMatch::~OBSSMatch() 2319 { 2320 if (_uatoms) 2321 delete [] _uatoms; 2322 } 2323 Match(std::vector<std::vector<int>> & mlist,int bidx)2324 void OBSSMatch::Match(std::vector<std::vector<int> > &mlist,int bidx) 2325 { 2326 OBSmartsMatcher matcher; 2327 if (bidx == -1) 2328 { 2329 OBAtom *atom; 2330 std::vector<OBAtom*>::iterator i; 2331 for (atom = _mol->BeginAtom(i);atom;atom = _mol->NextAtom(i)) 2332 if (matcher.EvalAtomExpr(_pat->atom[0].expr,atom)) 2333 { 2334 _map[0] = atom->GetIdx(); 2335 _uatoms[atom->GetIdx()] = true; 2336 Match(mlist,0); 2337 _map[0] = 0; 2338 _uatoms[atom->GetIdx()] = false; 2339 } 2340 return; 2341 } 2342 2343 if (bidx == _pat->bcount) //save full match here 2344 { 2345 mlist.push_back(_map); 2346 return; 2347 } 2348 2349 if (_pat->bond[bidx].grow) //match the next bond 2350 { 2351 int src = _pat->bond[bidx].src; 2352 int dst = _pat->bond[bidx].dst; 2353 2354 if (_map[src] <= 0 || _map[src] > (signed)_mol->NumAtoms()) 2355 return; 2356 2357 AtomExpr *aexpr = _pat->atom[dst].expr; 2358 BondExpr *bexpr = _pat->bond[bidx].expr; 2359 OBAtom *atom,*nbr; 2360 std::vector<OBBond*>::iterator i; 2361 2362 atom = _mol->GetAtom(_map[src]); 2363 for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) 2364 if (!_uatoms[nbr->GetIdx()] && matcher.EvalAtomExpr(aexpr,nbr) && 2365 matcher.EvalBondExpr(bexpr,((OBBond*) *i))) 2366 { 2367 _map[dst] = nbr->GetIdx(); 2368 _uatoms[nbr->GetIdx()] = true; 2369 Match(mlist,bidx+1); 2370 _uatoms[nbr->GetIdx()] = false; 2371 _map[dst] = 0; 2372 } 2373 } 2374 else //just check bond here 2375 { 2376 OBBond *bond = _mol->GetBond(_map[_pat->bond[bidx].src], 2377 _map[_pat->bond[bidx].dst]); 2378 if (bond && matcher.EvalBondExpr(_pat->bond[bidx].expr,bond)) 2379 Match(mlist,bidx+1); 2380 } 2381 } 2382 GetExprOrder(BondExpr * expr)2383 static int GetExprOrder(BondExpr *expr) 2384 { 2385 int tmp1,tmp2; 2386 2387 switch( expr->type ) 2388 { 2389 case BE_SINGLE: return 1; 2390 case BE_DOUBLE: return 2; 2391 case BE_TRIPLE: return 3; 2392 case BE_QUAD: return 4; 2393 case BE_AROM: return 5; 2394 2395 case BE_UP: 2396 case BE_DOWN: 2397 case BE_UPUNSPEC: 2398 case BE_DOWNUNSPEC: 2399 return 1; 2400 2401 case BE_ANDHI: 2402 case BE_ANDLO: 2403 tmp1 = GetExprOrder(expr->bin.lft); 2404 tmp2 = GetExprOrder(expr->bin.rgt); 2405 if (tmp1 == 0) return tmp2; 2406 if (tmp2 == 0) return tmp1; 2407 if (tmp1 == tmp2) return tmp1; 2408 break; 2409 2410 case BE_OR: 2411 tmp1 = GetExprOrder(expr->bin.lft); 2412 if (tmp1 == 0) return 0; 2413 tmp2 = GetExprOrder(expr->bin.rgt); 2414 if (tmp2 == 0) return 0; 2415 if (tmp1 == tmp2) return tmp1; 2416 break; 2417 } 2418 2419 return 0; 2420 } 2421 GetExprCharge(AtomExpr * expr)2422 static int GetExprCharge(AtomExpr *expr) 2423 { 2424 int tmp1,tmp2; 2425 2426 switch( expr->type ) 2427 { 2428 case AE_CHARGE: 2429 return expr->leaf.value; 2430 2431 case AE_ANDHI: 2432 case AE_ANDLO: 2433 tmp1 = GetExprCharge(expr->bin.lft); 2434 tmp2 = GetExprCharge(expr->bin.rgt); 2435 if (tmp1 == 0) return tmp2; 2436 if (tmp2 == 0) return tmp1; 2437 if (tmp1 == tmp2) return tmp1; 2438 break; 2439 2440 case AE_OR: 2441 tmp1 = GetExprCharge(expr->bin.lft); 2442 if (tmp1 == 0) return 0; 2443 tmp2 = GetExprCharge(expr->bin.rgt); 2444 if (tmp2 == 0) return 0; 2445 if (tmp1 == tmp2) return tmp1; 2446 break; 2447 } 2448 2449 return 0; 2450 } 2451 GetCharge(int idx)2452 int OBSmartsPattern::GetCharge(int idx) 2453 { 2454 return GetExprCharge(_pat->atom[idx].expr); 2455 } 2456 GetExprAtomicNum(AtomExpr * expr)2457 static int GetExprAtomicNum(AtomExpr *expr) 2458 { 2459 int tmp1,tmp2; 2460 2461 switch( expr->type ) 2462 { 2463 case AE_ELEM: 2464 case AE_AROMELEM: 2465 case AE_ALIPHELEM: 2466 return expr->leaf.value; 2467 2468 case AE_ANDHI: 2469 case AE_ANDLO: 2470 tmp1 = GetExprAtomicNum(expr->bin.lft); 2471 tmp2 = GetExprAtomicNum(expr->bin.rgt); 2472 if (tmp1 == 0) return tmp2; 2473 if (tmp2 == 0) return tmp1; 2474 if (tmp1 == tmp2) return tmp1; 2475 break; 2476 2477 case AE_OR: 2478 tmp1 = GetExprAtomicNum(expr->bin.lft); 2479 if (tmp1 == 0) return 0; 2480 tmp2 = GetExprAtomicNum(expr->bin.rgt); 2481 if (tmp2 == 0) return 0; 2482 if (tmp1 == tmp2) return tmp1; 2483 break; 2484 } 2485 2486 return 0; 2487 } 2488 GetAtomicNum(int idx)2489 int OBSmartsPattern::GetAtomicNum(int idx) 2490 { 2491 return GetExprAtomicNum(_pat->atom[idx].expr); 2492 } 2493 GetBond(int & src,int & dst,int & ord,int idx)2494 void OBSmartsPattern::GetBond(int &src,int &dst,int &ord,int idx) 2495 { 2496 src = _pat->bond[idx].src; 2497 dst = _pat->bond[idx].dst; 2498 ord = GetExprOrder(_pat->bond[idx].expr); 2499 } 2500 SmartsLexReplace(std::string & s,std::vector<std::pair<std::string,std::string>> & vlex)2501 void SmartsLexReplace(std::string &s,std::vector<std::pair<std::string,std::string> > &vlex) 2502 { 2503 size_t j,pos; 2504 std::string token,repstr; 2505 std::vector<std::pair<std::string,std::string> >::iterator i; 2506 2507 for (pos = 0,pos = s.find("$",pos);pos < s.size();pos = s.find("$",pos)) 2508 //for (pos = 0,pos = s.find("$",pos);pos != std::string::npos;pos = s.find("$",pos)) 2509 { 2510 pos++; 2511 for (j = pos;j < s.size();++j) 2512 if (!isalpha(s[j]) && !isdigit(s[j]) && s[j] != '_') 2513 break; 2514 if (pos == j) 2515 continue; 2516 2517 token = s.substr(pos,j-pos); 2518 for (i = vlex.begin();i != vlex.end();++i) 2519 if (token == i->first) 2520 { 2521 repstr = "(" + i->second + ")"; 2522 s.replace(pos,j-pos,repstr); 2523 j = 0; 2524 } 2525 pos = j; 2526 } 2527 } 2528 2529 } // end namespace OpenBabel 2530 2531 //! \file parsmart.cpp 2532 //! \brief Implementation of Daylight SMARTS parser. 2533