1# Natural Language Toolkit: CFG visualization 2# 3# Copyright (C) 2001-2019 NLTK Project 4# Author: Edward Loper <edloper@gmail.com> 5# URL: <http://nltk.org/> 6# For license information, see LICENSE.TXT 7 8""" 9Visualization tools for CFGs. 10""" 11 12# Idea for a nice demo: 13# - 3 panes: grammar, treelet, working area 14# - grammar is a list of productions 15# - when you select a production, the treelet that it licenses appears 16# in the treelet area 17# - the working area has the text on the bottom, and S at top. When 18# you select a production, it shows (ghosted) the locations where 19# that production's treelet could be attached to either the text 20# or the tree rooted at S. 21# - the user can drag the treelet onto one of those (or click on them?) 22# - the user can delete pieces of the tree from the working area 23# (right click?) 24# - connecting top to bottom? drag one NP onto another? 25# 26# +-------------------------------------------------------------+ 27# | S -> NP VP | S | 28# |[NP -> Det N ]| / \ | 29# | ... | NP VP | 30# | N -> 'dog' | | 31# | N -> 'cat' | | 32# | ... | | 33# +--------------+ | 34# | NP | Det N | 35# | / \ | | | | 36# | Det N | the cat saw the dog | 37# | | | 38# +--------------+----------------------------------------------+ 39# 40# Operations: 41# - connect a new treelet -- drag or click shadow 42# - delete a treelet -- right click 43# - if only connected to top, delete everything below 44# - if only connected to bottom, delete everything above 45# - connect top & bottom -- drag a leaf to a root or a root to a leaf 46# - disconnect top & bottom -- right click 47# - if connected to top & bottom, then disconnect 48 49import re 50 51from six import string_types 52from six.moves.tkinter import ( 53 Button, 54 Canvas, 55 Entry, 56 Frame, 57 IntVar, 58 Label, 59 Scrollbar, 60 Text, 61 Tk, 62 Toplevel, 63) 64 65from nltk.grammar import CFG, _read_cfg_production, Nonterminal, nonterminals 66from nltk.tree import Tree 67from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment 68from nltk.draw.util import ( 69 CanvasFrame, 70 ColorizedList, 71 ShowText, 72 SymbolWidget, 73 TextWidget, 74) 75 76###################################################################### 77# Production List 78###################################################################### 79 80 81class ProductionList(ColorizedList): 82 ARROW = SymbolWidget.SYMBOLS['rightarrow'] 83 84 def _init_colortags(self, textwidget, options): 85 textwidget.tag_config('terminal', foreground='#006000') 86 textwidget.tag_config('arrow', font='symbol', underline='0') 87 textwidget.tag_config( 88 'nonterminal', foreground='blue', font=('helvetica', -12, 'bold') 89 ) 90 91 def _item_repr(self, item): 92 contents = [] 93 contents.append(('%s\t' % item.lhs(), 'nonterminal')) 94 contents.append((self.ARROW, 'arrow')) 95 for elt in item.rhs(): 96 if isinstance(elt, Nonterminal): 97 contents.append((' %s' % elt.symbol(), 'nonterminal')) 98 else: 99 contents.append((' %r' % elt, 'terminal')) 100 return contents 101 102 103###################################################################### 104# CFG Editor 105###################################################################### 106 107_CFGEditor_HELP = """ 108 109The CFG Editor can be used to create or modify context free grammars. 110A context free grammar consists of a start symbol and a list of 111productions. The start symbol is specified by the text entry field in 112the upper right hand corner of the editor; and the list of productions 113are specified in the main text editing box. 114 115Every non-blank line specifies a single production. Each production 116has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS 117is a list of nonterminals and terminals. 118 119Nonterminals must be a single word, such as S or NP or NP_subj. 120Currently, nonterminals must consists of alphanumeric characters and 121underscores (_). Nonterminals are colored blue. If you place the 122mouse over any nonterminal, then all occurrences of that nonterminal 123will be highlighted. 124 125Terminals must be surrounded by single quotes (') or double 126quotes(\"). For example, "dog" and "New York" are terminals. 127Currently, the string within the quotes must consist of alphanumeric 128characters, underscores, and spaces. 129 130To enter a new production, go to a blank line, and type a nonterminal, 131followed by an arrow (->), followed by a sequence of terminals and 132nonterminals. Note that "->" (dash + greater-than) is automatically 133converted to an arrow symbol. When you move your cursor to a 134different line, your production will automatically be colorized. If 135there are any errors, they will be highlighted in red. 136 137Note that the order of the productions is significant for some 138algorithms. To re-order the productions, use cut and paste to move 139them. 140 141Use the buttons at the bottom of the window when you are done editing 142the CFG: 143 - Ok: apply the new CFG, and exit the editor. 144 - Apply: apply the new CFG, and do not exit the editor. 145 - Reset: revert to the original CFG, and do not exit the editor. 146 - Cancel: revert to the original CFG, and exit the editor. 147 148""" 149 150 151class CFGEditor(object): 152 """ 153 A dialog window for creating and editing context free grammars. 154 ``CFGEditor`` imposes the following restrictions: 155 156 - All nonterminals must be strings consisting of word 157 characters. 158 - All terminals must be strings consisting of word characters 159 and space characters. 160 """ 161 162 # Regular expressions used by _analyze_line. Precompile them, so 163 # we can process the text faster. 164 ARROW = SymbolWidget.SYMBOLS['rightarrow'] 165 _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))") 166 _ARROW_RE = re.compile("\s*(->|(" + ARROW + "))\s*") 167 _PRODUCTION_RE = re.compile( 168 r"(^\s*\w+\s*)" 169 + "(->|(" # LHS 170 + ARROW 171 + "))\s*" 172 + r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$" # arrow 173 ) # RHS 174 _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")") 175 _BOLD = ('helvetica', -12, 'bold') 176 177 def __init__(self, parent, cfg=None, set_cfg_callback=None): 178 self._parent = parent 179 if cfg is not None: 180 self._cfg = cfg 181 else: 182 self._cfg = CFG(Nonterminal('S'), []) 183 self._set_cfg_callback = set_cfg_callback 184 185 self._highlight_matching_nonterminals = 1 186 187 # Create the top-level window. 188 self._top = Toplevel(parent) 189 self._init_bindings() 190 191 self._init_startframe() 192 self._startframe.pack(side='top', fill='x', expand=0) 193 self._init_prodframe() 194 self._prodframe.pack(side='top', fill='both', expand=1) 195 self._init_buttons() 196 self._buttonframe.pack(side='bottom', fill='x', expand=0) 197 198 self._textwidget.focus() 199 200 def _init_startframe(self): 201 frame = self._startframe = Frame(self._top) 202 self._start = Entry(frame) 203 self._start.pack(side='right') 204 Label(frame, text='Start Symbol:').pack(side='right') 205 Label(frame, text='Productions:').pack(side='left') 206 self._start.insert(0, self._cfg.start().symbol()) 207 208 def _init_buttons(self): 209 frame = self._buttonframe = Frame(self._top) 210 Button(frame, text='Ok', command=self._ok, underline=0, takefocus=0).pack( 211 side='left' 212 ) 213 Button(frame, text='Apply', command=self._apply, underline=0, takefocus=0).pack( 214 side='left' 215 ) 216 Button(frame, text='Reset', command=self._reset, underline=0, takefocus=0).pack( 217 side='left' 218 ) 219 Button( 220 frame, text='Cancel', command=self._cancel, underline=0, takefocus=0 221 ).pack(side='left') 222 Button(frame, text='Help', command=self._help, underline=0, takefocus=0).pack( 223 side='right' 224 ) 225 226 def _init_bindings(self): 227 self._top.title('CFG Editor') 228 self._top.bind('<Control-q>', self._cancel) 229 self._top.bind('<Alt-q>', self._cancel) 230 self._top.bind('<Control-d>', self._cancel) 231 # self._top.bind('<Control-x>', self._cancel) 232 self._top.bind('<Alt-x>', self._cancel) 233 self._top.bind('<Escape>', self._cancel) 234 # self._top.bind('<Control-c>', self._cancel) 235 self._top.bind('<Alt-c>', self._cancel) 236 237 self._top.bind('<Control-o>', self._ok) 238 self._top.bind('<Alt-o>', self._ok) 239 self._top.bind('<Control-a>', self._apply) 240 self._top.bind('<Alt-a>', self._apply) 241 self._top.bind('<Control-r>', self._reset) 242 self._top.bind('<Alt-r>', self._reset) 243 self._top.bind('<Control-h>', self._help) 244 self._top.bind('<Alt-h>', self._help) 245 self._top.bind('<F1>', self._help) 246 247 def _init_prodframe(self): 248 self._prodframe = Frame(self._top) 249 250 # Create the basic Text widget & scrollbar. 251 self._textwidget = Text( 252 self._prodframe, background='#e0e0e0', exportselection=1 253 ) 254 self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient='vertical') 255 self._textwidget.config(yscrollcommand=self._textscroll.set) 256 self._textscroll.config(command=self._textwidget.yview) 257 self._textscroll.pack(side='right', fill='y') 258 self._textwidget.pack(expand=1, fill='both', side='left') 259 260 # Initialize the colorization tags. Each nonterminal gets its 261 # own tag, so they aren't listed here. 262 self._textwidget.tag_config('terminal', foreground='#006000') 263 self._textwidget.tag_config('arrow', font='symbol') 264 self._textwidget.tag_config('error', background='red') 265 266 # Keep track of what line they're on. We use that to remember 267 # to re-analyze a line whenever they leave it. 268 self._linenum = 0 269 270 # Expand "->" to an arrow. 271 self._top.bind('>', self._replace_arrows) 272 273 # Re-colorize lines when appropriate. 274 self._top.bind('<<Paste>>', self._analyze) 275 self._top.bind('<KeyPress>', self._check_analyze) 276 self._top.bind('<ButtonPress>', self._check_analyze) 277 278 # Tab cycles focus. (why doesn't this work??) 279 def cycle(e, textwidget=self._textwidget): 280 textwidget.tk_focusNext().focus() 281 282 self._textwidget.bind('<Tab>', cycle) 283 284 prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()] 285 for i in range(len(prod_tuples) - 1, 0, -1): 286 if prod_tuples[i][0] == prod_tuples[i - 1][0]: 287 if () in prod_tuples[i][1]: 288 continue 289 if () in prod_tuples[i - 1][1]: 290 continue 291 print(prod_tuples[i - 1][1]) 292 print(prod_tuples[i][1]) 293 prod_tuples[i - 1][1].extend(prod_tuples[i][1]) 294 del prod_tuples[i] 295 296 for lhs, rhss in prod_tuples: 297 print(lhs, rhss) 298 s = '%s ->' % lhs 299 for rhs in rhss: 300 for elt in rhs: 301 if isinstance(elt, Nonterminal): 302 s += ' %s' % elt 303 else: 304 s += ' %r' % elt 305 s += ' |' 306 s = s[:-2] + '\n' 307 self._textwidget.insert('end', s) 308 309 self._analyze() 310 311 # # Add the producitons to the text widget, and colorize them. 312 # prod_by_lhs = {} 313 # for prod in self._cfg.productions(): 314 # if len(prod.rhs()) > 0: 315 # prod_by_lhs.setdefault(prod.lhs(),[]).append(prod) 316 # for (lhs, prods) in prod_by_lhs.items(): 317 # self._textwidget.insert('end', '%s ->' % lhs) 318 # self._textwidget.insert('end', self._rhs(prods[0])) 319 # for prod in prods[1:]: 320 # print '\t|'+self._rhs(prod), 321 # self._textwidget.insert('end', '\t|'+self._rhs(prod)) 322 # print 323 # self._textwidget.insert('end', '\n') 324 # for prod in self._cfg.productions(): 325 # if len(prod.rhs()) == 0: 326 # self._textwidget.insert('end', '%s' % prod) 327 # self._analyze() 328 329 # def _rhs(self, prod): 330 # s = '' 331 # for elt in prod.rhs(): 332 # if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol() 333 # else: s += ' %r' % elt 334 # return s 335 336 def _clear_tags(self, linenum): 337 """ 338 Remove all tags (except ``arrow`` and ``sel``) from the given 339 line of the text widget used for editing the productions. 340 """ 341 start = '%d.0' % linenum 342 end = '%d.end' % linenum 343 for tag in self._textwidget.tag_names(): 344 if tag not in ('arrow', 'sel'): 345 self._textwidget.tag_remove(tag, start, end) 346 347 def _check_analyze(self, *e): 348 """ 349 Check if we've moved to a new line. If we have, then remove 350 all colorization from the line we moved to, and re-colorize 351 the line that we moved from. 352 """ 353 linenum = int(self._textwidget.index('insert').split('.')[0]) 354 if linenum != self._linenum: 355 self._clear_tags(linenum) 356 self._analyze_line(self._linenum) 357 self._linenum = linenum 358 359 def _replace_arrows(self, *e): 360 """ 361 Replace any ``'->'`` text strings with arrows (char \\256, in 362 symbol font). This searches the whole buffer, but is fast 363 enough to be done anytime they press '>'. 364 """ 365 arrow = '1.0' 366 while True: 367 arrow = self._textwidget.search('->', arrow, 'end+1char') 368 if arrow == '': 369 break 370 self._textwidget.delete(arrow, arrow + '+2char') 371 self._textwidget.insert(arrow, self.ARROW, 'arrow') 372 self._textwidget.insert(arrow, '\t') 373 374 arrow = '1.0' 375 while True: 376 arrow = self._textwidget.search(self.ARROW, arrow + '+1char', 'end+1char') 377 if arrow == '': 378 break 379 self._textwidget.tag_add('arrow', arrow, arrow + '+1char') 380 381 def _analyze_token(self, match, linenum): 382 """ 383 Given a line number and a regexp match for a token on that 384 line, colorize the token. Note that the regexp match gives us 385 the token's text, start index (on the line), and end index (on 386 the line). 387 """ 388 # What type of token is it? 389 if match.group()[0] in "'\"": 390 tag = 'terminal' 391 elif match.group() in ('->', self.ARROW): 392 tag = 'arrow' 393 else: 394 # If it's a nonterminal, then set up new bindings, so we 395 # can highlight all instances of that nonterminal when we 396 # put the mouse over it. 397 tag = 'nonterminal_' + match.group() 398 if tag not in self._textwidget.tag_names(): 399 self._init_nonterminal_tag(tag) 400 401 start = '%d.%d' % (linenum, match.start()) 402 end = '%d.%d' % (linenum, match.end()) 403 self._textwidget.tag_add(tag, start, end) 404 405 def _init_nonterminal_tag(self, tag, foreground='blue'): 406 self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD) 407 if not self._highlight_matching_nonterminals: 408 return 409 410 def enter(e, textwidget=self._textwidget, tag=tag): 411 textwidget.tag_config(tag, background='#80ff80') 412 413 def leave(e, textwidget=self._textwidget, tag=tag): 414 textwidget.tag_config(tag, background='') 415 416 self._textwidget.tag_bind(tag, '<Enter>', enter) 417 self._textwidget.tag_bind(tag, '<Leave>', leave) 418 419 def _analyze_line(self, linenum): 420 """ 421 Colorize a given line. 422 """ 423 # Get rid of any tags that were previously on the line. 424 self._clear_tags(linenum) 425 426 # Get the line line's text string. 427 line = self._textwidget.get(repr(linenum) + '.0', repr(linenum) + '.end') 428 429 # If it's a valid production, then colorize each token. 430 if CFGEditor._PRODUCTION_RE.match(line): 431 # It's valid; Use _TOKEN_RE to tokenize the production, 432 # and call analyze_token on each token. 433 def analyze_token(match, self=self, linenum=linenum): 434 self._analyze_token(match, linenum) 435 return '' 436 437 CFGEditor._TOKEN_RE.sub(analyze_token, line) 438 elif line.strip() != '': 439 # It's invalid; show the user where the error is. 440 self._mark_error(linenum, line) 441 442 def _mark_error(self, linenum, line): 443 """ 444 Mark the location of an error in a line. 445 """ 446 arrowmatch = CFGEditor._ARROW_RE.search(line) 447 if not arrowmatch: 448 # If there's no arrow at all, highlight the whole line. 449 start = '%d.0' % linenum 450 end = '%d.end' % linenum 451 elif not CFGEditor._LHS_RE.match(line): 452 # Otherwise, if the LHS is bad, highlight it. 453 start = '%d.0' % linenum 454 end = '%d.%d' % (linenum, arrowmatch.start()) 455 else: 456 # Otherwise, highlight the RHS. 457 start = '%d.%d' % (linenum, arrowmatch.end()) 458 end = '%d.end' % linenum 459 460 # If we're highlighting 0 chars, highlight the whole line. 461 if self._textwidget.compare(start, '==', end): 462 start = '%d.0' % linenum 463 end = '%d.end' % linenum 464 self._textwidget.tag_add('error', start, end) 465 466 def _analyze(self, *e): 467 """ 468 Replace ``->`` with arrows, and colorize the entire buffer. 469 """ 470 self._replace_arrows() 471 numlines = int(self._textwidget.index('end').split('.')[0]) 472 for linenum in range(1, numlines + 1): # line numbers start at 1. 473 self._analyze_line(linenum) 474 475 def _parse_productions(self): 476 """ 477 Parse the current contents of the textwidget buffer, to create 478 a list of productions. 479 """ 480 productions = [] 481 482 # Get the text, normalize it, and split it into lines. 483 text = self._textwidget.get('1.0', 'end') 484 text = re.sub(self.ARROW, '->', text) 485 text = re.sub('\t', ' ', text) 486 lines = text.split('\n') 487 488 # Convert each line to a CFG production 489 for line in lines: 490 line = line.strip() 491 if line == '': 492 continue 493 productions += _read_cfg_production(line) 494 # if line.strip() == '': continue 495 # if not CFGEditor._PRODUCTION_RE.match(line): 496 # raise ValueError('Bad production string %r' % line) 497 # 498 # (lhs_str, rhs_str) = line.split('->') 499 # lhs = Nonterminal(lhs_str.strip()) 500 # rhs = [] 501 # def parse_token(match, rhs=rhs): 502 # token = match.group() 503 # if token[0] in "'\"": rhs.append(token[1:-1]) 504 # else: rhs.append(Nonterminal(token)) 505 # return '' 506 # CFGEditor._TOKEN_RE.sub(parse_token, rhs_str) 507 # 508 # productions.append(Production(lhs, *rhs)) 509 510 return productions 511 512 def _destroy(self, *e): 513 if self._top is None: 514 return 515 self._top.destroy() 516 self._top = None 517 518 def _ok(self, *e): 519 self._apply() 520 self._destroy() 521 522 def _apply(self, *e): 523 productions = self._parse_productions() 524 start = Nonterminal(self._start.get()) 525 cfg = CFG(start, productions) 526 if self._set_cfg_callback is not None: 527 self._set_cfg_callback(cfg) 528 529 def _reset(self, *e): 530 self._textwidget.delete('1.0', 'end') 531 for production in self._cfg.productions(): 532 self._textwidget.insert('end', '%s\n' % production) 533 self._analyze() 534 if self._set_cfg_callback is not None: 535 self._set_cfg_callback(self._cfg) 536 537 def _cancel(self, *e): 538 try: 539 self._reset() 540 except: 541 pass 542 self._destroy() 543 544 def _help(self, *e): 545 # The default font's not very legible; try using 'fixed' instead. 546 try: 547 ShowText( 548 self._parent, 549 'Help: Chart Parser Demo', 550 (_CFGEditor_HELP).strip(), 551 width=75, 552 font='fixed', 553 ) 554 except: 555 ShowText( 556 self._parent, 557 'Help: Chart Parser Demo', 558 (_CFGEditor_HELP).strip(), 559 width=75, 560 ) 561 562 563###################################################################### 564# New Demo (built tree based on cfg) 565###################################################################### 566 567 568class CFGDemo(object): 569 def __init__(self, grammar, text): 570 self._grammar = grammar 571 self._text = text 572 573 # Set up the main window. 574 self._top = Tk() 575 self._top.title('Context Free Grammar Demo') 576 577 # Base font size 578 self._size = IntVar(self._top) 579 self._size.set(12) # = medium 580 581 # Set up the key bindings 582 self._init_bindings(self._top) 583 584 # Create the basic frames 585 frame1 = Frame(self._top) 586 frame1.pack(side='left', fill='y', expand=0) 587 self._init_menubar(self._top) 588 self._init_buttons(self._top) 589 self._init_grammar(frame1) 590 self._init_treelet(frame1) 591 self._init_workspace(self._top) 592 593 # ////////////////////////////////////////////////// 594 # Initialization 595 # ////////////////////////////////////////////////// 596 597 def _init_bindings(self, top): 598 top.bind('<Control-q>', self.destroy) 599 600 def _init_menubar(self, parent): 601 pass 602 603 def _init_buttons(self, parent): 604 pass 605 606 def _init_grammar(self, parent): 607 self._prodlist = ProductionList(parent, self._grammar, width=20) 608 self._prodlist.pack(side='top', fill='both', expand=1) 609 self._prodlist.focus() 610 self._prodlist.add_callback('select', self._selectprod_cb) 611 self._prodlist.add_callback('move', self._selectprod_cb) 612 613 def _init_treelet(self, parent): 614 self._treelet_canvas = Canvas(parent, background='white') 615 self._treelet_canvas.pack(side='bottom', fill='x') 616 self._treelet = None 617 618 def _init_workspace(self, parent): 619 self._workspace = CanvasFrame(parent, background='white') 620 self._workspace.pack(side='right', fill='both', expand=1) 621 self._tree = None 622 self.reset_workspace() 623 624 # ////////////////////////////////////////////////// 625 # Workspace 626 # ////////////////////////////////////////////////// 627 628 def reset_workspace(self): 629 c = self._workspace.canvas() 630 fontsize = int(self._size.get()) 631 node_font = ('helvetica', -(fontsize + 4), 'bold') 632 leaf_font = ('helvetica', -(fontsize + 2)) 633 634 # Remove the old tree 635 if self._tree is not None: 636 self._workspace.remove_widget(self._tree) 637 638 # The root of the tree. 639 start = self._grammar.start().symbol() 640 rootnode = TextWidget(c, start, font=node_font, draggable=1) 641 642 # The leaves of the tree. 643 leaves = [] 644 for word in self._text: 645 leaves.append(TextWidget(c, word, font=leaf_font, draggable=1)) 646 647 # Put it all together into one tree 648 self._tree = TreeSegmentWidget(c, rootnode, leaves, color='white') 649 650 # Add it to the workspace. 651 self._workspace.add_widget(self._tree) 652 653 # Move the leaves to the bottom of the workspace. 654 for leaf in leaves: 655 leaf.move(0, 100) 656 657 # self._nodes = {start:1} 658 # self._leaves = dict([(l,1) for l in leaves]) 659 660 def workspace_markprod(self, production): 661 pass 662 663 def _markproduction(self, prod, tree=None): 664 if tree is None: 665 tree = self._tree 666 for i in range(len(tree.subtrees()) - len(prod.rhs())): 667 if tree['color', i] == 'white': 668 self._markproduction # FIXME: Is this necessary at all? 669 670 for j, node in enumerate(prod.rhs()): 671 widget = tree.subtrees()[i + j] 672 if ( 673 isinstance(node, Nonterminal) 674 and isinstance(widget, TreeSegmentWidget) 675 and node.symbol == widget.label().text() 676 ): 677 pass # matching nonterminal 678 elif ( 679 isinstance(node, string_types) 680 and isinstance(widget, TextWidget) 681 and node == widget.text() 682 ): 683 pass # matching nonterminal 684 else: 685 break 686 else: 687 # Everything matched! 688 print('MATCH AT', i) 689 690 # ////////////////////////////////////////////////// 691 # Grammar 692 # ////////////////////////////////////////////////// 693 694 def _selectprod_cb(self, production): 695 canvas = self._treelet_canvas 696 697 self._prodlist.highlight(production) 698 if self._treelet is not None: 699 self._treelet.destroy() 700 701 # Convert the production to a tree. 702 rhs = production.rhs() 703 for (i, elt) in enumerate(rhs): 704 if isinstance(elt, Nonterminal): 705 elt = Tree(elt) 706 tree = Tree(production.lhs().symbol(), *rhs) 707 708 # Draw the tree in the treelet area. 709 fontsize = int(self._size.get()) 710 node_font = ('helvetica', -(fontsize + 4), 'bold') 711 leaf_font = ('helvetica', -(fontsize + 2)) 712 self._treelet = tree_to_treesegment( 713 canvas, tree, node_font=node_font, leaf_font=leaf_font 714 ) 715 self._treelet['draggable'] = 1 716 717 # Center the treelet. 718 (x1, y1, x2, y2) = self._treelet.bbox() 719 w, h = int(canvas['width']), int(canvas['height']) 720 self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2) 721 722 # Mark the places where we can add it to the workspace. 723 self._markproduction(production) 724 725 def destroy(self, *args): 726 self._top.destroy() 727 728 def mainloop(self, *args, **kwargs): 729 self._top.mainloop(*args, **kwargs) 730 731 732def demo2(): 733 from nltk import Nonterminal, Production, CFG 734 735 nonterminals = 'S VP NP PP P N Name V Det' 736 (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] 737 productions = ( 738 # Syntactic Productions 739 Production(S, [NP, VP]), 740 Production(NP, [Det, N]), 741 Production(NP, [NP, PP]), 742 Production(VP, [VP, PP]), 743 Production(VP, [V, NP, PP]), 744 Production(VP, [V, NP]), 745 Production(PP, [P, NP]), 746 Production(PP, []), 747 Production(PP, ['up', 'over', NP]), 748 # Lexical Productions 749 Production(NP, ['I']), 750 Production(Det, ['the']), 751 Production(Det, ['a']), 752 Production(N, ['man']), 753 Production(V, ['saw']), 754 Production(P, ['in']), 755 Production(P, ['with']), 756 Production(N, ['park']), 757 Production(N, ['dog']), 758 Production(N, ['statue']), 759 Production(Det, ['my']), 760 ) 761 grammar = CFG(S, productions) 762 763 text = 'I saw a man in the park'.split() 764 d = CFGDemo(grammar, text) 765 d.mainloop() 766 767 768###################################################################### 769# Old Demo 770###################################################################### 771 772 773def demo(): 774 from nltk import Nonterminal, CFG 775 776 nonterminals = 'S VP NP PP P N Name V Det' 777 (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] 778 779 grammar = CFG.fromstring( 780 """ 781 S -> NP VP 782 PP -> P NP 783 NP -> Det N 784 NP -> NP PP 785 VP -> V NP 786 VP -> VP PP 787 Det -> 'a' 788 Det -> 'the' 789 Det -> 'my' 790 NP -> 'I' 791 N -> 'dog' 792 N -> 'man' 793 N -> 'park' 794 N -> 'statue' 795 V -> 'saw' 796 P -> 'in' 797 P -> 'up' 798 P -> 'over' 799 P -> 'with' 800 """ 801 ) 802 803 def cb(grammar): 804 print(grammar) 805 806 top = Tk() 807 editor = CFGEditor(top, grammar, cb) 808 Label(top, text='\nTesting CFG Editor\n').pack() 809 Button(top, text='Quit', command=top.destroy).pack() 810 top.mainloop() 811 812 813def demo3(): 814 from nltk import Production 815 816 (S, VP, NP, PP, P, N, Name, V, Det) = nonterminals( 817 'S, VP, NP, PP, P, N, Name, V, Det' 818 ) 819 820 productions = ( 821 # Syntactic Productions 822 Production(S, [NP, VP]), 823 Production(NP, [Det, N]), 824 Production(NP, [NP, PP]), 825 Production(VP, [VP, PP]), 826 Production(VP, [V, NP, PP]), 827 Production(VP, [V, NP]), 828 Production(PP, [P, NP]), 829 Production(PP, []), 830 Production(PP, ['up', 'over', NP]), 831 # Lexical Productions 832 Production(NP, ['I']), 833 Production(Det, ['the']), 834 Production(Det, ['a']), 835 Production(N, ['man']), 836 Production(V, ['saw']), 837 Production(P, ['in']), 838 Production(P, ['with']), 839 Production(N, ['park']), 840 Production(N, ['dog']), 841 Production(N, ['statue']), 842 Production(Det, ['my']), 843 ) 844 845 t = Tk() 846 847 def destroy(e, t=t): 848 t.destroy() 849 850 t.bind('q', destroy) 851 p = ProductionList(t, productions) 852 p.pack(expand=1, fill='both') 853 p.add_callback('select', p.markonly) 854 p.add_callback('move', p.markonly) 855 p.focus() 856 p.mark(productions[2]) 857 p.mark(productions[8]) 858 859 860if __name__ == '__main__': 861 demo() 862