1# This file is part of python-ly, https://pypi.python.org/pypi/python-ly 2# 3# Copyright (c) 2008 - 2015 by Wilbert Berendsen 4# 5# This program is free software; you can redistribute it and/or 6# modify it under the terms of the GNU General Public License 7# as published by the Free Software Foundation; either version 2 8# of the License, or (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18# See http://www.gnu.org/licenses/ for more information. 19 20r""" 21This module is built on top of slexer and can parse LilyPond input and other 22formats. 23 24The base functionality is delegated to modules with an underscore in this package. 25The modules describing parsing modes (filetypes) are the files without underscore. 26 27Currently available are modes for lilypond, latex, html, texinfo, scheme, and docbook. 28 29The 'underscored' modules should not be imported in application code. What is 30needed from them is available here, in the ly.lex namespace. 31 32If you add new files for parsing other file types, you should add them in _mode.py. 33The _token.py module contains base Token types and Token mixin classes. 34 35The State, Parser, FallthroughParser and Fridge classes from slexer are all 36slightly extended here, 37 38Usage:: 39 40 >>> import ly.lex 41 >>> txt = r"\relative c' { c d e f-^ g }" 42 >>> s = ly.lex.state("lilypond") 43 >>> for t in s.tokens(txt): 44 ... print(t, t.__class__.__name__) 45 \relative Command 46 Space 47 c Name 48 ' Unparsed 49 Space 50 { SequentialStart 51 Space 52 c Note 53 Space 54 d Note 55 Space 56 e Note 57 Space 58 f Note 59 - Direction 60 ^ ScriptAbbreviation 61 Space 62 g Note 63 Space 64 } SequentialEnd 65 66A State() is used to parse text. The text is given to the tokens() method, that 67returns an iterator iterating over Token instances as they are found. Each token 68has a 'pos' and an 'end' attribute describing its position in the original 69string. 70 71While iterating over the tokens(), the State maintains information about what 72kind of text is parsed. (So don't iterate over more than one call to tokens() of 73the same State object at the same time.) 74 75Use ly.lex.state("name") to get a state for a specific mode to start parsing with. 76If you don't know the type of text, you can use ly.lex.guessState(text), where 77text is the text you want to parse. A quick heuristic is then used to determine 78the type of the text. 79 80See for more information the documentation of the slexer module. 81 82""" 83 84from __future__ import unicode_literals 85 86import re 87 88from .. import slexer 89from ._token import * 90from ._mode import extensions, modes, guessMode 91 92 93__all__ = [ 94 'State', 95 'Parser', 'FallthroughParser', 96 'Fridge', 97 'extensions', 'modes', 'guessMode', 98 'state', 'guessState', 99 'Token', 100 'Unparsed', 101 'Space', 102 'Newline', 103 'Comment', 104 'LineComment', 105 'BlockComment', 106 'BlockCommentStart', 107 'BlockCommentEnd', 108 'String', 109 'StringStart', 110 'StringEnd', 111 'Character', 112 'Numeric', 113 'Error', 114 'MatchStart', 115 'MatchEnd', 116 'Indent', 117 'Dedent', 118] 119 120 121class Parser(slexer.Parser): 122 re_flags = re.MULTILINE | re.UNICODE 123 argcount = 0 124 default = Unparsed 125 mode = None 126 127 def __init__(self, argcount = None): 128 if argcount is not None: 129 self.argcount = argcount 130 131 def freeze(self): 132 return (self.argcount,) 133 134 135class FallthroughParser(Parser, slexer.FallthroughParser): 136 pass 137 138 139class State(slexer.State): 140 def endArgument(self): 141 """Decrease argcount and leave the parser if it would reach 0.""" 142 while self.depth() > 1: 143 p = self.parser() 144 if p.argcount == 1: 145 self.leave() 146 else: 147 if p.argcount > 0: 148 p.argcount -= 1 149 return 150 151 def mode(self): 152 """Returns the mode attribute of the first parser (from current parser) that has it.""" 153 for parser in self.state[::-1]: 154 if parser.mode: 155 return parser.mode 156 157 158class Fridge(slexer.Fridge): 159 def __init__(self, stateClass = State): 160 super(Fridge, self).__init__(stateClass) 161 162 163def state(mode): 164 """Returns a State instance for the given mode.""" 165 return State(modes[mode]()) 166 167 168def guessState(text): 169 """Returns a State instance, guessing the type of text.""" 170 return State(modes[guessMode(text)]()) 171 172 173