1#!/usr/bin/env python 2 3from optparse import OptionParser 4from random import * 5import string 6import sys 7 8# return a random non-degenerate (ie not [10]) partition of nChildren 9def chooseLeafWidth(nChildren): 10 width = randint(1, 5) 11 width = min(width, nChildren-1) 12 s = sample(range(1, nChildren), width) 13 s.sort() 14 s = [0] + s + [nChildren] 15 v = [ s[i+1] - s[i] for i in range(0, len(s)-1) if s[i+1] != s[i] ] 16 return v 17 18def generateConcat(nChildren, atTopIgnored): 19 v = [ generateRE(w, atTop = False) for w in chooseLeafWidth(nChildren) ] 20 v = [ r for r in v if r != '' ] 21 return string.join(v, "") 22 23def makeGroup(s): 24 # Parenthesise either in normal parens or a non-capturing group. 25 if randint(0, 1) == 0: 26 return "(" + s + ")" 27 else: 28 return "(?:" + s + ")" 29 30def generateAlt(nChildren, atTop): 31 v = [ generateRE(w, [generateAlt], atTop) for w in chooseLeafWidth(nChildren) ] 32 v = [ r for r in v if r != '' ] 33 s = string.join(v, "|") 34 if len(v) == 1: 35 return s 36 else: 37 return makeGroup(s) 38 39def generateQuant(nChildren, atTopIgnored): 40 lo = int(round(expovariate(0.2))) 41 hi = lo + int(round(expovariate(0.2))) 42 q = choice(["*", "?", "+", "{%d}"%lo, "{%d,}"%lo, "{%d,%d}"%(lo,hi)]) 43 r = generateRE(nChildren, [generateQuant], atTop = False) 44 if (len(r) == 1) or (r[0] != '(' and r[-1] != ")"): 45 return r + q 46 else: 47 return makeGroup(r) + q 48 49def generateChar(nChildren, atTop = False): 50 return chr(choice(alphabet)) 51 52def generateNocaseChar(nChildren, atTop = False): 53 'Either generate an uppercase char from the alphabet or a nocase class [Aa]' 54 c = generateChar(nChildren, atTop) 55 if random() < 0.5: 56 return c.upper() 57 else: 58 return '[' + c.upper() + c.lower() + ']' 59 60def generateDot(nChildren, atTop = False): 61 return "." 62 63def generateBoundary(nChildren, atTop = False): 64 # \b, \B in parens so that we can repeat them and still be accepted by 65 # libpcre 66 return makeGroup('\\' + choice('bB')) 67 68def generateCharClass(nChildren, atTop = False): 69 s = "" 70 if random() < 0.2: 71 s = "^" 72 nChars = randint(1,4) 73 else: 74 nChars = randint(2,4) 75 76 for i in xrange(nChars): 77 s += generateChar(1) 78 return "[" + s + "]" 79 80def generateOptionsFlags(nChildren, atTop = False): 81 allflags = "smix" 82 pos_flags = sample(allflags, randint(1, len(allflags))) 83 neg_flags = sample(allflags, randint(1, len(allflags))) 84 s = '(?' + ''.join(pos_flags) + '-' + ''.join(neg_flags) + ')' 85 return s 86 87def generateLogicalId(nChildren, atTop = False): 88 return str(randint(0, options.count)) 89 90def makeLogicalGroup(s): 91 return "(" + s + ")" 92 93def generateLogicalNot(nChildren, atTop): 94 r = generateCombination(nChildren, [generateLogicalNot], atTop = False) 95 return "!" + makeLogicalGroup(r) 96 97def generateLogicalAnd(nChildren, atTop): 98 v = [ generateCombination(w, [generateLogicalAnd], atTop = False) for w in chooseLeafWidth(nChildren) ] 99 v = [ r for r in v if r != '' ] 100 s = string.join(v, "&") 101 if len(v) == 1: 102 return s 103 else: 104 return makeLogicalGroup(s) 105 106def generateLogicalOr(nChildren, atTop): 107 v = [ generateCombination(w, [generateLogicalOr], atTop = False) for w in chooseLeafWidth(nChildren) ] 108 v = [ r for r in v if r != '' ] 109 s = string.join(v, "|") 110 if len(v) == 1: 111 return s 112 else: 113 return makeLogicalGroup(s) 114 115weightsTree = [ 116 (generateConcat, 10), 117 (generateAlt, 3), 118 (generateQuant, 2), 119 ] 120 121weightsLeaf = [ 122 (generateChar, 30), 123 (generateCharClass, 5), 124 (generateDot, 5), 125 (generateNocaseChar, 2), 126 (generateBoundary, 1), 127 (generateOptionsFlags, 1) 128 ] 129 130weightsLogicalTree = [ 131 (generateLogicalNot, 1), 132 (generateLogicalAnd, 5), 133 (generateLogicalOr, 5), 134 ] 135 136weightsLogicalLeaf = [ 137 (generateLogicalId, 1), 138 ] 139 140def genChoices(weighted): 141 r = [] 142 for (f, w) in weighted: 143 r = r + [f] * w 144 return r 145 146choicesTree = genChoices(weightsTree) 147choicesLeaf = genChoices(weightsLeaf) 148choicesLogicalTree = genChoices(weightsLogicalTree) 149choicesLogicalLeaf = genChoices(weightsLogicalLeaf) 150 151weightsAnchor = [ 152 ("\\A%s\\Z", 1), 153 ("\\A%s\\z", 1), 154 ("\\A%s", 4), 155 ("%s\\Z", 2), 156 ("%s\\z", 2), 157 ("^%s$", 1), 158 ("^%s", 4), 159 ("%s$", 2), 160 ("%s", 25) 161 ] 162choicesAnchor = genChoices(weightsAnchor) 163 164def generateRE(nChildren, suppressList = [], atTop = False): 165 if atTop: 166 anchorSubstituteString = choice(choicesAnchor) 167 else: 168 anchorSubstituteString = "%s" 169 170 nChildren -= 1 171 if nChildren == 0: 172 res = choice(choicesLeaf)(nChildren, atTop) 173 else: 174 c = [ ch for ch in choicesTree if ch not in suppressList ] 175 res = choice(c)(nChildren, atTop) 176 177 return anchorSubstituteString % res 178 179def generateCombination(nChildren, suppressList = [], atTop = False): 180 nChildren -= 1 181 if nChildren == 0: 182 res = choice(choicesLogicalLeaf)(nChildren, atTop) 183 else: 184 c = [ ch for ch in choicesLogicalTree if ch not in suppressList ] 185 res = choice(c)(nChildren, atTop) 186 187 return res 188 189def generateRandomOptions(): 190 if options.hybrid: 191 allflags = "smiH8W" 192 else: 193 # Maintain an ordering for consistency. 194 allflags = "smiHV8WLP" 195 flags = "" 196 for f in allflags: 197 flags += choice(['', f]) 198 if options.logical: 199 flags += choice(['', 'Q']) 200 return flags 201 202def generateRandomExtParam(depth, extparam): 203 if not extparam: 204 return "" 205 params = [] 206 if choice((False, True)): 207 params.append("min_length=%u" % randint(1, depth)) 208 if choice((False, True)): 209 params.append("min_offset=%u" % randint(1, depth)) 210 if choice((False, True)): 211 params.append("max_offset=%u" % randint(1, depth*3)) 212 if choice((False, True)): 213 dist = randint(1, 3) 214 if choice((False, True)): 215 params.append("edit_distance=%u" % dist) 216 else: 217 params.append("hamming_distance=%u" % dist) 218 if params: 219 return "{" + ",".join(params) + "}" 220 else: 221 return "" 222 223parser = OptionParser() 224parser.add_option("-d", "--depth", 225 action="store", type="int", dest="depth", default=200, 226 help="Depth of generation (akin to maximum length)") 227parser.add_option("-c", "--count", 228 action="store", type="int", dest="count", default=1000, 229 help="Number of expressions to generate") 230parser.add_option("-a", "--alphabet", 231 action="store", type="int", dest="alphabet", default=26, 232 help="Size of alphabet to generate character expressions over (starting with lowercase 'a')") 233parser.add_option("-i", "--nocase", 234 action="store_true", dest="nocase", 235 help="Use a caseless alphabet for character generation") 236parser.add_option("-x", "--extparam", 237 action="store_true", dest="extparam", 238 help="Generate random extended parameters") 239parser.add_option("-l", "--logical", 240 action="store_true", dest="logical", 241 help="Generate logical combination expressions") 242parser.add_option("-H", "--hybrid", 243 action="store_true", dest="hybrid", 244 help="Generate random flags for hybrid mode") 245 246(options, args) = parser.parse_args() 247if len(args) != 0: 248 parser.error("incorrect number of arguments") 249 250alphabet = range(ord('a'), ord('a') + options.alphabet) 251if options.nocase: 252 alphabet += range(ord('A'), ord('A') + options.alphabet) 253 254for i in xrange(0, options.count): 255 print "%08d:/%s/%s%s" % (i, generateRE(randint(1, options.depth), atTop = True), generateRandomOptions(), generateRandomExtParam(options.depth, options.extparam)) 256 257if options.logical: 258 for i in xrange(options.count, options.count + 3000): 259 print "%08d:/%s/C" % (i, generateCombination(randint(1, options.depth), atTop = True)) 260