1// Copyright 2017 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package format 6 7import ( 8 "reflect" 9 "unicode/utf8" 10) 11 12// A Parser parses a format string. The result from the parse are set in the 13// struct fields. 14type Parser struct { 15 Verb rune 16 17 WidthPresent bool 18 PrecPresent bool 19 Minus bool 20 Plus bool 21 Sharp bool 22 Space bool 23 Zero bool 24 25 // For the formats %+v %#v, we set the plusV/sharpV flags 26 // and clear the plus/sharp flags since %+v and %#v are in effect 27 // different, flagless formats set at the top level. 28 PlusV bool 29 SharpV bool 30 31 HasIndex bool 32 33 Width int 34 Prec int // precision 35 36 // retain arguments across calls. 37 Args []interface{} 38 // retain current argument number across calls 39 ArgNum int 40 41 // reordered records whether the format string used argument reordering. 42 Reordered bool 43 // goodArgNum records whether the most recent reordering directive was valid. 44 goodArgNum bool 45 46 // position info 47 format string 48 startPos int 49 endPos int 50 Status Status 51} 52 53// Reset initializes a parser to scan format strings for the given args. 54func (p *Parser) Reset(args []interface{}) { 55 p.Args = args 56 p.ArgNum = 0 57 p.startPos = 0 58 p.Reordered = false 59} 60 61// Text returns the part of the format string that was parsed by the last call 62// to Scan. It returns the original substitution clause if the current scan 63// parsed a substitution. 64func (p *Parser) Text() string { return p.format[p.startPos:p.endPos] } 65 66// SetFormat sets a new format string to parse. It does not reset the argument 67// count. 68func (p *Parser) SetFormat(format string) { 69 p.format = format 70 p.startPos = 0 71 p.endPos = 0 72} 73 74// Status indicates the result type of a call to Scan. 75type Status int 76 77const ( 78 StatusText Status = iota 79 StatusSubstitution 80 StatusBadWidthSubstitution 81 StatusBadPrecSubstitution 82 StatusNoVerb 83 StatusBadArgNum 84 StatusMissingArg 85) 86 87// ClearFlags reset the parser to default behavior. 88func (p *Parser) ClearFlags() { 89 p.WidthPresent = false 90 p.PrecPresent = false 91 p.Minus = false 92 p.Plus = false 93 p.Sharp = false 94 p.Space = false 95 p.Zero = false 96 97 p.PlusV = false 98 p.SharpV = false 99 100 p.HasIndex = false 101} 102 103// Scan scans the next part of the format string and sets the status to 104// indicate whether it scanned a string literal, substitution or error. 105func (p *Parser) Scan() bool { 106 p.Status = StatusText 107 format := p.format 108 end := len(format) 109 if p.endPos >= end { 110 return false 111 } 112 afterIndex := false // previous item in format was an index like [3]. 113 114 p.startPos = p.endPos 115 p.goodArgNum = true 116 i := p.startPos 117 for i < end && format[i] != '%' { 118 i++ 119 } 120 if i > p.startPos { 121 p.endPos = i 122 return true 123 } 124 // Process one verb 125 i++ 126 127 p.Status = StatusSubstitution 128 129 // Do we have flags? 130 p.ClearFlags() 131 132simpleFormat: 133 for ; i < end; i++ { 134 c := p.format[i] 135 switch c { 136 case '#': 137 p.Sharp = true 138 case '0': 139 p.Zero = !p.Minus // Only allow zero padding to the left. 140 case '+': 141 p.Plus = true 142 case '-': 143 p.Minus = true 144 p.Zero = false // Do not pad with zeros to the right. 145 case ' ': 146 p.Space = true 147 default: 148 // Fast path for common case of ascii lower case simple verbs 149 // without precision or width or argument indices. 150 if 'a' <= c && c <= 'z' && p.ArgNum < len(p.Args) { 151 if c == 'v' { 152 // Go syntax 153 p.SharpV = p.Sharp 154 p.Sharp = false 155 // Struct-field syntax 156 p.PlusV = p.Plus 157 p.Plus = false 158 } 159 p.Verb = rune(c) 160 p.ArgNum++ 161 p.endPos = i + 1 162 return true 163 } 164 // Format is more complex than simple flags and a verb or is malformed. 165 break simpleFormat 166 } 167 } 168 169 // Do we have an explicit argument index? 170 i, afterIndex = p.updateArgNumber(format, i) 171 172 // Do we have width? 173 if i < end && format[i] == '*' { 174 i++ 175 p.Width, p.WidthPresent = p.intFromArg() 176 177 if !p.WidthPresent { 178 p.Status = StatusBadWidthSubstitution 179 } 180 181 // We have a negative width, so take its value and ensure 182 // that the minus flag is set 183 if p.Width < 0 { 184 p.Width = -p.Width 185 p.Minus = true 186 p.Zero = false // Do not pad with zeros to the right. 187 } 188 afterIndex = false 189 } else { 190 p.Width, p.WidthPresent, i = parsenum(format, i, end) 191 if afterIndex && p.WidthPresent { // "%[3]2d" 192 p.goodArgNum = false 193 } 194 } 195 196 // Do we have precision? 197 if i+1 < end && format[i] == '.' { 198 i++ 199 if afterIndex { // "%[3].2d" 200 p.goodArgNum = false 201 } 202 i, afterIndex = p.updateArgNumber(format, i) 203 if i < end && format[i] == '*' { 204 i++ 205 p.Prec, p.PrecPresent = p.intFromArg() 206 // Negative precision arguments don't make sense 207 if p.Prec < 0 { 208 p.Prec = 0 209 p.PrecPresent = false 210 } 211 if !p.PrecPresent { 212 p.Status = StatusBadPrecSubstitution 213 } 214 afterIndex = false 215 } else { 216 p.Prec, p.PrecPresent, i = parsenum(format, i, end) 217 if !p.PrecPresent { 218 p.Prec = 0 219 p.PrecPresent = true 220 } 221 } 222 } 223 224 if !afterIndex { 225 i, afterIndex = p.updateArgNumber(format, i) 226 } 227 p.HasIndex = afterIndex 228 229 if i >= end { 230 p.endPos = i 231 p.Status = StatusNoVerb 232 return true 233 } 234 235 verb, w := utf8.DecodeRuneInString(format[i:]) 236 p.endPos = i + w 237 p.Verb = verb 238 239 switch { 240 case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec. 241 p.startPos = p.endPos - 1 242 p.Status = StatusText 243 case !p.goodArgNum: 244 p.Status = StatusBadArgNum 245 case p.ArgNum >= len(p.Args): // No argument left over to print for the current verb. 246 p.Status = StatusMissingArg 247 case verb == 'v': 248 // Go syntax 249 p.SharpV = p.Sharp 250 p.Sharp = false 251 // Struct-field syntax 252 p.PlusV = p.Plus 253 p.Plus = false 254 fallthrough 255 default: 256 p.ArgNum++ 257 } 258 return true 259} 260 261// intFromArg gets the ArgNumth element of Args. On return, isInt reports 262// whether the argument has integer type. 263func (p *Parser) intFromArg() (num int, isInt bool) { 264 if p.ArgNum < len(p.Args) { 265 arg := p.Args[p.ArgNum] 266 num, isInt = arg.(int) // Almost always OK. 267 if !isInt { 268 // Work harder. 269 switch v := reflect.ValueOf(arg); v.Kind() { 270 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 271 n := v.Int() 272 if int64(int(n)) == n { 273 num = int(n) 274 isInt = true 275 } 276 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: 277 n := v.Uint() 278 if int64(n) >= 0 && uint64(int(n)) == n { 279 num = int(n) 280 isInt = true 281 } 282 default: 283 // Already 0, false. 284 } 285 } 286 p.ArgNum++ 287 if tooLarge(num) { 288 num = 0 289 isInt = false 290 } 291 } 292 return 293} 294 295// parseArgNumber returns the value of the bracketed number, minus 1 296// (explicit argument numbers are one-indexed but we want zero-indexed). 297// The opening bracket is known to be present at format[0]. 298// The returned values are the index, the number of bytes to consume 299// up to the closing paren, if present, and whether the number parsed 300// ok. The bytes to consume will be 1 if no closing paren is present. 301func parseArgNumber(format string) (index int, wid int, ok bool) { 302 // There must be at least 3 bytes: [n]. 303 if len(format) < 3 { 304 return 0, 1, false 305 } 306 307 // Find closing bracket. 308 for i := 1; i < len(format); i++ { 309 if format[i] == ']' { 310 width, ok, newi := parsenum(format, 1, i) 311 if !ok || newi != i { 312 return 0, i + 1, false 313 } 314 return width - 1, i + 1, true // arg numbers are one-indexed and skip paren. 315 } 316 } 317 return 0, 1, false 318} 319 320// updateArgNumber returns the next argument to evaluate, which is either the value of the passed-in 321// argNum or the value of the bracketed integer that begins format[i:]. It also returns 322// the new value of i, that is, the index of the next byte of the format to process. 323func (p *Parser) updateArgNumber(format string, i int) (newi int, found bool) { 324 if len(format) <= i || format[i] != '[' { 325 return i, false 326 } 327 p.Reordered = true 328 index, wid, ok := parseArgNumber(format[i:]) 329 if ok && 0 <= index && index < len(p.Args) { 330 p.ArgNum = index 331 return i + wid, true 332 } 333 p.goodArgNum = false 334 return i + wid, ok 335} 336 337// tooLarge reports whether the magnitude of the integer is 338// too large to be used as a formatting width or precision. 339func tooLarge(x int) bool { 340 const max int = 1e6 341 return x > max || x < -max 342} 343 344// parsenum converts ASCII to integer. num is 0 (and isnum is false) if no number present. 345func parsenum(s string, start, end int) (num int, isnum bool, newi int) { 346 if start >= end { 347 return 0, false, end 348 } 349 for newi = start; newi < end && '0' <= s[newi] && s[newi] <= '9'; newi++ { 350 if tooLarge(num) { 351 return 0, false, end // Overflow; crazy long number most likely. 352 } 353 num = num*10 + int(s[newi]-'0') 354 isnum = true 355 } 356 return 357} 358