1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:generate go run gen.go 6//go:generate asmfmt -w acc_amd64.s 7 8// asmfmt is https://github.com/klauspost/asmfmt 9 10// Package vector provides a rasterizer for 2-D vector graphics. 11package vector // import "golang.org/x/image/vector" 12 13// The rasterizer's design follows 14// https://medium.com/@raphlinus/inside-the-fastest-font-renderer-in-the-world-75ae5270c445 15// 16// Proof of concept code is in 17// https://github.com/google/font-go 18// 19// See also: 20// http://nothings.org/gamedev/rasterize/ 21// http://projects.tuxee.net/cl-vectors/section-the-cl-aa-algorithm 22// https://people.gnome.org/~mathieu/libart/internals.html#INTERNALS-SCANLINE 23 24import ( 25 "image" 26 "image/color" 27 "image/draw" 28 "math" 29) 30 31// floatingPointMathThreshold is the width or height above which the rasterizer 32// chooses to used floating point math instead of fixed point math. 33// 34// Both implementations of line segmentation rasterization (see raster_fixed.go 35// and raster_floating.go) implement the same algorithm (in ideal, infinite 36// precision math) but they perform differently in practice. The fixed point 37// math version is roughtly 1.25x faster (on GOARCH=amd64) on the benchmarks, 38// but at sufficiently large scales, the computations will overflow and hence 39// show rendering artifacts. The floating point math version has more 40// consistent quality over larger scales, but it is significantly slower. 41// 42// This constant determines when to use the faster implementation and when to 43// use the better quality implementation. 44// 45// The rationale for this particular value is that TestRasterizePolygon in 46// vector_test.go checks the rendering quality of polygon edges at various 47// angles, inscribed in a circle of diameter 512. It may be that a higher value 48// would still produce acceptable quality, but 512 seems to work. 49const floatingPointMathThreshold = 512 50 51func lerp(t, px, py, qx, qy float32) (x, y float32) { 52 return px + t*(qx-px), py + t*(qy-py) 53} 54 55func clamp(i, width int32) uint { 56 if i < 0 { 57 return 0 58 } 59 if i < width { 60 return uint(i) 61 } 62 return uint(width) 63} 64 65// NewRasterizer returns a new Rasterizer whose rendered mask image is bounded 66// by the given width and height. 67func NewRasterizer(w, h int) *Rasterizer { 68 z := &Rasterizer{} 69 z.Reset(w, h) 70 return z 71} 72 73// Raster is a 2-D vector graphics rasterizer. 74// 75// The zero value is usable, in that it is a Rasterizer whose rendered mask 76// image has zero width and zero height. Call Reset to change its bounds. 77type Rasterizer struct { 78 // bufXxx are buffers of float32 or uint32 values, holding either the 79 // individual or cumulative area values. 80 // 81 // We don't actually need both values at any given time, and to conserve 82 // memory, the integration of the individual to the cumulative could modify 83 // the buffer in place. In other words, we could use a single buffer, say 84 // of type []uint32, and add some math.Float32bits and math.Float32frombits 85 // calls to satisfy the compiler's type checking. As of Go 1.7, though, 86 // there is a performance penalty between: 87 // bufF32[i] += x 88 // and 89 // bufU32[i] = math.Float32bits(x + math.Float32frombits(bufU32[i])) 90 // 91 // See golang.org/issue/17220 for some discussion. 92 bufF32 []float32 93 bufU32 []uint32 94 95 useFloatingPointMath bool 96 97 size image.Point 98 firstX float32 99 firstY float32 100 penX float32 101 penY float32 102 103 // DrawOp is the operator used for the Draw method. 104 // 105 // The zero value is draw.Over. 106 DrawOp draw.Op 107 108 // TODO: an exported field equivalent to the mask point in the 109 // draw.DrawMask function in the stdlib image/draw package? 110} 111 112// Reset resets a Rasterizer as if it was just returned by NewRasterizer. 113// 114// This includes setting z.DrawOp to draw.Over. 115func (z *Rasterizer) Reset(w, h int) { 116 z.size = image.Point{w, h} 117 z.firstX = 0 118 z.firstY = 0 119 z.penX = 0 120 z.penY = 0 121 z.DrawOp = draw.Over 122 123 z.setUseFloatingPointMath(w > floatingPointMathThreshold || h > floatingPointMathThreshold) 124} 125 126func (z *Rasterizer) setUseFloatingPointMath(b bool) { 127 z.useFloatingPointMath = b 128 129 // Make z.bufF32 or z.bufU32 large enough to hold width * height samples. 130 if z.useFloatingPointMath { 131 if n := z.size.X * z.size.Y; n > cap(z.bufF32) { 132 z.bufF32 = make([]float32, n) 133 } else { 134 z.bufF32 = z.bufF32[:n] 135 for i := range z.bufF32 { 136 z.bufF32[i] = 0 137 } 138 } 139 } else { 140 if n := z.size.X * z.size.Y; n > cap(z.bufU32) { 141 z.bufU32 = make([]uint32, n) 142 } else { 143 z.bufU32 = z.bufU32[:n] 144 for i := range z.bufU32 { 145 z.bufU32[i] = 0 146 } 147 } 148 } 149} 150 151// Size returns the width and height passed to NewRasterizer or Reset. 152func (z *Rasterizer) Size() image.Point { 153 return z.size 154} 155 156// Bounds returns the rectangle from (0, 0) to the width and height passed to 157// NewRasterizer or Reset. 158func (z *Rasterizer) Bounds() image.Rectangle { 159 return image.Rectangle{Max: z.size} 160} 161 162// Pen returns the location of the path-drawing pen: the last argument to the 163// most recent XxxTo call. 164func (z *Rasterizer) Pen() (x, y float32) { 165 return z.penX, z.penY 166} 167 168// ClosePath closes the current path. 169func (z *Rasterizer) ClosePath() { 170 z.LineTo(z.firstX, z.firstY) 171} 172 173// MoveTo starts a new path and moves the pen to (ax, ay). 174// 175// The coordinates are allowed to be out of the Rasterizer's bounds. 176func (z *Rasterizer) MoveTo(ax, ay float32) { 177 z.firstX = ax 178 z.firstY = ay 179 z.penX = ax 180 z.penY = ay 181} 182 183// LineTo adds a line segment, from the pen to (bx, by), and moves the pen to 184// (bx, by). 185// 186// The coordinates are allowed to be out of the Rasterizer's bounds. 187func (z *Rasterizer) LineTo(bx, by float32) { 188 if z.useFloatingPointMath { 189 z.floatingLineTo(bx, by) 190 } else { 191 z.fixedLineTo(bx, by) 192 } 193} 194 195// QuadTo adds a quadratic Bézier segment, from the pen via (bx, by) to (cx, 196// cy), and moves the pen to (cx, cy). 197// 198// The coordinates are allowed to be out of the Rasterizer's bounds. 199func (z *Rasterizer) QuadTo(bx, by, cx, cy float32) { 200 ax, ay := z.penX, z.penY 201 devsq := devSquared(ax, ay, bx, by, cx, cy) 202 if devsq >= 0.333 { 203 const tol = 3 204 n := 1 + int(math.Sqrt(math.Sqrt(tol*float64(devsq)))) 205 t, nInv := float32(0), 1/float32(n) 206 for i := 0; i < n-1; i++ { 207 t += nInv 208 abx, aby := lerp(t, ax, ay, bx, by) 209 bcx, bcy := lerp(t, bx, by, cx, cy) 210 z.LineTo(lerp(t, abx, aby, bcx, bcy)) 211 } 212 } 213 z.LineTo(cx, cy) 214} 215 216// CubeTo adds a cubic Bézier segment, from the pen via (bx, by) and (cx, cy) 217// to (dx, dy), and moves the pen to (dx, dy). 218// 219// The coordinates are allowed to be out of the Rasterizer's bounds. 220func (z *Rasterizer) CubeTo(bx, by, cx, cy, dx, dy float32) { 221 ax, ay := z.penX, z.penY 222 devsq := devSquared(ax, ay, bx, by, dx, dy) 223 if devsqAlt := devSquared(ax, ay, cx, cy, dx, dy); devsq < devsqAlt { 224 devsq = devsqAlt 225 } 226 if devsq >= 0.333 { 227 const tol = 3 228 n := 1 + int(math.Sqrt(math.Sqrt(tol*float64(devsq)))) 229 t, nInv := float32(0), 1/float32(n) 230 for i := 0; i < n-1; i++ { 231 t += nInv 232 abx, aby := lerp(t, ax, ay, bx, by) 233 bcx, bcy := lerp(t, bx, by, cx, cy) 234 cdx, cdy := lerp(t, cx, cy, dx, dy) 235 abcx, abcy := lerp(t, abx, aby, bcx, bcy) 236 bcdx, bcdy := lerp(t, bcx, bcy, cdx, cdy) 237 z.LineTo(lerp(t, abcx, abcy, bcdx, bcdy)) 238 } 239 } 240 z.LineTo(dx, dy) 241} 242 243// devSquared returns a measure of how curvy the sequence (ax, ay) to (bx, by) 244// to (cx, cy) is. It determines how many line segments will approximate a 245// Bézier curve segment. 246// 247// http://lists.nongnu.org/archive/html/freetype-devel/2016-08/msg00080.html 248// gives the rationale for this evenly spaced heuristic instead of a recursive 249// de Casteljau approach: 250// 251// The reason for the subdivision by n is that I expect the "flatness" 252// computation to be semi-expensive (it's done once rather than on each 253// potential subdivision) and also because you'll often get fewer subdivisions. 254// Taking a circular arc as a simplifying assumption (ie a spherical cow), 255// where I get n, a recursive approach would get 2^⌈lg n⌉, which, if I haven't 256// made any horrible mistakes, is expected to be 33% more in the limit. 257func devSquared(ax, ay, bx, by, cx, cy float32) float32 { 258 devx := ax - 2*bx + cx 259 devy := ay - 2*by + cy 260 return devx*devx + devy*devy 261} 262 263// Draw implements the Drawer interface from the standard library's image/draw 264// package. 265// 266// The vector paths previously added via the XxxTo calls become the mask for 267// drawing src onto dst. 268func (z *Rasterizer) Draw(dst draw.Image, r image.Rectangle, src image.Image, sp image.Point) { 269 // TODO: adjust r and sp (and mp?) if src.Bounds() doesn't contain 270 // r.Add(sp.Sub(r.Min)). 271 272 if src, ok := src.(*image.Uniform); ok { 273 srcR, srcG, srcB, srcA := src.RGBA() 274 switch dst := dst.(type) { 275 case *image.Alpha: 276 // Fast path for glyph rendering. 277 if srcA == 0xffff { 278 if z.DrawOp == draw.Over { 279 z.rasterizeDstAlphaSrcOpaqueOpOver(dst, r) 280 } else { 281 z.rasterizeDstAlphaSrcOpaqueOpSrc(dst, r) 282 } 283 return 284 } 285 case *image.RGBA: 286 if z.DrawOp == draw.Over { 287 z.rasterizeDstRGBASrcUniformOpOver(dst, r, srcR, srcG, srcB, srcA) 288 } else { 289 z.rasterizeDstRGBASrcUniformOpSrc(dst, r, srcR, srcG, srcB, srcA) 290 } 291 return 292 } 293 } 294 295 if z.DrawOp == draw.Over { 296 z.rasterizeOpOver(dst, r, src, sp) 297 } else { 298 z.rasterizeOpSrc(dst, r, src, sp) 299 } 300} 301 302func (z *Rasterizer) accumulateMask() { 303 if z.useFloatingPointMath { 304 if n := z.size.X * z.size.Y; n > cap(z.bufU32) { 305 z.bufU32 = make([]uint32, n) 306 } else { 307 z.bufU32 = z.bufU32[:n] 308 } 309 if haveAccumulateSIMD { 310 floatingAccumulateMaskSIMD(z.bufU32, z.bufF32) 311 } else { 312 floatingAccumulateMask(z.bufU32, z.bufF32) 313 } 314 } else { 315 if haveAccumulateSIMD { 316 fixedAccumulateMaskSIMD(z.bufU32) 317 } else { 318 fixedAccumulateMask(z.bufU32) 319 } 320 } 321} 322 323func (z *Rasterizer) rasterizeDstAlphaSrcOpaqueOpOver(dst *image.Alpha, r image.Rectangle) { 324 // TODO: non-zero vs even-odd winding? 325 if r == dst.Bounds() && r == z.Bounds() { 326 // We bypass the z.accumulateMask step and convert straight from 327 // z.bufF32 or z.bufU32 to dst.Pix. 328 if z.useFloatingPointMath { 329 if haveAccumulateSIMD { 330 floatingAccumulateOpOverSIMD(dst.Pix, z.bufF32) 331 } else { 332 floatingAccumulateOpOver(dst.Pix, z.bufF32) 333 } 334 } else { 335 if haveAccumulateSIMD { 336 fixedAccumulateOpOverSIMD(dst.Pix, z.bufU32) 337 } else { 338 fixedAccumulateOpOver(dst.Pix, z.bufU32) 339 } 340 } 341 return 342 } 343 344 z.accumulateMask() 345 pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):] 346 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { 347 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { 348 ma := z.bufU32[y*z.size.X+x] 349 i := y*dst.Stride + x 350 351 // This formula is like rasterizeOpOver's, simplified for the 352 // concrete dst type and opaque src assumption. 353 a := 0xffff - ma 354 pix[i] = uint8((uint32(pix[i])*0x101*a/0xffff + ma) >> 8) 355 } 356 } 357} 358 359func (z *Rasterizer) rasterizeDstAlphaSrcOpaqueOpSrc(dst *image.Alpha, r image.Rectangle) { 360 // TODO: non-zero vs even-odd winding? 361 if r == dst.Bounds() && r == z.Bounds() { 362 // We bypass the z.accumulateMask step and convert straight from 363 // z.bufF32 or z.bufU32 to dst.Pix. 364 if z.useFloatingPointMath { 365 if haveAccumulateSIMD { 366 floatingAccumulateOpSrcSIMD(dst.Pix, z.bufF32) 367 } else { 368 floatingAccumulateOpSrc(dst.Pix, z.bufF32) 369 } 370 } else { 371 if haveAccumulateSIMD { 372 fixedAccumulateOpSrcSIMD(dst.Pix, z.bufU32) 373 } else { 374 fixedAccumulateOpSrc(dst.Pix, z.bufU32) 375 } 376 } 377 return 378 } 379 380 z.accumulateMask() 381 pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):] 382 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { 383 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { 384 ma := z.bufU32[y*z.size.X+x] 385 386 // This formula is like rasterizeOpSrc's, simplified for the 387 // concrete dst type and opaque src assumption. 388 pix[y*dst.Stride+x] = uint8(ma >> 8) 389 } 390 } 391} 392 393func (z *Rasterizer) rasterizeDstRGBASrcUniformOpOver(dst *image.RGBA, r image.Rectangle, sr, sg, sb, sa uint32) { 394 z.accumulateMask() 395 pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):] 396 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { 397 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { 398 ma := z.bufU32[y*z.size.X+x] 399 400 // This formula is like rasterizeOpOver's, simplified for the 401 // concrete dst type and uniform src assumption. 402 a := 0xffff - (sa * ma / 0xffff) 403 i := y*dst.Stride + 4*x 404 pix[i+0] = uint8(((uint32(pix[i+0])*0x101*a + sr*ma) / 0xffff) >> 8) 405 pix[i+1] = uint8(((uint32(pix[i+1])*0x101*a + sg*ma) / 0xffff) >> 8) 406 pix[i+2] = uint8(((uint32(pix[i+2])*0x101*a + sb*ma) / 0xffff) >> 8) 407 pix[i+3] = uint8(((uint32(pix[i+3])*0x101*a + sa*ma) / 0xffff) >> 8) 408 } 409 } 410} 411 412func (z *Rasterizer) rasterizeDstRGBASrcUniformOpSrc(dst *image.RGBA, r image.Rectangle, sr, sg, sb, sa uint32) { 413 z.accumulateMask() 414 pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):] 415 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { 416 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { 417 ma := z.bufU32[y*z.size.X+x] 418 419 // This formula is like rasterizeOpSrc's, simplified for the 420 // concrete dst type and uniform src assumption. 421 i := y*dst.Stride + 4*x 422 pix[i+0] = uint8((sr * ma / 0xffff) >> 8) 423 pix[i+1] = uint8((sg * ma / 0xffff) >> 8) 424 pix[i+2] = uint8((sb * ma / 0xffff) >> 8) 425 pix[i+3] = uint8((sa * ma / 0xffff) >> 8) 426 } 427 } 428} 429 430func (z *Rasterizer) rasterizeOpOver(dst draw.Image, r image.Rectangle, src image.Image, sp image.Point) { 431 z.accumulateMask() 432 out := color.RGBA64{} 433 outc := color.Color(&out) 434 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { 435 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { 436 sr, sg, sb, sa := src.At(sp.X+x, sp.Y+y).RGBA() 437 ma := z.bufU32[y*z.size.X+x] 438 439 // This algorithm comes from the standard library's image/draw 440 // package. 441 dr, dg, db, da := dst.At(r.Min.X+x, r.Min.Y+y).RGBA() 442 a := 0xffff - (sa * ma / 0xffff) 443 out.R = uint16((dr*a + sr*ma) / 0xffff) 444 out.G = uint16((dg*a + sg*ma) / 0xffff) 445 out.B = uint16((db*a + sb*ma) / 0xffff) 446 out.A = uint16((da*a + sa*ma) / 0xffff) 447 448 dst.Set(r.Min.X+x, r.Min.Y+y, outc) 449 } 450 } 451} 452 453func (z *Rasterizer) rasterizeOpSrc(dst draw.Image, r image.Rectangle, src image.Image, sp image.Point) { 454 z.accumulateMask() 455 out := color.RGBA64{} 456 outc := color.Color(&out) 457 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { 458 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { 459 sr, sg, sb, sa := src.At(sp.X+x, sp.Y+y).RGBA() 460 ma := z.bufU32[y*z.size.X+x] 461 462 // This algorithm comes from the standard library's image/draw 463 // package. 464 out.R = uint16(sr * ma / 0xffff) 465 out.G = uint16(sg * ma / 0xffff) 466 out.B = uint16(sb * ma / 0xffff) 467 out.A = uint16(sa * ma / 0xffff) 468 469 dst.Set(r.Min.X+x, r.Min.Y+y, outc) 470 } 471 } 472} 473