1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package draw provides image composition functions.
6//
7// See "The Go image/draw package" for an introduction to this package:
8// https://golang.org/doc/articles/image_draw.html
9package draw
10
11import (
12	"image"
13	"image/color"
14	"image/internal/imageutil"
15)
16
17// m is the maximum color value returned by image.Color.RGBA.
18const m = 1<<16 - 1
19
20// Image is an image.Image with a Set method to change a single pixel.
21type Image interface {
22	image.Image
23	Set(x, y int, c color.Color)
24}
25
26// Quantizer produces a palette for an image.
27type Quantizer interface {
28	// Quantize appends up to cap(p) - len(p) colors to p and returns the
29	// updated palette suitable for converting m to a paletted image.
30	Quantize(p color.Palette, m image.Image) color.Palette
31}
32
33// Op is a Porter-Duff compositing operator.
34type Op int
35
36const (
37	// Over specifies ``(src in mask) over dst''.
38	Over Op = iota
39	// Src specifies ``src in mask''.
40	Src
41)
42
43// Draw implements the Drawer interface by calling the Draw function with this
44// Op.
45func (op Op) Draw(dst Image, r image.Rectangle, src image.Image, sp image.Point) {
46	DrawMask(dst, r, src, sp, nil, image.Point{}, op)
47}
48
49// Drawer contains the Draw method.
50type Drawer interface {
51	// Draw aligns r.Min in dst with sp in src and then replaces the
52	// rectangle r in dst with the result of drawing src on dst.
53	Draw(dst Image, r image.Rectangle, src image.Image, sp image.Point)
54}
55
56// FloydSteinberg is a Drawer that is the Src Op with Floyd-Steinberg error
57// diffusion.
58var FloydSteinberg Drawer = floydSteinberg{}
59
60type floydSteinberg struct{}
61
62func (floydSteinberg) Draw(dst Image, r image.Rectangle, src image.Image, sp image.Point) {
63	clip(dst, &r, src, &sp, nil, nil)
64	if r.Empty() {
65		return
66	}
67	drawPaletted(dst, r, src, sp, true)
68}
69
70// clip clips r against each image's bounds (after translating into the
71// destination image's coordinate space) and shifts the points sp and mp by
72// the same amount as the change in r.Min.
73func clip(dst Image, r *image.Rectangle, src image.Image, sp *image.Point, mask image.Image, mp *image.Point) {
74	orig := r.Min
75	*r = r.Intersect(dst.Bounds())
76	*r = r.Intersect(src.Bounds().Add(orig.Sub(*sp)))
77	if mask != nil {
78		*r = r.Intersect(mask.Bounds().Add(orig.Sub(*mp)))
79	}
80	dx := r.Min.X - orig.X
81	dy := r.Min.Y - orig.Y
82	if dx == 0 && dy == 0 {
83		return
84	}
85	sp.X += dx
86	sp.Y += dy
87	if mp != nil {
88		mp.X += dx
89		mp.Y += dy
90	}
91}
92
93func processBackward(dst image.Image, r image.Rectangle, src image.Image, sp image.Point) bool {
94	return dst == src &&
95		r.Overlaps(r.Add(sp.Sub(r.Min))) &&
96		(sp.Y < r.Min.Y || (sp.Y == r.Min.Y && sp.X < r.Min.X))
97}
98
99// Draw calls DrawMask with a nil mask.
100func Draw(dst Image, r image.Rectangle, src image.Image, sp image.Point, op Op) {
101	DrawMask(dst, r, src, sp, nil, image.Point{}, op)
102}
103
104// DrawMask aligns r.Min in dst with sp in src and mp in mask and then replaces the rectangle r
105// in dst with the result of a Porter-Duff composition. A nil mask is treated as opaque.
106func DrawMask(dst Image, r image.Rectangle, src image.Image, sp image.Point, mask image.Image, mp image.Point, op Op) {
107	clip(dst, &r, src, &sp, mask, &mp)
108	if r.Empty() {
109		return
110	}
111
112	// Fast paths for special cases. If none of them apply, then we fall back to a general but slow implementation.
113	switch dst0 := dst.(type) {
114	case *image.RGBA:
115		if op == Over {
116			if mask == nil {
117				switch src0 := src.(type) {
118				case *image.Uniform:
119					sr, sg, sb, sa := src0.RGBA()
120					if sa == 0xffff {
121						drawFillSrc(dst0, r, sr, sg, sb, sa)
122					} else {
123						drawFillOver(dst0, r, sr, sg, sb, sa)
124					}
125					return
126				case *image.RGBA:
127					drawCopyOver(dst0, r, src0, sp)
128					return
129				case *image.NRGBA:
130					drawNRGBAOver(dst0, r, src0, sp)
131					return
132				case *image.YCbCr:
133					// An image.YCbCr is always fully opaque, and so if the
134					// mask is nil (i.e. fully opaque) then the op is
135					// effectively always Src. Similarly for image.Gray and
136					// image.CMYK.
137					if imageutil.DrawYCbCr(dst0, r, src0, sp) {
138						return
139					}
140				case *image.Gray:
141					drawGray(dst0, r, src0, sp)
142					return
143				case *image.CMYK:
144					drawCMYK(dst0, r, src0, sp)
145					return
146				}
147			} else if mask0, ok := mask.(*image.Alpha); ok {
148				switch src0 := src.(type) {
149				case *image.Uniform:
150					drawGlyphOver(dst0, r, src0, mask0, mp)
151					return
152				}
153			}
154		} else {
155			if mask == nil {
156				switch src0 := src.(type) {
157				case *image.Uniform:
158					sr, sg, sb, sa := src0.RGBA()
159					drawFillSrc(dst0, r, sr, sg, sb, sa)
160					return
161				case *image.RGBA:
162					drawCopySrc(dst0, r, src0, sp)
163					return
164				case *image.NRGBA:
165					drawNRGBASrc(dst0, r, src0, sp)
166					return
167				case *image.YCbCr:
168					if imageutil.DrawYCbCr(dst0, r, src0, sp) {
169						return
170					}
171				case *image.Gray:
172					drawGray(dst0, r, src0, sp)
173					return
174				case *image.CMYK:
175					drawCMYK(dst0, r, src0, sp)
176					return
177				}
178			}
179		}
180		drawRGBA(dst0, r, src, sp, mask, mp, op)
181		return
182	case *image.Paletted:
183		if op == Src && mask == nil {
184			if src0, ok := src.(*image.Uniform); ok {
185				colorIndex := uint8(dst0.Palette.Index(src0.C))
186				i0 := dst0.PixOffset(r.Min.X, r.Min.Y)
187				i1 := i0 + r.Dx()
188				for i := i0; i < i1; i++ {
189					dst0.Pix[i] = colorIndex
190				}
191				firstRow := dst0.Pix[i0:i1]
192				for y := r.Min.Y + 1; y < r.Max.Y; y++ {
193					i0 += dst0.Stride
194					i1 += dst0.Stride
195					copy(dst0.Pix[i0:i1], firstRow)
196				}
197				return
198			} else if !processBackward(dst, r, src, sp) {
199				drawPaletted(dst0, r, src, sp, false)
200				return
201			}
202		}
203	}
204
205	x0, x1, dx := r.Min.X, r.Max.X, 1
206	y0, y1, dy := r.Min.Y, r.Max.Y, 1
207	if processBackward(dst, r, src, sp) {
208		x0, x1, dx = x1-1, x0-1, -1
209		y0, y1, dy = y1-1, y0-1, -1
210	}
211
212	var out color.RGBA64
213	sy := sp.Y + y0 - r.Min.Y
214	my := mp.Y + y0 - r.Min.Y
215	for y := y0; y != y1; y, sy, my = y+dy, sy+dy, my+dy {
216		sx := sp.X + x0 - r.Min.X
217		mx := mp.X + x0 - r.Min.X
218		for x := x0; x != x1; x, sx, mx = x+dx, sx+dx, mx+dx {
219			ma := uint32(m)
220			if mask != nil {
221				_, _, _, ma = mask.At(mx, my).RGBA()
222			}
223			switch {
224			case ma == 0:
225				if op == Over {
226					// No-op.
227				} else {
228					dst.Set(x, y, color.Transparent)
229				}
230			case ma == m && op == Src:
231				dst.Set(x, y, src.At(sx, sy))
232			default:
233				sr, sg, sb, sa := src.At(sx, sy).RGBA()
234				if op == Over {
235					dr, dg, db, da := dst.At(x, y).RGBA()
236					a := m - (sa * ma / m)
237					out.R = uint16((dr*a + sr*ma) / m)
238					out.G = uint16((dg*a + sg*ma) / m)
239					out.B = uint16((db*a + sb*ma) / m)
240					out.A = uint16((da*a + sa*ma) / m)
241				} else {
242					out.R = uint16(sr * ma / m)
243					out.G = uint16(sg * ma / m)
244					out.B = uint16(sb * ma / m)
245					out.A = uint16(sa * ma / m)
246				}
247				// The third argument is &out instead of out (and out is
248				// declared outside of the inner loop) to avoid the implicit
249				// conversion to color.Color here allocating memory in the
250				// inner loop if sizeof(color.RGBA64) > sizeof(uintptr).
251				dst.Set(x, y, &out)
252			}
253		}
254	}
255}
256
257func drawFillOver(dst *image.RGBA, r image.Rectangle, sr, sg, sb, sa uint32) {
258	// The 0x101 is here for the same reason as in drawRGBA.
259	a := (m - sa) * 0x101
260	i0 := dst.PixOffset(r.Min.X, r.Min.Y)
261	i1 := i0 + r.Dx()*4
262	for y := r.Min.Y; y != r.Max.Y; y++ {
263		for i := i0; i < i1; i += 4 {
264			dr := &dst.Pix[i+0]
265			dg := &dst.Pix[i+1]
266			db := &dst.Pix[i+2]
267			da := &dst.Pix[i+3]
268
269			*dr = uint8((uint32(*dr)*a/m + sr) >> 8)
270			*dg = uint8((uint32(*dg)*a/m + sg) >> 8)
271			*db = uint8((uint32(*db)*a/m + sb) >> 8)
272			*da = uint8((uint32(*da)*a/m + sa) >> 8)
273		}
274		i0 += dst.Stride
275		i1 += dst.Stride
276	}
277}
278
279func drawFillSrc(dst *image.RGBA, r image.Rectangle, sr, sg, sb, sa uint32) {
280	sr8 := uint8(sr >> 8)
281	sg8 := uint8(sg >> 8)
282	sb8 := uint8(sb >> 8)
283	sa8 := uint8(sa >> 8)
284	// The built-in copy function is faster than a straightforward for loop to fill the destination with
285	// the color, but copy requires a slice source. We therefore use a for loop to fill the first row, and
286	// then use the first row as the slice source for the remaining rows.
287	i0 := dst.PixOffset(r.Min.X, r.Min.Y)
288	i1 := i0 + r.Dx()*4
289	for i := i0; i < i1; i += 4 {
290		dst.Pix[i+0] = sr8
291		dst.Pix[i+1] = sg8
292		dst.Pix[i+2] = sb8
293		dst.Pix[i+3] = sa8
294	}
295	firstRow := dst.Pix[i0:i1]
296	for y := r.Min.Y + 1; y < r.Max.Y; y++ {
297		i0 += dst.Stride
298		i1 += dst.Stride
299		copy(dst.Pix[i0:i1], firstRow)
300	}
301}
302
303func drawCopyOver(dst *image.RGBA, r image.Rectangle, src *image.RGBA, sp image.Point) {
304	dx, dy := r.Dx(), r.Dy()
305	d0 := dst.PixOffset(r.Min.X, r.Min.Y)
306	s0 := src.PixOffset(sp.X, sp.Y)
307	var (
308		ddelta, sdelta int
309		i0, i1, idelta int
310	)
311	if r.Min.Y < sp.Y || r.Min.Y == sp.Y && r.Min.X <= sp.X {
312		ddelta = dst.Stride
313		sdelta = src.Stride
314		i0, i1, idelta = 0, dx*4, +4
315	} else {
316		// If the source start point is higher than the destination start point, or equal height but to the left,
317		// then we compose the rows in right-to-left, bottom-up order instead of left-to-right, top-down.
318		d0 += (dy - 1) * dst.Stride
319		s0 += (dy - 1) * src.Stride
320		ddelta = -dst.Stride
321		sdelta = -src.Stride
322		i0, i1, idelta = (dx-1)*4, -4, -4
323	}
324	for ; dy > 0; dy-- {
325		dpix := dst.Pix[d0:]
326		spix := src.Pix[s0:]
327		for i := i0; i != i1; i += idelta {
328			s := spix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
329			sr := uint32(s[0]) * 0x101
330			sg := uint32(s[1]) * 0x101
331			sb := uint32(s[2]) * 0x101
332			sa := uint32(s[3]) * 0x101
333
334			// The 0x101 is here for the same reason as in drawRGBA.
335			a := (m - sa) * 0x101
336
337			d := dpix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
338			d[0] = uint8((uint32(d[0])*a/m + sr) >> 8)
339			d[1] = uint8((uint32(d[1])*a/m + sg) >> 8)
340			d[2] = uint8((uint32(d[2])*a/m + sb) >> 8)
341			d[3] = uint8((uint32(d[3])*a/m + sa) >> 8)
342		}
343		d0 += ddelta
344		s0 += sdelta
345	}
346}
347
348func drawCopySrc(dst *image.RGBA, r image.Rectangle, src *image.RGBA, sp image.Point) {
349	n, dy := 4*r.Dx(), r.Dy()
350	d0 := dst.PixOffset(r.Min.X, r.Min.Y)
351	s0 := src.PixOffset(sp.X, sp.Y)
352	var ddelta, sdelta int
353	if r.Min.Y <= sp.Y {
354		ddelta = dst.Stride
355		sdelta = src.Stride
356	} else {
357		// If the source start point is higher than the destination start
358		// point, then we compose the rows in bottom-up order instead of
359		// top-down. Unlike the drawCopyOver function, we don't have to check
360		// the x coordinates because the built-in copy function can handle
361		// overlapping slices.
362		d0 += (dy - 1) * dst.Stride
363		s0 += (dy - 1) * src.Stride
364		ddelta = -dst.Stride
365		sdelta = -src.Stride
366	}
367	for ; dy > 0; dy-- {
368		copy(dst.Pix[d0:d0+n], src.Pix[s0:s0+n])
369		d0 += ddelta
370		s0 += sdelta
371	}
372}
373
374func drawNRGBAOver(dst *image.RGBA, r image.Rectangle, src *image.NRGBA, sp image.Point) {
375	i0 := (r.Min.X - dst.Rect.Min.X) * 4
376	i1 := (r.Max.X - dst.Rect.Min.X) * 4
377	si0 := (sp.X - src.Rect.Min.X) * 4
378	yMax := r.Max.Y - dst.Rect.Min.Y
379
380	y := r.Min.Y - dst.Rect.Min.Y
381	sy := sp.Y - src.Rect.Min.Y
382	for ; y != yMax; y, sy = y+1, sy+1 {
383		dpix := dst.Pix[y*dst.Stride:]
384		spix := src.Pix[sy*src.Stride:]
385
386		for i, si := i0, si0; i < i1; i, si = i+4, si+4 {
387			// Convert from non-premultiplied color to pre-multiplied color.
388			s := spix[si : si+4 : si+4] // Small cap improves performance, see https://golang.org/issue/27857
389			sa := uint32(s[3]) * 0x101
390			sr := uint32(s[0]) * sa / 0xff
391			sg := uint32(s[1]) * sa / 0xff
392			sb := uint32(s[2]) * sa / 0xff
393
394			d := dpix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
395			dr := uint32(d[0])
396			dg := uint32(d[1])
397			db := uint32(d[2])
398			da := uint32(d[3])
399
400			// The 0x101 is here for the same reason as in drawRGBA.
401			a := (m - sa) * 0x101
402
403			d[0] = uint8((dr*a/m + sr) >> 8)
404			d[1] = uint8((dg*a/m + sg) >> 8)
405			d[2] = uint8((db*a/m + sb) >> 8)
406			d[3] = uint8((da*a/m + sa) >> 8)
407		}
408	}
409}
410
411func drawNRGBASrc(dst *image.RGBA, r image.Rectangle, src *image.NRGBA, sp image.Point) {
412	i0 := (r.Min.X - dst.Rect.Min.X) * 4
413	i1 := (r.Max.X - dst.Rect.Min.X) * 4
414	si0 := (sp.X - src.Rect.Min.X) * 4
415	yMax := r.Max.Y - dst.Rect.Min.Y
416
417	y := r.Min.Y - dst.Rect.Min.Y
418	sy := sp.Y - src.Rect.Min.Y
419	for ; y != yMax; y, sy = y+1, sy+1 {
420		dpix := dst.Pix[y*dst.Stride:]
421		spix := src.Pix[sy*src.Stride:]
422
423		for i, si := i0, si0; i < i1; i, si = i+4, si+4 {
424			// Convert from non-premultiplied color to pre-multiplied color.
425			s := spix[si : si+4 : si+4] // Small cap improves performance, see https://golang.org/issue/27857
426			sa := uint32(s[3]) * 0x101
427			sr := uint32(s[0]) * sa / 0xff
428			sg := uint32(s[1]) * sa / 0xff
429			sb := uint32(s[2]) * sa / 0xff
430
431			d := dpix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
432			d[0] = uint8(sr >> 8)
433			d[1] = uint8(sg >> 8)
434			d[2] = uint8(sb >> 8)
435			d[3] = uint8(sa >> 8)
436		}
437	}
438}
439
440func drawGray(dst *image.RGBA, r image.Rectangle, src *image.Gray, sp image.Point) {
441	i0 := (r.Min.X - dst.Rect.Min.X) * 4
442	i1 := (r.Max.X - dst.Rect.Min.X) * 4
443	si0 := (sp.X - src.Rect.Min.X) * 1
444	yMax := r.Max.Y - dst.Rect.Min.Y
445
446	y := r.Min.Y - dst.Rect.Min.Y
447	sy := sp.Y - src.Rect.Min.Y
448	for ; y != yMax; y, sy = y+1, sy+1 {
449		dpix := dst.Pix[y*dst.Stride:]
450		spix := src.Pix[sy*src.Stride:]
451
452		for i, si := i0, si0; i < i1; i, si = i+4, si+1 {
453			p := spix[si]
454			d := dpix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
455			d[0] = p
456			d[1] = p
457			d[2] = p
458			d[3] = 255
459		}
460	}
461}
462
463func drawCMYK(dst *image.RGBA, r image.Rectangle, src *image.CMYK, sp image.Point) {
464	i0 := (r.Min.X - dst.Rect.Min.X) * 4
465	i1 := (r.Max.X - dst.Rect.Min.X) * 4
466	si0 := (sp.X - src.Rect.Min.X) * 4
467	yMax := r.Max.Y - dst.Rect.Min.Y
468
469	y := r.Min.Y - dst.Rect.Min.Y
470	sy := sp.Y - src.Rect.Min.Y
471	for ; y != yMax; y, sy = y+1, sy+1 {
472		dpix := dst.Pix[y*dst.Stride:]
473		spix := src.Pix[sy*src.Stride:]
474
475		for i, si := i0, si0; i < i1; i, si = i+4, si+4 {
476			s := spix[si : si+4 : si+4] // Small cap improves performance, see https://golang.org/issue/27857
477			d := dpix[i : i+4 : i+4]
478			d[0], d[1], d[2] = color.CMYKToRGB(s[0], s[1], s[2], s[3])
479			d[3] = 255
480		}
481	}
482}
483
484func drawGlyphOver(dst *image.RGBA, r image.Rectangle, src *image.Uniform, mask *image.Alpha, mp image.Point) {
485	i0 := dst.PixOffset(r.Min.X, r.Min.Y)
486	i1 := i0 + r.Dx()*4
487	mi0 := mask.PixOffset(mp.X, mp.Y)
488	sr, sg, sb, sa := src.RGBA()
489	for y, my := r.Min.Y, mp.Y; y != r.Max.Y; y, my = y+1, my+1 {
490		for i, mi := i0, mi0; i < i1; i, mi = i+4, mi+1 {
491			ma := uint32(mask.Pix[mi])
492			if ma == 0 {
493				continue
494			}
495			ma |= ma << 8
496
497			// The 0x101 is here for the same reason as in drawRGBA.
498			a := (m - (sa * ma / m)) * 0x101
499
500			d := dst.Pix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
501			d[0] = uint8((uint32(d[0])*a + sr*ma) / m >> 8)
502			d[1] = uint8((uint32(d[1])*a + sg*ma) / m >> 8)
503			d[2] = uint8((uint32(d[2])*a + sb*ma) / m >> 8)
504			d[3] = uint8((uint32(d[3])*a + sa*ma) / m >> 8)
505		}
506		i0 += dst.Stride
507		i1 += dst.Stride
508		mi0 += mask.Stride
509	}
510}
511
512func drawRGBA(dst *image.RGBA, r image.Rectangle, src image.Image, sp image.Point, mask image.Image, mp image.Point, op Op) {
513	x0, x1, dx := r.Min.X, r.Max.X, 1
514	y0, y1, dy := r.Min.Y, r.Max.Y, 1
515	if image.Image(dst) == src && r.Overlaps(r.Add(sp.Sub(r.Min))) {
516		if sp.Y < r.Min.Y || sp.Y == r.Min.Y && sp.X < r.Min.X {
517			x0, x1, dx = x1-1, x0-1, -1
518			y0, y1, dy = y1-1, y0-1, -1
519		}
520	}
521
522	sy := sp.Y + y0 - r.Min.Y
523	my := mp.Y + y0 - r.Min.Y
524	sx0 := sp.X + x0 - r.Min.X
525	mx0 := mp.X + x0 - r.Min.X
526	sx1 := sx0 + (x1 - x0)
527	i0 := dst.PixOffset(x0, y0)
528	di := dx * 4
529	for y := y0; y != y1; y, sy, my = y+dy, sy+dy, my+dy {
530		for i, sx, mx := i0, sx0, mx0; sx != sx1; i, sx, mx = i+di, sx+dx, mx+dx {
531			ma := uint32(m)
532			if mask != nil {
533				_, _, _, ma = mask.At(mx, my).RGBA()
534			}
535			sr, sg, sb, sa := src.At(sx, sy).RGBA()
536			d := dst.Pix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
537			if op == Over {
538				dr := uint32(d[0])
539				dg := uint32(d[1])
540				db := uint32(d[2])
541				da := uint32(d[3])
542
543				// dr, dg, db and da are all 8-bit color at the moment, ranging in [0,255].
544				// We work in 16-bit color, and so would normally do:
545				// dr |= dr << 8
546				// and similarly for dg, db and da, but instead we multiply a
547				// (which is a 16-bit color, ranging in [0,65535]) by 0x101.
548				// This yields the same result, but is fewer arithmetic operations.
549				a := (m - (sa * ma / m)) * 0x101
550
551				d[0] = uint8((dr*a + sr*ma) / m >> 8)
552				d[1] = uint8((dg*a + sg*ma) / m >> 8)
553				d[2] = uint8((db*a + sb*ma) / m >> 8)
554				d[3] = uint8((da*a + sa*ma) / m >> 8)
555
556			} else {
557				d[0] = uint8(sr * ma / m >> 8)
558				d[1] = uint8(sg * ma / m >> 8)
559				d[2] = uint8(sb * ma / m >> 8)
560				d[3] = uint8(sa * ma / m >> 8)
561			}
562		}
563		i0 += dy * dst.Stride
564	}
565}
566
567// clamp clamps i to the interval [0, 0xffff].
568func clamp(i int32) int32 {
569	if i < 0 {
570		return 0
571	}
572	if i > 0xffff {
573		return 0xffff
574	}
575	return i
576}
577
578// sqDiff returns the squared-difference of x and y, shifted by 2 so that
579// adding four of those won't overflow a uint32.
580//
581// x and y are both assumed to be in the range [0, 0xffff].
582func sqDiff(x, y int32) uint32 {
583	// This is an optimized code relying on the overflow/wrap around
584	// properties of unsigned integers operations guaranteed by the language
585	// spec. See sqDiff from the image/color package for more details.
586	d := uint32(x - y)
587	return (d * d) >> 2
588}
589
590func drawPaletted(dst Image, r image.Rectangle, src image.Image, sp image.Point, floydSteinberg bool) {
591	// TODO(nigeltao): handle the case where the dst and src overlap.
592	// Does it even make sense to try and do Floyd-Steinberg whilst
593	// walking the image backward (right-to-left bottom-to-top)?
594
595	// If dst is an *image.Paletted, we have a fast path for dst.Set and
596	// dst.At. The dst.Set equivalent is a batch version of the algorithm
597	// used by color.Palette's Index method in image/color/color.go, plus
598	// optional Floyd-Steinberg error diffusion.
599	palette, pix, stride := [][4]int32(nil), []byte(nil), 0
600	if p, ok := dst.(*image.Paletted); ok {
601		palette = make([][4]int32, len(p.Palette))
602		for i, col := range p.Palette {
603			r, g, b, a := col.RGBA()
604			palette[i][0] = int32(r)
605			palette[i][1] = int32(g)
606			palette[i][2] = int32(b)
607			palette[i][3] = int32(a)
608		}
609		pix, stride = p.Pix[p.PixOffset(r.Min.X, r.Min.Y):], p.Stride
610	}
611
612	// quantErrorCurr and quantErrorNext are the Floyd-Steinberg quantization
613	// errors that have been propagated to the pixels in the current and next
614	// rows. The +2 simplifies calculation near the edges.
615	var quantErrorCurr, quantErrorNext [][4]int32
616	if floydSteinberg {
617		quantErrorCurr = make([][4]int32, r.Dx()+2)
618		quantErrorNext = make([][4]int32, r.Dx()+2)
619	}
620	pxRGBA := func(x, y int) (r, g, b, a uint32) { return src.At(x, y).RGBA() }
621	// Fast paths for special cases to avoid excessive use of the color.Color
622	// interface which escapes to the heap but need to be discovered for
623	// each pixel on r. See also https://golang.org/issues/15759.
624	switch src0 := src.(type) {
625	case *image.RGBA:
626		pxRGBA = func(x, y int) (r, g, b, a uint32) { return src0.RGBAAt(x, y).RGBA() }
627	case *image.NRGBA:
628		pxRGBA = func(x, y int) (r, g, b, a uint32) { return src0.NRGBAAt(x, y).RGBA() }
629	case *image.YCbCr:
630		pxRGBA = func(x, y int) (r, g, b, a uint32) { return src0.YCbCrAt(x, y).RGBA() }
631	}
632
633	// Loop over each source pixel.
634	out := color.RGBA64{A: 0xffff}
635	for y := 0; y != r.Dy(); y++ {
636		for x := 0; x != r.Dx(); x++ {
637			// er, eg and eb are the pixel's R,G,B values plus the
638			// optional Floyd-Steinberg error.
639			sr, sg, sb, sa := pxRGBA(sp.X+x, sp.Y+y)
640			er, eg, eb, ea := int32(sr), int32(sg), int32(sb), int32(sa)
641			if floydSteinberg {
642				er = clamp(er + quantErrorCurr[x+1][0]/16)
643				eg = clamp(eg + quantErrorCurr[x+1][1]/16)
644				eb = clamp(eb + quantErrorCurr[x+1][2]/16)
645				ea = clamp(ea + quantErrorCurr[x+1][3]/16)
646			}
647
648			if palette != nil {
649				// Find the closest palette color in Euclidean R,G,B,A space:
650				// the one that minimizes sum-squared-difference.
651				// TODO(nigeltao): consider smarter algorithms.
652				bestIndex, bestSum := 0, uint32(1<<32-1)
653				for index, p := range palette {
654					sum := sqDiff(er, p[0]) + sqDiff(eg, p[1]) + sqDiff(eb, p[2]) + sqDiff(ea, p[3])
655					if sum < bestSum {
656						bestIndex, bestSum = index, sum
657						if sum == 0 {
658							break
659						}
660					}
661				}
662				pix[y*stride+x] = byte(bestIndex)
663
664				if !floydSteinberg {
665					continue
666				}
667				er -= palette[bestIndex][0]
668				eg -= palette[bestIndex][1]
669				eb -= palette[bestIndex][2]
670				ea -= palette[bestIndex][3]
671
672			} else {
673				out.R = uint16(er)
674				out.G = uint16(eg)
675				out.B = uint16(eb)
676				out.A = uint16(ea)
677				// The third argument is &out instead of out (and out is
678				// declared outside of the inner loop) to avoid the implicit
679				// conversion to color.Color here allocating memory in the
680				// inner loop if sizeof(color.RGBA64) > sizeof(uintptr).
681				dst.Set(r.Min.X+x, r.Min.Y+y, &out)
682
683				if !floydSteinberg {
684					continue
685				}
686				sr, sg, sb, sa = dst.At(r.Min.X+x, r.Min.Y+y).RGBA()
687				er -= int32(sr)
688				eg -= int32(sg)
689				eb -= int32(sb)
690				ea -= int32(sa)
691			}
692
693			// Propagate the Floyd-Steinberg quantization error.
694			quantErrorNext[x+0][0] += er * 3
695			quantErrorNext[x+0][1] += eg * 3
696			quantErrorNext[x+0][2] += eb * 3
697			quantErrorNext[x+0][3] += ea * 3
698			quantErrorNext[x+1][0] += er * 5
699			quantErrorNext[x+1][1] += eg * 5
700			quantErrorNext[x+1][2] += eb * 5
701			quantErrorNext[x+1][3] += ea * 5
702			quantErrorNext[x+2][0] += er * 1
703			quantErrorNext[x+2][1] += eg * 1
704			quantErrorNext[x+2][2] += eb * 1
705			quantErrorNext[x+2][3] += ea * 1
706			quantErrorCurr[x+2][0] += er * 7
707			quantErrorCurr[x+2][1] += eg * 7
708			quantErrorCurr[x+2][2] += eb * 7
709			quantErrorCurr[x+2][3] += ea * 7
710		}
711
712		// Recycle the quantization error buffers.
713		if floydSteinberg {
714			quantErrorCurr, quantErrorNext = quantErrorNext, quantErrorCurr
715			for i := range quantErrorNext {
716				quantErrorNext[i] = [4]int32{}
717			}
718		}
719	}
720}
721