1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#define NOSPLIT 4
6#define RODATA 8
7
8// func castagnoliSSE42(crc uint32, p []byte) uint32
9TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
10	MOVL crc+0(FP), AX   // CRC value
11	MOVL p+4(FP), SI     // data pointer
12	MOVL p_len+8(FP), CX // len(p)
13
14	NOTL AX
15
16	// If there's less than 8 bytes to process, we do it byte-by-byte.
17	CMPQ CX, $8
18	JL   cleanup
19
20	// Process individual bytes until the input is 8-byte aligned.
21startup:
22	MOVQ SI, BX
23	ANDQ $7, BX
24	JZ   aligned
25
26	CRC32B (SI), AX
27	DECQ   CX
28	INCQ   SI
29	JMP    startup
30
31aligned:
32	// The input is now 8-byte aligned and we can process 8-byte chunks.
33	CMPQ CX, $8
34	JL   cleanup
35
36	CRC32Q (SI), AX
37	ADDQ   $8, SI
38	SUBQ   $8, CX
39	JMP    aligned
40
41cleanup:
42	// We may have some bytes left over that we process one at a time.
43	CMPQ CX, $0
44	JE   done
45
46	CRC32B (SI), AX
47	INCQ   SI
48	DECQ   CX
49	JMP    cleanup
50
51done:
52	NOTL AX
53	MOVL AX, ret+16(FP)
54	RET
55
56// func haveSSE42() bool
57TEXT ·haveSSE42(SB), NOSPLIT, $0
58	XORQ AX, AX
59	INCL AX
60	CPUID
61	SHRQ $20, CX
62	ANDQ $1, CX
63	MOVB CX, ret+0(FP)
64	RET
65
66