1 /*
2 * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2019, SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "runtime/stubRoutines.hpp"
29 #include "runtime/vm_version.hpp"
30
31 // Implementation of the platform-specific part of StubRoutines - for
32 // a description of how to extend it, see the stubRoutines.hpp file.
33
34
35 #define __ masm->
36
37 // CRC constant compute functions
fold_byte(juint w,juint reverse_poly)38 static juint fold_byte(juint w, juint reverse_poly) {
39 for (int i = 0; i < 8; i++) {
40 int poly_if_odd = (-(w & 1)) & reverse_poly;
41 w = (w >> 1) ^ poly_if_odd;
42 }
43 return w;
44 }
45
fold_word(juint w,juint reverse_poly)46 static juint fold_word(juint w, juint reverse_poly) {
47 for (int i = 0; i < 32; i++) {
48 int poly_if_odd = (-(w & 1)) & reverse_poly;
49 w = (w >> 1) ^ poly_if_odd;
50 }
51 return w;
52 }
53
numberOfLeadingZeros(julong p)54 static julong numberOfLeadingZeros(julong p) {
55 julong l = 1ull << 63;
56 for (int i = 0; i < 64; ++i) {
57 if (p & l) return i;
58 l >>= 1;
59 }
60 return 64;
61 }
62
compute_inverse_poly(julong long_poly)63 static julong compute_inverse_poly(julong long_poly) {
64 // 2^64 / p
65 julong mod = 0, div = 0;
66 int d = numberOfLeadingZeros(long_poly);
67 int s = d + 1;
68 do {
69 mod ^= (long_poly << s);
70 div |= (1L << s);
71 s = d - numberOfLeadingZeros(mod);
72 } while (s >= 0);
73 return div;
74 }
75
76 #ifndef VM_LITTLE_ENDIAN
reverse_bytes(juint & w)77 static void reverse_bytes(juint &w) {
78 w = ((w >> 24) & 0xFF) | (((w >> 16) & 0xFF) << 8) | (((w >> 8) & 0xFF) << 16) | ((w & 0xFF) << 24);
79 }
80 #endif
81
82 // Constants to fold n words as needed by macroAssembler.
generate_crc_constants(juint reverse_poly)83 address StubRoutines::generate_crc_constants(juint reverse_poly) {
84 // Layout of constant table:
85 // <= Power7 Little Endian: 4 tables for byte folding
86 // <= Power7 Big Endian: 1 table for single byte folding + 4 tables for multi-byte folding
87 // >= Power8: 1 table for single byte folding + constants for fast vector implementation
88 const bool use_vector = VM_Version::has_vpmsumb();
89 const int vector_size = 16 * (CRC32_UNROLL_FACTOR2 + CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2);
90
91 const int size = use_vector ? CRC32_TABLE_SIZE + vector_size : (4 BIG_ENDIAN_ONLY(+1)) * CRC32_TABLE_SIZE;
92 const address consts = (address)malloc(size);
93 if (consts == NULL) {
94 vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, "CRC constants: no enough space");
95 }
96 juint* ptr = (juint*)consts;
97
98 // Simple table used for single byte folding
99 LITTLE_ENDIAN_ONLY(if (use_vector)) {
100 for (int i = 0; i < 256; ++i) {
101 ptr[i] = fold_byte(i, reverse_poly);
102 }
103 }
104
105 if (!use_vector) {
106 BIG_ENDIAN_ONLY(ptr = (juint*)(consts + CRC32_TABLE_SIZE);)
107 // <= Power7: 4 tables
108 for (int i = 0; i < 256; ++i) {
109 juint a = fold_byte(i, reverse_poly),
110 b = fold_byte(a, reverse_poly),
111 c = fold_byte(b, reverse_poly),
112 d = fold_byte(c, reverse_poly);
113 #ifndef VM_LITTLE_ENDIAN
114 reverse_bytes(a);
115 reverse_bytes(b);
116 reverse_bytes(c);
117 reverse_bytes(d);
118 #endif
119 ptr[i ] = a;
120 ptr[i + 256] = b;
121 ptr[i + 2* 256] = c;
122 ptr[i + 3* 256] = d;
123 }
124 #if 0
125 for (int i = 0; i < 4; ++i) {
126 tty->print_cr("table %d:", i);
127 for (int j = 0; j < 32; ++j) {
128 for (int k = 0; k < 8; ++k) {
129 tty->print("%08x ", ptr[i*256 + j*8 + k]);
130 }
131 tty->cr();
132 }
133 }
134 #endif
135 return consts;
136 }
137
138 // >= Power8: vector constants
139 juint* ptr1 = (juint*)(consts + CRC32_TABLE_SIZE);
140 guarantee(((intptr_t)ptr1 & 0xF) == 0, "16-byte alignment needed");
141
142 // Generate constants for outer loop
143 juint v0, v1, v2, v3 = 1;
144 for (int i = 0; i < CRC32_UNROLL_FACTOR2 - 1; ++i) {
145 v0 = fold_word(v3, reverse_poly);
146 v1 = fold_word(v0, reverse_poly);
147 v2 = fold_word(v1, reverse_poly);
148 v3 = fold_word(v2, reverse_poly);
149 #ifdef VM_LITTLE_ENDIAN
150 ptr1[4*i ] = v3;
151 ptr1[4*i+1] = v2;
152 ptr1[4*i+2] = v3;
153 ptr1[4*i+3] = v2;
154 #else
155 ptr1[4*i ] = v2;
156 ptr1[4*i+1] = v3;
157 ptr1[4*i+2] = v2;
158 ptr1[4*i+3] = v3;
159 #endif
160 }
161
162 // Generate constants for inner loop
163 juint* ptr2 = ptr1 + 4 * (CRC32_UNROLL_FACTOR2 - 1);
164 v3 = 1; // Restart from scratch.
165 for (int i = 0; i < CRC32_UNROLL_FACTOR; ++i) {
166 v0 = fold_word(v3, reverse_poly);
167 v1 = fold_word(v0, reverse_poly);
168 v2 = fold_word(v1, reverse_poly);
169 v3 = fold_word(v2, reverse_poly);
170 if (i % CRC32_UNROLL_FACTOR2 == 0) {
171 int idx = CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2 - 1 - i / CRC32_UNROLL_FACTOR2;
172 for (int j = 0; j < 4; ++j) {
173 #ifdef VM_LITTLE_ENDIAN
174 ptr2[4*idx ] = v3;
175 ptr2[4*idx+1] = v2;
176 ptr2[4*idx+2] = v1;
177 ptr2[4*idx+3] = v0;
178 #else
179 ptr2[4*idx ] = v0;
180 ptr2[4*idx+1] = v1;
181 ptr2[4*idx+2] = v2;
182 ptr2[4*idx+3] = v3;
183 #endif
184 }
185 }
186 }
187
188 // Constants to reduce 64 to 32 bit as needed by macroAssembler.
189 juint* ptr3 = ptr2 + 4 * (CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2);
190 julong* c = (julong*)ptr3;
191 julong long_poly = (((julong)reverse_poly) << 1) | 1;
192 julong inverse_long_poly = compute_inverse_poly(long_poly);
193 #ifdef VM_LITTLE_ENDIAN
194 c[0] = inverse_long_poly;
195 c[1] = long_poly;
196 #else
197 c[0] = long_poly;
198 c[1] = inverse_long_poly;
199 #endif
200
201 #ifdef ASSERT
202 if (reverse_poly == REVERSE_CRC32_POLY) {
203 assert(INVERSE_REVERSE_CRC32_POLY == inverse_long_poly, "sanity");
204 } else if (reverse_poly == REVERSE_CRC32C_POLY) {
205 assert(INVERSE_REVERSE_CRC32C_POLY == inverse_long_poly, "sanity");
206 }
207 #endif
208
209 //printf("inv poly: 0x%016llx\n", (long long unsigned int)inverse_long_poly);
210
211 return consts;
212 }
213