1 /*
2  * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
3  * Copyright (c) 2012, 2019, SAP SE. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.
9  *
10  * This code is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * version 2 for more details (a copy is included in the LICENSE file that
14  * accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License version
17  * 2 along with this work; if not, write to the Free Software Foundation,
18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21  * or visit www.oracle.com if you need additional information or have any
22  * questions.
23  *
24  */
25 
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "runtime/stubRoutines.hpp"
29 #include "runtime/vm_version.hpp"
30 
31 // Implementation of the platform-specific part of StubRoutines - for
32 // a description of how to extend it, see the stubRoutines.hpp file.
33 
34 
35 #define __ masm->
36 
37 // CRC constant compute functions
fold_byte(juint w,juint reverse_poly)38 static juint fold_byte(juint w, juint reverse_poly) {
39   for (int i = 0; i < 8; i++) {
40     int poly_if_odd = (-(w & 1)) & reverse_poly;
41     w = (w >> 1) ^ poly_if_odd;
42   }
43   return w;
44 }
45 
fold_word(juint w,juint reverse_poly)46 static juint fold_word(juint w, juint reverse_poly) {
47   for (int i = 0; i < 32; i++) {
48     int poly_if_odd = (-(w & 1)) & reverse_poly;
49     w = (w >> 1) ^ poly_if_odd;
50   }
51   return w;
52 }
53 
numberOfLeadingZeros(julong p)54 static julong numberOfLeadingZeros(julong p) {
55   julong l = 1ull << 63;
56   for (int i = 0; i < 64; ++i) {
57     if (p & l) return i;
58     l >>= 1;
59   }
60   return 64;
61 }
62 
compute_inverse_poly(julong long_poly)63 static julong compute_inverse_poly(julong long_poly) {
64   // 2^64 / p
65   julong mod = 0, div = 0;
66   int d = numberOfLeadingZeros(long_poly);
67   int s = d + 1;
68   do {
69     mod ^= (long_poly << s);
70     div |= (1L << s);
71     s = d - numberOfLeadingZeros(mod);
72   } while (s >= 0);
73   return div;
74 }
75 
76 #ifndef VM_LITTLE_ENDIAN
reverse_bytes(juint & w)77 static void reverse_bytes(juint &w) {
78   w = ((w >> 24) & 0xFF) | (((w >> 16) & 0xFF) << 8) | (((w >> 8) & 0xFF) << 16) | ((w & 0xFF) << 24);
79 }
80 #endif
81 
82 // Constants to fold n words as needed by macroAssembler.
generate_crc_constants(juint reverse_poly)83 address StubRoutines::generate_crc_constants(juint reverse_poly) {
84   // Layout of constant table:
85   // <= Power7 Little Endian: 4 tables for byte folding
86   // <= Power7 Big Endian: 1 table for single byte folding + 4 tables for multi-byte folding
87   // >= Power8: 1 table for single byte folding + constants for fast vector implementation
88   const bool use_vector = VM_Version::has_vpmsumb();
89   const int vector_size = 16 * (CRC32_UNROLL_FACTOR2 + CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2);
90 
91   const int size = use_vector ? CRC32_TABLE_SIZE + vector_size : (4 BIG_ENDIAN_ONLY(+1)) * CRC32_TABLE_SIZE;
92   const address consts = (address)malloc(size);
93   if (consts == NULL) {
94     vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, "CRC constants: no enough space");
95   }
96   juint* ptr = (juint*)consts;
97 
98   // Simple table used for single byte folding
99   LITTLE_ENDIAN_ONLY(if (use_vector)) {
100     for (int i = 0; i < 256; ++i) {
101       ptr[i] = fold_byte(i, reverse_poly);
102     }
103   }
104 
105   if (!use_vector) {
106     BIG_ENDIAN_ONLY(ptr = (juint*)(consts + CRC32_TABLE_SIZE);)
107     // <= Power7: 4 tables
108     for (int i = 0; i < 256; ++i) {
109       juint a = fold_byte(i, reverse_poly),
110             b = fold_byte(a, reverse_poly),
111             c = fold_byte(b, reverse_poly),
112             d = fold_byte(c, reverse_poly);
113 #ifndef VM_LITTLE_ENDIAN
114       reverse_bytes(a);
115       reverse_bytes(b);
116       reverse_bytes(c);
117       reverse_bytes(d);
118 #endif
119       ptr[i         ] = a;
120       ptr[i +    256] = b;
121       ptr[i + 2* 256] = c;
122       ptr[i + 3* 256] = d;
123     }
124 #if 0
125     for (int i = 0; i < 4; ++i) {
126       tty->print_cr("table %d:", i);
127       for (int j = 0; j < 32; ++j) {
128         for (int k = 0; k < 8; ++k) {
129           tty->print("%08x ", ptr[i*256 + j*8 + k]);
130         }
131         tty->cr();
132       }
133     }
134 #endif
135     return consts;
136   }
137 
138   // >= Power8: vector constants
139   juint* ptr1 = (juint*)(consts + CRC32_TABLE_SIZE);
140   guarantee(((intptr_t)ptr1 & 0xF) == 0, "16-byte alignment needed");
141 
142   // Generate constants for outer loop
143   juint v0, v1, v2, v3 = 1;
144   for (int i = 0; i < CRC32_UNROLL_FACTOR2 - 1; ++i) {
145     v0 = fold_word(v3, reverse_poly);
146     v1 = fold_word(v0, reverse_poly);
147     v2 = fold_word(v1, reverse_poly);
148     v3 = fold_word(v2, reverse_poly);
149 #ifdef VM_LITTLE_ENDIAN
150     ptr1[4*i  ] = v3;
151     ptr1[4*i+1] = v2;
152     ptr1[4*i+2] = v3;
153     ptr1[4*i+3] = v2;
154 #else
155     ptr1[4*i  ] = v2;
156     ptr1[4*i+1] = v3;
157     ptr1[4*i+2] = v2;
158     ptr1[4*i+3] = v3;
159 #endif
160   }
161 
162   // Generate constants for inner loop
163   juint* ptr2 = ptr1 + 4 * (CRC32_UNROLL_FACTOR2 - 1);
164   v3 = 1; // Restart from scratch.
165   for (int i = 0; i < CRC32_UNROLL_FACTOR; ++i) {
166     v0 = fold_word(v3, reverse_poly);
167     v1 = fold_word(v0, reverse_poly);
168     v2 = fold_word(v1, reverse_poly);
169     v3 = fold_word(v2, reverse_poly);
170     if (i % CRC32_UNROLL_FACTOR2 == 0) {
171       int idx = CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2 - 1 - i / CRC32_UNROLL_FACTOR2;
172       for (int j = 0; j < 4; ++j) {
173 #ifdef VM_LITTLE_ENDIAN
174         ptr2[4*idx  ] = v3;
175         ptr2[4*idx+1] = v2;
176         ptr2[4*idx+2] = v1;
177         ptr2[4*idx+3] = v0;
178 #else
179         ptr2[4*idx  ] = v0;
180         ptr2[4*idx+1] = v1;
181         ptr2[4*idx+2] = v2;
182         ptr2[4*idx+3] = v3;
183 #endif
184       }
185     }
186   }
187 
188   // Constants to reduce 64 to 32 bit as needed by macroAssembler.
189   juint* ptr3 = ptr2 + 4 * (CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2);
190   julong* c = (julong*)ptr3;
191   julong long_poly = (((julong)reverse_poly) << 1) | 1;
192   julong inverse_long_poly = compute_inverse_poly(long_poly);
193 #ifdef VM_LITTLE_ENDIAN
194   c[0] = inverse_long_poly;
195   c[1] = long_poly;
196 #else
197   c[0] = long_poly;
198   c[1] = inverse_long_poly;
199 #endif
200 
201 #ifdef ASSERT
202   if (reverse_poly == REVERSE_CRC32_POLY) {
203     assert(INVERSE_REVERSE_CRC32_POLY == inverse_long_poly, "sanity");
204   } else if (reverse_poly == REVERSE_CRC32C_POLY) {
205     assert(INVERSE_REVERSE_CRC32C_POLY == inverse_long_poly, "sanity");
206   }
207 #endif
208 
209   //printf("inv poly: 0x%016llx\n", (long long unsigned int)inverse_long_poly);
210 
211   return consts;
212 }
213