1 /*
2 * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26
27 #include "asm/macroAssembler.inline.hpp"
28 #include "gc/shared/memset_with_concurrent_readers.hpp"
29 #include "runtime/prefetch.inline.hpp"
30 #include "utilities/align.hpp"
31 #include "utilities/debug.hpp"
32 #include "utilities/globalDefinitions.hpp"
33 #include "utilities/macros.hpp"
34
35 // An implementation of memset, for use when there may be concurrent
36 // readers of the region being stored into.
37 //
38 // We can't use the standard library memset if it is implemented using
39 // block initializing stores. Doing so can result in concurrent readers
40 // seeing spurious zeros.
41 //
42 // We can't use the obvious C/C++ for-loop, because the compiler may
43 // recognize the idiomatic loop and optimize it into a call to the
44 // standard library memset; we've seen exactly this happen with, for
45 // example, Solaris Studio 12.3. Hence the use of inline assembly
46 // code, hiding loops from the compiler's optimizer.
47 //
48 // We don't attempt to use the standard library memset when it is safe
49 // to do so. We could conservatively do so by detecting the presence
50 // of block initializing stores (VM_Version::has_blk_init()), but the
51 // implementation provided here should be sufficient.
52
fill_subword(void * start,void * end,int value)53 inline void fill_subword(void* start, void* end, int value) {
54 STATIC_ASSERT(BytesPerWord == 8);
55 assert(pointer_delta(end, start, 1) < (size_t)BytesPerWord, "precondition");
56 // Dispatch on (end - start).
57 void* pc;
58 __asm__ volatile(
59 // offset := (7 - (end - start)) + 3
60 // 3 instructions from rdpc to DISPATCH
61 " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
62 " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
63 " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
64 " rd %%pc, %[pc]\n\t" // dispatch on scaled offset
65 " jmpl %[pc]+%[offset], %%g0\n\t"
66 " nop\n\t"
67 // DISPATCH: no direct reference, but without it the store block may be elided.
68 "1:\n\t"
69 " stb %[value], [%[end]-7]\n\t" // end[-7] = value
70 " stb %[value], [%[end]-6]\n\t"
71 " stb %[value], [%[end]-5]\n\t"
72 " stb %[value], [%[end]-4]\n\t"
73 " stb %[value], [%[end]-3]\n\t"
74 " stb %[value], [%[end]-2]\n\t"
75 " stb %[value], [%[end]-1]\n\t" // end[-1] = value
76 : /* only temporaries/overwritten outputs */
77 [pc] "=&r" (pc), // temp
78 [offset] "+&r" (start)
79 : [end] "r" (end),
80 [value] "r" (value)
81 : "memory");
82 }
83
memset_with_concurrent_readers(void * to,int value,size_t size)84 void memset_with_concurrent_readers(void* to, int value, size_t size) {
85 Prefetch::write(to, 0);
86 void* end = static_cast<char*>(to) + size;
87 if (size >= (size_t)BytesPerWord) {
88 // Fill any partial word prefix.
89 uintx* aligned_to = static_cast<uintx*>(align_up(to, BytesPerWord));
90 fill_subword(to, aligned_to, value);
91
92 // Compute fill word.
93 STATIC_ASSERT(BitsPerByte == 8);
94 STATIC_ASSERT(BitsPerWord == 64);
95 uintx xvalue = value & 0xff;
96 xvalue |= (xvalue << 8);
97 xvalue |= (xvalue << 16);
98 xvalue |= (xvalue << 32);
99
100 uintx* aligned_end = static_cast<uintx*>(align_down(end, BytesPerWord));
101 assert(aligned_to <= aligned_end, "invariant");
102
103 // for ( ; aligned_to < aligned_end; ++aligned_to) {
104 // *aligned_to = xvalue;
105 // }
106 uintptr_t temp;
107 __asm__ volatile(
108 // Unroll loop x8.
109 " sub %[aend], %[ato], %[temp]\n\t"
110 " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words
111 " ba %%xcc, 2f\n\t" // goto TEST always
112 " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
113 // LOOP:
114 "1:\n\t" // unrolled x8 store loop top
115 " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to
116 " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
117 " stx %[xvalue], [%[ato]-56]\n\t"
118 " stx %[xvalue], [%[ato]-48]\n\t"
119 " stx %[xvalue], [%[ato]-40]\n\t"
120 " stx %[xvalue], [%[ato]-32]\n\t"
121 " stx %[xvalue], [%[ato]-24]\n\t"
122 " stx %[xvalue], [%[ato]-16]\n\t"
123 " stx %[xvalue], [%[ato]-8]\n\t"
124 // TEST:
125 "2:\n\t"
126 " bgu,a %%xcc, 1b\n\t" // goto LOOP if more than 7 words remaining
127 " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration
128 // Fill remaining < 8 full words.
129 // Dispatch on (aligned_end - aligned_to).
130 // offset := (7 - (aligned_end - aligned_to)) + 3
131 // 3 instructions from rdpc to DISPATCH
132 " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
133 " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4
134 " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size
135 " rd %%pc, %[temp]\n\t" // dispatch on scaled offset
136 " jmpl %[temp]+%[ato], %%g0\n\t"
137 " nop\n\t"
138 // DISPATCH: no direct reference, but without it the store block may be elided.
139 "3:\n\t"
140 " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
141 " stx %[xvalue], [%[aend]-48]\n\t"
142 " stx %[xvalue], [%[aend]-40]\n\t"
143 " stx %[xvalue], [%[aend]-32]\n\t"
144 " stx %[xvalue], [%[aend]-24]\n\t"
145 " stx %[xvalue], [%[aend]-16]\n\t"
146 " stx %[xvalue], [%[aend]-8]\n\t" // aligned_end[-1] = xvalue
147 : /* only temporaries/overwritten outputs */
148 [temp] "=&r" (temp),
149 [ato] "+&r" (aligned_to)
150 : [aend] "r" (aligned_end),
151 [xvalue] "r" (xvalue)
152 : "cc", "memory");
153 to = aligned_end; // setup for suffix
154 }
155 // Fill any partial word suffix. Also the prefix if size < BytesPerWord.
156 fill_subword(to, end, value);
157 }
158