1 /*
2 * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "classfile/javaClasses.hpp"
26 #include "gc/z/c2/zBarrierSetC2.hpp"
27 #include "gc/z/zBarrierSet.hpp"
28 #include "gc/z/zBarrierSetAssembler.hpp"
29 #include "gc/z/zBarrierSetRuntime.hpp"
30 #include "opto/arraycopynode.hpp"
31 #include "opto/addnode.hpp"
32 #include "opto/block.hpp"
33 #include "opto/compile.hpp"
34 #include "opto/graphKit.hpp"
35 #include "opto/machnode.hpp"
36 #include "opto/macro.hpp"
37 #include "opto/memnode.hpp"
38 #include "opto/node.hpp"
39 #include "opto/output.hpp"
40 #include "opto/regalloc.hpp"
41 #include "opto/rootnode.hpp"
42 #include "opto/type.hpp"
43 #include "utilities/growableArray.hpp"
44 #include "utilities/macros.hpp"
45
46 class ZBarrierSetC2State : public ResourceObj {
47 private:
48 GrowableArray<ZLoadBarrierStubC2*>* _stubs;
49 Node_Array _live;
50
51 public:
ZBarrierSetC2State(Arena * arena)52 ZBarrierSetC2State(Arena* arena) :
53 _stubs(new (arena) GrowableArray<ZLoadBarrierStubC2*>(arena, 8, 0, NULL)),
54 _live(arena) {}
55
stubs()56 GrowableArray<ZLoadBarrierStubC2*>* stubs() {
57 return _stubs;
58 }
59
live(const Node * node)60 RegMask* live(const Node* node) {
61 if (!node->is_Mach()) {
62 // Don't need liveness for non-MachNodes
63 return NULL;
64 }
65
66 const MachNode* const mach = node->as_Mach();
67 if (mach->barrier_data() == ZLoadBarrierElided) {
68 // Don't need liveness data for nodes without barriers
69 return NULL;
70 }
71
72 RegMask* live = (RegMask*)_live[node->_idx];
73 if (live == NULL) {
74 live = new (Compile::current()->comp_arena()->Amalloc_D(sizeof(RegMask))) RegMask();
75 _live.map(node->_idx, (Node*)live);
76 }
77
78 return live;
79 }
80 };
81
barrier_set_state()82 static ZBarrierSetC2State* barrier_set_state() {
83 return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
84 }
85
create(const MachNode * node,Address ref_addr,Register ref,Register tmp,uint8_t barrier_data)86 ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) {
87 ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref, tmp, barrier_data);
88 if (!Compile::current()->output()->in_scratch_emit_size()) {
89 barrier_set_state()->stubs()->append(stub);
90 }
91
92 return stub;
93 }
94
ZLoadBarrierStubC2(const MachNode * node,Address ref_addr,Register ref,Register tmp,uint8_t barrier_data)95 ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) :
96 _node(node),
97 _ref_addr(ref_addr),
98 _ref(ref),
99 _tmp(tmp),
100 _barrier_data(barrier_data),
101 _entry(),
102 _continuation() {
103 assert_different_registers(ref, ref_addr.base());
104 assert_different_registers(ref, ref_addr.index());
105 }
106
ref_addr() const107 Address ZLoadBarrierStubC2::ref_addr() const {
108 return _ref_addr;
109 }
110
ref() const111 Register ZLoadBarrierStubC2::ref() const {
112 return _ref;
113 }
114
tmp() const115 Register ZLoadBarrierStubC2::tmp() const {
116 return _tmp;
117 }
118
slow_path() const119 address ZLoadBarrierStubC2::slow_path() const {
120 DecoratorSet decorators = DECORATORS_NONE;
121 if (_barrier_data & ZLoadBarrierStrong) {
122 decorators |= ON_STRONG_OOP_REF;
123 }
124 if (_barrier_data & ZLoadBarrierWeak) {
125 decorators |= ON_WEAK_OOP_REF;
126 }
127 if (_barrier_data & ZLoadBarrierPhantom) {
128 decorators |= ON_PHANTOM_OOP_REF;
129 }
130 if (_barrier_data & ZLoadBarrierNoKeepalive) {
131 decorators |= AS_NO_KEEPALIVE;
132 }
133 return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
134 }
135
live() const136 RegMask& ZLoadBarrierStubC2::live() const {
137 return *barrier_set_state()->live(_node);
138 }
139
entry()140 Label* ZLoadBarrierStubC2::entry() {
141 // The _entry will never be bound when in_scratch_emit_size() is true.
142 // However, we still need to return a label that is not bound now, but
143 // will eventually be bound. Any lable will do, as it will only act as
144 // a placeholder, so we return the _continuation label.
145 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
146 }
147
continuation()148 Label* ZLoadBarrierStubC2::continuation() {
149 return &_continuation;
150 }
151
create_barrier_state(Arena * comp_arena) const152 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
153 return new (comp_arena) ZBarrierSetC2State(comp_arena);
154 }
155
late_barrier_analysis() const156 void ZBarrierSetC2::late_barrier_analysis() const {
157 analyze_dominating_barriers();
158 compute_liveness_at_stubs();
159 }
160
emit_stubs(CodeBuffer & cb) const161 void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
162 MacroAssembler masm(&cb);
163 GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
164
165 for (int i = 0; i < stubs->length(); i++) {
166 // Make sure there is enough space in the code buffer
167 if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == NULL) {
168 ciEnv::current()->record_failure("CodeCache is full");
169 return;
170 }
171
172 ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
173 }
174
175 masm.flush();
176 }
177
estimate_stub_size() const178 int ZBarrierSetC2::estimate_stub_size() const {
179 Compile* const C = Compile::current();
180 BufferBlob* const blob = C->output()->scratch_buffer_blob();
181 GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
182 int size = 0;
183
184 for (int i = 0; i < stubs->length(); i++) {
185 CodeBuffer cb(blob->content_begin(), (address)C->output()->scratch_locs_memory() - blob->content_begin());
186 MacroAssembler masm(&cb);
187 ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
188 size += cb.insts_size();
189 }
190
191 return size;
192 }
193
set_barrier_data(C2Access & access)194 static void set_barrier_data(C2Access& access) {
195 if (ZBarrierSet::barrier_needed(access.decorators(), access.type())) {
196 if (access.decorators() & ON_WEAK_OOP_REF) {
197 access.set_barrier_data(ZLoadBarrierWeak);
198 } else {
199 access.set_barrier_data(ZLoadBarrierStrong);
200 }
201 }
202 }
203
load_at_resolved(C2Access & access,const Type * val_type) const204 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
205 set_barrier_data(access);
206 return BarrierSetC2::load_at_resolved(access, val_type);
207 }
208
atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess & access,Node * expected_val,Node * new_val,const Type * val_type) const209 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
210 Node* new_val, const Type* val_type) const {
211 set_barrier_data(access);
212 return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
213 }
214
atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess & access,Node * expected_val,Node * new_val,const Type * value_type) const215 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
216 Node* new_val, const Type* value_type) const {
217 set_barrier_data(access);
218 return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
219 }
220
atomic_xchg_at_resolved(C2AtomicParseAccess & access,Node * new_val,const Type * val_type) const221 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
222 set_barrier_data(access);
223 return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
224 }
225
array_copy_requires_gc_barriers(bool tightly_coupled_alloc,BasicType type,bool is_clone,ArrayCopyPhase phase) const226 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type,
227 bool is_clone, ArrayCopyPhase phase) const {
228 return type == T_OBJECT || type == T_ARRAY;
229 }
230
231 // This TypeFunc assumes a 64bit system
clone_type()232 static const TypeFunc* clone_type() {
233 // Create input type (domain)
234 const Type** domain_fields = TypeTuple::fields(4);
235 domain_fields[TypeFunc::Parms + 0] = TypeInstPtr::NOTNULL; // src
236 domain_fields[TypeFunc::Parms + 1] = TypeInstPtr::NOTNULL; // dst
237 domain_fields[TypeFunc::Parms + 2] = TypeLong::LONG; // size lower
238 domain_fields[TypeFunc::Parms + 3] = Type::HALF; // size upper
239 const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + 4, domain_fields);
240
241 // Create result type (range)
242 const Type** range_fields = TypeTuple::fields(0);
243 const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
244
245 return TypeFunc::make(domain, range);
246 }
247
clone_at_expansion(PhaseMacroExpand * phase,ArrayCopyNode * ac) const248 void ZBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
249 Node* const src = ac->in(ArrayCopyNode::Src);
250 if (ac->is_clone_array()) {
251 // Clone primitive array
252 BarrierSetC2::clone_at_expansion(phase, ac);
253 return;
254 }
255
256 // Clone instance
257 Node* const ctrl = ac->in(TypeFunc::Control);
258 Node* const mem = ac->in(TypeFunc::Memory);
259 Node* const dst = ac->in(ArrayCopyNode::Dest);
260 Node* const size = ac->in(ArrayCopyNode::Length);
261
262 assert(ac->is_clone_inst(), "Sanity check");
263 assert(size->bottom_type()->is_long(), "Should be long");
264
265 // The native clone we are calling here expects the instance size in words
266 // Add header/offset size to payload size to get instance size.
267 Node* const base_offset = phase->longcon(arraycopy_payload_base_offset(false) >> LogBytesPerLong);
268 Node* const full_size = phase->transform_later(new AddLNode(size, base_offset));
269
270 Node* const call = phase->make_leaf_call(ctrl,
271 mem,
272 clone_type(),
273 ZBarrierSetRuntime::clone_addr(),
274 "ZBarrierSetRuntime::clone",
275 TypeRawPtr::BOTTOM,
276 src,
277 dst,
278 full_size,
279 phase->top());
280 phase->transform_later(call);
281 phase->igvn().replace_node(ac, call);
282 }
283
284 // == Dominating barrier elision ==
285
block_has_safepoint(const Block * block,uint from,uint to)286 static bool block_has_safepoint(const Block* block, uint from, uint to) {
287 for (uint i = from; i < to; i++) {
288 if (block->get_node(i)->is_MachSafePoint()) {
289 // Safepoint found
290 return true;
291 }
292 }
293
294 // Safepoint not found
295 return false;
296 }
297
block_has_safepoint(const Block * block)298 static bool block_has_safepoint(const Block* block) {
299 return block_has_safepoint(block, 0, block->number_of_nodes());
300 }
301
block_index(const Block * block,const Node * node)302 static uint block_index(const Block* block, const Node* node) {
303 for (uint j = 0; j < block->number_of_nodes(); ++j) {
304 if (block->get_node(j) == node) {
305 return j;
306 }
307 }
308 ShouldNotReachHere();
309 return 0;
310 }
311
analyze_dominating_barriers() const312 void ZBarrierSetC2::analyze_dominating_barriers() const {
313 ResourceMark rm;
314 Compile* const C = Compile::current();
315 PhaseCFG* const cfg = C->cfg();
316 Block_List worklist;
317 Node_List mem_ops;
318 Node_List barrier_loads;
319
320 // Step 1 - Find accesses, and track them in lists
321 for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
322 const Block* const block = cfg->get_block(i);
323 for (uint j = 0; j < block->number_of_nodes(); ++j) {
324 const Node* const node = block->get_node(j);
325 if (!node->is_Mach()) {
326 continue;
327 }
328
329 MachNode* const mach = node->as_Mach();
330 switch (mach->ideal_Opcode()) {
331 case Op_LoadP:
332 if ((mach->barrier_data() & ZLoadBarrierStrong) != 0) {
333 barrier_loads.push(mach);
334 }
335 if ((mach->barrier_data() & (ZLoadBarrierStrong | ZLoadBarrierNoKeepalive)) ==
336 ZLoadBarrierStrong) {
337 mem_ops.push(mach);
338 }
339 break;
340 case Op_CompareAndExchangeP:
341 case Op_CompareAndSwapP:
342 case Op_GetAndSetP:
343 if ((mach->barrier_data() & ZLoadBarrierStrong) != 0) {
344 barrier_loads.push(mach);
345 }
346 case Op_StoreP:
347 mem_ops.push(mach);
348 break;
349
350 default:
351 break;
352 }
353 }
354 }
355
356 // Step 2 - Find dominating accesses for each load
357 for (uint i = 0; i < barrier_loads.size(); i++) {
358 MachNode* const load = barrier_loads.at(i)->as_Mach();
359 const TypePtr* load_adr_type = NULL;
360 intptr_t load_offset = 0;
361 const Node* const load_obj = load->get_base_and_disp(load_offset, load_adr_type);
362 Block* const load_block = cfg->get_block_for_node(load);
363 const uint load_index = block_index(load_block, load);
364
365 for (uint j = 0; j < mem_ops.size(); j++) {
366 MachNode* mem = mem_ops.at(j)->as_Mach();
367 const TypePtr* mem_adr_type = NULL;
368 intptr_t mem_offset = 0;
369 const Node* mem_obj = mem->get_base_and_disp(mem_offset, mem_adr_type);
370 Block* mem_block = cfg->get_block_for_node(mem);
371 uint mem_index = block_index(mem_block, mem);
372
373 if (load_obj == NodeSentinel || mem_obj == NodeSentinel ||
374 load_obj == NULL || mem_obj == NULL ||
375 load_offset < 0 || mem_offset < 0) {
376 continue;
377 }
378
379 if (mem_obj != load_obj || mem_offset != load_offset) {
380 // Not the same addresses, not a candidate
381 continue;
382 }
383
384 if (load_block == mem_block) {
385 // Earlier accesses in the same block
386 if (mem_index < load_index && !block_has_safepoint(mem_block, mem_index + 1, load_index)) {
387 load->set_barrier_data(ZLoadBarrierElided);
388 }
389 } else if (mem_block->dominates(load_block)) {
390 // Dominating block? Look around for safepoints
391 ResourceMark rm;
392 Block_List stack;
393 VectorSet visited;
394 stack.push(load_block);
395 bool safepoint_found = block_has_safepoint(load_block);
396 while (!safepoint_found && stack.size() > 0) {
397 Block* block = stack.pop();
398 if (visited.test_set(block->_pre_order)) {
399 continue;
400 }
401 if (block_has_safepoint(block)) {
402 safepoint_found = true;
403 break;
404 }
405 if (block == mem_block) {
406 continue;
407 }
408
409 // Push predecessor blocks
410 for (uint p = 1; p < block->num_preds(); ++p) {
411 Block* pred = cfg->get_block_for_node(block->pred(p));
412 stack.push(pred);
413 }
414 }
415
416 if (!safepoint_found) {
417 load->set_barrier_data(ZLoadBarrierElided);
418 }
419 }
420 }
421 }
422 }
423
424 // == Reduced spilling optimization ==
425
compute_liveness_at_stubs() const426 void ZBarrierSetC2::compute_liveness_at_stubs() const {
427 ResourceMark rm;
428 Compile* const C = Compile::current();
429 Arena* const A = Thread::current()->resource_area();
430 PhaseCFG* const cfg = C->cfg();
431 PhaseRegAlloc* const regalloc = C->regalloc();
432 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
433 ZBarrierSetAssembler* const bs = ZBarrierSet::assembler();
434 Block_List worklist;
435
436 for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
437 new ((void*)(live + i)) RegMask();
438 worklist.push(cfg->get_block(i));
439 }
440
441 while (worklist.size() > 0) {
442 const Block* const block = worklist.pop();
443 RegMask& old_live = live[block->_pre_order];
444 RegMask new_live;
445
446 // Initialize to union of successors
447 for (uint i = 0; i < block->_num_succs; i++) {
448 const uint succ_id = block->_succs[i]->_pre_order;
449 new_live.OR(live[succ_id]);
450 }
451
452 // Walk block backwards, computing liveness
453 for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
454 const Node* const node = block->get_node(i);
455
456 // Remove def bits
457 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
458 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
459 if (first != OptoReg::Bad) {
460 new_live.Remove(first);
461 }
462 if (second != OptoReg::Bad) {
463 new_live.Remove(second);
464 }
465
466 // Add use bits
467 for (uint j = 1; j < node->req(); ++j) {
468 const Node* const use = node->in(j);
469 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
470 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
471 if (first != OptoReg::Bad) {
472 new_live.Insert(first);
473 }
474 if (second != OptoReg::Bad) {
475 new_live.Insert(second);
476 }
477 }
478
479 // If this node tracks liveness, update it
480 RegMask* const regs = barrier_set_state()->live(node);
481 if (regs != NULL) {
482 regs->OR(new_live);
483 }
484 }
485
486 // Now at block top, see if we have any changes
487 new_live.SUBTRACT(old_live);
488 if (new_live.is_NotEmpty()) {
489 // Liveness has refined, update and propagate to prior blocks
490 old_live.OR(new_live);
491 for (uint i = 1; i < block->num_preds(); ++i) {
492 Block* const pred = cfg->get_block_for_node(block->pred(i));
493 worklist.push(pred);
494 }
495 }
496 }
497 }
498