1 /*
2  * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 #include "precompiled.hpp"
25 #include "classfile/javaClasses.hpp"
26 #include "gc/z/c2/zBarrierSetC2.hpp"
27 #include "gc/z/zBarrierSet.hpp"
28 #include "gc/z/zBarrierSetAssembler.hpp"
29 #include "gc/z/zBarrierSetRuntime.hpp"
30 #include "opto/arraycopynode.hpp"
31 #include "opto/addnode.hpp"
32 #include "opto/block.hpp"
33 #include "opto/compile.hpp"
34 #include "opto/graphKit.hpp"
35 #include "opto/machnode.hpp"
36 #include "opto/macro.hpp"
37 #include "opto/memnode.hpp"
38 #include "opto/node.hpp"
39 #include "opto/output.hpp"
40 #include "opto/regalloc.hpp"
41 #include "opto/rootnode.hpp"
42 #include "opto/type.hpp"
43 #include "utilities/growableArray.hpp"
44 #include "utilities/macros.hpp"
45 
46 class ZBarrierSetC2State : public ResourceObj {
47 private:
48   GrowableArray<ZLoadBarrierStubC2*>* _stubs;
49   Node_Array                          _live;
50 
51 public:
ZBarrierSetC2State(Arena * arena)52   ZBarrierSetC2State(Arena* arena) :
53     _stubs(new (arena) GrowableArray<ZLoadBarrierStubC2*>(arena, 8,  0, NULL)),
54     _live(arena) {}
55 
stubs()56   GrowableArray<ZLoadBarrierStubC2*>* stubs() {
57     return _stubs;
58   }
59 
live(const Node * node)60   RegMask* live(const Node* node) {
61     if (!node->is_Mach()) {
62       // Don't need liveness for non-MachNodes
63       return NULL;
64     }
65 
66     const MachNode* const mach = node->as_Mach();
67     if (mach->barrier_data() == ZLoadBarrierElided) {
68       // Don't need liveness data for nodes without barriers
69       return NULL;
70     }
71 
72     RegMask* live = (RegMask*)_live[node->_idx];
73     if (live == NULL) {
74       live = new (Compile::current()->comp_arena()->Amalloc_D(sizeof(RegMask))) RegMask();
75       _live.map(node->_idx, (Node*)live);
76     }
77 
78     return live;
79   }
80 };
81 
barrier_set_state()82 static ZBarrierSetC2State* barrier_set_state() {
83   return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
84 }
85 
create(const MachNode * node,Address ref_addr,Register ref,Register tmp,uint8_t barrier_data)86 ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) {
87   ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref, tmp, barrier_data);
88   if (!Compile::current()->output()->in_scratch_emit_size()) {
89     barrier_set_state()->stubs()->append(stub);
90   }
91 
92   return stub;
93 }
94 
ZLoadBarrierStubC2(const MachNode * node,Address ref_addr,Register ref,Register tmp,uint8_t barrier_data)95 ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) :
96     _node(node),
97     _ref_addr(ref_addr),
98     _ref(ref),
99     _tmp(tmp),
100     _barrier_data(barrier_data),
101     _entry(),
102     _continuation() {
103   assert_different_registers(ref, ref_addr.base());
104   assert_different_registers(ref, ref_addr.index());
105 }
106 
ref_addr() const107 Address ZLoadBarrierStubC2::ref_addr() const {
108   return _ref_addr;
109 }
110 
ref() const111 Register ZLoadBarrierStubC2::ref() const {
112   return _ref;
113 }
114 
tmp() const115 Register ZLoadBarrierStubC2::tmp() const {
116   return _tmp;
117 }
118 
slow_path() const119 address ZLoadBarrierStubC2::slow_path() const {
120   DecoratorSet decorators = DECORATORS_NONE;
121   if (_barrier_data & ZLoadBarrierStrong) {
122     decorators |= ON_STRONG_OOP_REF;
123   }
124   if (_barrier_data & ZLoadBarrierWeak) {
125     decorators |= ON_WEAK_OOP_REF;
126   }
127   if (_barrier_data & ZLoadBarrierPhantom) {
128     decorators |= ON_PHANTOM_OOP_REF;
129   }
130   if (_barrier_data & ZLoadBarrierNoKeepalive) {
131     decorators |= AS_NO_KEEPALIVE;
132   }
133   return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
134 }
135 
live() const136 RegMask& ZLoadBarrierStubC2::live() const {
137   return *barrier_set_state()->live(_node);
138 }
139 
entry()140 Label* ZLoadBarrierStubC2::entry() {
141   // The _entry will never be bound when in_scratch_emit_size() is true.
142   // However, we still need to return a label that is not bound now, but
143   // will eventually be bound. Any lable will do, as it will only act as
144   // a placeholder, so we return the _continuation label.
145   return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
146 }
147 
continuation()148 Label* ZLoadBarrierStubC2::continuation() {
149   return &_continuation;
150 }
151 
create_barrier_state(Arena * comp_arena) const152 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
153   return new (comp_arena) ZBarrierSetC2State(comp_arena);
154 }
155 
late_barrier_analysis() const156 void ZBarrierSetC2::late_barrier_analysis() const {
157   analyze_dominating_barriers();
158   compute_liveness_at_stubs();
159 }
160 
emit_stubs(CodeBuffer & cb) const161 void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
162   MacroAssembler masm(&cb);
163   GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
164 
165   for (int i = 0; i < stubs->length(); i++) {
166     // Make sure there is enough space in the code buffer
167     if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == NULL) {
168       ciEnv::current()->record_failure("CodeCache is full");
169       return;
170     }
171 
172     ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
173   }
174 
175   masm.flush();
176 }
177 
estimate_stub_size() const178 int ZBarrierSetC2::estimate_stub_size() const {
179   Compile* const C = Compile::current();
180   BufferBlob* const blob = C->output()->scratch_buffer_blob();
181   GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
182   int size = 0;
183 
184   for (int i = 0; i < stubs->length(); i++) {
185     CodeBuffer cb(blob->content_begin(), (address)C->output()->scratch_locs_memory() - blob->content_begin());
186     MacroAssembler masm(&cb);
187     ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
188     size += cb.insts_size();
189   }
190 
191   return size;
192 }
193 
set_barrier_data(C2Access & access)194 static void set_barrier_data(C2Access& access) {
195   if (ZBarrierSet::barrier_needed(access.decorators(), access.type())) {
196     if (access.decorators() & ON_WEAK_OOP_REF) {
197       access.set_barrier_data(ZLoadBarrierWeak);
198     } else {
199       access.set_barrier_data(ZLoadBarrierStrong);
200     }
201   }
202 }
203 
load_at_resolved(C2Access & access,const Type * val_type) const204 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
205   set_barrier_data(access);
206   return BarrierSetC2::load_at_resolved(access, val_type);
207 }
208 
atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess & access,Node * expected_val,Node * new_val,const Type * val_type) const209 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
210                                                     Node* new_val, const Type* val_type) const {
211   set_barrier_data(access);
212   return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
213 }
214 
atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess & access,Node * expected_val,Node * new_val,const Type * value_type) const215 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
216                                                      Node* new_val, const Type* value_type) const {
217   set_barrier_data(access);
218   return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
219 }
220 
atomic_xchg_at_resolved(C2AtomicParseAccess & access,Node * new_val,const Type * val_type) const221 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
222   set_barrier_data(access);
223   return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
224 }
225 
array_copy_requires_gc_barriers(bool tightly_coupled_alloc,BasicType type,bool is_clone,ArrayCopyPhase phase) const226 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type,
227                                                     bool is_clone, ArrayCopyPhase phase) const {
228   return type == T_OBJECT || type == T_ARRAY;
229 }
230 
231 // This TypeFunc assumes a 64bit system
clone_type()232 static const TypeFunc* clone_type() {
233   // Create input type (domain)
234   const Type** domain_fields = TypeTuple::fields(4);
235   domain_fields[TypeFunc::Parms + 0] = TypeInstPtr::NOTNULL;  // src
236   domain_fields[TypeFunc::Parms + 1] = TypeInstPtr::NOTNULL;  // dst
237   domain_fields[TypeFunc::Parms + 2] = TypeLong::LONG;        // size lower
238   domain_fields[TypeFunc::Parms + 3] = Type::HALF;            // size upper
239   const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + 4, domain_fields);
240 
241   // Create result type (range)
242   const Type** range_fields = TypeTuple::fields(0);
243   const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
244 
245   return TypeFunc::make(domain, range);
246 }
247 
clone_at_expansion(PhaseMacroExpand * phase,ArrayCopyNode * ac) const248 void ZBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
249   Node* const src = ac->in(ArrayCopyNode::Src);
250   if (ac->is_clone_array()) {
251     // Clone primitive array
252     BarrierSetC2::clone_at_expansion(phase, ac);
253     return;
254   }
255 
256   // Clone instance
257   Node* const ctrl       = ac->in(TypeFunc::Control);
258   Node* const mem        = ac->in(TypeFunc::Memory);
259   Node* const dst        = ac->in(ArrayCopyNode::Dest);
260   Node* const size       = ac->in(ArrayCopyNode::Length);
261 
262   assert(ac->is_clone_inst(), "Sanity check");
263   assert(size->bottom_type()->is_long(), "Should be long");
264 
265   // The native clone we are calling here expects the instance size in words
266   // Add header/offset size to payload size to get instance size.
267   Node* const base_offset = phase->longcon(arraycopy_payload_base_offset(false) >> LogBytesPerLong);
268   Node* const full_size = phase->transform_later(new AddLNode(size, base_offset));
269 
270   Node* const call = phase->make_leaf_call(ctrl,
271                                            mem,
272                                            clone_type(),
273                                            ZBarrierSetRuntime::clone_addr(),
274                                            "ZBarrierSetRuntime::clone",
275                                            TypeRawPtr::BOTTOM,
276                                            src,
277                                            dst,
278                                            full_size,
279                                            phase->top());
280   phase->transform_later(call);
281   phase->igvn().replace_node(ac, call);
282 }
283 
284 // == Dominating barrier elision ==
285 
block_has_safepoint(const Block * block,uint from,uint to)286 static bool block_has_safepoint(const Block* block, uint from, uint to) {
287   for (uint i = from; i < to; i++) {
288     if (block->get_node(i)->is_MachSafePoint()) {
289       // Safepoint found
290       return true;
291     }
292   }
293 
294   // Safepoint not found
295   return false;
296 }
297 
block_has_safepoint(const Block * block)298 static bool block_has_safepoint(const Block* block) {
299   return block_has_safepoint(block, 0, block->number_of_nodes());
300 }
301 
block_index(const Block * block,const Node * node)302 static uint block_index(const Block* block, const Node* node) {
303   for (uint j = 0; j < block->number_of_nodes(); ++j) {
304     if (block->get_node(j) == node) {
305       return j;
306     }
307   }
308   ShouldNotReachHere();
309   return 0;
310 }
311 
analyze_dominating_barriers() const312 void ZBarrierSetC2::analyze_dominating_barriers() const {
313   ResourceMark rm;
314   Compile* const C = Compile::current();
315   PhaseCFG* const cfg = C->cfg();
316   Block_List worklist;
317   Node_List mem_ops;
318   Node_List barrier_loads;
319 
320   // Step 1 - Find accesses, and track them in lists
321   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
322     const Block* const block = cfg->get_block(i);
323     for (uint j = 0; j < block->number_of_nodes(); ++j) {
324       const Node* const node = block->get_node(j);
325       if (!node->is_Mach()) {
326         continue;
327       }
328 
329       MachNode* const mach = node->as_Mach();
330       switch (mach->ideal_Opcode()) {
331       case Op_LoadP:
332         if ((mach->barrier_data() & ZLoadBarrierStrong) != 0) {
333           barrier_loads.push(mach);
334         }
335         if ((mach->barrier_data() & (ZLoadBarrierStrong | ZLoadBarrierNoKeepalive)) ==
336             ZLoadBarrierStrong) {
337           mem_ops.push(mach);
338         }
339         break;
340       case Op_CompareAndExchangeP:
341       case Op_CompareAndSwapP:
342       case Op_GetAndSetP:
343         if ((mach->barrier_data() & ZLoadBarrierStrong) != 0) {
344           barrier_loads.push(mach);
345         }
346       case Op_StoreP:
347         mem_ops.push(mach);
348         break;
349 
350       default:
351         break;
352       }
353     }
354   }
355 
356   // Step 2 - Find dominating accesses for each load
357   for (uint i = 0; i < barrier_loads.size(); i++) {
358     MachNode* const load = barrier_loads.at(i)->as_Mach();
359     const TypePtr* load_adr_type = NULL;
360     intptr_t load_offset = 0;
361     const Node* const load_obj = load->get_base_and_disp(load_offset, load_adr_type);
362     Block* const load_block = cfg->get_block_for_node(load);
363     const uint load_index = block_index(load_block, load);
364 
365     for (uint j = 0; j < mem_ops.size(); j++) {
366       MachNode* mem = mem_ops.at(j)->as_Mach();
367       const TypePtr* mem_adr_type = NULL;
368       intptr_t mem_offset = 0;
369       const Node* mem_obj = mem->get_base_and_disp(mem_offset, mem_adr_type);
370       Block* mem_block = cfg->get_block_for_node(mem);
371       uint mem_index = block_index(mem_block, mem);
372 
373       if (load_obj == NodeSentinel || mem_obj == NodeSentinel ||
374           load_obj == NULL || mem_obj == NULL ||
375           load_offset < 0 || mem_offset < 0) {
376         continue;
377       }
378 
379       if (mem_obj != load_obj || mem_offset != load_offset) {
380         // Not the same addresses, not a candidate
381         continue;
382       }
383 
384       if (load_block == mem_block) {
385         // Earlier accesses in the same block
386         if (mem_index < load_index && !block_has_safepoint(mem_block, mem_index + 1, load_index)) {
387           load->set_barrier_data(ZLoadBarrierElided);
388         }
389       } else if (mem_block->dominates(load_block)) {
390         // Dominating block? Look around for safepoints
391         ResourceMark rm;
392         Block_List stack;
393         VectorSet visited;
394         stack.push(load_block);
395         bool safepoint_found = block_has_safepoint(load_block);
396         while (!safepoint_found && stack.size() > 0) {
397           Block* block = stack.pop();
398           if (visited.test_set(block->_pre_order)) {
399             continue;
400           }
401           if (block_has_safepoint(block)) {
402             safepoint_found = true;
403             break;
404           }
405           if (block == mem_block) {
406             continue;
407           }
408 
409           // Push predecessor blocks
410           for (uint p = 1; p < block->num_preds(); ++p) {
411             Block* pred = cfg->get_block_for_node(block->pred(p));
412             stack.push(pred);
413           }
414         }
415 
416         if (!safepoint_found) {
417           load->set_barrier_data(ZLoadBarrierElided);
418         }
419       }
420     }
421   }
422 }
423 
424 // == Reduced spilling optimization ==
425 
compute_liveness_at_stubs() const426 void ZBarrierSetC2::compute_liveness_at_stubs() const {
427   ResourceMark rm;
428   Compile* const C = Compile::current();
429   Arena* const A = Thread::current()->resource_area();
430   PhaseCFG* const cfg = C->cfg();
431   PhaseRegAlloc* const regalloc = C->regalloc();
432   RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
433   ZBarrierSetAssembler* const bs = ZBarrierSet::assembler();
434   Block_List worklist;
435 
436   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
437     new ((void*)(live + i)) RegMask();
438     worklist.push(cfg->get_block(i));
439   }
440 
441   while (worklist.size() > 0) {
442     const Block* const block = worklist.pop();
443     RegMask& old_live = live[block->_pre_order];
444     RegMask new_live;
445 
446     // Initialize to union of successors
447     for (uint i = 0; i < block->_num_succs; i++) {
448       const uint succ_id = block->_succs[i]->_pre_order;
449       new_live.OR(live[succ_id]);
450     }
451 
452     // Walk block backwards, computing liveness
453     for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
454       const Node* const node = block->get_node(i);
455 
456       // Remove def bits
457       const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
458       const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
459       if (first != OptoReg::Bad) {
460         new_live.Remove(first);
461       }
462       if (second != OptoReg::Bad) {
463         new_live.Remove(second);
464       }
465 
466       // Add use bits
467       for (uint j = 1; j < node->req(); ++j) {
468         const Node* const use = node->in(j);
469         const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
470         const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
471         if (first != OptoReg::Bad) {
472           new_live.Insert(first);
473         }
474         if (second != OptoReg::Bad) {
475           new_live.Insert(second);
476         }
477       }
478 
479       // If this node tracks liveness, update it
480       RegMask* const regs = barrier_set_state()->live(node);
481       if (regs != NULL) {
482         regs->OR(new_live);
483       }
484     }
485 
486     // Now at block top, see if we have any changes
487     new_live.SUBTRACT(old_live);
488     if (new_live.is_NotEmpty()) {
489       // Liveness has refined, update and propagate to prior blocks
490       old_live.OR(new_live);
491       for (uint i = 1; i < block->num_preds(); ++i) {
492         Block* const pred = cfg->get_block_for_node(block->pred(i));
493         worklist.push(pred);
494       }
495     }
496   }
497 }
498