1 /*
2  * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 #include "precompiled.hpp"
25 #include "classfile/javaClasses.hpp"
26 #include "gc/z/c2/zBarrierSetC2.hpp"
27 #include "gc/z/zBarrierSet.hpp"
28 #include "gc/z/zBarrierSetAssembler.hpp"
29 #include "gc/z/zBarrierSetRuntime.hpp"
30 #include "opto/arraycopynode.hpp"
31 #include "opto/addnode.hpp"
32 #include "opto/block.hpp"
33 #include "opto/compile.hpp"
34 #include "opto/graphKit.hpp"
35 #include "opto/machnode.hpp"
36 #include "opto/macro.hpp"
37 #include "opto/memnode.hpp"
38 #include "opto/node.hpp"
39 #include "opto/regalloc.hpp"
40 #include "opto/rootnode.hpp"
41 #include "opto/type.hpp"
42 #include "utilities/growableArray.hpp"
43 #include "utilities/macros.hpp"
44 
45 class ZBarrierSetC2State : public ResourceObj {
46 private:
47   GrowableArray<ZLoadBarrierStubC2*>* _stubs;
48   Node_Array                          _live;
49 
50 public:
ZBarrierSetC2State(Arena * arena)51   ZBarrierSetC2State(Arena* arena) :
52     _stubs(new (arena) GrowableArray<ZLoadBarrierStubC2*>(arena, 8,  0, NULL)),
53     _live(arena) {}
54 
stubs()55   GrowableArray<ZLoadBarrierStubC2*>* stubs() {
56     return _stubs;
57   }
58 
live(const Node * node)59   RegMask* live(const Node* node) {
60     if (!node->is_Mach()) {
61       // Don't need liveness for non-MachNodes
62       return NULL;
63     }
64 
65     const MachNode* const mach = node->as_Mach();
66     if (mach->barrier_data() != ZLoadBarrierStrong &&
67         mach->barrier_data() != ZLoadBarrierWeak) {
68       // Don't need liveness data for nodes without barriers
69       return NULL;
70     }
71 
72     RegMask* live = (RegMask*)_live[node->_idx];
73     if (live == NULL) {
74       live = new (Compile::current()->comp_arena()->Amalloc_D(sizeof(RegMask))) RegMask();
75       _live.map(node->_idx, (Node*)live);
76     }
77 
78     return live;
79   }
80 };
81 
barrier_set_state()82 static ZBarrierSetC2State* barrier_set_state() {
83   return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
84 }
85 
create(const MachNode * node,Address ref_addr,Register ref,Register tmp,bool weak)86 ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
87   ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref, tmp, weak);
88   if (!Compile::current()->in_scratch_emit_size()) {
89     barrier_set_state()->stubs()->append(stub);
90   }
91 
92   return stub;
93 }
94 
ZLoadBarrierStubC2(const MachNode * node,Address ref_addr,Register ref,Register tmp,bool weak)95 ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) :
96     _node(node),
97     _ref_addr(ref_addr),
98     _ref(ref),
99     _tmp(tmp),
100     _weak(weak),
101     _entry(),
102     _continuation() {
103   assert_different_registers(ref, ref_addr.base());
104   assert_different_registers(ref, ref_addr.index());
105 }
106 
ref_addr() const107 Address ZLoadBarrierStubC2::ref_addr() const {
108   return _ref_addr;
109 }
110 
ref() const111 Register ZLoadBarrierStubC2::ref() const {
112   return _ref;
113 }
114 
tmp() const115 Register ZLoadBarrierStubC2::tmp() const {
116   return _tmp;
117 }
118 
slow_path() const119 address ZLoadBarrierStubC2::slow_path() const {
120   const DecoratorSet decorators = _weak ? ON_WEAK_OOP_REF : ON_STRONG_OOP_REF;
121   return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
122 }
123 
live() const124 RegMask& ZLoadBarrierStubC2::live() const {
125   return *barrier_set_state()->live(_node);
126 }
127 
entry()128 Label* ZLoadBarrierStubC2::entry() {
129   // The _entry will never be bound when in_scratch_emit_size() is true.
130   // However, we still need to return a label that is not bound now, but
131   // will eventually be bound. Any lable will do, as it will only act as
132   // a placeholder, so we return the _continuation label.
133   return Compile::current()->in_scratch_emit_size() ? &_continuation : &_entry;
134 }
135 
continuation()136 Label* ZLoadBarrierStubC2::continuation() {
137   return &_continuation;
138 }
139 
create_barrier_state(Arena * comp_arena) const140 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
141   return new (comp_arena) ZBarrierSetC2State(comp_arena);
142 }
143 
late_barrier_analysis() const144 void ZBarrierSetC2::late_barrier_analysis() const {
145   analyze_dominating_barriers();
146   compute_liveness_at_stubs();
147 }
148 
emit_stubs(CodeBuffer & cb) const149 void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
150   MacroAssembler masm(&cb);
151   GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
152 
153   for (int i = 0; i < stubs->length(); i++) {
154     // Make sure there is enough space in the code buffer
155     if (cb.insts()->maybe_expand_to_ensure_remaining(Compile::MAX_inst_size) && cb.blob() == NULL) {
156       ciEnv::current()->record_failure("CodeCache is full");
157       return;
158     }
159 
160     ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
161   }
162 
163   masm.flush();
164 }
165 
estimate_stub_size() const166 int ZBarrierSetC2::estimate_stub_size() const {
167   Compile* const C = Compile::current();
168   BufferBlob* const blob = C->scratch_buffer_blob();
169   GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
170   int size = 0;
171 
172   for (int i = 0; i < stubs->length(); i++) {
173     CodeBuffer cb(blob->content_begin(), (address)C->scratch_locs_memory() - blob->content_begin());
174     MacroAssembler masm(&cb);
175     ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
176     size += cb.insts_size();
177   }
178 
179   return size;
180 }
181 
set_barrier_data(C2Access & access)182 static void set_barrier_data(C2Access& access) {
183   if (ZBarrierSet::barrier_needed(access.decorators(), access.type())) {
184     if (access.decorators() & ON_WEAK_OOP_REF) {
185       access.set_barrier_data(ZLoadBarrierWeak);
186     } else {
187       access.set_barrier_data(ZLoadBarrierStrong);
188     }
189   }
190 }
191 
load_at_resolved(C2Access & access,const Type * val_type) const192 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
193   set_barrier_data(access);
194   return BarrierSetC2::load_at_resolved(access, val_type);
195 }
196 
atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess & access,Node * expected_val,Node * new_val,const Type * val_type) const197 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
198                                                     Node* new_val, const Type* val_type) const {
199   set_barrier_data(access);
200   return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
201 }
202 
atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess & access,Node * expected_val,Node * new_val,const Type * value_type) const203 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
204                                                      Node* new_val, const Type* value_type) const {
205   set_barrier_data(access);
206   return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
207 }
208 
atomic_xchg_at_resolved(C2AtomicParseAccess & access,Node * new_val,const Type * val_type) const209 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
210   set_barrier_data(access);
211   return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
212 }
213 
array_copy_requires_gc_barriers(bool tightly_coupled_alloc,BasicType type,bool is_clone,ArrayCopyPhase phase) const214 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type,
215                                                     bool is_clone, ArrayCopyPhase phase) const {
216   return type == T_OBJECT || type == T_ARRAY;
217 }
218 
219 // This TypeFunc assumes a 64bit system
clone_type()220 static const TypeFunc* clone_type() {
221   // Create input type (domain)
222   const Type** domain_fields = TypeTuple::fields(4);
223   domain_fields[TypeFunc::Parms + 0] = TypeInstPtr::NOTNULL;  // src
224   domain_fields[TypeFunc::Parms + 1] = TypeInstPtr::NOTNULL;  // dst
225   domain_fields[TypeFunc::Parms + 2] = TypeLong::LONG;        // size lower
226   domain_fields[TypeFunc::Parms + 3] = Type::HALF;            // size upper
227   const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + 4, domain_fields);
228 
229   // Create result type (range)
230   const Type** range_fields = TypeTuple::fields(0);
231   const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
232 
233   return TypeFunc::make(domain, range);
234 }
235 
236 // Node n is pointing to the start of oop payload - return base pointer
get_base_for_arracycopy_clone(PhaseMacroExpand * phase,Node * n)237 static Node* get_base_for_arracycopy_clone(PhaseMacroExpand* phase, Node* n) {
238   // This would normally be handled by optimizations, but the type system
239   // checks get confused when it thinks it already has a base pointer.
240   const int base_offset = BarrierSetC2::arraycopy_payload_base_offset(false);
241 
242   if (n->is_AddP() &&
243       n->in(AddPNode::Offset)->is_Con() &&
244       n->in(AddPNode::Offset)->get_long() == base_offset) {
245     assert(n->in(AddPNode::Base) == n->in(AddPNode::Address), "Sanity check");
246     return n->in(AddPNode::Base);
247   } else {
248     return phase->basic_plus_adr(n, phase->longcon(-base_offset));
249   }
250 }
251 
clone_at_expansion(PhaseMacroExpand * phase,ArrayCopyNode * ac) const252 void ZBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
253   Node* const src = ac->in(ArrayCopyNode::Src);
254   if (ac->is_clone_array()) {
255     // Clone primitive array
256     BarrierSetC2::clone_at_expansion(phase, ac);
257     return;
258   }
259 
260   // Clone instance
261   assert(ac->is_clone_inst(), "Sanity check");
262 
263   Node* const ctrl       = ac->in(TypeFunc::Control);
264   Node* const mem        = ac->in(TypeFunc::Memory);
265   Node* const dst        = ac->in(ArrayCopyNode::Dest);
266   Node* const src_offset = ac->in(ArrayCopyNode::SrcPos);
267   Node* const dst_offset = ac->in(ArrayCopyNode::DestPos);
268   Node* const size       = ac->in(ArrayCopyNode::Length);
269 
270   assert(src_offset == NULL, "Should be null");
271   assert(dst_offset == NULL, "Should be null");
272   assert(size->bottom_type()->is_long(), "Should be long");
273 
274   // The src and dst point to the object payload rather than the object base
275   Node* const src_base = get_base_for_arracycopy_clone(phase, src);
276   Node* const dst_base = get_base_for_arracycopy_clone(phase, dst);
277 
278   // The size must also be increased to match the instance size
279   Node* const base_offset = phase->longcon(arraycopy_payload_base_offset(false) >> LogBytesPerLong);
280   Node* const full_size = phase->transform_later(new AddLNode(size, base_offset));
281 
282   Node* const call = phase->make_leaf_call(ctrl,
283                                            mem,
284                                            clone_type(),
285                                            ZBarrierSetRuntime::clone_addr(),
286                                            "ZBarrierSetRuntime::clone",
287                                            TypeRawPtr::BOTTOM,
288                                            src_base,
289                                            dst_base,
290                                            full_size,
291                                            phase->top());
292   phase->transform_later(call);
293   phase->igvn().replace_node(ac, call);
294 }
295 
296 // == Dominating barrier elision ==
297 
block_has_safepoint(const Block * block,uint from,uint to)298 static bool block_has_safepoint(const Block* block, uint from, uint to) {
299   for (uint i = from; i < to; i++) {
300     if (block->get_node(i)->is_MachSafePoint()) {
301       // Safepoint found
302       return true;
303     }
304   }
305 
306   // Safepoint not found
307   return false;
308 }
309 
block_has_safepoint(const Block * block)310 static bool block_has_safepoint(const Block* block) {
311   return block_has_safepoint(block, 0, block->number_of_nodes());
312 }
313 
block_index(const Block * block,const Node * node)314 static uint block_index(const Block* block, const Node* node) {
315   for (uint j = 0; j < block->number_of_nodes(); ++j) {
316     if (block->get_node(j) == node) {
317       return j;
318     }
319   }
320   ShouldNotReachHere();
321   return 0;
322 }
323 
analyze_dominating_barriers() const324 void ZBarrierSetC2::analyze_dominating_barriers() const {
325   ResourceMark rm;
326   Compile* const C = Compile::current();
327   PhaseCFG* const cfg = C->cfg();
328   Block_List worklist;
329   Node_List mem_ops;
330   Node_List barrier_loads;
331 
332   // Step 1 - Find accesses, and track them in lists
333   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
334     const Block* const block = cfg->get_block(i);
335     for (uint j = 0; j < block->number_of_nodes(); ++j) {
336       const Node* const node = block->get_node(j);
337       if (!node->is_Mach()) {
338         continue;
339       }
340 
341       MachNode* const mach = node->as_Mach();
342       switch (mach->ideal_Opcode()) {
343       case Op_LoadP:
344       case Op_CompareAndExchangeP:
345       case Op_CompareAndSwapP:
346       case Op_GetAndSetP:
347         if (mach->barrier_data() == ZLoadBarrierStrong) {
348           barrier_loads.push(mach);
349         }
350       case Op_StoreP:
351         mem_ops.push(mach);
352         break;
353 
354       default:
355         break;
356       }
357     }
358   }
359 
360   // Step 2 - Find dominating accesses for each load
361   for (uint i = 0; i < barrier_loads.size(); i++) {
362     MachNode* const load = barrier_loads.at(i)->as_Mach();
363     const TypePtr* load_adr_type = NULL;
364     intptr_t load_offset = 0;
365     const Node* const load_obj = load->get_base_and_disp(load_offset, load_adr_type);
366     Block* const load_block = cfg->get_block_for_node(load);
367     const uint load_index = block_index(load_block, load);
368 
369     for (uint j = 0; j < mem_ops.size(); j++) {
370       MachNode* mem = mem_ops.at(j)->as_Mach();
371       const TypePtr* mem_adr_type = NULL;
372       intptr_t mem_offset = 0;
373       const Node* mem_obj = mem->get_base_and_disp(mem_offset, mem_adr_type);
374       Block* mem_block = cfg->get_block_for_node(mem);
375       uint mem_index = block_index(mem_block, mem);
376 
377       if (load_obj == NodeSentinel || mem_obj == NodeSentinel ||
378           load_obj == NULL || mem_obj == NULL ||
379           load_offset < 0 || mem_offset < 0) {
380         continue;
381       }
382 
383       if (mem_obj != load_obj || mem_offset != load_offset) {
384         // Not the same addresses, not a candidate
385         continue;
386       }
387 
388       if (load_block == mem_block) {
389         // Earlier accesses in the same block
390         if (mem_index < load_index && !block_has_safepoint(mem_block, mem_index + 1, load_index)) {
391           load->set_barrier_data(ZLoadBarrierElided);
392         }
393       } else if (mem_block->dominates(load_block)) {
394         // Dominating block? Look around for safepoints
395         ResourceMark rm;
396         Block_List stack;
397         VectorSet visited(Thread::current()->resource_area());
398         stack.push(load_block);
399         bool safepoint_found = block_has_safepoint(load_block);
400         while (!safepoint_found && stack.size() > 0) {
401           Block* block = stack.pop();
402           if (visited.test_set(block->_pre_order)) {
403             continue;
404           }
405           if (block_has_safepoint(block)) {
406             safepoint_found = true;
407             break;
408           }
409           if (block == mem_block) {
410             continue;
411           }
412 
413           // Push predecessor blocks
414           for (uint p = 1; p < block->num_preds(); ++p) {
415             Block* pred = cfg->get_block_for_node(block->pred(p));
416             stack.push(pred);
417           }
418         }
419 
420         if (!safepoint_found) {
421           load->set_barrier_data(ZLoadBarrierElided);
422         }
423       }
424     }
425   }
426 }
427 
428 // == Reduced spilling optimization ==
429 
compute_liveness_at_stubs() const430 void ZBarrierSetC2::compute_liveness_at_stubs() const {
431   ResourceMark rm;
432   Compile* const C = Compile::current();
433   Arena* const A = Thread::current()->resource_area();
434   PhaseCFG* const cfg = C->cfg();
435   PhaseRegAlloc* const regalloc = C->regalloc();
436   RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
437   ZBarrierSetAssembler* const bs = ZBarrierSet::assembler();
438   Block_List worklist;
439 
440   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
441     new ((void*)(live + i)) RegMask();
442     worklist.push(cfg->get_block(i));
443   }
444 
445   while (worklist.size() > 0) {
446     const Block* const block = worklist.pop();
447     RegMask& old_live = live[block->_pre_order];
448     RegMask new_live;
449 
450     // Initialize to union of successors
451     for (uint i = 0; i < block->_num_succs; i++) {
452       const uint succ_id = block->_succs[i]->_pre_order;
453       new_live.OR(live[succ_id]);
454     }
455 
456     // Walk block backwards, computing liveness
457     for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
458       const Node* const node = block->get_node(i);
459 
460       // Remove def bits
461       const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
462       const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
463       if (first != OptoReg::Bad) {
464         new_live.Remove(first);
465       }
466       if (second != OptoReg::Bad) {
467         new_live.Remove(second);
468       }
469 
470       // Add use bits
471       for (uint j = 1; j < node->req(); ++j) {
472         const Node* const use = node->in(j);
473         const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
474         const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
475         if (first != OptoReg::Bad) {
476           new_live.Insert(first);
477         }
478         if (second != OptoReg::Bad) {
479           new_live.Insert(second);
480         }
481       }
482 
483       // If this node tracks liveness, update it
484       RegMask* const regs = barrier_set_state()->live(node);
485       if (regs != NULL) {
486         regs->OR(new_live);
487       }
488     }
489 
490     // Now at block top, see if we have any changes
491     new_live.SUBTRACT(old_live);
492     if (new_live.is_NotEmpty()) {
493       // Liveness has refined, update and propagate to prior blocks
494       old_live.OR(new_live);
495       for (uint i = 1; i < block->num_preds(); ++i) {
496         Block* const pred = cfg->get_block_for_node(block->pred(i));
497         worklist.push(pred);
498       }
499     }
500   }
501 }
502