1 /*
2 * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "classfile/vmSymbols.hpp"
27 #include "jfr/jfrEvents.hpp"
28 #include "jfr/support/jfrThreadId.hpp"
29 #include "memory/allocation.inline.hpp"
30 #include "memory/resourceArea.hpp"
31 #include "oops/markOop.hpp"
32 #include "oops/oop.inline.hpp"
33 #include "runtime/atomic.hpp"
34 #include "runtime/handles.inline.hpp"
35 #include "runtime/interfaceSupport.inline.hpp"
36 #include "runtime/mutexLocker.hpp"
37 #include "runtime/objectMonitor.hpp"
38 #include "runtime/objectMonitor.inline.hpp"
39 #include "runtime/orderAccess.hpp"
40 #include "runtime/osThread.hpp"
41 #include "runtime/safepointMechanism.inline.hpp"
42 #include "runtime/sharedRuntime.hpp"
43 #include "runtime/stubRoutines.hpp"
44 #include "runtime/thread.inline.hpp"
45 #include "services/threadService.hpp"
46 #include "utilities/dtrace.hpp"
47 #include "utilities/macros.hpp"
48 #include "utilities/preserveException.hpp"
49 #if INCLUDE_JFR
50 #include "jfr/support/jfrFlush.hpp"
51 #endif
52
53 #ifdef DTRACE_ENABLED
54
55 // Only bother with this argument setup if dtrace is available
56 // TODO-FIXME: probes should not fire when caller is _blocked. assert() accordingly.
57
58
59 #define DTRACE_MONITOR_PROBE_COMMON(obj, thread) \
60 char* bytes = NULL; \
61 int len = 0; \
62 jlong jtid = SharedRuntime::get_java_tid(thread); \
63 Symbol* klassname = ((oop)obj)->klass()->name(); \
64 if (klassname != NULL) { \
65 bytes = (char*)klassname->bytes(); \
66 len = klassname->utf8_length(); \
67 }
68
69 #define DTRACE_MONITOR_WAIT_PROBE(monitor, obj, thread, millis) \
70 { \
71 if (DTraceMonitorProbes) { \
72 DTRACE_MONITOR_PROBE_COMMON(obj, thread); \
73 HOTSPOT_MONITOR_WAIT(jtid, \
74 (monitor), bytes, len, (millis)); \
75 } \
76 }
77
78 #define HOTSPOT_MONITOR_contended__enter HOTSPOT_MONITOR_CONTENDED_ENTER
79 #define HOTSPOT_MONITOR_contended__entered HOTSPOT_MONITOR_CONTENDED_ENTERED
80 #define HOTSPOT_MONITOR_contended__exit HOTSPOT_MONITOR_CONTENDED_EXIT
81 #define HOTSPOT_MONITOR_notify HOTSPOT_MONITOR_NOTIFY
82 #define HOTSPOT_MONITOR_notifyAll HOTSPOT_MONITOR_NOTIFYALL
83
84 #define DTRACE_MONITOR_PROBE(probe, monitor, obj, thread) \
85 { \
86 if (DTraceMonitorProbes) { \
87 DTRACE_MONITOR_PROBE_COMMON(obj, thread); \
88 HOTSPOT_MONITOR_##probe(jtid, \
89 (uintptr_t)(monitor), bytes, len); \
90 } \
91 }
92
93 #else // ndef DTRACE_ENABLED
94
95 #define DTRACE_MONITOR_WAIT_PROBE(obj, thread, millis, mon) {;}
96 #define DTRACE_MONITOR_PROBE(probe, obj, thread, mon) {;}
97
98 #endif // ndef DTRACE_ENABLED
99
100 // Tunables ...
101 // The knob* variables are effectively final. Once set they should
102 // never be modified hence. Consider using __read_mostly with GCC.
103
104 int ObjectMonitor::Knob_SpinLimit = 5000; // derived by an external tool -
105
106 static int Knob_Bonus = 100; // spin success bonus
107 static int Knob_BonusB = 100; // spin success bonus
108 static int Knob_Penalty = 200; // spin failure penalty
109 static int Knob_Poverty = 1000;
110 static int Knob_FixedSpin = 0;
111 static int Knob_PreSpin = 10; // 20-100 likely better
112
DEBUG_ONLY(static volatile bool InitDone=false;)113 DEBUG_ONLY(static volatile bool InitDone = false;)
114
115 // -----------------------------------------------------------------------------
116 // Theory of operations -- Monitors lists, thread residency, etc:
117 //
118 // * A thread acquires ownership of a monitor by successfully
119 // CAS()ing the _owner field from null to non-null.
120 //
121 // * Invariant: A thread appears on at most one monitor list --
122 // cxq, EntryList or WaitSet -- at any one time.
123 //
124 // * Contending threads "push" themselves onto the cxq with CAS
125 // and then spin/park.
126 //
127 // * After a contending thread eventually acquires the lock it must
128 // dequeue itself from either the EntryList or the cxq.
129 //
130 // * The exiting thread identifies and unparks an "heir presumptive"
131 // tentative successor thread on the EntryList. Critically, the
132 // exiting thread doesn't unlink the successor thread from the EntryList.
133 // After having been unparked, the wakee will recontend for ownership of
134 // the monitor. The successor (wakee) will either acquire the lock or
135 // re-park itself.
136 //
137 // Succession is provided for by a policy of competitive handoff.
138 // The exiting thread does _not_ grant or pass ownership to the
139 // successor thread. (This is also referred to as "handoff" succession").
140 // Instead the exiting thread releases ownership and possibly wakes
141 // a successor, so the successor can (re)compete for ownership of the lock.
142 // If the EntryList is empty but the cxq is populated the exiting
143 // thread will drain the cxq into the EntryList. It does so by
144 // by detaching the cxq (installing null with CAS) and folding
145 // the threads from the cxq into the EntryList. The EntryList is
146 // doubly linked, while the cxq is singly linked because of the
147 // CAS-based "push" used to enqueue recently arrived threads (RATs).
148 //
149 // * Concurrency invariants:
150 //
151 // -- only the monitor owner may access or mutate the EntryList.
152 // The mutex property of the monitor itself protects the EntryList
153 // from concurrent interference.
154 // -- Only the monitor owner may detach the cxq.
155 //
156 // * The monitor entry list operations avoid locks, but strictly speaking
157 // they're not lock-free. Enter is lock-free, exit is not.
158 // For a description of 'Methods and apparatus providing non-blocking access
159 // to a resource,' see U.S. Pat. No. 7844973.
160 //
161 // * The cxq can have multiple concurrent "pushers" but only one concurrent
162 // detaching thread. This mechanism is immune from the ABA corruption.
163 // More precisely, the CAS-based "push" onto cxq is ABA-oblivious.
164 //
165 // * Taken together, the cxq and the EntryList constitute or form a
166 // single logical queue of threads stalled trying to acquire the lock.
167 // We use two distinct lists to improve the odds of a constant-time
168 // dequeue operation after acquisition (in the ::enter() epilogue) and
169 // to reduce heat on the list ends. (c.f. Michael Scott's "2Q" algorithm).
170 // A key desideratum is to minimize queue & monitor metadata manipulation
171 // that occurs while holding the monitor lock -- that is, we want to
172 // minimize monitor lock holds times. Note that even a small amount of
173 // fixed spinning will greatly reduce the # of enqueue-dequeue operations
174 // on EntryList|cxq. That is, spinning relieves contention on the "inner"
175 // locks and monitor metadata.
176 //
177 // Cxq points to the set of Recently Arrived Threads attempting entry.
178 // Because we push threads onto _cxq with CAS, the RATs must take the form of
179 // a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when
180 // the unlocking thread notices that EntryList is null but _cxq is != null.
181 //
182 // The EntryList is ordered by the prevailing queue discipline and
183 // can be organized in any convenient fashion, such as a doubly-linked list or
184 // a circular doubly-linked list. Critically, we want insert and delete operations
185 // to operate in constant-time. If we need a priority queue then something akin
186 // to Solaris' sleepq would work nicely. Viz.,
187 // http://agg.eng/ws/on10_nightly/source/usr/src/uts/common/os/sleepq.c.
188 // Queue discipline is enforced at ::exit() time, when the unlocking thread
189 // drains the cxq into the EntryList, and orders or reorders the threads on the
190 // EntryList accordingly.
191 //
192 // Barring "lock barging", this mechanism provides fair cyclic ordering,
193 // somewhat similar to an elevator-scan.
194 //
195 // * The monitor synchronization subsystem avoids the use of native
196 // synchronization primitives except for the narrow platform-specific
197 // park-unpark abstraction. See the comments in os_solaris.cpp regarding
198 // the semantics of park-unpark. Put another way, this monitor implementation
199 // depends only on atomic operations and park-unpark. The monitor subsystem
200 // manages all RUNNING->BLOCKED and BLOCKED->READY transitions while the
201 // underlying OS manages the READY<->RUN transitions.
202 //
203 // * Waiting threads reside on the WaitSet list -- wait() puts
204 // the caller onto the WaitSet.
205 //
206 // * notify() or notifyAll() simply transfers threads from the WaitSet to
207 // either the EntryList or cxq. Subsequent exit() operations will
208 // unpark the notifyee. Unparking a notifee in notify() is inefficient -
209 // it's likely the notifyee would simply impale itself on the lock held
210 // by the notifier.
211 //
212 // * An interesting alternative is to encode cxq as (List,LockByte) where
213 // the LockByte is 0 iff the monitor is owned. _owner is simply an auxiliary
214 // variable, like _recursions, in the scheme. The threads or Events that form
215 // the list would have to be aligned in 256-byte addresses. A thread would
216 // try to acquire the lock or enqueue itself with CAS, but exiting threads
217 // could use a 1-0 protocol and simply STB to set the LockByte to 0.
218 // Note that is is *not* word-tearing, but it does presume that full-word
219 // CAS operations are coherent with intermix with STB operations. That's true
220 // on most common processors.
221 //
222 // * See also http://blogs.sun.com/dave
223
224
225 void* ObjectMonitor::operator new (size_t size) throw() {
226 return AllocateHeap(size, mtInternal);
227 }
operator new[](size_t size)228 void* ObjectMonitor::operator new[] (size_t size) throw() {
229 return operator new (size);
230 }
operator delete(void * p)231 void ObjectMonitor::operator delete(void* p) {
232 FreeHeap(p);
233 }
operator delete[](void * p)234 void ObjectMonitor::operator delete[] (void *p) {
235 operator delete(p);
236 }
237
238 // -----------------------------------------------------------------------------
239 // Enter support
240
enter(TRAPS)241 void ObjectMonitor::enter(TRAPS) {
242 // The following code is ordered to check the most common cases first
243 // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors.
244 Thread * const Self = THREAD;
245
246 void * cur = Atomic::cmpxchg(Self, &_owner, (void*)NULL);
247 if (cur == NULL) {
248 // Either ASSERT _recursions == 0 or explicitly set _recursions = 0.
249 assert(_recursions == 0, "invariant");
250 assert(_owner == Self, "invariant");
251 return;
252 }
253
254 if (cur == Self) {
255 // TODO-FIXME: check for integer overflow! BUGID 6557169.
256 _recursions++;
257 return;
258 }
259
260 if (Self->is_lock_owned ((address)cur)) {
261 assert(_recursions == 0, "internal state error");
262 _recursions = 1;
263 // Commute owner from a thread-specific on-stack BasicLockObject address to
264 // a full-fledged "Thread *".
265 _owner = Self;
266 return;
267 }
268
269 // We've encountered genuine contention.
270 assert(Self->_Stalled == 0, "invariant");
271 Self->_Stalled = intptr_t(this);
272
273 // Try one round of spinning *before* enqueueing Self
274 // and before going through the awkward and expensive state
275 // transitions. The following spin is strictly optional ...
276 // Note that if we acquire the monitor from an initial spin
277 // we forgo posting JVMTI events and firing DTRACE probes.
278 if (TrySpin(Self) > 0) {
279 assert(_owner == Self, "invariant");
280 assert(_recursions == 0, "invariant");
281 assert(((oop)(object()))->mark() == markOopDesc::encode(this), "invariant");
282 Self->_Stalled = 0;
283 return;
284 }
285
286 assert(_owner != Self, "invariant");
287 assert(_succ != Self, "invariant");
288 assert(Self->is_Java_thread(), "invariant");
289 JavaThread * jt = (JavaThread *) Self;
290 assert(!SafepointSynchronize::is_at_safepoint(), "invariant");
291 assert(jt->thread_state() != _thread_blocked, "invariant");
292 assert(this->object() != NULL, "invariant");
293 assert(_count >= 0, "invariant");
294
295 // Prevent deflation at STW-time. See deflate_idle_monitors() and is_busy().
296 // Ensure the object-monitor relationship remains stable while there's contention.
297 Atomic::inc(&_count);
298
299 JFR_ONLY(JfrConditionalFlushWithStacktrace<EventJavaMonitorEnter> flush(jt);)
300 EventJavaMonitorEnter event;
301 if (event.should_commit()) {
302 event.set_monitorClass(((oop)this->object())->klass());
303 event.set_address((uintptr_t)(this->object_addr()));
304 }
305
306 { // Change java thread status to indicate blocked on monitor enter.
307 JavaThreadBlockedOnMonitorEnterState jtbmes(jt, this);
308
309 Self->set_current_pending_monitor(this);
310
311 DTRACE_MONITOR_PROBE(contended__enter, this, object(), jt);
312 if (JvmtiExport::should_post_monitor_contended_enter()) {
313 JvmtiExport::post_monitor_contended_enter(jt, this);
314
315 // The current thread does not yet own the monitor and does not
316 // yet appear on any queues that would get it made the successor.
317 // This means that the JVMTI_EVENT_MONITOR_CONTENDED_ENTER event
318 // handler cannot accidentally consume an unpark() meant for the
319 // ParkEvent associated with this ObjectMonitor.
320 }
321
322 OSThreadContendState osts(Self->osthread());
323 ThreadBlockInVM tbivm(jt);
324
325 // TODO-FIXME: change the following for(;;) loop to straight-line code.
326 for (;;) {
327 jt->set_suspend_equivalent();
328 // cleared by handle_special_suspend_equivalent_condition()
329 // or java_suspend_self()
330
331 EnterI(THREAD);
332
333 if (!ExitSuspendEquivalent(jt)) break;
334
335 // We have acquired the contended monitor, but while we were
336 // waiting another thread suspended us. We don't want to enter
337 // the monitor while suspended because that would surprise the
338 // thread that suspended us.
339 //
340 _recursions = 0;
341 _succ = NULL;
342 exit(false, Self);
343
344 jt->java_suspend_self();
345 }
346 Self->set_current_pending_monitor(NULL);
347
348 // We cleared the pending monitor info since we've just gotten past
349 // the enter-check-for-suspend dance and we now own the monitor free
350 // and clear, i.e., it is no longer pending. The ThreadBlockInVM
351 // destructor can go to a safepoint at the end of this block. If we
352 // do a thread dump during that safepoint, then this thread will show
353 // as having "-locked" the monitor, but the OS and java.lang.Thread
354 // states will still report that the thread is blocked trying to
355 // acquire it.
356 }
357
358 Atomic::dec(&_count);
359 assert(_count >= 0, "invariant");
360 Self->_Stalled = 0;
361
362 // Must either set _recursions = 0 or ASSERT _recursions == 0.
363 assert(_recursions == 0, "invariant");
364 assert(_owner == Self, "invariant");
365 assert(_succ != Self, "invariant");
366 assert(((oop)(object()))->mark() == markOopDesc::encode(this), "invariant");
367
368 // The thread -- now the owner -- is back in vm mode.
369 // Report the glorious news via TI,DTrace and jvmstat.
370 // The probe effect is non-trivial. All the reportage occurs
371 // while we hold the monitor, increasing the length of the critical
372 // section. Amdahl's parallel speedup law comes vividly into play.
373 //
374 // Another option might be to aggregate the events (thread local or
375 // per-monitor aggregation) and defer reporting until a more opportune
376 // time -- such as next time some thread encounters contention but has
377 // yet to acquire the lock. While spinning that thread could
378 // spinning we could increment JVMStat counters, etc.
379
380 DTRACE_MONITOR_PROBE(contended__entered, this, object(), jt);
381 if (JvmtiExport::should_post_monitor_contended_entered()) {
382 JvmtiExport::post_monitor_contended_entered(jt, this);
383
384 // The current thread already owns the monitor and is not going to
385 // call park() for the remainder of the monitor enter protocol. So
386 // it doesn't matter if the JVMTI_EVENT_MONITOR_CONTENDED_ENTERED
387 // event handler consumed an unpark() issued by the thread that
388 // just exited the monitor.
389 }
390 if (event.should_commit()) {
391 event.set_previousOwner((uintptr_t)_previous_owner_tid);
392 event.commit();
393 }
394 OM_PERFDATA_OP(ContendedLockAttempts, inc());
395 }
396
397 // Caveat: TryLock() is not necessarily serializing if it returns failure.
398 // Callers must compensate as needed.
399
TryLock(Thread * Self)400 int ObjectMonitor::TryLock(Thread * Self) {
401 void * own = _owner;
402 if (own != NULL) return 0;
403 if (Atomic::replace_if_null(Self, &_owner)) {
404 // Either guarantee _recursions == 0 or set _recursions = 0.
405 assert(_recursions == 0, "invariant");
406 assert(_owner == Self, "invariant");
407 return 1;
408 }
409 // The lock had been free momentarily, but we lost the race to the lock.
410 // Interference -- the CAS failed.
411 // We can either return -1 or retry.
412 // Retry doesn't make as much sense because the lock was just acquired.
413 return -1;
414 }
415
416 #define MAX_RECHECK_INTERVAL 1000
417
EnterI(TRAPS)418 void ObjectMonitor::EnterI(TRAPS) {
419 Thread * const Self = THREAD;
420 assert(Self->is_Java_thread(), "invariant");
421 assert(((JavaThread *) Self)->thread_state() == _thread_blocked, "invariant");
422
423 // Try the lock - TATAS
424 if (TryLock (Self) > 0) {
425 assert(_succ != Self, "invariant");
426 assert(_owner == Self, "invariant");
427 assert(_Responsible != Self, "invariant");
428 return;
429 }
430
431 assert(InitDone, "Unexpectedly not initialized");
432
433 // We try one round of spinning *before* enqueueing Self.
434 //
435 // If the _owner is ready but OFFPROC we could use a YieldTo()
436 // operation to donate the remainder of this thread's quantum
437 // to the owner. This has subtle but beneficial affinity
438 // effects.
439
440 if (TrySpin(Self) > 0) {
441 assert(_owner == Self, "invariant");
442 assert(_succ != Self, "invariant");
443 assert(_Responsible != Self, "invariant");
444 return;
445 }
446
447 // The Spin failed -- Enqueue and park the thread ...
448 assert(_succ != Self, "invariant");
449 assert(_owner != Self, "invariant");
450 assert(_Responsible != Self, "invariant");
451
452 // Enqueue "Self" on ObjectMonitor's _cxq.
453 //
454 // Node acts as a proxy for Self.
455 // As an aside, if were to ever rewrite the synchronization code mostly
456 // in Java, WaitNodes, ObjectMonitors, and Events would become 1st-class
457 // Java objects. This would avoid awkward lifecycle and liveness issues,
458 // as well as eliminate a subset of ABA issues.
459 // TODO: eliminate ObjectWaiter and enqueue either Threads or Events.
460
461 ObjectWaiter node(Self);
462 Self->_ParkEvent->reset();
463 node._prev = (ObjectWaiter *) 0xBAD;
464 node.TState = ObjectWaiter::TS_CXQ;
465
466 // Push "Self" onto the front of the _cxq.
467 // Once on cxq/EntryList, Self stays on-queue until it acquires the lock.
468 // Note that spinning tends to reduce the rate at which threads
469 // enqueue and dequeue on EntryList|cxq.
470 ObjectWaiter * nxt;
471 for (;;) {
472 node._next = nxt = _cxq;
473 if (Atomic::cmpxchg(&node, &_cxq, nxt) == nxt) break;
474
475 // Interference - the CAS failed because _cxq changed. Just retry.
476 // As an optional optimization we retry the lock.
477 if (TryLock (Self) > 0) {
478 assert(_succ != Self, "invariant");
479 assert(_owner == Self, "invariant");
480 assert(_Responsible != Self, "invariant");
481 return;
482 }
483 }
484
485 // Check for cxq|EntryList edge transition to non-null. This indicates
486 // the onset of contention. While contention persists exiting threads
487 // will use a ST:MEMBAR:LD 1-1 exit protocol. When contention abates exit
488 // operations revert to the faster 1-0 mode. This enter operation may interleave
489 // (race) a concurrent 1-0 exit operation, resulting in stranding, so we
490 // arrange for one of the contending thread to use a timed park() operations
491 // to detect and recover from the race. (Stranding is form of progress failure
492 // where the monitor is unlocked but all the contending threads remain parked).
493 // That is, at least one of the contended threads will periodically poll _owner.
494 // One of the contending threads will become the designated "Responsible" thread.
495 // The Responsible thread uses a timed park instead of a normal indefinite park
496 // operation -- it periodically wakes and checks for and recovers from potential
497 // strandings admitted by 1-0 exit operations. We need at most one Responsible
498 // thread per-monitor at any given moment. Only threads on cxq|EntryList may
499 // be responsible for a monitor.
500 //
501 // Currently, one of the contended threads takes on the added role of "Responsible".
502 // A viable alternative would be to use a dedicated "stranding checker" thread
503 // that periodically iterated over all the threads (or active monitors) and unparked
504 // successors where there was risk of stranding. This would help eliminate the
505 // timer scalability issues we see on some platforms as we'd only have one thread
506 // -- the checker -- parked on a timer.
507
508 if (nxt == NULL && _EntryList == NULL) {
509 // Try to assume the role of responsible thread for the monitor.
510 // CONSIDER: ST vs CAS vs { if (Responsible==null) Responsible=Self }
511 Atomic::replace_if_null(Self, &_Responsible);
512 }
513
514 // The lock might have been released while this thread was occupied queueing
515 // itself onto _cxq. To close the race and avoid "stranding" and
516 // progress-liveness failure we must resample-retry _owner before parking.
517 // Note the Dekker/Lamport duality: ST cxq; MEMBAR; LD Owner.
518 // In this case the ST-MEMBAR is accomplished with CAS().
519 //
520 // TODO: Defer all thread state transitions until park-time.
521 // Since state transitions are heavy and inefficient we'd like
522 // to defer the state transitions until absolutely necessary,
523 // and in doing so avoid some transitions ...
524
525 int nWakeups = 0;
526 int recheckInterval = 1;
527
528 for (;;) {
529
530 if (TryLock(Self) > 0) break;
531 assert(_owner != Self, "invariant");
532
533 // park self
534 if (_Responsible == Self) {
535 Self->_ParkEvent->park((jlong) recheckInterval);
536 // Increase the recheckInterval, but clamp the value.
537 recheckInterval *= 8;
538 if (recheckInterval > MAX_RECHECK_INTERVAL) {
539 recheckInterval = MAX_RECHECK_INTERVAL;
540 }
541 } else {
542 Self->_ParkEvent->park();
543 }
544
545 if (TryLock(Self) > 0) break;
546
547 // The lock is still contested.
548 // Keep a tally of the # of futile wakeups.
549 // Note that the counter is not protected by a lock or updated by atomics.
550 // That is by design - we trade "lossy" counters which are exposed to
551 // races during updates for a lower probe effect.
552
553 // This PerfData object can be used in parallel with a safepoint.
554 // See the work around in PerfDataManager::destroy().
555 OM_PERFDATA_OP(FutileWakeups, inc());
556 ++nWakeups;
557
558 // Assuming this is not a spurious wakeup we'll normally find _succ == Self.
559 // We can defer clearing _succ until after the spin completes
560 // TrySpin() must tolerate being called with _succ == Self.
561 // Try yet another round of adaptive spinning.
562 if (TrySpin(Self) > 0) break;
563
564 // We can find that we were unpark()ed and redesignated _succ while
565 // we were spinning. That's harmless. If we iterate and call park(),
566 // park() will consume the event and return immediately and we'll
567 // just spin again. This pattern can repeat, leaving _succ to simply
568 // spin on a CPU.
569
570 if (_succ == Self) _succ = NULL;
571
572 // Invariant: after clearing _succ a thread *must* retry _owner before parking.
573 OrderAccess::fence();
574 }
575
576 // Egress :
577 // Self has acquired the lock -- Unlink Self from the cxq or EntryList.
578 // Normally we'll find Self on the EntryList .
579 // From the perspective of the lock owner (this thread), the
580 // EntryList is stable and cxq is prepend-only.
581 // The head of cxq is volatile but the interior is stable.
582 // In addition, Self.TState is stable.
583
584 assert(_owner == Self, "invariant");
585 assert(object() != NULL, "invariant");
586 // I'd like to write:
587 // guarantee (((oop)(object()))->mark() == markOopDesc::encode(this), "invariant") ;
588 // but as we're at a safepoint that's not safe.
589
590 UnlinkAfterAcquire(Self, &node);
591 if (_succ == Self) _succ = NULL;
592
593 assert(_succ != Self, "invariant");
594 if (_Responsible == Self) {
595 _Responsible = NULL;
596 OrderAccess::fence(); // Dekker pivot-point
597
598 // We may leave threads on cxq|EntryList without a designated
599 // "Responsible" thread. This is benign. When this thread subsequently
600 // exits the monitor it can "see" such preexisting "old" threads --
601 // threads that arrived on the cxq|EntryList before the fence, above --
602 // by LDing cxq|EntryList. Newly arrived threads -- that is, threads
603 // that arrive on cxq after the ST:MEMBAR, above -- will set Responsible
604 // non-null and elect a new "Responsible" timer thread.
605 //
606 // This thread executes:
607 // ST Responsible=null; MEMBAR (in enter epilogue - here)
608 // LD cxq|EntryList (in subsequent exit)
609 //
610 // Entering threads in the slow/contended path execute:
611 // ST cxq=nonnull; MEMBAR; LD Responsible (in enter prolog)
612 // The (ST cxq; MEMBAR) is accomplished with CAS().
613 //
614 // The MEMBAR, above, prevents the LD of cxq|EntryList in the subsequent
615 // exit operation from floating above the ST Responsible=null.
616 }
617
618 // We've acquired ownership with CAS().
619 // CAS is serializing -- it has MEMBAR/FENCE-equivalent semantics.
620 // But since the CAS() this thread may have also stored into _succ,
621 // EntryList, cxq or Responsible. These meta-data updates must be
622 // visible __before this thread subsequently drops the lock.
623 // Consider what could occur if we didn't enforce this constraint --
624 // STs to monitor meta-data and user-data could reorder with (become
625 // visible after) the ST in exit that drops ownership of the lock.
626 // Some other thread could then acquire the lock, but observe inconsistent
627 // or old monitor meta-data and heap data. That violates the JMM.
628 // To that end, the 1-0 exit() operation must have at least STST|LDST
629 // "release" barrier semantics. Specifically, there must be at least a
630 // STST|LDST barrier in exit() before the ST of null into _owner that drops
631 // the lock. The barrier ensures that changes to monitor meta-data and data
632 // protected by the lock will be visible before we release the lock, and
633 // therefore before some other thread (CPU) has a chance to acquire the lock.
634 // See also: http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
635 //
636 // Critically, any prior STs to _succ or EntryList must be visible before
637 // the ST of null into _owner in the *subsequent* (following) corresponding
638 // monitorexit. Recall too, that in 1-0 mode monitorexit does not necessarily
639 // execute a serializing instruction.
640
641 return;
642 }
643
644 // ReenterI() is a specialized inline form of the latter half of the
645 // contended slow-path from EnterI(). We use ReenterI() only for
646 // monitor reentry in wait().
647 //
648 // In the future we should reconcile EnterI() and ReenterI().
649
ReenterI(Thread * Self,ObjectWaiter * SelfNode)650 void ObjectMonitor::ReenterI(Thread * Self, ObjectWaiter * SelfNode) {
651 assert(Self != NULL, "invariant");
652 assert(SelfNode != NULL, "invariant");
653 assert(SelfNode->_thread == Self, "invariant");
654 assert(_waiters > 0, "invariant");
655 assert(((oop)(object()))->mark() == markOopDesc::encode(this), "invariant");
656 assert(((JavaThread *)Self)->thread_state() != _thread_blocked, "invariant");
657 JavaThread * jt = (JavaThread *) Self;
658
659 int nWakeups = 0;
660 for (;;) {
661 ObjectWaiter::TStates v = SelfNode->TState;
662 guarantee(v == ObjectWaiter::TS_ENTER || v == ObjectWaiter::TS_CXQ, "invariant");
663 assert(_owner != Self, "invariant");
664
665 if (TryLock(Self) > 0) break;
666 if (TrySpin(Self) > 0) break;
667
668 // State transition wrappers around park() ...
669 // ReenterI() wisely defers state transitions until
670 // it's clear we must park the thread.
671 {
672 OSThreadContendState osts(Self->osthread());
673 ThreadBlockInVM tbivm(jt);
674
675 // cleared by handle_special_suspend_equivalent_condition()
676 // or java_suspend_self()
677 jt->set_suspend_equivalent();
678 Self->_ParkEvent->park();
679
680 // were we externally suspended while we were waiting?
681 for (;;) {
682 if (!ExitSuspendEquivalent(jt)) break;
683 if (_succ == Self) { _succ = NULL; OrderAccess::fence(); }
684 jt->java_suspend_self();
685 jt->set_suspend_equivalent();
686 }
687 }
688
689 // Try again, but just so we distinguish between futile wakeups and
690 // successful wakeups. The following test isn't algorithmically
691 // necessary, but it helps us maintain sensible statistics.
692 if (TryLock(Self) > 0) break;
693
694 // The lock is still contested.
695 // Keep a tally of the # of futile wakeups.
696 // Note that the counter is not protected by a lock or updated by atomics.
697 // That is by design - we trade "lossy" counters which are exposed to
698 // races during updates for a lower probe effect.
699 ++nWakeups;
700
701 // Assuming this is not a spurious wakeup we'll normally
702 // find that _succ == Self.
703 if (_succ == Self) _succ = NULL;
704
705 // Invariant: after clearing _succ a contending thread
706 // *must* retry _owner before parking.
707 OrderAccess::fence();
708
709 // This PerfData object can be used in parallel with a safepoint.
710 // See the work around in PerfDataManager::destroy().
711 OM_PERFDATA_OP(FutileWakeups, inc());
712 }
713
714 // Self has acquired the lock -- Unlink Self from the cxq or EntryList .
715 // Normally we'll find Self on the EntryList.
716 // Unlinking from the EntryList is constant-time and atomic-free.
717 // From the perspective of the lock owner (this thread), the
718 // EntryList is stable and cxq is prepend-only.
719 // The head of cxq is volatile but the interior is stable.
720 // In addition, Self.TState is stable.
721
722 assert(_owner == Self, "invariant");
723 assert(((oop)(object()))->mark() == markOopDesc::encode(this), "invariant");
724 UnlinkAfterAcquire(Self, SelfNode);
725 if (_succ == Self) _succ = NULL;
726 assert(_succ != Self, "invariant");
727 SelfNode->TState = ObjectWaiter::TS_RUN;
728 OrderAccess::fence(); // see comments at the end of EnterI()
729 }
730
731 // By convention we unlink a contending thread from EntryList|cxq immediately
732 // after the thread acquires the lock in ::enter(). Equally, we could defer
733 // unlinking the thread until ::exit()-time.
734
UnlinkAfterAcquire(Thread * Self,ObjectWaiter * SelfNode)735 void ObjectMonitor::UnlinkAfterAcquire(Thread *Self, ObjectWaiter *SelfNode) {
736 assert(_owner == Self, "invariant");
737 assert(SelfNode->_thread == Self, "invariant");
738
739 if (SelfNode->TState == ObjectWaiter::TS_ENTER) {
740 // Normal case: remove Self from the DLL EntryList .
741 // This is a constant-time operation.
742 ObjectWaiter * nxt = SelfNode->_next;
743 ObjectWaiter * prv = SelfNode->_prev;
744 if (nxt != NULL) nxt->_prev = prv;
745 if (prv != NULL) prv->_next = nxt;
746 if (SelfNode == _EntryList) _EntryList = nxt;
747 assert(nxt == NULL || nxt->TState == ObjectWaiter::TS_ENTER, "invariant");
748 assert(prv == NULL || prv->TState == ObjectWaiter::TS_ENTER, "invariant");
749 } else {
750 assert(SelfNode->TState == ObjectWaiter::TS_CXQ, "invariant");
751 // Inopportune interleaving -- Self is still on the cxq.
752 // This usually means the enqueue of self raced an exiting thread.
753 // Normally we'll find Self near the front of the cxq, so
754 // dequeueing is typically fast. If needbe we can accelerate
755 // this with some MCS/CHL-like bidirectional list hints and advisory
756 // back-links so dequeueing from the interior will normally operate
757 // in constant-time.
758 // Dequeue Self from either the head (with CAS) or from the interior
759 // with a linear-time scan and normal non-atomic memory operations.
760 // CONSIDER: if Self is on the cxq then simply drain cxq into EntryList
761 // and then unlink Self from EntryList. We have to drain eventually,
762 // so it might as well be now.
763
764 ObjectWaiter * v = _cxq;
765 assert(v != NULL, "invariant");
766 if (v != SelfNode || Atomic::cmpxchg(SelfNode->_next, &_cxq, v) != v) {
767 // The CAS above can fail from interference IFF a "RAT" arrived.
768 // In that case Self must be in the interior and can no longer be
769 // at the head of cxq.
770 if (v == SelfNode) {
771 assert(_cxq != v, "invariant");
772 v = _cxq; // CAS above failed - start scan at head of list
773 }
774 ObjectWaiter * p;
775 ObjectWaiter * q = NULL;
776 for (p = v; p != NULL && p != SelfNode; p = p->_next) {
777 q = p;
778 assert(p->TState == ObjectWaiter::TS_CXQ, "invariant");
779 }
780 assert(v != SelfNode, "invariant");
781 assert(p == SelfNode, "Node not found on cxq");
782 assert(p != _cxq, "invariant");
783 assert(q != NULL, "invariant");
784 assert(q->_next == p, "invariant");
785 q->_next = p->_next;
786 }
787 }
788
789 #ifdef ASSERT
790 // Diagnostic hygiene ...
791 SelfNode->_prev = (ObjectWaiter *) 0xBAD;
792 SelfNode->_next = (ObjectWaiter *) 0xBAD;
793 SelfNode->TState = ObjectWaiter::TS_RUN;
794 #endif
795 }
796
797 // -----------------------------------------------------------------------------
798 // Exit support
799 //
800 // exit()
801 // ~~~~~~
802 // Note that the collector can't reclaim the objectMonitor or deflate
803 // the object out from underneath the thread calling ::exit() as the
804 // thread calling ::exit() never transitions to a stable state.
805 // This inhibits GC, which in turn inhibits asynchronous (and
806 // inopportune) reclamation of "this".
807 //
808 // We'd like to assert that: (THREAD->thread_state() != _thread_blocked) ;
809 // There's one exception to the claim above, however. EnterI() can call
810 // exit() to drop a lock if the acquirer has been externally suspended.
811 // In that case exit() is called with _thread_state as _thread_blocked,
812 // but the monitor's _count field is > 0, which inhibits reclamation.
813 //
814 // 1-0 exit
815 // ~~~~~~~~
816 // ::exit() uses a canonical 1-1 idiom with a MEMBAR although some of
817 // the fast-path operators have been optimized so the common ::exit()
818 // operation is 1-0, e.g., see macroAssembler_x86.cpp: fast_unlock().
819 // The code emitted by fast_unlock() elides the usual MEMBAR. This
820 // greatly improves latency -- MEMBAR and CAS having considerable local
821 // latency on modern processors -- but at the cost of "stranding". Absent the
822 // MEMBAR, a thread in fast_unlock() can race a thread in the slow
823 // ::enter() path, resulting in the entering thread being stranding
824 // and a progress-liveness failure. Stranding is extremely rare.
825 // We use timers (timed park operations) & periodic polling to detect
826 // and recover from stranding. Potentially stranded threads periodically
827 // wake up and poll the lock. See the usage of the _Responsible variable.
828 //
829 // The CAS() in enter provides for safety and exclusion, while the CAS or
830 // MEMBAR in exit provides for progress and avoids stranding. 1-0 locking
831 // eliminates the CAS/MEMBAR from the exit path, but it admits stranding.
832 // We detect and recover from stranding with timers.
833 //
834 // If a thread transiently strands it'll park until (a) another
835 // thread acquires the lock and then drops the lock, at which time the
836 // exiting thread will notice and unpark the stranded thread, or, (b)
837 // the timer expires. If the lock is high traffic then the stranding latency
838 // will be low due to (a). If the lock is low traffic then the odds of
839 // stranding are lower, although the worst-case stranding latency
840 // is longer. Critically, we don't want to put excessive load in the
841 // platform's timer subsystem. We want to minimize both the timer injection
842 // rate (timers created/sec) as well as the number of timers active at
843 // any one time. (more precisely, we want to minimize timer-seconds, which is
844 // the integral of the # of active timers at any instant over time).
845 // Both impinge on OS scalability. Given that, at most one thread parked on
846 // a monitor will use a timer.
847 //
848 // There is also the risk of a futile wake-up. If we drop the lock
849 // another thread can reacquire the lock immediately, and we can
850 // then wake a thread unnecessarily. This is benign, and we've
851 // structured the code so the windows are short and the frequency
852 // of such futile wakups is low.
853
exit(bool not_suspended,TRAPS)854 void ObjectMonitor::exit(bool not_suspended, TRAPS) {
855 Thread * const Self = THREAD;
856 if (THREAD != _owner) {
857 if (THREAD->is_lock_owned((address) _owner)) {
858 // Transmute _owner from a BasicLock pointer to a Thread address.
859 // We don't need to hold _mutex for this transition.
860 // Non-null to Non-null is safe as long as all readers can
861 // tolerate either flavor.
862 assert(_recursions == 0, "invariant");
863 _owner = THREAD;
864 _recursions = 0;
865 } else {
866 // Apparent unbalanced locking ...
867 // Naively we'd like to throw IllegalMonitorStateException.
868 // As a practical matter we can neither allocate nor throw an
869 // exception as ::exit() can be called from leaf routines.
870 // see x86_32.ad Fast_Unlock() and the I1 and I2 properties.
871 // Upon deeper reflection, however, in a properly run JVM the only
872 // way we should encounter this situation is in the presence of
873 // unbalanced JNI locking. TODO: CheckJNICalls.
874 // See also: CR4414101
875 assert(false, "Non-balanced monitor enter/exit! Likely JNI locking");
876 return;
877 }
878 }
879
880 if (_recursions != 0) {
881 _recursions--; // this is simple recursive enter
882 return;
883 }
884
885 // Invariant: after setting Responsible=null an thread must execute
886 // a MEMBAR or other serializing instruction before fetching EntryList|cxq.
887 _Responsible = NULL;
888
889 #if INCLUDE_JFR
890 // get the owner's thread id for the MonitorEnter event
891 // if it is enabled and the thread isn't suspended
892 if (not_suspended && EventJavaMonitorEnter::is_enabled()) {
893 _previous_owner_tid = JFR_THREAD_ID(Self);
894 }
895 #endif
896
897 for (;;) {
898 assert(THREAD == _owner, "invariant");
899
900 // release semantics: prior loads and stores from within the critical section
901 // must not float (reorder) past the following store that drops the lock.
902 // On SPARC that requires MEMBAR #loadstore|#storestore.
903 // But of course in TSO #loadstore|#storestore is not required.
904 OrderAccess::release_store(&_owner, (void*)NULL); // drop the lock
905 OrderAccess::storeload(); // See if we need to wake a successor
906 if ((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != NULL) {
907 return;
908 }
909 // Other threads are blocked trying to acquire the lock.
910
911 // Normally the exiting thread is responsible for ensuring succession,
912 // but if other successors are ready or other entering threads are spinning
913 // then this thread can simply store NULL into _owner and exit without
914 // waking a successor. The existence of spinners or ready successors
915 // guarantees proper succession (liveness). Responsibility passes to the
916 // ready or running successors. The exiting thread delegates the duty.
917 // More precisely, if a successor already exists this thread is absolved
918 // of the responsibility of waking (unparking) one.
919 //
920 // The _succ variable is critical to reducing futile wakeup frequency.
921 // _succ identifies the "heir presumptive" thread that has been made
922 // ready (unparked) but that has not yet run. We need only one such
923 // successor thread to guarantee progress.
924 // See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf
925 // section 3.3 "Futile Wakeup Throttling" for details.
926 //
927 // Note that spinners in Enter() also set _succ non-null.
928 // In the current implementation spinners opportunistically set
929 // _succ so that exiting threads might avoid waking a successor.
930 // Another less appealing alternative would be for the exiting thread
931 // to drop the lock and then spin briefly to see if a spinner managed
932 // to acquire the lock. If so, the exiting thread could exit
933 // immediately without waking a successor, otherwise the exiting
934 // thread would need to dequeue and wake a successor.
935 // (Note that we'd need to make the post-drop spin short, but no
936 // shorter than the worst-case round-trip cache-line migration time.
937 // The dropped lock needs to become visible to the spinner, and then
938 // the acquisition of the lock by the spinner must become visible to
939 // the exiting thread).
940
941 // It appears that an heir-presumptive (successor) must be made ready.
942 // Only the current lock owner can manipulate the EntryList or
943 // drain _cxq, so we need to reacquire the lock. If we fail
944 // to reacquire the lock the responsibility for ensuring succession
945 // falls to the new owner.
946 //
947 if (!Atomic::replace_if_null(THREAD, &_owner)) {
948 return;
949 }
950
951 guarantee(_owner == THREAD, "invariant");
952
953 ObjectWaiter * w = NULL;
954
955 w = _EntryList;
956 if (w != NULL) {
957 // I'd like to write: guarantee (w->_thread != Self).
958 // But in practice an exiting thread may find itself on the EntryList.
959 // Let's say thread T1 calls O.wait(). Wait() enqueues T1 on O's waitset and
960 // then calls exit(). Exit release the lock by setting O._owner to NULL.
961 // Let's say T1 then stalls. T2 acquires O and calls O.notify(). The
962 // notify() operation moves T1 from O's waitset to O's EntryList. T2 then
963 // release the lock "O". T2 resumes immediately after the ST of null into
964 // _owner, above. T2 notices that the EntryList is populated, so it
965 // reacquires the lock and then finds itself on the EntryList.
966 // Given all that, we have to tolerate the circumstance where "w" is
967 // associated with Self.
968 assert(w->TState == ObjectWaiter::TS_ENTER, "invariant");
969 ExitEpilog(Self, w);
970 return;
971 }
972
973 // If we find that both _cxq and EntryList are null then just
974 // re-run the exit protocol from the top.
975 w = _cxq;
976 if (w == NULL) continue;
977
978 // Drain _cxq into EntryList - bulk transfer.
979 // First, detach _cxq.
980 // The following loop is tantamount to: w = swap(&cxq, NULL)
981 for (;;) {
982 assert(w != NULL, "Invariant");
983 ObjectWaiter * u = Atomic::cmpxchg((ObjectWaiter*)NULL, &_cxq, w);
984 if (u == w) break;
985 w = u;
986 }
987
988 assert(w != NULL, "invariant");
989 assert(_EntryList == NULL, "invariant");
990
991 // Convert the LIFO SLL anchored by _cxq into a DLL.
992 // The list reorganization step operates in O(LENGTH(w)) time.
993 // It's critical that this step operate quickly as
994 // "Self" still holds the outer-lock, restricting parallelism
995 // and effectively lengthening the critical section.
996 // Invariant: s chases t chases u.
997 // TODO-FIXME: consider changing EntryList from a DLL to a CDLL so
998 // we have faster access to the tail.
999
1000 _EntryList = w;
1001 ObjectWaiter * q = NULL;
1002 ObjectWaiter * p;
1003 for (p = w; p != NULL; p = p->_next) {
1004 guarantee(p->TState == ObjectWaiter::TS_CXQ, "Invariant");
1005 p->TState = ObjectWaiter::TS_ENTER;
1006 p->_prev = q;
1007 q = p;
1008 }
1009
1010 // In 1-0 mode we need: ST EntryList; MEMBAR #storestore; ST _owner = NULL
1011 // The MEMBAR is satisfied by the release_store() operation in ExitEpilog().
1012
1013 // See if we can abdicate to a spinner instead of waking a thread.
1014 // A primary goal of the implementation is to reduce the
1015 // context-switch rate.
1016 if (_succ != NULL) continue;
1017
1018 w = _EntryList;
1019 if (w != NULL) {
1020 guarantee(w->TState == ObjectWaiter::TS_ENTER, "invariant");
1021 ExitEpilog(Self, w);
1022 return;
1023 }
1024 }
1025 }
1026
1027 // ExitSuspendEquivalent:
1028 // A faster alternate to handle_special_suspend_equivalent_condition()
1029 //
1030 // handle_special_suspend_equivalent_condition() unconditionally
1031 // acquires the SR_lock. On some platforms uncontended MutexLocker()
1032 // operations have high latency. Note that in ::enter() we call HSSEC
1033 // while holding the monitor, so we effectively lengthen the critical sections.
1034 //
1035 // There are a number of possible solutions:
1036 //
1037 // A. To ameliorate the problem we might also defer state transitions
1038 // to as late as possible -- just prior to parking.
1039 // Given that, we'd call HSSEC after having returned from park(),
1040 // but before attempting to acquire the monitor. This is only a
1041 // partial solution. It avoids calling HSSEC while holding the
1042 // monitor (good), but it still increases successor reacquisition latency --
1043 // the interval between unparking a successor and the time the successor
1044 // resumes and retries the lock. See ReenterI(), which defers state transitions.
1045 // If we use this technique we can also avoid EnterI()-exit() loop
1046 // in ::enter() where we iteratively drop the lock and then attempt
1047 // to reacquire it after suspending.
1048 //
1049 // B. In the future we might fold all the suspend bits into a
1050 // composite per-thread suspend flag and then update it with CAS().
1051 // Alternately, a Dekker-like mechanism with multiple variables
1052 // would suffice:
1053 // ST Self->_suspend_equivalent = false
1054 // MEMBAR
1055 // LD Self_>_suspend_flags
1056
ExitSuspendEquivalent(JavaThread * jSelf)1057 bool ObjectMonitor::ExitSuspendEquivalent(JavaThread * jSelf) {
1058 return jSelf->handle_special_suspend_equivalent_condition();
1059 }
1060
1061
ExitEpilog(Thread * Self,ObjectWaiter * Wakee)1062 void ObjectMonitor::ExitEpilog(Thread * Self, ObjectWaiter * Wakee) {
1063 assert(_owner == Self, "invariant");
1064
1065 // Exit protocol:
1066 // 1. ST _succ = wakee
1067 // 2. membar #loadstore|#storestore;
1068 // 2. ST _owner = NULL
1069 // 3. unpark(wakee)
1070
1071 _succ = Wakee->_thread;
1072 ParkEvent * Trigger = Wakee->_event;
1073
1074 // Hygiene -- once we've set _owner = NULL we can't safely dereference Wakee again.
1075 // The thread associated with Wakee may have grabbed the lock and "Wakee" may be
1076 // out-of-scope (non-extant).
1077 Wakee = NULL;
1078
1079 // Drop the lock
1080 OrderAccess::release_store(&_owner, (void*)NULL);
1081 OrderAccess::fence(); // ST _owner vs LD in unpark()
1082
1083 DTRACE_MONITOR_PROBE(contended__exit, this, object(), Self);
1084 Trigger->unpark();
1085
1086 // Maintain stats and report events to JVMTI
1087 OM_PERFDATA_OP(Parks, inc());
1088 }
1089
1090
1091 // -----------------------------------------------------------------------------
1092 // Class Loader deadlock handling.
1093 //
1094 // complete_exit exits a lock returning recursion count
1095 // complete_exit/reenter operate as a wait without waiting
1096 // complete_exit requires an inflated monitor
1097 // The _owner field is not always the Thread addr even with an
1098 // inflated monitor, e.g. the monitor can be inflated by a non-owning
1099 // thread due to contention.
complete_exit(TRAPS)1100 intptr_t ObjectMonitor::complete_exit(TRAPS) {
1101 Thread * const Self = THREAD;
1102 assert(Self->is_Java_thread(), "Must be Java thread!");
1103 JavaThread *jt = (JavaThread *)THREAD;
1104
1105 assert(InitDone, "Unexpectedly not initialized");
1106
1107 if (THREAD != _owner) {
1108 if (THREAD->is_lock_owned ((address)_owner)) {
1109 assert(_recursions == 0, "internal state error");
1110 _owner = THREAD; // Convert from basiclock addr to Thread addr
1111 _recursions = 0;
1112 }
1113 }
1114
1115 guarantee(Self == _owner, "complete_exit not owner");
1116 intptr_t save = _recursions; // record the old recursion count
1117 _recursions = 0; // set the recursion level to be 0
1118 exit(true, Self); // exit the monitor
1119 guarantee(_owner != Self, "invariant");
1120 return save;
1121 }
1122
1123 // reenter() enters a lock and sets recursion count
1124 // complete_exit/reenter operate as a wait without waiting
reenter(intptr_t recursions,TRAPS)1125 void ObjectMonitor::reenter(intptr_t recursions, TRAPS) {
1126 Thread * const Self = THREAD;
1127 assert(Self->is_Java_thread(), "Must be Java thread!");
1128 JavaThread *jt = (JavaThread *)THREAD;
1129
1130 guarantee(_owner != Self, "reenter already owner");
1131 enter(THREAD); // enter the monitor
1132 guarantee(_recursions == 0, "reenter recursion");
1133 _recursions = recursions;
1134 return;
1135 }
1136
1137
1138 // -----------------------------------------------------------------------------
1139 // A macro is used below because there may already be a pending
1140 // exception which should not abort the execution of the routines
1141 // which use this (which is why we don't put this into check_slow and
1142 // call it with a CHECK argument).
1143
1144 #define CHECK_OWNER() \
1145 do { \
1146 if (THREAD != _owner) { \
1147 if (THREAD->is_lock_owned((address) _owner)) { \
1148 _owner = THREAD; /* Convert from basiclock addr to Thread addr */ \
1149 _recursions = 0; \
1150 } else { \
1151 THROW(vmSymbols::java_lang_IllegalMonitorStateException()); \
1152 } \
1153 } \
1154 } while (false)
1155
1156 // check_slow() is a misnomer. It's called to simply to throw an IMSX exception.
1157 // TODO-FIXME: remove check_slow() -- it's likely dead.
1158
check_slow(TRAPS)1159 void ObjectMonitor::check_slow(TRAPS) {
1160 assert(THREAD != _owner && !THREAD->is_lock_owned((address) _owner), "must not be owner");
1161 THROW_MSG(vmSymbols::java_lang_IllegalMonitorStateException(), "current thread not owner");
1162 }
1163
post_monitor_wait_event(EventJavaMonitorWait * event,ObjectMonitor * monitor,jlong notifier_tid,jlong timeout,bool timedout)1164 static void post_monitor_wait_event(EventJavaMonitorWait* event,
1165 ObjectMonitor* monitor,
1166 jlong notifier_tid,
1167 jlong timeout,
1168 bool timedout) {
1169 assert(event != NULL, "invariant");
1170 assert(monitor != NULL, "invariant");
1171 event->set_monitorClass(((oop)monitor->object())->klass());
1172 event->set_timeout(timeout);
1173 event->set_address((uintptr_t)monitor->object_addr());
1174 event->set_notifier(notifier_tid);
1175 event->set_timedOut(timedout);
1176 event->commit();
1177 }
1178
1179 // -----------------------------------------------------------------------------
1180 // Wait/Notify/NotifyAll
1181 //
1182 // Note: a subset of changes to ObjectMonitor::wait()
1183 // will need to be replicated in complete_exit
wait(jlong millis,bool interruptible,TRAPS)1184 void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) {
1185 Thread * const Self = THREAD;
1186 assert(Self->is_Java_thread(), "Must be Java thread!");
1187 JavaThread *jt = (JavaThread *)THREAD;
1188
1189 assert(InitDone, "Unexpectedly not initialized");
1190
1191 // Throw IMSX or IEX.
1192 CHECK_OWNER();
1193
1194 EventJavaMonitorWait event;
1195
1196 // check for a pending interrupt
1197 if (interruptible && Thread::is_interrupted(Self, true) && !HAS_PENDING_EXCEPTION) {
1198 // post monitor waited event. Note that this is past-tense, we are done waiting.
1199 if (JvmtiExport::should_post_monitor_waited()) {
1200 // Note: 'false' parameter is passed here because the
1201 // wait was not timed out due to thread interrupt.
1202 JvmtiExport::post_monitor_waited(jt, this, false);
1203
1204 // In this short circuit of the monitor wait protocol, the
1205 // current thread never drops ownership of the monitor and
1206 // never gets added to the wait queue so the current thread
1207 // cannot be made the successor. This means that the
1208 // JVMTI_EVENT_MONITOR_WAITED event handler cannot accidentally
1209 // consume an unpark() meant for the ParkEvent associated with
1210 // this ObjectMonitor.
1211 }
1212 if (event.should_commit()) {
1213 post_monitor_wait_event(&event, this, 0, millis, false);
1214 }
1215 THROW(vmSymbols::java_lang_InterruptedException());
1216 return;
1217 }
1218
1219 assert(Self->_Stalled == 0, "invariant");
1220 Self->_Stalled = intptr_t(this);
1221 jt->set_current_waiting_monitor(this);
1222
1223 // create a node to be put into the queue
1224 // Critically, after we reset() the event but prior to park(), we must check
1225 // for a pending interrupt.
1226 ObjectWaiter node(Self);
1227 node.TState = ObjectWaiter::TS_WAIT;
1228 Self->_ParkEvent->reset();
1229 OrderAccess::fence(); // ST into Event; membar ; LD interrupted-flag
1230
1231 // Enter the waiting queue, which is a circular doubly linked list in this case
1232 // but it could be a priority queue or any data structure.
1233 // _WaitSetLock protects the wait queue. Normally the wait queue is accessed only
1234 // by the the owner of the monitor *except* in the case where park()
1235 // returns because of a timeout of interrupt. Contention is exceptionally rare
1236 // so we use a simple spin-lock instead of a heavier-weight blocking lock.
1237
1238 Thread::SpinAcquire(&_WaitSetLock, "WaitSet - add");
1239 AddWaiter(&node);
1240 Thread::SpinRelease(&_WaitSetLock);
1241
1242 _Responsible = NULL;
1243
1244 intptr_t save = _recursions; // record the old recursion count
1245 _waiters++; // increment the number of waiters
1246 _recursions = 0; // set the recursion level to be 1
1247 exit(true, Self); // exit the monitor
1248 guarantee(_owner != Self, "invariant");
1249
1250 // The thread is on the WaitSet list - now park() it.
1251 // On MP systems it's conceivable that a brief spin before we park
1252 // could be profitable.
1253 //
1254 // TODO-FIXME: change the following logic to a loop of the form
1255 // while (!timeout && !interrupted && _notified == 0) park()
1256
1257 int ret = OS_OK;
1258 int WasNotified = 0;
1259 { // State transition wrappers
1260 OSThread* osthread = Self->osthread();
1261 OSThreadWaitState osts(osthread, true);
1262 {
1263 ThreadBlockInVM tbivm(jt);
1264 // Thread is in thread_blocked state and oop access is unsafe.
1265 jt->set_suspend_equivalent();
1266
1267 if (interruptible && (Thread::is_interrupted(THREAD, false) || HAS_PENDING_EXCEPTION)) {
1268 // Intentionally empty
1269 } else if (node._notified == 0) {
1270 if (millis <= 0) {
1271 Self->_ParkEvent->park();
1272 } else {
1273 ret = Self->_ParkEvent->park(millis);
1274 }
1275 }
1276
1277 // were we externally suspended while we were waiting?
1278 if (ExitSuspendEquivalent (jt)) {
1279 // TODO-FIXME: add -- if succ == Self then succ = null.
1280 jt->java_suspend_self();
1281 }
1282
1283 } // Exit thread safepoint: transition _thread_blocked -> _thread_in_vm
1284
1285 // Node may be on the WaitSet, the EntryList (or cxq), or in transition
1286 // from the WaitSet to the EntryList.
1287 // See if we need to remove Node from the WaitSet.
1288 // We use double-checked locking to avoid grabbing _WaitSetLock
1289 // if the thread is not on the wait queue.
1290 //
1291 // Note that we don't need a fence before the fetch of TState.
1292 // In the worst case we'll fetch a old-stale value of TS_WAIT previously
1293 // written by the is thread. (perhaps the fetch might even be satisfied
1294 // by a look-aside into the processor's own store buffer, although given
1295 // the length of the code path between the prior ST and this load that's
1296 // highly unlikely). If the following LD fetches a stale TS_WAIT value
1297 // then we'll acquire the lock and then re-fetch a fresh TState value.
1298 // That is, we fail toward safety.
1299
1300 if (node.TState == ObjectWaiter::TS_WAIT) {
1301 Thread::SpinAcquire(&_WaitSetLock, "WaitSet - unlink");
1302 if (node.TState == ObjectWaiter::TS_WAIT) {
1303 DequeueSpecificWaiter(&node); // unlink from WaitSet
1304 assert(node._notified == 0, "invariant");
1305 node.TState = ObjectWaiter::TS_RUN;
1306 }
1307 Thread::SpinRelease(&_WaitSetLock);
1308 }
1309
1310 // The thread is now either on off-list (TS_RUN),
1311 // on the EntryList (TS_ENTER), or on the cxq (TS_CXQ).
1312 // The Node's TState variable is stable from the perspective of this thread.
1313 // No other threads will asynchronously modify TState.
1314 guarantee(node.TState != ObjectWaiter::TS_WAIT, "invariant");
1315 OrderAccess::loadload();
1316 if (_succ == Self) _succ = NULL;
1317 WasNotified = node._notified;
1318
1319 // Reentry phase -- reacquire the monitor.
1320 // re-enter contended monitor after object.wait().
1321 // retain OBJECT_WAIT state until re-enter successfully completes
1322 // Thread state is thread_in_vm and oop access is again safe,
1323 // although the raw address of the object may have changed.
1324 // (Don't cache naked oops over safepoints, of course).
1325
1326 // post monitor waited event. Note that this is past-tense, we are done waiting.
1327 if (JvmtiExport::should_post_monitor_waited()) {
1328 JvmtiExport::post_monitor_waited(jt, this, ret == OS_TIMEOUT);
1329
1330 if (node._notified != 0 && _succ == Self) {
1331 // In this part of the monitor wait-notify-reenter protocol it
1332 // is possible (and normal) for another thread to do a fastpath
1333 // monitor enter-exit while this thread is still trying to get
1334 // to the reenter portion of the protocol.
1335 //
1336 // The ObjectMonitor was notified and the current thread is
1337 // the successor which also means that an unpark() has already
1338 // been done. The JVMTI_EVENT_MONITOR_WAITED event handler can
1339 // consume the unpark() that was done when the successor was
1340 // set because the same ParkEvent is shared between Java
1341 // monitors and JVM/TI RawMonitors (for now).
1342 //
1343 // We redo the unpark() to ensure forward progress, i.e., we
1344 // don't want all pending threads hanging (parked) with none
1345 // entering the unlocked monitor.
1346 node._event->unpark();
1347 }
1348 }
1349
1350 if (event.should_commit()) {
1351 post_monitor_wait_event(&event, this, node._notifier_tid, millis, ret == OS_TIMEOUT);
1352 }
1353
1354 OrderAccess::fence();
1355
1356 assert(Self->_Stalled != 0, "invariant");
1357 Self->_Stalled = 0;
1358
1359 assert(_owner != Self, "invariant");
1360 ObjectWaiter::TStates v = node.TState;
1361 if (v == ObjectWaiter::TS_RUN) {
1362 enter(Self);
1363 } else {
1364 guarantee(v == ObjectWaiter::TS_ENTER || v == ObjectWaiter::TS_CXQ, "invariant");
1365 ReenterI(Self, &node);
1366 node.wait_reenter_end(this);
1367 }
1368
1369 // Self has reacquired the lock.
1370 // Lifecycle - the node representing Self must not appear on any queues.
1371 // Node is about to go out-of-scope, but even if it were immortal we wouldn't
1372 // want residual elements associated with this thread left on any lists.
1373 guarantee(node.TState == ObjectWaiter::TS_RUN, "invariant");
1374 assert(_owner == Self, "invariant");
1375 assert(_succ != Self, "invariant");
1376 } // OSThreadWaitState()
1377
1378 jt->set_current_waiting_monitor(NULL);
1379
1380 guarantee(_recursions == 0, "invariant");
1381 _recursions = save; // restore the old recursion count
1382 _waiters--; // decrement the number of waiters
1383
1384 // Verify a few postconditions
1385 assert(_owner == Self, "invariant");
1386 assert(_succ != Self, "invariant");
1387 assert(((oop)(object()))->mark() == markOopDesc::encode(this), "invariant");
1388
1389 // check if the notification happened
1390 if (!WasNotified) {
1391 // no, it could be timeout or Thread.interrupt() or both
1392 // check for interrupt event, otherwise it is timeout
1393 if (interruptible && Thread::is_interrupted(Self, true) && !HAS_PENDING_EXCEPTION) {
1394 THROW(vmSymbols::java_lang_InterruptedException());
1395 }
1396 }
1397
1398 // NOTE: Spurious wake up will be consider as timeout.
1399 // Monitor notify has precedence over thread interrupt.
1400 }
1401
1402
1403 // Consider:
1404 // If the lock is cool (cxq == null && succ == null) and we're on an MP system
1405 // then instead of transferring a thread from the WaitSet to the EntryList
1406 // we might just dequeue a thread from the WaitSet and directly unpark() it.
1407
INotify(Thread * Self)1408 void ObjectMonitor::INotify(Thread * Self) {
1409 Thread::SpinAcquire(&_WaitSetLock, "WaitSet - notify");
1410 ObjectWaiter * iterator = DequeueWaiter();
1411 if (iterator != NULL) {
1412 guarantee(iterator->TState == ObjectWaiter::TS_WAIT, "invariant");
1413 guarantee(iterator->_notified == 0, "invariant");
1414 // Disposition - what might we do with iterator ?
1415 // a. add it directly to the EntryList - either tail (policy == 1)
1416 // or head (policy == 0).
1417 // b. push it onto the front of the _cxq (policy == 2).
1418 // For now we use (b).
1419
1420 iterator->TState = ObjectWaiter::TS_ENTER;
1421
1422 iterator->_notified = 1;
1423 iterator->_notifier_tid = JFR_THREAD_ID(Self);
1424
1425 ObjectWaiter * list = _EntryList;
1426 if (list != NULL) {
1427 assert(list->_prev == NULL, "invariant");
1428 assert(list->TState == ObjectWaiter::TS_ENTER, "invariant");
1429 assert(list != iterator, "invariant");
1430 }
1431
1432 // prepend to cxq
1433 if (list == NULL) {
1434 iterator->_next = iterator->_prev = NULL;
1435 _EntryList = iterator;
1436 } else {
1437 iterator->TState = ObjectWaiter::TS_CXQ;
1438 for (;;) {
1439 ObjectWaiter * front = _cxq;
1440 iterator->_next = front;
1441 if (Atomic::cmpxchg(iterator, &_cxq, front) == front) {
1442 break;
1443 }
1444 }
1445 }
1446
1447 // _WaitSetLock protects the wait queue, not the EntryList. We could
1448 // move the add-to-EntryList operation, above, outside the critical section
1449 // protected by _WaitSetLock. In practice that's not useful. With the
1450 // exception of wait() timeouts and interrupts the monitor owner
1451 // is the only thread that grabs _WaitSetLock. There's almost no contention
1452 // on _WaitSetLock so it's not profitable to reduce the length of the
1453 // critical section.
1454
1455 iterator->wait_reenter_begin(this);
1456 }
1457 Thread::SpinRelease(&_WaitSetLock);
1458 }
1459
1460 // Consider: a not-uncommon synchronization bug is to use notify() when
1461 // notifyAll() is more appropriate, potentially resulting in stranded
1462 // threads; this is one example of a lost wakeup. A useful diagnostic
1463 // option is to force all notify() operations to behave as notifyAll().
1464 //
1465 // Note: We can also detect many such problems with a "minimum wait".
1466 // When the "minimum wait" is set to a small non-zero timeout value
1467 // and the program does not hang whereas it did absent "minimum wait",
1468 // that suggests a lost wakeup bug.
1469
notify(TRAPS)1470 void ObjectMonitor::notify(TRAPS) {
1471 CHECK_OWNER();
1472 if (_WaitSet == NULL) {
1473 return;
1474 }
1475 DTRACE_MONITOR_PROBE(notify, this, object(), THREAD);
1476 INotify(THREAD);
1477 OM_PERFDATA_OP(Notifications, inc(1));
1478 }
1479
1480
1481 // The current implementation of notifyAll() transfers the waiters one-at-a-time
1482 // from the waitset to the EntryList. This could be done more efficiently with a
1483 // single bulk transfer but in practice it's not time-critical. Beware too,
1484 // that in prepend-mode we invert the order of the waiters. Let's say that the
1485 // waitset is "ABCD" and the EntryList is "XYZ". After a notifyAll() in prepend
1486 // mode the waitset will be empty and the EntryList will be "DCBAXYZ".
1487
notifyAll(TRAPS)1488 void ObjectMonitor::notifyAll(TRAPS) {
1489 CHECK_OWNER();
1490 if (_WaitSet == NULL) {
1491 return;
1492 }
1493
1494 DTRACE_MONITOR_PROBE(notifyAll, this, object(), THREAD);
1495 int tally = 0;
1496 while (_WaitSet != NULL) {
1497 tally++;
1498 INotify(THREAD);
1499 }
1500
1501 OM_PERFDATA_OP(Notifications, inc(tally));
1502 }
1503
1504 // -----------------------------------------------------------------------------
1505 // Adaptive Spinning Support
1506 //
1507 // Adaptive spin-then-block - rational spinning
1508 //
1509 // Note that we spin "globally" on _owner with a classic SMP-polite TATAS
1510 // algorithm. On high order SMP systems it would be better to start with
1511 // a brief global spin and then revert to spinning locally. In the spirit of MCS/CLH,
1512 // a contending thread could enqueue itself on the cxq and then spin locally
1513 // on a thread-specific variable such as its ParkEvent._Event flag.
1514 // That's left as an exercise for the reader. Note that global spinning is
1515 // not problematic on Niagara, as the L2 cache serves the interconnect and
1516 // has both low latency and massive bandwidth.
1517 //
1518 // Broadly, we can fix the spin frequency -- that is, the % of contended lock
1519 // acquisition attempts where we opt to spin -- at 100% and vary the spin count
1520 // (duration) or we can fix the count at approximately the duration of
1521 // a context switch and vary the frequency. Of course we could also
1522 // vary both satisfying K == Frequency * Duration, where K is adaptive by monitor.
1523 // For a description of 'Adaptive spin-then-block mutual exclusion in
1524 // multi-threaded processing,' see U.S. Pat. No. 8046758.
1525 //
1526 // This implementation varies the duration "D", where D varies with
1527 // the success rate of recent spin attempts. (D is capped at approximately
1528 // length of a round-trip context switch). The success rate for recent
1529 // spin attempts is a good predictor of the success rate of future spin
1530 // attempts. The mechanism adapts automatically to varying critical
1531 // section length (lock modality), system load and degree of parallelism.
1532 // D is maintained per-monitor in _SpinDuration and is initialized
1533 // optimistically. Spin frequency is fixed at 100%.
1534 //
1535 // Note that _SpinDuration is volatile, but we update it without locks
1536 // or atomics. The code is designed so that _SpinDuration stays within
1537 // a reasonable range even in the presence of races. The arithmetic
1538 // operations on _SpinDuration are closed over the domain of legal values,
1539 // so at worst a race will install and older but still legal value.
1540 // At the very worst this introduces some apparent non-determinism.
1541 // We might spin when we shouldn't or vice-versa, but since the spin
1542 // count are relatively short, even in the worst case, the effect is harmless.
1543 //
1544 // Care must be taken that a low "D" value does not become an
1545 // an absorbing state. Transient spinning failures -- when spinning
1546 // is overall profitable -- should not cause the system to converge
1547 // on low "D" values. We want spinning to be stable and predictable
1548 // and fairly responsive to change and at the same time we don't want
1549 // it to oscillate, become metastable, be "too" non-deterministic,
1550 // or converge on or enter undesirable stable absorbing states.
1551 //
1552 // We implement a feedback-based control system -- using past behavior
1553 // to predict future behavior. We face two issues: (a) if the
1554 // input signal is random then the spin predictor won't provide optimal
1555 // results, and (b) if the signal frequency is too high then the control
1556 // system, which has some natural response lag, will "chase" the signal.
1557 // (b) can arise from multimodal lock hold times. Transient preemption
1558 // can also result in apparent bimodal lock hold times.
1559 // Although sub-optimal, neither condition is particularly harmful, as
1560 // in the worst-case we'll spin when we shouldn't or vice-versa.
1561 // The maximum spin duration is rather short so the failure modes aren't bad.
1562 // To be conservative, I've tuned the gain in system to bias toward
1563 // _not spinning. Relatedly, the system can sometimes enter a mode where it
1564 // "rings" or oscillates between spinning and not spinning. This happens
1565 // when spinning is just on the cusp of profitability, however, so the
1566 // situation is not dire. The state is benign -- there's no need to add
1567 // hysteresis control to damp the transition rate between spinning and
1568 // not spinning.
1569
1570 // Spinning: Fixed frequency (100%), vary duration
TrySpin(Thread * Self)1571 int ObjectMonitor::TrySpin(Thread * Self) {
1572 // Dumb, brutal spin. Good for comparative measurements against adaptive spinning.
1573 int ctr = Knob_FixedSpin;
1574 if (ctr != 0) {
1575 while (--ctr >= 0) {
1576 if (TryLock(Self) > 0) return 1;
1577 SpinPause();
1578 }
1579 return 0;
1580 }
1581
1582 for (ctr = Knob_PreSpin + 1; --ctr >= 0;) {
1583 if (TryLock(Self) > 0) {
1584 // Increase _SpinDuration ...
1585 // Note that we don't clamp SpinDuration precisely at SpinLimit.
1586 // Raising _SpurDuration to the poverty line is key.
1587 int x = _SpinDuration;
1588 if (x < Knob_SpinLimit) {
1589 if (x < Knob_Poverty) x = Knob_Poverty;
1590 _SpinDuration = x + Knob_BonusB;
1591 }
1592 return 1;
1593 }
1594 SpinPause();
1595 }
1596
1597 // Admission control - verify preconditions for spinning
1598 //
1599 // We always spin a little bit, just to prevent _SpinDuration == 0 from
1600 // becoming an absorbing state. Put another way, we spin briefly to
1601 // sample, just in case the system load, parallelism, contention, or lock
1602 // modality changed.
1603 //
1604 // Consider the following alternative:
1605 // Periodically set _SpinDuration = _SpinLimit and try a long/full
1606 // spin attempt. "Periodically" might mean after a tally of
1607 // the # of failed spin attempts (or iterations) reaches some threshold.
1608 // This takes us into the realm of 1-out-of-N spinning, where we
1609 // hold the duration constant but vary the frequency.
1610
1611 ctr = _SpinDuration;
1612 if (ctr <= 0) return 0;
1613
1614 if (NotRunnable(Self, (Thread *) _owner)) {
1615 return 0;
1616 }
1617
1618 // We're good to spin ... spin ingress.
1619 // CONSIDER: use Prefetch::write() to avoid RTS->RTO upgrades
1620 // when preparing to LD...CAS _owner, etc and the CAS is likely
1621 // to succeed.
1622 if (_succ == NULL) {
1623 _succ = Self;
1624 }
1625 Thread * prv = NULL;
1626
1627 // There are three ways to exit the following loop:
1628 // 1. A successful spin where this thread has acquired the lock.
1629 // 2. Spin failure with prejudice
1630 // 3. Spin failure without prejudice
1631
1632 while (--ctr >= 0) {
1633
1634 // Periodic polling -- Check for pending GC
1635 // Threads may spin while they're unsafe.
1636 // We don't want spinning threads to delay the JVM from reaching
1637 // a stop-the-world safepoint or to steal cycles from GC.
1638 // If we detect a pending safepoint we abort in order that
1639 // (a) this thread, if unsafe, doesn't delay the safepoint, and (b)
1640 // this thread, if safe, doesn't steal cycles from GC.
1641 // This is in keeping with the "no loitering in runtime" rule.
1642 // We periodically check to see if there's a safepoint pending.
1643 if ((ctr & 0xFF) == 0) {
1644 if (SafepointMechanism::should_block(Self)) {
1645 goto Abort; // abrupt spin egress
1646 }
1647 SpinPause();
1648 }
1649
1650 // Probe _owner with TATAS
1651 // If this thread observes the monitor transition or flicker
1652 // from locked to unlocked to locked, then the odds that this
1653 // thread will acquire the lock in this spin attempt go down
1654 // considerably. The same argument applies if the CAS fails
1655 // or if we observe _owner change from one non-null value to
1656 // another non-null value. In such cases we might abort
1657 // the spin without prejudice or apply a "penalty" to the
1658 // spin count-down variable "ctr", reducing it by 100, say.
1659
1660 Thread * ox = (Thread *) _owner;
1661 if (ox == NULL) {
1662 ox = (Thread*)Atomic::cmpxchg(Self, &_owner, (void*)NULL);
1663 if (ox == NULL) {
1664 // The CAS succeeded -- this thread acquired ownership
1665 // Take care of some bookkeeping to exit spin state.
1666 if (_succ == Self) {
1667 _succ = NULL;
1668 }
1669
1670 // Increase _SpinDuration :
1671 // The spin was successful (profitable) so we tend toward
1672 // longer spin attempts in the future.
1673 // CONSIDER: factor "ctr" into the _SpinDuration adjustment.
1674 // If we acquired the lock early in the spin cycle it
1675 // makes sense to increase _SpinDuration proportionally.
1676 // Note that we don't clamp SpinDuration precisely at SpinLimit.
1677 int x = _SpinDuration;
1678 if (x < Knob_SpinLimit) {
1679 if (x < Knob_Poverty) x = Knob_Poverty;
1680 _SpinDuration = x + Knob_Bonus;
1681 }
1682 return 1;
1683 }
1684
1685 // The CAS failed ... we can take any of the following actions:
1686 // * penalize: ctr -= CASPenalty
1687 // * exit spin with prejudice -- goto Abort;
1688 // * exit spin without prejudice.
1689 // * Since CAS is high-latency, retry again immediately.
1690 prv = ox;
1691 goto Abort;
1692 }
1693
1694 // Did lock ownership change hands ?
1695 if (ox != prv && prv != NULL) {
1696 goto Abort;
1697 }
1698 prv = ox;
1699
1700 // Abort the spin if the owner is not executing.
1701 // The owner must be executing in order to drop the lock.
1702 // Spinning while the owner is OFFPROC is idiocy.
1703 // Consider: ctr -= RunnablePenalty ;
1704 if (NotRunnable(Self, ox)) {
1705 goto Abort;
1706 }
1707 if (_succ == NULL) {
1708 _succ = Self;
1709 }
1710 }
1711
1712 // Spin failed with prejudice -- reduce _SpinDuration.
1713 // TODO: Use an AIMD-like policy to adjust _SpinDuration.
1714 // AIMD is globally stable.
1715 {
1716 int x = _SpinDuration;
1717 if (x > 0) {
1718 // Consider an AIMD scheme like: x -= (x >> 3) + 100
1719 // This is globally sample and tends to damp the response.
1720 x -= Knob_Penalty;
1721 if (x < 0) x = 0;
1722 _SpinDuration = x;
1723 }
1724 }
1725
1726 Abort:
1727 if (_succ == Self) {
1728 _succ = NULL;
1729 // Invariant: after setting succ=null a contending thread
1730 // must recheck-retry _owner before parking. This usually happens
1731 // in the normal usage of TrySpin(), but it's safest
1732 // to make TrySpin() as foolproof as possible.
1733 OrderAccess::fence();
1734 if (TryLock(Self) > 0) return 1;
1735 }
1736 return 0;
1737 }
1738
1739 // NotRunnable() -- informed spinning
1740 //
1741 // Don't bother spinning if the owner is not eligible to drop the lock.
1742 // Spin only if the owner thread is _thread_in_Java or _thread_in_vm.
1743 // The thread must be runnable in order to drop the lock in timely fashion.
1744 // If the _owner is not runnable then spinning will not likely be
1745 // successful (profitable).
1746 //
1747 // Beware -- the thread referenced by _owner could have died
1748 // so a simply fetch from _owner->_thread_state might trap.
1749 // Instead, we use SafeFetchXX() to safely LD _owner->_thread_state.
1750 // Because of the lifecycle issues, the _thread_state values
1751 // observed by NotRunnable() might be garbage. NotRunnable must
1752 // tolerate this and consider the observed _thread_state value
1753 // as advisory.
1754 //
1755 // Beware too, that _owner is sometimes a BasicLock address and sometimes
1756 // a thread pointer.
1757 // Alternately, we might tag the type (thread pointer vs basiclock pointer)
1758 // with the LSB of _owner. Another option would be to probabilistically probe
1759 // the putative _owner->TypeTag value.
1760 //
1761 // Checking _thread_state isn't perfect. Even if the thread is
1762 // in_java it might be blocked on a page-fault or have been preempted
1763 // and sitting on a ready/dispatch queue.
1764 //
1765 // The return value from NotRunnable() is *advisory* -- the
1766 // result is based on sampling and is not necessarily coherent.
1767 // The caller must tolerate false-negative and false-positive errors.
1768 // Spinning, in general, is probabilistic anyway.
1769
1770
NotRunnable(Thread * Self,Thread * ox)1771 int ObjectMonitor::NotRunnable(Thread * Self, Thread * ox) {
1772 // Check ox->TypeTag == 2BAD.
1773 if (ox == NULL) return 0;
1774
1775 // Avoid transitive spinning ...
1776 // Say T1 spins or blocks trying to acquire L. T1._Stalled is set to L.
1777 // Immediately after T1 acquires L it's possible that T2, also
1778 // spinning on L, will see L.Owner=T1 and T1._Stalled=L.
1779 // This occurs transiently after T1 acquired L but before
1780 // T1 managed to clear T1.Stalled. T2 does not need to abort
1781 // its spin in this circumstance.
1782 intptr_t BlockedOn = SafeFetchN((intptr_t *) &ox->_Stalled, intptr_t(1));
1783
1784 if (BlockedOn == 1) return 1;
1785 if (BlockedOn != 0) {
1786 return BlockedOn != intptr_t(this) && _owner == ox;
1787 }
1788
1789 assert(sizeof(((JavaThread *)ox)->_thread_state == sizeof(int)), "invariant");
1790 int jst = SafeFetch32((int *) &((JavaThread *) ox)->_thread_state, -1);;
1791 // consider also: jst != _thread_in_Java -- but that's overspecific.
1792 return jst == _thread_blocked || jst == _thread_in_native;
1793 }
1794
1795
1796 // -----------------------------------------------------------------------------
1797 // WaitSet management ...
1798
ObjectWaiter(Thread * thread)1799 ObjectWaiter::ObjectWaiter(Thread* thread) {
1800 _next = NULL;
1801 _prev = NULL;
1802 _notified = 0;
1803 _notifier_tid = 0;
1804 TState = TS_RUN;
1805 _thread = thread;
1806 _event = thread->_ParkEvent;
1807 _active = false;
1808 assert(_event != NULL, "invariant");
1809 }
1810
wait_reenter_begin(ObjectMonitor * const mon)1811 void ObjectWaiter::wait_reenter_begin(ObjectMonitor * const mon) {
1812 JavaThread *jt = (JavaThread *)this->_thread;
1813 _active = JavaThreadBlockedOnMonitorEnterState::wait_reenter_begin(jt, mon);
1814 }
1815
wait_reenter_end(ObjectMonitor * const mon)1816 void ObjectWaiter::wait_reenter_end(ObjectMonitor * const mon) {
1817 JavaThread *jt = (JavaThread *)this->_thread;
1818 JavaThreadBlockedOnMonitorEnterState::wait_reenter_end(jt, _active);
1819 }
1820
AddWaiter(ObjectWaiter * node)1821 inline void ObjectMonitor::AddWaiter(ObjectWaiter* node) {
1822 assert(node != NULL, "should not add NULL node");
1823 assert(node->_prev == NULL, "node already in list");
1824 assert(node->_next == NULL, "node already in list");
1825 // put node at end of queue (circular doubly linked list)
1826 if (_WaitSet == NULL) {
1827 _WaitSet = node;
1828 node->_prev = node;
1829 node->_next = node;
1830 } else {
1831 ObjectWaiter* head = _WaitSet;
1832 ObjectWaiter* tail = head->_prev;
1833 assert(tail->_next == head, "invariant check");
1834 tail->_next = node;
1835 head->_prev = node;
1836 node->_next = head;
1837 node->_prev = tail;
1838 }
1839 }
1840
DequeueWaiter()1841 inline ObjectWaiter* ObjectMonitor::DequeueWaiter() {
1842 // dequeue the very first waiter
1843 ObjectWaiter* waiter = _WaitSet;
1844 if (waiter) {
1845 DequeueSpecificWaiter(waiter);
1846 }
1847 return waiter;
1848 }
1849
DequeueSpecificWaiter(ObjectWaiter * node)1850 inline void ObjectMonitor::DequeueSpecificWaiter(ObjectWaiter* node) {
1851 assert(node != NULL, "should not dequeue NULL node");
1852 assert(node->_prev != NULL, "node already removed from list");
1853 assert(node->_next != NULL, "node already removed from list");
1854 // when the waiter has woken up because of interrupt,
1855 // timeout or other spurious wake-up, dequeue the
1856 // waiter from waiting list
1857 ObjectWaiter* next = node->_next;
1858 if (next == node) {
1859 assert(node->_prev == node, "invariant check");
1860 _WaitSet = NULL;
1861 } else {
1862 ObjectWaiter* prev = node->_prev;
1863 assert(prev->_next == node, "invariant check");
1864 assert(next->_prev == node, "invariant check");
1865 next->_prev = prev;
1866 prev->_next = next;
1867 if (_WaitSet == node) {
1868 _WaitSet = next;
1869 }
1870 }
1871 node->_next = NULL;
1872 node->_prev = NULL;
1873 }
1874
1875 // -----------------------------------------------------------------------------
1876 // PerfData support
1877 PerfCounter * ObjectMonitor::_sync_ContendedLockAttempts = NULL;
1878 PerfCounter * ObjectMonitor::_sync_FutileWakeups = NULL;
1879 PerfCounter * ObjectMonitor::_sync_Parks = NULL;
1880 PerfCounter * ObjectMonitor::_sync_Notifications = NULL;
1881 PerfCounter * ObjectMonitor::_sync_Inflations = NULL;
1882 PerfCounter * ObjectMonitor::_sync_Deflations = NULL;
1883 PerfLongVariable * ObjectMonitor::_sync_MonExtant = NULL;
1884
1885 // One-shot global initialization for the sync subsystem.
1886 // We could also defer initialization and initialize on-demand
1887 // the first time we call inflate(). Initialization would
1888 // be protected - like so many things - by the MonitorCache_lock.
1889
Initialize()1890 void ObjectMonitor::Initialize() {
1891 assert(!InitDone, "invariant");
1892
1893 if (!os::is_MP()) {
1894 Knob_SpinLimit = 0;
1895 Knob_PreSpin = 0;
1896 Knob_FixedSpin = -1;
1897 }
1898
1899 if (UsePerfData) {
1900 EXCEPTION_MARK;
1901 #define NEWPERFCOUNTER(n) \
1902 { \
1903 n = PerfDataManager::create_counter(SUN_RT, #n, PerfData::U_Events, \
1904 CHECK); \
1905 }
1906 #define NEWPERFVARIABLE(n) \
1907 { \
1908 n = PerfDataManager::create_variable(SUN_RT, #n, PerfData::U_Events, \
1909 CHECK); \
1910 }
1911 NEWPERFCOUNTER(_sync_Inflations);
1912 NEWPERFCOUNTER(_sync_Deflations);
1913 NEWPERFCOUNTER(_sync_ContendedLockAttempts);
1914 NEWPERFCOUNTER(_sync_FutileWakeups);
1915 NEWPERFCOUNTER(_sync_Parks);
1916 NEWPERFCOUNTER(_sync_Notifications);
1917 NEWPERFVARIABLE(_sync_MonExtant);
1918 #undef NEWPERFCOUNTER
1919 #undef NEWPERFVARIABLE
1920 }
1921
1922 DEBUG_ONLY(InitDone = true;)
1923 }
1924