1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7import (
8	"runtime/internal/sys"
9	"unsafe"
10)
11
12type mOS struct {
13	unused byte
14}
15
16func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32 {
17	return int32(syscall(_SYS_futex, uintptr(addr), uintptr(op), uintptr(val), uintptr(ts), uintptr(addr2), uintptr(val3)))
18}
19
20// For sched_getaffinity use the system call rather than the libc call,
21// because the system call returns the number of entries set by the kernel.
22func sched_getaffinity(pid _pid_t, cpusetsize uintptr, mask *byte) int32 {
23	return int32(syscall(_SYS_sched_getaffinity, uintptr(pid), cpusetsize, uintptr(unsafe.Pointer(mask)), 0, 0, 0))
24}
25
26// Linux futex.
27//
28//	futexsleep(uint32 *addr, uint32 val)
29//	futexwakeup(uint32 *addr)
30//
31// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
32// Futexwakeup wakes up threads sleeping on addr.
33// Futexsleep is allowed to wake up spuriously.
34
35const (
36	_FUTEX_PRIVATE_FLAG = 128
37	_FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
38	_FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
39)
40
41// Atomically,
42//	if(*addr == val) sleep
43// Might be woken up spuriously; that's allowed.
44// Don't sleep longer than ns; ns < 0 means forever.
45//go:nosplit
46func futexsleep(addr *uint32, val uint32, ns int64) {
47	// Some Linux kernels have a bug where futex of
48	// FUTEX_WAIT returns an internal error code
49	// as an errno. Libpthread ignores the return value
50	// here, and so can we: as it says a few lines up,
51	// spurious wakeups are allowed.
52	if ns < 0 {
53		futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
54		return
55	}
56
57	var ts timespec
58	ts.setNsec(ns)
59	futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
60}
61
62// If any procs are sleeping on addr, wake up at most cnt.
63//go:nosplit
64func futexwakeup(addr *uint32, cnt uint32) {
65	ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
66	if ret >= 0 {
67		return
68	}
69
70	// I don't know that futex wakeup can return
71	// EAGAIN or EINTR, but if it does, it would be
72	// safe to loop and call futex again.
73	systemstack(func() {
74		print("futexwakeup addr=", addr, " returned ", ret, "\n")
75	})
76
77	*(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
78}
79
80func getproccount() int32 {
81	// This buffer is huge (8 kB) but we are on the system stack
82	// and there should be plenty of space (64 kB).
83	// Also this is a leaf, so we're not holding up the memory for long.
84	// See golang.org/issue/11823.
85	// The suggested behavior here is to keep trying with ever-larger
86	// buffers, but we don't have a dynamic memory allocator at the
87	// moment, so that's a bit tricky and seems like overkill.
88	const maxCPUs = 64 * 1024
89	var buf [maxCPUs / 8]byte
90	r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
91	if r < 0 {
92		return 1
93	}
94	n := int32(0)
95	for _, v := range buf[:r] {
96		for v != 0 {
97			n += int32(v & 1)
98			v >>= 1
99		}
100	}
101	if n == 0 {
102		n = 1
103	}
104	return n
105}
106
107const (
108	_AT_NULL   = 0  // End of vector
109	_AT_PAGESZ = 6  // System physical page size
110	_AT_HWCAP  = 16 // hardware capability bit vector
111	_AT_RANDOM = 25 // introduced in 2.6.29
112	_AT_HWCAP2 = 26 // hardware capability bit vector 2
113)
114
115var procAuxv = []byte("/proc/self/auxv\x00")
116
117var addrspace_vec [1]byte
118
119//extern mincore
120func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
121
122func sysargs(argc int32, argv **byte) {
123	n := argc + 1
124
125	// skip over argv, envp to get to auxv
126	for argv_index(argv, n) != nil {
127		n++
128	}
129
130	// skip NULL separator
131	n++
132
133	// now argv+n is auxv
134	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
135	if sysauxv(auxv[:]) != 0 {
136		return
137	}
138	// In some situations we don't get a loader-provided
139	// auxv, such as when loaded as a library on Android.
140	// Fall back to /proc/self/auxv.
141	fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0)
142	if fd < 0 {
143		// On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to
144		// try using mincore to detect the physical page size.
145		// mincore should return EINVAL when address is not a multiple of system page size.
146		const size = 256 << 10 // size of memory region to allocate
147		p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
148		if err != 0 {
149			return
150		}
151		var n uintptr
152		for n = 4 << 10; n < size; n <<= 1 {
153			err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
154			if err == 0 {
155				physPageSize = n
156				break
157			}
158		}
159		if physPageSize == 0 {
160			physPageSize = size
161		}
162		munmap(p, size)
163		return
164	}
165	var buf [128]uintptr
166	n = read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf)))
167	closefd(fd)
168	if n < 0 {
169		return
170	}
171	// Make sure buf is terminated, even if we didn't read
172	// the whole file.
173	buf[len(buf)-2] = _AT_NULL
174	sysauxv(buf[:])
175}
176
177func sysauxv(auxv []uintptr) int {
178	var i int
179	for ; auxv[i] != _AT_NULL; i += 2 {
180		tag, val := auxv[i], auxv[i+1]
181		switch tag {
182		case _AT_RANDOM:
183			// The kernel provides a pointer to 16-bytes
184			// worth of random data.
185			startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
186
187			setRandomNumber(uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
188				uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24)
189
190		case _AT_PAGESZ:
191			physPageSize = val
192		}
193
194		archauxv(tag, val)
195
196		// Commented out for gccgo for now.
197		// vdsoauxv(tag, val)
198	}
199	return i / 2
200}
201
202var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
203
204func getHugePageSize() uintptr {
205	var numbuf [20]byte
206	fd := open(&sysTHPSizePath[0], 0 /* O_RDONLY */, 0)
207	if fd < 0 {
208		return 0
209	}
210	n := read(fd, noescape(unsafe.Pointer(&numbuf[0])), int32(len(numbuf)))
211	closefd(fd)
212	if n <= 0 {
213		return 0
214	}
215	l := n - 1 // remove trailing newline
216	v, ok := atoi(slicebytetostringtmp(numbuf[:l]))
217	if !ok || v < 0 {
218		v = 0
219	}
220	if v&(v-1) != 0 {
221		// v is not a power of 2
222		return 0
223	}
224	return uintptr(v)
225}
226
227func osinit() {
228	ncpu = getproccount()
229	physHugePageSize = getHugePageSize()
230}
231