1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// The gccgo version of mem_*.go.
6
7package runtime
8
9import (
10	"unsafe"
11)
12
13// Functions called by C code.
14//go:linkname sysAlloc
15//go:linkname sysFree
16
17//extern mmap
18func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) unsafe.Pointer
19
20//extern munmap
21func munmap(addr unsafe.Pointer, length uintptr) int32
22
23//extern madvise
24func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
25
26var mmapFD = int32(-1)
27
28var devZero = []byte("/dev/zero\x00")
29
30func init() {
31	if _MAP_ANON == 0 {
32		mmapFD = open(&devZero[0], 0 /* O_RDONLY */, 0)
33		if mmapFD < 0 {
34			println("open /dev/zero: errno=", errno())
35			exit(2)
36		}
37	}
38}
39
40func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) (unsafe.Pointer, int) {
41	p := sysMmap(addr, n, prot, flags, fd, off)
42	if uintptr(p) == _MAP_FAILED {
43		return nil, errno()
44	}
45	return p, 0
46}
47
48// Don't split the stack as this method may be invoked without a valid G, which
49// prevents us from allocating more stack.
50//go:nosplit
51func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
52	p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
53	if err != 0 {
54		if err == _EACCES {
55			print("runtime: mmap: access denied\n")
56			exit(2)
57		}
58		if err == _EAGAIN {
59			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
60			exit(2)
61		}
62		return nil
63	}
64	mSysStatInc(sysStat, n)
65	return p
66}
67
68func sysUnused(v unsafe.Pointer, n uintptr) {
69	// By default, Linux's "transparent huge page" support will
70	// merge pages into a huge page if there's even a single
71	// present regular page, undoing the effects of the DONTNEED
72	// below. On amd64, that means khugepaged can turn a single
73	// 4KB page to 2MB, bloating the process's RSS by as much as
74	// 512X. (See issue #8832 and Linux kernel bug
75	// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
76	//
77	// To work around this, we explicitly disable transparent huge
78	// pages when we release pages of the heap. However, we have
79	// to do this carefully because changing this flag tends to
80	// split the VMA (memory mapping) containing v in to three
81	// VMAs in order to track the different values of the
82	// MADV_NOHUGEPAGE flag in the different regions. There's a
83	// default limit of 65530 VMAs per address space (sysctl
84	// vm.max_map_count), so we must be careful not to create too
85	// many VMAs (see issue #12233).
86	//
87	// Since huge pages are huge, there's little use in adjusting
88	// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
89	// exploding the number of VMAs by only adjusting the
90	// MADV_NOHUGEPAGE flag on a large granularity. This still
91	// gets most of the benefit of huge pages while keeping the
92	// number of VMAs under control. With hugePageSize = 2MB, even
93	// a pessimal heap can reach 128GB before running out of VMAs.
94	if physHugePageSize != 0 && _MADV_NOHUGEPAGE != 0 {
95		// If it's a large allocation, we want to leave huge
96		// pages enabled. Hence, we only adjust the huge page
97		// flag on the huge pages containing v and v+n-1, and
98		// only if those aren't aligned.
99		var head, tail uintptr
100		if uintptr(v)%physHugePageSize != 0 {
101			// Compute huge page containing v.
102			head = uintptr(v) &^ (physHugePageSize - 1)
103		}
104		if (uintptr(v)+n)%physHugePageSize != 0 {
105			// Compute huge page containing v+n-1.
106			tail = (uintptr(v) + n - 1) &^ (physHugePageSize - 1)
107		}
108
109		// Note that madvise will return EINVAL if the flag is
110		// already set, which is quite likely. We ignore
111		// errors.
112		if head != 0 && head+physHugePageSize == tail {
113			// head and tail are different but adjacent,
114			// so do this in one call.
115			madvise(unsafe.Pointer(head), 2*physHugePageSize, _MADV_NOHUGEPAGE)
116		} else {
117			// Advise the huge pages containing v and v+n-1.
118			if head != 0 {
119				madvise(unsafe.Pointer(head), physHugePageSize, _MADV_NOHUGEPAGE)
120			}
121			if tail != 0 && tail != head {
122				madvise(unsafe.Pointer(tail), physHugePageSize, _MADV_NOHUGEPAGE)
123			}
124		}
125	}
126
127	if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
128		// madvise will round this to any physical page
129		// *covered* by this range, so an unaligned madvise
130		// will release more memory than intended.
131		throw("unaligned sysUnused")
132	}
133
134	if _MADV_DONTNEED != 0 {
135		madvise(v, n, _MADV_DONTNEED)
136	} else if _MADV_FREE != 0 {
137		madvise(v, n, _MADV_FREE)
138	}
139}
140
141func sysUsed(v unsafe.Pointer, n uintptr) {
142	// Partially undo the NOHUGEPAGE marks from sysUnused
143	// for whole huge pages between v and v+n. This may
144	// leave huge pages off at the end points v and v+n
145	// even though allocations may cover these entire huge
146	// pages. We could detect this and undo NOHUGEPAGE on
147	// the end points as well, but it's probably not worth
148	// the cost because when neighboring allocations are
149	// freed sysUnused will just set NOHUGEPAGE again.
150	sysHugePage(v, n)
151}
152
153func sysHugePage(v unsafe.Pointer, n uintptr) {
154	if physHugePageSize != 0 && _MADV_HUGEPAGE != 0 {
155		// Round v up to a huge page boundary.
156		beg := (uintptr(v) + (physHugePageSize - 1)) &^ (physHugePageSize - 1)
157		// Round v+n down to a huge page boundary.
158		end := (uintptr(v) + n) &^ (physHugePageSize - 1)
159
160		if beg < end {
161			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
162		}
163	}
164}
165
166// Don't split the stack as this function may be invoked without a valid G,
167// which prevents us from allocating more stack.
168//go:nosplit
169func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
170	mSysStatDec(sysStat, n)
171	munmap(v, n)
172}
173
174func sysFault(v unsafe.Pointer, n uintptr) {
175	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, mmapFD, 0)
176}
177
178func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
179	p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
180	if err != 0 {
181		return nil
182	}
183	return p
184}
185
186func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
187	mSysStatInc(sysStat, n)
188
189	if GOOS == "aix" {
190		// AIX does not allow mapping a range that is already mapped.
191		// So always unmap first even if it is already unmapped.
192		munmap(v, n)
193	}
194	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, mmapFD, 0)
195	if err == _ENOMEM {
196		throw("runtime: out of memory")
197	}
198	if p != v || err != 0 {
199		throw("runtime: cannot map pages in arena address space")
200	}
201}
202