1; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
2; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
3
4@var_8bit = global i8 0
5@var_16bit = global i16 0
6@var_32bit = global i32 0
7@var_64bit = global i64 0
8
9@var_float = global float 0.0
10@var_double = global double 0.0
11
12define void @ldst_8bit() {
13; CHECK-LABEL: ldst_8bit:
14
15; No architectural support for loads to 16-bit or 8-bit since we
16; promote i8 during lowering.
17
18; match a sign-extending load 8-bit -> 32-bit
19   %val8_sext32 = load volatile i8, i8* @var_8bit
20   %val32_signed = sext i8 %val8_sext32 to i32
21   store volatile i32 %val32_signed, i32* @var_32bit
22; CHECK: adrp {{x[0-9]+}}, var_8bit
23; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
24
25; match a zero-extending load volatile 8-bit -> 32-bit
26  %val8_zext32 = load volatile i8, i8* @var_8bit
27  %val32_unsigned = zext i8 %val8_zext32 to i32
28  store volatile i32 %val32_unsigned, i32* @var_32bit
29; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
30
31; match an any-extending load volatile 8-bit -> 32-bit
32  %val8_anyext = load volatile i8, i8* @var_8bit
33  %newval8 = add i8 %val8_anyext, 1
34  store volatile i8 %newval8, i8* @var_8bit
35; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
36
37; match a sign-extending load volatile 8-bit -> 64-bit
38  %val8_sext64 = load volatile i8, i8* @var_8bit
39  %val64_signed = sext i8 %val8_sext64 to i64
40  store volatile i64 %val64_signed, i64* @var_64bit
41; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
42
43; match a zero-extending load volatile 8-bit -> 64-bit.
44; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
45; of x0 so it's identical to load volatileing to 32-bits.
46  %val8_zext64 = load volatile i8, i8* @var_8bit
47  %val64_unsigned = zext i8 %val8_zext64 to i64
48  store volatile i64 %val64_unsigned, i64* @var_64bit
49; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
50
51; truncating store volatile 32-bits to 8-bits
52  %val32 = load volatile i32, i32* @var_32bit
53  %val8_trunc32 = trunc i32 %val32 to i8
54  store volatile i8 %val8_trunc32, i8* @var_8bit
55; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
56
57; truncating store volatile 64-bits to 8-bits
58  %val64 = load volatile i64, i64* @var_64bit
59  %val8_trunc64 = trunc i64 %val64 to i8
60  store volatile i8 %val8_trunc64, i8* @var_8bit
61; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
62
63   ret void
64}
65
66define void @ldst_16bit() {
67; CHECK-LABEL: ldst_16bit:
68
69; No architectural support for load volatiles to 16-bit promote i16 during
70; lowering.
71
72; match a sign-extending load volatile 16-bit -> 32-bit
73  %val16_sext32 = load volatile i16, i16* @var_16bit
74  %val32_signed = sext i16 %val16_sext32 to i32
75  store volatile i32 %val32_signed, i32* @var_32bit
76; CHECK: adrp {{x[0-9]+}}, var_16bit
77; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
78
79; match a zero-extending load volatile 16-bit -> 32-bit
80  %val16_zext32 = load volatile i16, i16* @var_16bit
81  %val32_unsigned = zext i16 %val16_zext32 to i32
82  store volatile i32 %val32_unsigned, i32* @var_32bit
83; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
84
85; match an any-extending load volatile 16-bit -> 32-bit
86  %val16_anyext = load volatile i16, i16* @var_16bit
87  %newval16 = add i16 %val16_anyext, 1
88  store volatile i16 %newval16, i16* @var_16bit
89; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
90
91; match a sign-extending load volatile 16-bit -> 64-bit
92  %val16_sext64 = load volatile i16, i16* @var_16bit
93  %val64_signed = sext i16 %val16_sext64 to i64
94  store volatile i64 %val64_signed, i64* @var_64bit
95; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
96
97; match a zero-extending load volatile 16-bit -> 64-bit.
98; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
99; of x0 so it's identical to load volatileing to 32-bits.
100  %val16_zext64 = load volatile i16, i16* @var_16bit
101  %val64_unsigned = zext i16 %val16_zext64 to i64
102  store volatile i64 %val64_unsigned, i64* @var_64bit
103; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
104
105; truncating store volatile 32-bits to 16-bits
106  %val32 = load volatile i32, i32* @var_32bit
107  %val16_trunc32 = trunc i32 %val32 to i16
108  store volatile i16 %val16_trunc32, i16* @var_16bit
109; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
110
111; truncating store volatile 64-bits to 16-bits
112  %val64 = load volatile i64, i64* @var_64bit
113  %val16_trunc64 = trunc i64 %val64 to i16
114  store volatile i16 %val16_trunc64, i16* @var_16bit
115; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
116
117  ret void
118}
119
120define void @ldst_32bit() {
121; CHECK-LABEL: ldst_32bit:
122
123; Straight 32-bit load/store
124  %val32_noext = load volatile i32, i32* @var_32bit
125  store volatile i32 %val32_noext, i32* @var_32bit
126; CHECK: adrp {{x[0-9]+}}, var_32bit
127; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
128; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
129
130; Zero-extension to 64-bits
131  %val32_zext = load volatile i32, i32* @var_32bit
132  %val64_unsigned = zext i32 %val32_zext to i64
133  store volatile i64 %val64_unsigned, i64* @var_64bit
134; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
135; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
136
137; Sign-extension to 64-bits
138  %val32_sext = load volatile i32, i32* @var_32bit
139  %val64_signed = sext i32 %val32_sext to i64
140  store volatile i64 %val64_signed, i64* @var_64bit
141; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
142; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
143
144; Truncation from 64-bits
145  %val64_trunc = load volatile i64, i64* @var_64bit
146  %val32_trunc = trunc i64 %val64_trunc to i32
147  store volatile i32 %val32_trunc, i32* @var_32bit
148; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
149; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
150
151  ret void
152}
153
154@arr8 = global i8* null
155@arr16 = global i16* null
156@arr32 = global i32* null
157@arr64 = global i64* null
158
159; Now check that our selection copes with accesses more complex than a
160; single symbol. Permitted offsets should be folded into the loads and
161; stores. Since all forms use the same Operand it's only necessary to
162; check the various access-sizes involved.
163
164define void @ldst_complex_offsets() {
165; CHECK: ldst_complex_offsets
166  %arr8_addr = load volatile i8*, i8** @arr8
167; CHECK: adrp {{x[0-9]+}}, arr8
168; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr8]
169
170  %arr8_sub1_addr = getelementptr i8, i8* %arr8_addr, i64 1
171  %arr8_sub1 = load volatile i8, i8* %arr8_sub1_addr
172  store volatile i8 %arr8_sub1, i8* @var_8bit
173; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #1]
174
175  %arr8_sub4095_addr = getelementptr i8, i8* %arr8_addr, i64 4095
176  %arr8_sub4095 = load volatile i8, i8* %arr8_sub4095_addr
177  store volatile i8 %arr8_sub4095, i8* @var_8bit
178; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #4095]
179
180
181  %arr16_addr = load volatile i16*, i16** @arr16
182; CHECK: adrp {{x[0-9]+}}, arr16
183; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr16]
184
185  %arr16_sub1_addr = getelementptr i16, i16* %arr16_addr, i64 1
186  %arr16_sub1 = load volatile i16, i16* %arr16_sub1_addr
187  store volatile i16 %arr16_sub1, i16* @var_16bit
188; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #2]
189
190  %arr16_sub4095_addr = getelementptr i16, i16* %arr16_addr, i64 4095
191  %arr16_sub4095 = load volatile i16, i16* %arr16_sub4095_addr
192  store volatile i16 %arr16_sub4095, i16* @var_16bit
193; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #8190]
194
195
196  %arr32_addr = load volatile i32*, i32** @arr32
197; CHECK: adrp {{x[0-9]+}}, arr32
198; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr32]
199
200  %arr32_sub1_addr = getelementptr i32, i32* %arr32_addr, i64 1
201  %arr32_sub1 = load volatile i32, i32* %arr32_sub1_addr
202  store volatile i32 %arr32_sub1, i32* @var_32bit
203; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #4]
204
205  %arr32_sub4095_addr = getelementptr i32, i32* %arr32_addr, i64 4095
206  %arr32_sub4095 = load volatile i32, i32* %arr32_sub4095_addr
207  store volatile i32 %arr32_sub4095, i32* @var_32bit
208; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #16380]
209
210
211  %arr64_addr = load volatile i64*, i64** @arr64
212; CHECK: adrp {{x[0-9]+}}, arr64
213; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr64]
214
215  %arr64_sub1_addr = getelementptr i64, i64* %arr64_addr, i64 1
216  %arr64_sub1 = load volatile i64, i64* %arr64_sub1_addr
217  store volatile i64 %arr64_sub1, i64* @var_64bit
218; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
219
220  %arr64_sub4095_addr = getelementptr i64, i64* %arr64_addr, i64 4095
221  %arr64_sub4095 = load volatile i64, i64* %arr64_sub4095_addr
222  store volatile i64 %arr64_sub4095, i64* @var_64bit
223; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #32760]
224
225  ret void
226}
227
228define void @ldst_float() {
229; CHECK-LABEL: ldst_float:
230
231   %valfp = load volatile float, float* @var_float
232; CHECK: adrp {{x[0-9]+}}, var_float
233; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_float]
234; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
235
236  store volatile float %valfp, float* @var_float
237; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_float]
238; CHECK-NOFP-NOT: str {{s[0-9]+}},
239
240   ret void
241}
242
243define void @ldst_double() {
244; CHECK-LABEL: ldst_double:
245
246   %valfp = load volatile double, double* @var_double
247; CHECK: adrp {{x[0-9]+}}, var_double
248; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_double]
249; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
250
251  store volatile double %valfp, double* @var_double
252; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_double]
253; CHECK-NOFP-NOT: str {{d[0-9]+}},
254
255   ret void
256}
257