1 /* tc-bpf.c -- Assembler for the Linux eBPF.
2 Copyright (C) 2019-2022 Free Software Foundation, Inc.
3 Contributed by Oracle, Inc.
4
5 This file is part of GAS, the GNU Assembler.
6
7 GAS is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GAS is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GAS; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street - Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "as.h"
23 #include "subsegs.h"
24 #include "symcat.h"
25 #include "opcodes/bpf-desc.h"
26 #include "opcodes/bpf-opc.h"
27 #include "cgen.h"
28 #include "elf/common.h"
29 #include "elf/bpf.h"
30 #include "dwarf2dbg.h"
31
32 const char comment_chars[] = ";";
33 const char line_comment_chars[] = "#";
34 const char line_separator_chars[] = "`";
35 const char EXP_CHARS[] = "eE";
36 const char FLT_CHARS[] = "fFdD";
37
38 /* Like s_lcomm_internal in gas/read.c but the alignment string
39 is allowed to be optional. */
40
41 static symbolS *
pe_lcomm_internal(int needs_align,symbolS * symbolP,addressT size)42 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
43 {
44 addressT align = 0;
45
46 SKIP_WHITESPACE ();
47
48 if (needs_align
49 && *input_line_pointer == ',')
50 {
51 align = parse_align (needs_align - 1);
52
53 if (align == (addressT) -1)
54 return NULL;
55 }
56 else
57 {
58 if (size >= 8)
59 align = 3;
60 else if (size >= 4)
61 align = 2;
62 else if (size >= 2)
63 align = 1;
64 else
65 align = 0;
66 }
67
68 bss_alloc (symbolP, size, align);
69 return symbolP;
70 }
71
72 static void
pe_lcomm(int needs_align)73 pe_lcomm (int needs_align)
74 {
75 s_comm_internal (needs_align * 2, pe_lcomm_internal);
76 }
77
78 /* The target specific pseudo-ops which we support. */
79 const pseudo_typeS md_pseudo_table[] =
80 {
81 { "half", cons, 2 },
82 { "word", cons, 4 },
83 { "dword", cons, 8 },
84 { "lcomm", pe_lcomm, 1 },
85 { NULL, NULL, 0 }
86 };
87
88
89
90 /* ISA handling. */
91 static CGEN_BITSET *bpf_isa;
92
93
94
95 /* Command-line options processing. */
96
97 enum options
98 {
99 OPTION_LITTLE_ENDIAN = OPTION_MD_BASE,
100 OPTION_BIG_ENDIAN,
101 OPTION_XBPF
102 };
103
104 struct option md_longopts[] =
105 {
106 { "EL", no_argument, NULL, OPTION_LITTLE_ENDIAN },
107 { "EB", no_argument, NULL, OPTION_BIG_ENDIAN },
108 { "mxbpf", no_argument, NULL, OPTION_XBPF },
109 { NULL, no_argument, NULL, 0 },
110 };
111
112 size_t md_longopts_size = sizeof (md_longopts);
113
114 const char * md_shortopts = "";
115
116 extern int target_big_endian;
117
118 /* Whether target_big_endian has been set while parsing command-line
119 arguments. */
120 static int set_target_endian = 0;
121
122 static int target_xbpf = 0;
123
124 static int set_xbpf = 0;
125
126 int
md_parse_option(int c,const char * arg ATTRIBUTE_UNUSED)127 md_parse_option (int c, const char * arg ATTRIBUTE_UNUSED)
128 {
129 switch (c)
130 {
131 case OPTION_BIG_ENDIAN:
132 set_target_endian = 1;
133 target_big_endian = 1;
134 break;
135 case OPTION_LITTLE_ENDIAN:
136 set_target_endian = 1;
137 target_big_endian = 0;
138 break;
139 case OPTION_XBPF:
140 set_xbpf = 1;
141 target_xbpf = 1;
142 break;
143 default:
144 return 0;
145 }
146
147 return 1;
148 }
149
150 void
md_show_usage(FILE * stream)151 md_show_usage (FILE * stream)
152 {
153 fprintf (stream, _("\nBPF options:\n"));
154 fprintf (stream, _("\
155 --EL generate code for a little endian machine\n\
156 --EB generate code for a big endian machine\n\
157 -mxbpf generate xBPF instructions\n"));
158 }
159
160
161 void
md_begin(void)162 md_begin (void)
163 {
164 /* Initialize the `cgen' interface. */
165
166 /* If not specified in the command line, use the host
167 endianness. */
168 if (!set_target_endian)
169 {
170 #ifdef WORDS_BIGENDIAN
171 target_big_endian = 1;
172 #else
173 target_big_endian = 0;
174 #endif
175 }
176
177 /* If not specified in the command line, use eBPF rather
178 than xBPF. */
179 if (!set_xbpf)
180 target_xbpf = 0;
181
182 /* Set the ISA, which depends on the target endianness. */
183 bpf_isa = cgen_bitset_create (ISA_MAX);
184 if (target_big_endian)
185 {
186 if (target_xbpf)
187 cgen_bitset_set (bpf_isa, ISA_XBPFBE);
188 else
189 cgen_bitset_set (bpf_isa, ISA_EBPFBE);
190 }
191 else
192 {
193 if (target_xbpf)
194 cgen_bitset_set (bpf_isa, ISA_XBPFLE);
195 else
196 cgen_bitset_set (bpf_isa, ISA_EBPFLE);
197 }
198
199 /* Set the machine number and endian. */
200 gas_cgen_cpu_desc = bpf_cgen_cpu_open (CGEN_CPU_OPEN_ENDIAN,
201 target_big_endian ?
202 CGEN_ENDIAN_BIG : CGEN_ENDIAN_LITTLE,
203 CGEN_CPU_OPEN_INSN_ENDIAN,
204 CGEN_ENDIAN_LITTLE,
205 CGEN_CPU_OPEN_ISAS,
206 bpf_isa,
207 CGEN_CPU_OPEN_END);
208 bpf_cgen_init_asm (gas_cgen_cpu_desc);
209
210 /* This is a callback from cgen to gas to parse operands. */
211 cgen_set_parse_operand_fn (gas_cgen_cpu_desc, gas_cgen_parse_operand);
212
213 /* Set the machine type. */
214 bfd_default_set_arch_mach (stdoutput, bfd_arch_bpf, bfd_mach_bpf);
215 }
216
217 valueT
md_section_align(segT segment,valueT size)218 md_section_align (segT segment, valueT size)
219 {
220 int align = bfd_section_alignment (segment);
221
222 return ((size + (1 << align) - 1) & -(1 << align));
223 }
224
225
226 /* Functions concerning relocs. */
227
228 /* The location from which a PC relative jump should be calculated,
229 given a PC relative reloc. */
230
231 long
md_pcrel_from_section(fixS * fixP,segT sec)232 md_pcrel_from_section (fixS *fixP, segT sec)
233 {
234 if (fixP->fx_addsy != (symbolS *) NULL
235 && (! S_IS_DEFINED (fixP->fx_addsy)
236 || (S_GET_SEGMENT (fixP->fx_addsy) != sec)
237 || S_IS_EXTERNAL (fixP->fx_addsy)
238 || S_IS_WEAK (fixP->fx_addsy)))
239 {
240 /* The symbol is undefined (or is defined but not in this section).
241 Let the linker figure it out. */
242 return 0;
243 }
244
245 return fixP->fx_where + fixP->fx_frag->fr_address;
246 }
247
248 /* Write a value out to the object file, using the appropriate endianness. */
249
250 void
md_number_to_chars(char * buf,valueT val,int n)251 md_number_to_chars (char * buf, valueT val, int n)
252 {
253 if (target_big_endian)
254 number_to_chars_bigendian (buf, val, n);
255 else
256 number_to_chars_littleendian (buf, val, n);
257 }
258
259 arelent *
tc_gen_reloc(asection * sec,fixS * fix)260 tc_gen_reloc (asection *sec, fixS *fix)
261 {
262 return gas_cgen_tc_gen_reloc (sec, fix);
263 }
264
265 /* Return the bfd reloc type for OPERAND of INSN at fixup FIXP. This
266 is called when the operand is an expression that couldn't be fully
267 resolved. Returns BFD_RELOC_NONE if no reloc type can be found.
268 *FIXP may be modified if desired. */
269
270 bfd_reloc_code_real_type
md_cgen_lookup_reloc(const CGEN_INSN * insn ATTRIBUTE_UNUSED,const CGEN_OPERAND * operand,fixS * fixP)271 md_cgen_lookup_reloc (const CGEN_INSN *insn ATTRIBUTE_UNUSED,
272 const CGEN_OPERAND *operand,
273 fixS *fixP)
274 {
275 switch (operand->type)
276 {
277 case BPF_OPERAND_OFFSET16:
278 return BFD_RELOC_BPF_16;
279 case BPF_OPERAND_IMM32:
280 return BFD_RELOC_BPF_32;
281 case BPF_OPERAND_IMM64:
282 return BFD_RELOC_BPF_64;
283 case BPF_OPERAND_DISP16:
284 fixP->fx_pcrel = 1;
285 return BFD_RELOC_BPF_DISP16;
286 case BPF_OPERAND_DISP32:
287 fixP->fx_pcrel = 1;
288 return BFD_RELOC_BPF_DISP32;
289 default:
290 break;
291 }
292 return BFD_RELOC_NONE;
293 }
294
295 /* *FRAGP has been relaxed to its final size, and now needs to have
296 the bytes inside it modified to conform to the new size.
297
298 Called after relaxation is finished.
299 fragP->fr_type == rs_machine_dependent.
300 fragP->fr_subtype is the subtype of what the address relaxed to. */
301
302 void
md_convert_frag(bfd * abfd ATTRIBUTE_UNUSED,segT sec ATTRIBUTE_UNUSED,fragS * fragP ATTRIBUTE_UNUSED)303 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED,
304 segT sec ATTRIBUTE_UNUSED,
305 fragS *fragP ATTRIBUTE_UNUSED)
306 {
307 as_fatal (_("convert_frag called"));
308 }
309
310 int
md_estimate_size_before_relax(fragS * fragP ATTRIBUTE_UNUSED,segT segment ATTRIBUTE_UNUSED)311 md_estimate_size_before_relax (fragS *fragP ATTRIBUTE_UNUSED,
312 segT segment ATTRIBUTE_UNUSED)
313 {
314 as_fatal (_("estimate_size_before_relax called"));
315 return 0;
316 }
317
318
319 void
md_apply_fix(fixS * fixP,valueT * valP,segT seg)320 md_apply_fix (fixS *fixP, valueT *valP, segT seg)
321 {
322 /* Some fixups for instructions require special attention. This is
323 handled in the code block below. */
324 if ((int) fixP->fx_r_type >= (int) BFD_RELOC_UNUSED)
325 {
326 int opindex = (int) fixP->fx_r_type - (int) BFD_RELOC_UNUSED;
327 const CGEN_OPERAND *operand = cgen_operand_lookup_by_num (gas_cgen_cpu_desc,
328 opindex);
329 char *where;
330
331 switch (operand->type)
332 {
333 case BPF_OPERAND_DISP32:
334 /* eBPF supports two kind of CALL instructions: the so
335 called pseudo calls ("bpf to bpf") and external calls
336 ("bpf to kernel").
337
338 Both kind of calls use the same instruction (CALL).
339 However, external calls are constructed by passing a
340 constant argument to the instruction, whereas pseudo
341 calls result from expressions involving symbols. In
342 practice, instructions requiring a fixup are interpreted
343 as pseudo-calls. If we are executing this code, this is
344 a pseudo call.
345
346 The kernel expects for pseudo-calls to be annotated by
347 having BPF_PSEUDO_CALL in the SRC field of the
348 instruction. But beware the infamous nibble-swapping of
349 eBPF and take endianness into account here.
350
351 Note that the CALL instruction has only one operand, so
352 this code is executed only once per instruction. */
353 where = fixP->fx_frag->fr_literal + fixP->fx_where + 1;
354 where[0] = target_big_endian ? 0x01 : 0x10;
355 /* Fallthrough. */
356 case BPF_OPERAND_DISP16:
357 /* The PC-relative displacement fields in jump instructions
358 shouldn't be in bytes. Instead, they hold the number of
359 64-bit words to the target, _minus one_. */
360 *valP = (((long) (*valP)) - 8) / 8;
361 break;
362 default:
363 break;
364 }
365 }
366
367 /* And now invoke CGEN's handler, which will eventually install
368 *valP into the corresponding operand. */
369 gas_cgen_md_apply_fix (fixP, valP, seg);
370 }
371
372 void
md_assemble(char * str)373 md_assemble (char *str)
374 {
375 const CGEN_INSN *insn;
376 char *errmsg;
377 CGEN_FIELDS fields;
378
379 #if CGEN_INT_INSN_P
380 CGEN_INSN_INT buffer[CGEN_MAX_INSN_SIZE / sizeof (CGEN_INT_INSN_P)];
381 #else
382 unsigned char buffer[CGEN_MAX_INSN_SIZE];
383 #endif
384
385 gas_cgen_init_parse ();
386 insn = bpf_cgen_assemble_insn (gas_cgen_cpu_desc, str, &fields,
387 buffer, &errmsg);
388
389 if (insn == NULL)
390 {
391 as_bad ("%s", errmsg);
392 return;
393 }
394
395 gas_cgen_finish_insn (insn, buffer, CGEN_FIELDS_BITSIZE (&fields),
396 0, /* zero to ban relaxable insns. */
397 NULL); /* NULL so results not returned here. */
398 }
399
400 void
md_operand(expressionS * expressionP)401 md_operand (expressionS *expressionP)
402 {
403 gas_cgen_md_operand (expressionP);
404 }
405
406
407 symbolS *
md_undefined_symbol(char * name ATTRIBUTE_UNUSED)408 md_undefined_symbol (char *name ATTRIBUTE_UNUSED)
409 {
410 return NULL;
411 }
412
413
414 /* Turn a string in input_line_pointer into a floating point constant
415 of type TYPE, and store the appropriate bytes in *LITP. The number
416 of LITTLENUMS emitted is stored in *SIZEP. An error message is
417 returned, or NULL on OK. */
418
419 const char *
md_atof(int type,char * litP,int * sizeP)420 md_atof (int type, char *litP, int *sizeP)
421 {
422 return ieee_md_atof (type, litP, sizeP, false);
423 }
424