1\# --------------------------------------------------------------------------
2\#
3\#   Copyright 1996-2020 The NASM Authors - All Rights Reserved
4\M{year}{1996-2020}
5\#   See the file AUTHORS included with the NASM distribution for
6\#   the specific copyright holders.
7\#
8\#   Redistribution and use in source and binary forms, with or without
9\#   modification, are permitted provided that the following
10\#   conditions are met:
11\#
12\#   * Redistributions of source code must retain the above copyright
13\#     notice, this list of conditions and the following disclaimer.
14\#   * Redistributions in binary form must reproduce the above
15\#     copyright notice, this list of conditions and the following
16\#     disclaimer in the documentation and/or other materials provided
17\#     with the distribution.
18\#
19\#     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
20\#     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
21\#     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22\#     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23\#     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
24\#     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25\#     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26\#     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27\#     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28\#     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29\#     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30\#     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31\#     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32\#
33\# --------------------------------------------------------------------------
34\#
35\# Source code to NASM documentation
36\#
37
38\M{category}{Programming}
39\M{title}{NASM - The Netwide Assembler}
40\M{author}{The NASM Development Team}
41\M{copyright_tail}{-- All Rights Reserved}
42\M{license}{This document is redistributable under the license given in the section "License".}
43\M{summary}{This file documents NASM, the Netwide Assembler: an assembler targetting the Intel x86 series of processors, with portable source.}
44\M{infoname}{NASM}
45\M{infofile}{nasm}
46\M{infotitle}{The Netwide Assembler for x86}
47\M{epslogo}{nasmlogo.eps}
48\M{logoyadj}{-72}
49
50\& version.src
51
52\IR{-D} \c{-D} option
53\IR{-E} \c{-E} option
54\IR{-F} \c{-F} option
55\IR{-I} \c{-I} option
56\IR{-L} \c{-L} option
57\IR{-M} \c{-M} option
58\IR{-MD} \c{-MD} option
59\IR{-MF} \c{-MF} option
60\IR{-MG} \c{-MG} option
61\IR{-MP} \c{-MP} option
62\IR{-MQ} \c{-MQ} option
63\IR{-MT} \c{-MT} option
64\IR{-MW} \c{-MW} option
65\IR{-O} \c{-O} option
66\IR{-P} \c{-P} option
67\IR{-U} \c{-U} option
68\IR{-X} \c{-X} option
69\IR{-a} \c{-a} option
70\IR{-d} \c{-d} option
71\IR{-e} \c{-e} option
72\IR{-f} \c{-f} option
73\IR{-g} \c{-g} option
74\IR{-i} \c{-i} option
75\IR{-l} \c{-l} option
76\IR{-o} \c{-o} option
77\IR{-p} \c{-p} option
78\IR{-s} \c{-s} option
79\IR{-u} \c{-u} option
80\IR{-v} \c{-v} option
81\IR{-W} \c{-W} option
82\IR{-Werror} \c{-Werror} option
83\IR{-Wno-error} \c{-Wno-error} option
84\IR{-w} \c{-w} option
85\IR{-Z} \c{-Z} option
86\IR{!=} \c{!=} operator
87\IR{$, here} \c{$}, Here token
88\IR{$, prefix} \c{$}, prefix
89\IR{$$} \c{$$} token
90\IR{%} \c{%} operator
91\IR{%db} \c{%} prefix to \c{DB} lists
92\IR{%%} \c{%%} operator
93\IR{%+1} \c{%+1} and \c{%-1} syntax
94\IA{%-1}{%+1}
95\IR{%0} \c{%0} parameter count
96\IR{&} \c{&} operator
97\IR{&&} \c{&&} operator
98\IR{*} \c{*} operator
99\IR{..@} \c{..@} symbol prefix
100\IR{/} \c{/} operator
101\IR{//} \c{//} operator
102\IR{<} \c{<} operator
103\IR{<<} \c{<<} operator
104\IR{<<<} \c{<<<} operator
105\IR{<=>} \c{<=>} operator
106\IR{<=} \c{<=} operator
107\IR{<>} \c{<>} operator
108\IR{<=>} \c{<=>} operator
109\IR{=} \c{=} operator
110\IR{==} \c{==} operator
111\IR{>} \c{>} operator
112\IR{>=} \c{>=} operator
113\IR{>>} \c{>>} operator
114\IR{>>>} \c{>>>} operator
115\IR{?db} \c{?}, data syntax
116\IR{?op} \c{?}, operator
117\IR{^} \c{^} operator
118\IR{^^} \c{^^} operator
119\IR{|} \c{|} operator
120\IR{||} \c{||} operator
121\IR{~} \c{~} operator
122\IR{%$} \c{%$} and \c{%$$} prefixes
123\IA{%$$}{%$}
124\IR{+ opaddition} \c{+} operator, binary
125\IR{+ opunary} \c{+} operator, unary
126\IR{+ modifier} \c{+} modifier
127\IR{- opsubtraction} \c{-} operator, binary
128\IR{- opunary} \c{-} operator, unary
129\IR{! opunary} \c{!} operator
130\IA{A16}{a16}
131\IA{A32}{a32}
132\IA{A64}{a64}
133\IA{O16}{o16}
134\IA{O32}{o32}
135\IA{O64}{o64}
136\IR{alignment, in bin sections} alignment, in \c{bin} sections
137\IR{alignment, in elf sections} alignment, in ELF sections
138\IR{alignment, in win32 sections} alignment, in \c{win32} sections
139\IR{alignment, of elf common variables} alignment, of ELF common
140variables
141\IR{alignment, in obj sections} alignment, in \c{obj} sections
142\IR{a.out, bsd version} \c{a.out}, BSD version
143\IR{a.out, linux version} \c{a.out}, Linux version
144\IR{bin} \c{bin} output format
145\IR{bitwise and} bitwise AND
146\IR{bitwise or} bitwise OR
147\IR{bitwise xor} bitwise XOR
148\IR{block ifs} block IFs
149\IR{borland pascal} Borland, Pascal
150\IR{borland's win32 compilers} Borland, Win32 compilers
151\IR{braces, after % sign} braces, after \c{%} sign
152\IR{bsd} BSD
153\IR{c calling convention} C calling convention
154\IR{c symbol names} C symbol names
155\IA{critical expressions}{critical expression}
156\IA{command line}{command-line}
157\IA{case sensitivity}{case sensitive}
158\IA{case-sensitive}{case sensitive}
159\IA{case-insensitive}{case sensitive}
160\IA{character constants}{character constant}
161\IR{codeview debugging format} CodeView debugging format
162\IR{common object file format} Common Object File Format
163\IR{common variables, alignment in elf} common variables, alignment in ELF
164\IR{common, elf extensions to} \c{COMMON}, ELF extensions to
165\IR{common, obj extensions to} \c{COMMON}, \c{obj} extensions to
166\IR{declaring structure} declaring structures
167\IR{default-wrt mechanism} default-\c{WRT} mechanism
168\IR{devpac} DevPac
169\IR{djgpp} DJGPP
170\IR{dll symbols, exporting} DLL symbols, exporting
171\IR{dll symbols, importing} DLL symbols, importing
172\IR{dos} DOS
173\IA{effective address}{effective addresses}
174\IA{effective-address}{effective addresses}
175\IR{elf} ELF
176\IR{elf, 16-bit code} ELF, 16-bit code
177\IR{elf, debug formats} ELF, debug formats
178\IR{elf shared library} ELF, shared libraries
179\IR{elf32} \c{elf32}
180\IR{elf64} \c{elf64}
181\IR{elfx32} \c{elfx32}
182\IR{executable and linkable format} Executable and Linkable Format
183\IR{extern, elf extensions to} \c{EXTERN}, \c{elf} extensions to
184\IR{extern, obj extensions to} \c{EXTERN}, \c{obj} extensions to
185\IR{extern, rdf extensions to} \c{EXTERN}, \c{rdf} extensions to
186\IR{floating-point, constants} floating-point, constants
187\IR{floating-point, packed bcd constants} floating-point, packed BCD constants
188\IR{freebsd} FreeBSD
189\IR{freelink} FreeLink
190\IR{functions, c calling convention} functions, C calling convention
191\IR{functions, pascal calling convention} functions, \c{PASCAL} calling convention
192\IR{global, aoutb extensions to} \c{GLOBAL}, \c{aoutb} extensions to
193\IR{global, elf extensions to} \c{GLOBAL}, ELF extensions to
194\IR{global, rdf extensions to} \c{GLOBAL}, \c{rdf} extensions to
195\IR{got} GOT
196\IR{got relocations} \c{GOT} relocations
197\IR{gotoff relocation} \c{GOTOFF} relocations
198\IR{gotpc relocation} \c{GOTPC} relocations
199\IR{intel number formats} Intel number formats
200\IR{linux, elf} Linux, ELF
201\IR{linux, a.out} Linux, \c{a.out}
202\IR{linux, as86} Linux, \c{as86}
203\IR{mach object file format} Mach, object file format
204\IA{mach-o}{macho}
205\IR{mach-o} Mach-O, object file format
206\IR{macho32} \c{macho32}
207\IR{macho64} \c{macho64}
208\IR{macos x} MacOS X
209\IR{masm} MASM
210\IR{masmdb} MASM, \c{DB} syntax
211\IA{memory reference}{memory references}
212\IR{minix} Minix
213\IA{misc directory}{misc subdirectory}
214\IR{misc subdirectory} \c{misc} subdirectory
215\IR{microsoft omf} Microsoft OMF
216\IR{ms-dos} MS-DOS
217\IR{ms-dos device drivers} MS-DOS device drivers
218\IR{multipush} \c{multipush} macro
219\IR{nan} NaN
220\IR{nasm version} NASM version
221\IR{nasm version history} NASM version, history
222\IR{nasm version macros} NASM version, macros
223\IR{nasm version id} NASM version, ID macro
224\IR{nasm version string} NASM version, string macro
225\IR{arithmetic negation} negation, arithmetic
226\IR{bitwise negation} negation, bitwise
227\IR{boolean negation} negation, boolean
228\IR{boolean and} boolean, AND
229\IR{boolean or} boolean, OR
230\IR{boolean xor} boolean, XOR
231\IR{netbsd} NetBSD
232\IR{nsis} NSIS
233\IR{nullsoft scriptable installer} Nullsoft Scriptable Installer
234\IA{.OBJ}{.obj}
235\IR{omf} OMF
236\IR{openbsd} OpenBSD
237\IR{operating system} operating system
238\IR{os/2} OS/2
239\IR{pascal calling convention} Pascal calling convention
240\IR{pic} PIC
241\IR{pharlap} PharLap
242\IR{plt} PLT
243\IR{plt} \c{PLT} relocations
244\IA{pre-defining macros}{pre-define}
245\IA{preprocessor expressions}{preprocessor, expressions}
246\IA{preprocessor loops}{preprocessor, loops}
247\IA{preprocessor variables}{preprocessor, variables}
248\IA{rdoff subdirectory}{rdoff}
249\IR{rdoff} \c{rdoff} subdirectory
250\IR{relocatable dynamic object file format} Relocatable Dynamic
251Object File Format
252\IR{relocations, pic-specific} relocations, PIC-specific
253\IA{repeating}{repeating code}
254\IR{section alignment, in elf} section alignment, in ELF
255\IR{section alignment, in bin} section alignment, in \c{bin}
256\IR{section alignment, in obj} section alignment, in \c{obj}
257\IR{section alignment, in win32} section alignment, in \c{win32}
258\IR{section, elf extensions to} \c{SECTION}, ELF extensions to
259\IR{section, macho extensions to} \c{SECTION}, \c{macho} extensions to
260\IR{section, windows extensions to} \c{SECTION}, Windows extensions to
261\IR{segment alignment, in bin} segment alignment, in \c{bin}
262\IR{segment alignment, in obj} segment alignment, in \c{obj}
263\IR{segment, obj extensions to} \c{SEGMENT}, \c{obj} extensions to
264\IR{segment names, borland pascal} segment names, Borland Pascal
265\IR{shift command} \c{shift} command
266\IA{string constant}{string constants}
267\IR{string constants} string, constants
268\IR{string length} string, length
269\IR{string manipulation in macros} string, manipulation in macros
270\IR{align, smart} \c{ALIGN}, smart
271\IA{sectalign}{sectalign}
272\IR{solaris x86} Solaris x86
273\IA{standard section names}{standardized section names}
274\IR{symbols, exporting from dlls} symbols, exporting from DLLs
275\IR{symbols, importing from dlls} symbols, importing from DLLs
276\IR{test subdirectory} \c{test} subdirectory
277\IR{thread local storage in elf} thread local storage, in ELF
278\IR{thread local storage in mach-o} thread local storage, in \c{macho}
279\IR{tlink} \c{TLINK}
280\IR{unconditionally importing symbols} importing symbols, unconditionally
281\IR{underscore, in c symbols} underscore, in C symbols
282\IA{uninitialized storage}{storage, uninitialized}
283\IR{unicode} Unicode
284\IR{unix} Unix
285\IR{utf-8} UTF-8
286\IR{utf-16} UTF-16
287\IR{utf-32} UTF-32
288\IA{sco unix}{unix, sco}
289\IR{unix, sco} Unix, SCO
290\IA{unix system v}{unix, system v}
291\IR{unix, system v} Unix, System V
292\IR{unixware} UnixWare
293\IR{val} VAL
294\IA{version number of nasm}{nasm, version}
295\IR{visual c++} Visual C++
296\IR{win32} Win32
297\IR{win64} Win64
298\IR{windows} Windows
299\IR{windows debugging formats} Windows, debugging formats
300\# \IC{program entry point}{entry point, program}
301\# \IC{program entry point}{start point, program}
302\# \IC{MS-DOS device drivers}{device drivers, MS-DOS}
303\# \IC{16-bit mode, versus 32-bit mode}{32-bit mode, versus 16-bit mode}
304\# \IC{c symbol names}{symbol names, in C}
305
306
307\C{intro} Introduction
308
309\H{whatsnasm} What Is NASM?
310
311The Netwide Assembler, NASM, is an 80x86 and x86-64 assembler designed
312for portability and modularity. It supports a range of object file
313formats, including Linux and *BSD \c{a.out}, ELF, Mach-O, 16-bit and
31432-bit \c{.obj} (OMF) format, COFF (including its Win32 and Win64
315variants.) It can also output plain binary files, Intel hex and
316Motorola S-Record formats. Its syntax is designed to be simple and
317easy to understand, similar to the syntax in the Intel Software
318Developer Manual with minimal complexity. It supports all currently
319known x86 architectural extensions, and has strong support for macros.
320
321\S{legal} \i{License}
322
323NASM is under the so-called 2-clause BSD license, also
324known as the simplified BSD license:
325
326Copyright \m{year} the NASM Authors - All rights reserved.
327
328Redistribution and use in source and binary forms, with or without
329modification, are permitted provided that the following conditions are
330met:
331
332\b Redistributions of source code must retain the above copyright
333notice, this list of conditions and the following disclaimer.
334
335\b Redistributions in binary form must reproduce the above copyright
336notice, this list of conditions and the following disclaimer in the
337documentation and/or other materials provided with the distribution.
338
339THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
340CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
341INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
342MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
343DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
344CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
345SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
346NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
347LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
348HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
349CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
350OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
351EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
352
353\C{running} Running NASM
354
355\H{syntax} NASM \i{Command-Line} Syntax
356
357To assemble a file, you issue a command of the form
358
359\c nasm -f <format> <filename> [-o <output>]
360
361For example,
362
363\c nasm -f elf myfile.asm
364
365will assemble \c{myfile.asm} into an ELF object file \c{myfile.o}. And
366
367\c nasm -f bin myfile.asm -o myfile.com
368
369will assemble \c{myfile.asm} into a raw binary file \c{myfile.com}.
370
371To produce a listing file, with the hex codes output from NASM
372displayed on the left of the original sources, use the \c{-l} option
373to give a listing file name, for example:
374
375\c nasm -f coff myfile.asm -l myfile.lst
376
377To get further usage instructions from NASM, try typing
378
379\c nasm -h
380
381The option \c{--help} is an alias for the \c{-h} option.
382
383If you use Linux but aren't sure whether your system is \c{a.out}
384or ELF, type
385
386\c file nasm
387
388(in the directory in which you put the NASM binary when you
389installed it). If it says something like
390
391\c nasm: ELF 32-bit LSB executable i386 (386 and up) Version 1
392
393then your system is \c{ELF}, and you should use the option \c{-f elf}
394when you want NASM to produce Linux object files. If it says
395
396\c nasm: Linux/i386 demand-paged executable (QMAGIC)
397
398or something similar, your system is \c{a.out}, and you should use
399\c{-f aout} instead (Linux \c{a.out} systems have long been obsolete,
400and are rare these days.)
401
402Like Unix compilers and assemblers, NASM is silent unless it
403goes wrong: you won't see any output at all, unless it gives error
404messages.
405
406
407\S{opt-o} The \i\c{-o} Option: Specifying the Output File Name
408
409NASM will normally choose the name of your output file for you;
410precisely how it does this is dependent on the object file format.
411For Microsoft object file formats (\c{obj}, \c{win32} and \c{win64}),
412it will remove the \c{.asm} \i{extension} (or whatever extension you
413like to use - NASM doesn't care) from your source file name and
414substitute \c{.obj}. For Unix object file formats (\c{aout}, \c{as86},
415\c{coff}, \c{elf32}, \c{elf64}, \c{elfx32}, \c{ieee}, \c{macho32} and
416\c{macho64}) it will substitute \c{.o}. For \c{dbg}, \c{rdf}, \c{ith}
417and \c{srec}, it will use \c{.dbg}, \c{.rdf}, \c{.ith} and \c{.srec},
418respectively, and for the \c{bin} format it will simply remove the
419extension, so that \c{myfile.asm} produces the output file \c{myfile}.
420
421If the output file already exists, NASM will overwrite it, unless it
422has the same name as the input file, in which case it will give a
423warning and use \i\c{nasm.out} as the output file name instead.
424
425For situations in which this behaviour is unacceptable, NASM
426provides the \c{-o} command-line option, which allows you to specify
427your desired output file name. You invoke \c{-o} by following it
428with the name you wish for the output file, either with or without
429an intervening space. For example:
430
431\c nasm -f bin program.asm -o program.com
432\c nasm -f bin driver.asm -odriver.sys
433
434Note that this is a small o, and is different from a capital O , which
435is used to specify the number of optimization passes required. See \k{opt-O}.
436
437
438\S{opt-f} The \i\c{-f} Option: Specifying the \i{Output File Format}
439
440If you do not supply the \c{-f} option to NASM, it will choose an
441output file format for you itself. In the distribution versions of
442NASM, the default is always \i\c{bin}; if you've compiled your own
443copy of NASM, you can redefine \i\c{OF_DEFAULT} at compile time and
444choose what you want the default to be.
445
446Like \c{-o}, the intervening space between \c{-f} and the output
447file format is optional; so \c{-f elf} and \c{-felf} are both valid.
448
449A complete list of the available output file formats can be given by
450issuing the command \i\c{nasm -h}.
451
452
453\S{opt-l} The \i\c{-l} Option: Generating a \i{Listing File}
454
455If you supply the \c{-l} option to NASM, followed (with the usual
456optional space) by a file name, NASM will generate a
457\i{source-listing file} for you, in which addresses and generated
458code are listed on the left, and the actual source code, with
459expansions of multi-line macros (except those which specifically
460request no expansion in source listings: see \k{nolist}) on the
461right. For example:
462
463\c nasm -f elf myfile.asm -l myfile.lst
464
465If a list file is selected, you may turn off listing for a
466section of your source with \c{[list -]}, and turn it back on
467with \c{[list +]}, (the default, obviously). There is no "user
468form" (without the brackets). This can be used to list only
469sections of interest, avoiding excessively long listings.
470
471\S{opt-L} The \i\c{-L} Option: Additional or Modified Listing Info
472
473Use this option to specify listing output details.
474
475Supported options are:
476
477\b \c{-Lb} show builtin macro packages (standard and \c{%use})
478
479\b \c{-Ld} show byte and repeat counts in decimal, not hex
480
481\b \c{-Le} show the preprocessed input
482
483\b \c{-Lf} ignore \c{.nolist} and force listing output
484
485\b \c{-Lm} show multi-line macro calls with expanded parameters
486
487\b \c{-Lp} output a list file in every pass, in case of errors
488
489\b \c{-Ls} show all single-line macro definitions
490
491\b \c{-Lw} flush the output after every line (very slow, mainly useful
492to debug NASM crashes)
493
494\b \c{-L+} enable \e{all} listing options except \c{-Lw} (very verbose)
495
496These options can be enabled or disabled at runtime using the
497\c{%pragma list options} directive:
498
499\c %pragma list options [+|-]flags...
500
501For example, to turn on the \c{d} and \c{m} flags but disable the
502\c{s} flag:
503
504\c %pragma list options +dm -s
505
506For forward compatility reasons, an undefined flag will be
507ignored. Thus, a new flag introduced in a newer version of NASM can be
508specified without breaking older versions. Listing flags will always
509be a single alphanumeric character and are case sensitive.
510
511\S{opt-M} The \i\c{-M} Option: Generate \i{Makefile Dependencies}
512
513This option can be used to generate makefile dependencies on stdout.
514This can be redirected to a file for further processing. For example:
515
516\c nasm -M myfile.asm > myfile.dep
517
518
519\S{opt-MG} The \i\c{-MG} Option: Generate \i{Makefile Dependencies}
520
521This option can be used to generate makefile dependencies on stdout.
522This differs from the \c{-M} option in that if a nonexisting file is
523encountered, it is assumed to be a generated file and is added to the
524dependency list without a prefix.
525
526
527\S{opt-MF} The \i\c\{-MF} Option: Set Makefile Dependency File
528
529This option can be used with the \c{-M} or \c{-MG} options to send the
530output to a file, rather than to stdout.  For example:
531
532\c nasm -M -MF myfile.dep myfile.asm
533
534
535\S{opt-MD} The \i\c{-MD} Option: Assemble and Generate Dependencies
536
537The \c{-MD} option acts as the combination of the \c{-M} and \c{-MF}
538options (i.e. a filename has to be specified.)  However, unlike the
539\c{-M} or \c{-MG} options, \c{-MD} does \e{not} inhibit the normal
540operation of the assembler.  Use this to automatically generate
541updated dependencies with every assembly session.  For example:
542
543\c nasm -f elf -o myfile.o -MD myfile.dep myfile.asm
544
545If the argument after \c{-MD} is an option rather than a filename,
546then the output filename is the first applicable one of:
547
548\b the filename set in the \c{-MF} option;
549
550\b the output filename from the \c{-o} option with \c{.d} appended;
551
552\b the input filename with the extension set to \c{.d}.
553
554
555\S{opt-MT} The \i\c{-MT} Option: Dependency Target Name
556
557The \c{-MT} option can be used to override the default name of the
558dependency target.  This is normally the same as the output filename,
559specified by the \c{-o} option.
560
561
562\S{opt-MQ} The \i\c{-MQ} Option: Dependency Target Name (Quoted)
563
564The \c{-MQ} option acts as the \c{-MT} option, except it tries to
565quote characters that have special meaning in Makefile syntax.  This
566is not foolproof, as not all characters with special meaning are
567quotable in Make.  The default output (if no \c{-MT} or \c{-MQ} option
568is specified) is automatically quoted.
569
570
571\S{opt-MP} The \i\c{-MP} Option: Emit phony targets
572
573When used with any of the dependency generation options, the \c{-MP}
574option causes NASM to emit a phony target without dependencies for
575each header file.  This prevents Make from complaining if a header
576file has been removed.
577
578
579\S{opt-MW} The \i\c{-MW} Option: Watcom Make quoting style
580
581This option causes NASM to attempt to quote dependencies according to
582Watcom Make conventions rather than POSIX Make conventions (also used
583by most other Make variants.)  This quotes \c{#} as \c{$#} rather than
584\c{\\#}, uses \c{&} rather than \c{\\} for continuation lines, and
585encloses filenames containing whitespace in double quotes.
586
587
588\S{opt-F} The \i\c{-F} Option: Selecting a \i{Debug Information Format}
589
590This option is used to select the format of the debug information
591emitted into the output file, to be used by a debugger (or \e{will}
592be). Prior to version 2.03.01, the use of this switch did \e{not} enable
593output of the selected debug info format.  Use \c{-g}, see \k{opt-g},
594to enable output.  Versions 2.03.01 and later automatically enable \c{-g}
595if \c{-F} is specified.
596
597A complete list of the available debug file formats for an output
598format can be seen by issuing the command \c{nasm -h}.  Not
599all output formats currently support debugging output.
600
601This should not be confused with the \c{-f dbg} output format option,
602see \k{dbgfmt}.
603
604
605\S{opt-g} The \i\c{-g} Option: Enabling \i{Debug Information}.
606
607This option can be used to generate debugging information in the specified
608format. See \k{opt-F}. Using \c{-g} without \c{-F} results in emitting
609debug info in the default format, if any, for the selected output format.
610If no debug information is currently implemented in the selected output
611format, \c{-g} is \e{silently ignored}.
612
613
614\S{opt-X} The \i\c{-X} Option: Selecting an \i{Error Reporting Format}
615
616This option can be used to select an error reporting format for any
617error messages that might be produced by NASM.
618
619Currently, two error reporting formats may be selected.  They are
620the \c{-Xvc} option and the \c{-Xgnu} option.  The GNU format is
621the default and looks like this:
622
623\c filename.asm:65: error: specific error message
624
625where \c{filename.asm} is the name of the source file in which the
626error was detected, \c{65} is the source file line number on which
627the error was detected, \c{error} is the severity of the error (this
628could be \c{warning}), and \c{specific error message} is a more
629detailed text message which should help pinpoint the exact problem.
630
631The other format, specified by \c{-Xvc} is the style used by Microsoft
632Visual C++ and some other programs.  It looks like this:
633
634\c filename.asm(65) : error: specific error message
635
636where the only difference is that the line number is in parentheses
637instead of being delimited by colons.
638
639See also the \c{Visual C++} output format, \k{win32fmt}.
640
641\S{opt-Z} The \i\c{-Z} Option: Send Errors to a File
642
643Under \I{DOS}\c{MS-DOS} it can be difficult (though there are ways) to
644redirect the standard-error output of a program to a file. Since
645NASM usually produces its warning and \i{error messages} on
646\i\c{stderr}, this can make it hard to capture the errors if (for
647example) you want to load them into an editor.
648
649NASM therefore provides the \c{-Z} option, taking a filename argument
650which causes errors to be sent to the specified files rather than
651standard error. Therefore you can \I{redirecting errors}redirect
652the errors into a file by typing
653
654\c nasm -Z myfile.err -f obj myfile.asm
655
656In earlier versions of NASM, this option was called \c{-E}, but it was
657changed since \c{-E} is an option conventionally used for
658preprocessing only, with disastrous results.  See \k{opt-E}.
659
660\S{opt-s} The \i\c{-s} Option: Send Errors to \i\c{stdout}
661
662The \c{-s} option redirects \i{error messages} to \c{stdout} rather
663than \c{stderr}, so it can be redirected under \I{DOS}\c{MS-DOS}. To
664assemble the file \c{myfile.asm} and pipe its output to the \c{more}
665program, you can type:
666
667\c nasm -s -f obj myfile.asm | more
668
669See also the \c{-Z} option, \k{opt-Z}.
670
671
672\S{opt-i} The \i\c{-i}\I\c{-I} Option: Include File Search Directories
673
674When NASM sees the \i\c{%include} or \i\c{%pathsearch} directive in a
675source file (see \k{include}, \k{pathsearch} or \k{incbin}), it will
676search for the given file not only in the current directory, but also
677in any directories specified on the command line by the use of the
678\c{-i} option. Therefore you can include files from a \i{macro
679library}, for example, by typing
680
681\c nasm -ic:\macrolib\ -f obj myfile.asm
682
683(As usual, a space between \c{-i} and the path name is allowed, and
684optional).
685
686Prior NASM 2.14 a path provided in the option has been considered as
687a verbatim copy and providing a path separator been up to a caller.
688One could implicitly concatenate a search path together with a filename.
689Still this was rather a trick than something useful. Now the trailing
690path separator is made to always present, thus \c{-ifoo} will be
691considered as the \c{-ifoo/} directory.
692
693If you want to define a \e{standard} \i{include search path},
694similar to \c{/usr/include} on Unix systems, you should place one or
695more \c{-i} directives in the \c{NASMENV} environment variable (see
696\k{nasmenv}).
697
698For Makefile compatibility with many C compilers, this option can also
699be specified as \c{-I}.
700
701
702\S{opt-p} The \i\c{-p}\I\c{-P} Option: \I{pre-including files}Pre-Include a File
703
704\I\c{%include}NASM allows you to specify files to be
705\e{pre-included} into your source file, by the use of the \c{-p}
706option. So running
707
708\c nasm myfile.asm -p myinc.inc
709
710is equivalent to running \c{nasm myfile.asm} and placing the
711directive \c{%include "myinc.inc"} at the start of the file.
712
713\c{--include} option is also accepted.
714
715For consistency with the \c{-I}, \c{-D} and \c{-U} options, this
716option can also be specified as \c{-P}.
717
718
719
720\S{opt-d} The \i\c{-d}\I\c{-D} Option: \I{pre-defining macros}Pre-Define a Macro
721
722\I\c{%define}Just as the \c{-p} option gives an alternative to placing
723\c{%include} directives at the start of a source file, the \c{-d}
724option gives an alternative to placing a \c{%define} directive. You
725could code
726
727\c nasm myfile.asm -dFOO=100
728
729as an alternative to placing the directive
730
731\c %define FOO 100
732
733at the start of the file. You can miss off the macro value, as well:
734the option \c{-dFOO} is equivalent to coding \c{%define FOO}. This
735form of the directive may be useful for selecting \i{assembly-time
736options} which are then tested using \c{%ifdef}, for example
737\c{-dDEBUG}.
738
739For Makefile compatibility with many C compilers, this option can also
740be specified as \c{-D}.
741
742
743\S{opt-u} The \i\c{-u}\I\c{-U} Option: \I{Undefining macros}Undefine a Macro
744
745\I\c{%undef}The \c{-u} option undefines a macro that would otherwise
746have been pre-defined, either automatically or by a \c{-p} or \c{-d}
747option specified earlier on the command lines.
748
749For example, the following command line:
750
751\c nasm myfile.asm -dFOO=100 -uFOO
752
753would result in \c{FOO} \e{not} being a predefined macro in the
754program. This is useful to override options specified at a different
755point in a Makefile.
756
757For Makefile compatibility with many C compilers, this option can also
758be specified as \c{-U}.
759
760
761\S{opt-E} The \i\c{-E}\I{-e} Option: Preprocess Only
762
763NASM allows the \i{preprocessor} to be run on its own, up to a
764point. Using the \c{-E} option (which requires no arguments) will
765cause NASM to preprocess its input file, expand all the macro
766references, remove all the comments and preprocessor directives, and
767print the resulting file on standard output (or save it to a file,
768if the \c{-o} option is also used).
769
770This option cannot be applied to programs which require the
771preprocessor to evaluate \I{preprocessor expressions}\i{expressions}
772which depend on the values of symbols: so code such as
773
774\c %assign tablesize ($-tablestart)
775
776will cause an error in \i{preprocess-only mode}.
777
778For compatiblity with older version of NASM, this option can also be
779written \c{-e}.  \c{-E} in older versions of NASM was the equivalent
780of the current \c{-Z} option, \k{opt-Z}.
781
782\S{opt-a} The \i\c{-a} Option: Don't Preprocess At All
783
784If NASM is being used as the back end to a compiler, it might be
785desirable to \I{suppressing preprocessing}suppress preprocessing
786completely and assume the compiler has already done it, to save time
787and increase compilation speeds. The \c{-a} option, requiring no
788argument, instructs NASM to replace its powerful \i{preprocessor}
789with a \i{stub preprocessor} which does nothing.
790
791
792\S{opt-O} The \i\c{-O} Option: Specifying \i{Multipass Optimization}
793
794Using the \c{-O} option, you can tell NASM to carry out different
795levels of optimization. Multiple flags can be specified after the
796\c{-O} options, some of which can be combined in a single option,
797e.g. \c{-Oxv}.
798
799\b \c{-O0}: No optimization. All operands take their long forms,
800        if a short form is not specified, except conditional jumps.
801        This is intended to match NASM 0.98 behavior.
802
803\b \c{-O1}: Minimal optimization. As above, but immediate operands
804        which will fit in a signed byte are optimized,
805        unless the long form is specified.  Conditional jumps default
806        to the long form unless otherwise specified.
807
808\b \c{-Ox} (where \c{x} is the actual letter \c{x}): Multipass optimization.
809        Minimize branch offsets and signed immediate bytes,
810        overriding size specification unless the \c{strict} keyword
811        has been used (see \k{strict}).  For compatibility with earlier
812        releases, the letter \c{x} may also be any number greater than
813        one. This number has no effect on the actual number of passes.
814
815\b \c{-Ov}: At the end of assembly, print the number of passes
816        actually executed.
817
818The \c{-Ox} mode is recommended for most uses, and is the default
819since NASM 2.09.
820
821Note that this is a capital \c{O}, and is different from a small \c{o}, which
822is used to specify the output file name. See \k{opt-o}.
823
824
825\S{opt-t} The \i\c{-t} Option: Enable TASM Compatibility Mode
826
827NASM includes a limited form of compatibility with Borland's \i\c{TASM}.
828When NASM's \c{-t} option is used, the following changes are made:
829
830\b local labels may be prefixed with \c{@@} instead of \c{.}
831
832\b size override is supported within brackets. In TASM compatible mode,
833a size override inside square brackets changes the size of the operand,
834and not the address type of the operand as it does in NASM syntax. E.g.
835\c{mov eax,[DWORD val]} is valid syntax in TASM compatibility mode.
836Note that you lose the ability to override the default address type for
837the instruction.
838
839\b unprefixed forms of some directives supported (\c{arg}, \c{elif},
840\c{else}, \c{endif}, \c{if}, \c{ifdef}, \c{ifdifi}, \c{ifndef},
841\c{include}, \c{local})
842
843\S{opt-w} The \i\c{-w} and \i\c{-W} Options: Enable or Disable Assembly \i{Warnings}
844
845NASM can observe many conditions during the course of assembly which
846are worth mentioning to the user, but not a sufficiently severe
847error to justify NASM refusing to generate an output file. These
848conditions are reported like errors, but come up with the word
849`warning' before the message. Warnings do not prevent NASM from
850generating an output file and returning a success status to the
851operating system.
852
853Some conditions are even less severe than that: they are only
854sometimes worth mentioning to the user. Therefore NASM supports the
855\c{-w} command-line option, which enables or disables certain
856classes of assembly warning. Such warning classes are described by a
857name, for example \c{label-orphan}; you can enable warnings of
858this class by the command-line option \c{-w+label-orphan} and
859disable it by \c{-w-label-orphan}.
860
861The current \i{warning classes} are:
862
863\& warnings.src
864
865Since version 2.15, NASM has group aliases for all prefixed warnings,
866so they can be used to enable or disable all warnings in the group.
867For example, -w+float enables all warnings with names starting with float-*.
868
869Since version 2.00, NASM has also supported the \c{gcc}-like syntax
870\c{-Wwarning-class} and \c{-Wno-warning-class} instead of
871\c{-w+warning-class} and \c{-w-warning-class}, respectively; both
872syntaxes work identically.
873
874The option \c{-w+error} or \i\c{-Werror} can be used to treat warnings
875as errors.  This can be controlled on a per warning class basis
876(\c{-w+error=}\e{warning-class} or \c{-Werror=}\e{warning-class});
877if no \e{warning-class} is specified NASM treats it as
878\c{-w+error=all}; the same applies to \c{-w-error} or
879\i\c{-Wno-error},
880of course.
881
882In addition, you can control warnings in the source code itself, using
883the \i\c{[WARNING]} directive.  See \k{asmdir-warning}.
884
885
886\S{opt-v} The \i\c{-v} Option: Display \i{Version} Info
887
888Typing \c{NASM -v} will display the version of NASM which you are using,
889and the date on which it was compiled.
890
891You will need the version number if you report a bug.
892
893For command-line compatibility with Yasm, the form \i\c{--v} is also
894accepted for this option starting in NASM version 2.11.05.
895
896
897\S{opt-pfix} The \i\c{--(g|l)prefix}, \i\c{--(g|l)postfix} Options.
898
899The \c{--(g)prefix} options prepend the given argument
900to all \c{extern}, \c{common}, \c{static}, and \c{global} symbols, and the
901\c{--lprefix} option prepends to all other symbols. Similarly,
902\c{--(g)postfix} and \c{--lpostfix} options append
903the argument in the exactly same way as the \c{--xxprefix} options does.
904
905Running this:
906
907\c nasm -f macho --gprefix _
908
909is equivalent to place the directive with \c{%pragma macho gprefix _}
910at the start of the file (\k{mangling}). It will prepend the underscore
911to all global and external variables, as C requires it in some, but not all,
912system calling conventions.
913
914\S{opt-pragma} The \i\c{--pragma} Option
915
916NASM accepts an argument as \c{%pragma} option, which is like placing
917a \c{%pragma} preprocess statement at the beginning of the source.
918Running this:
919
920\c nasm -f macho --pragma "macho gprefix _"
921
922is equivalent to the example in \k{opt-pfix}. See \k{pragma}.
923
924
925\S{opt-before} The \i\c{--before} Option
926
927A preprocess statement can be accepted with this option. The example
928shown in \k{opt-pragma} is the same as running this:
929
930\c nasm -f macho --before "%pragma macho gprefix _"
931
932
933\S{opt-limit} The \i\c{--limit-X} Option
934
935This option allows user to setup various maximum values after which
936NASM will terminate with a fatal error rather than consume arbitrary
937amount of compute time. Each limit can be set to a positive number or
938\c{unlimited}.
939
940\b\c{--limit-passes}: Number of maximum allowed passes. Default is
941\c{unlimited}.
942
943\b\c{--limit-stalled-passes}: Maximum number of allowed unfinished
944passes. Default is 1000.
945
946\b\c{--limit-macro-levels}: Define maximum depth of macro expansion
947(in preprocess). Default is 10000
948
949\b\c{--limit-macro-tokens}: Maximum number of tokens processed during
950single-line macro expansion. Default is 10000000.
951
952\b\c{--limit-mmacros}: Maximum number of multi-line macros processed
953before returning to the top-level input. Default is 100000.
954
955\b\c{--limit-rep}: Maximum number of allowed preprocessor loop, defined
956under \c{%rep}. Default is 1000000.
957
958\b\c{--limit-eval}: This number sets the boundary condition of allowed
959expression length. Default is 8192 on most systems.
960
961\b\c{--limit-lines}: Total number of source lines allowed to be
962processed. Default is 2000000000.
963
964For example, set the maximum line count to 1000:
965
966\c nasm --limit-lines 1000
967
968Limits can also be set via the directive \c{%pragma limit}, for
969example:
970
971\c %pragma limit lines 1000
972
973
974\S{opt-keep-all} The \i\c{--keep-all} Option
975
976This option prevents NASM from deleting any output files even if an
977error happens.
978
979\S{opt-no-line} The \i\c{--no-line} Option
980
981If this option is given, all \i\c{%line} directives in the source code
982are ignored. This can be useful for debugging already preprocessed
983code. See \k{line}.
984
985\S{opt-reproducible} The \i\c{--reproducible} Option
986
987If this option is given, NASM will not emit information that is
988inherently dependent on the NASM version or different from run to run
989(such as timestamps) into the output file.
990
991
992\S{nasmenv} The \i\c{NASMENV} \i{Environment} Variable
993
994If you define an environment variable called \c{NASMENV}, the program
995will interpret it as a list of extra command-line options, which are
996processed before the real command line. You can use this to define
997standard search directories for include files, by putting \c{-i}
998options in the \c{NASMENV} variable.
999
1000The value of the variable is split up at white space, so that the
1001value \c{-s -ic:\\nasmlib\\} will be treated as two separate options.
1002However, that means that the value \c{-dNAME="my name"} won't do
1003what you might want, because it will be split at the space and the
1004NASM command-line processing will get confused by the two
1005nonsensical words \c{-dNAME="my} and \c{name"}.
1006
1007To get round this, NASM provides a feature whereby, if you begin the
1008\c{NASMENV} environment variable with some character that isn't a minus
1009sign, then NASM will treat this character as the \i{separator
1010character} for options. So setting the \c{NASMENV} variable to the
1011value \c{!-s!-ic:\\nasmlib\\} is equivalent to setting it to \c{-s
1012-ic:\\nasmlib\\}, but \c{!-dNAME="my name"} will work.
1013
1014This environment variable was previously called \c{NASM}. This was
1015changed with version 0.98.31.
1016
1017
1018\H{qstart} \i{Quick Start} for \i{MASM} Users
1019
1020If you're used to writing programs with MASM, or with \i{TASM} in
1021MASM-compatible (non-Ideal) mode, or with \i\c{a86}, this section
1022attempts to outline the major differences between MASM's syntax and
1023NASM's. If you're not already used to MASM, it's probably worth
1024skipping this section.
1025
1026
1027\S{qscs} NASM Is \I{case sensitivity}Case-Sensitive
1028
1029One simple difference is that NASM is case-sensitive. It makes a
1030difference whether you call your label \c{foo}, \c{Foo} or \c{FOO}.
1031If you're assembling to \c{DOS} or \c{OS/2} \c{.OBJ} files, you can
1032invoke the \i\c{UPPERCASE} directive (documented in \k{objfmt}) to
1033ensure that all symbols exported to other code modules are forced
1034to be upper case; but even then, \e{within} a single module, NASM
1035will distinguish between labels differing only in case.
1036
1037
1038\S{qsbrackets} NASM Requires \i{Square Brackets} For \i{Memory References}
1039
1040NASM was designed with simplicity of syntax in mind. One of the
1041\i{design goals} of NASM is that it should be possible, as far as is
1042practical, for the user to look at a single line of NASM code
1043and tell what opcode is generated by it. You can't do this in MASM:
1044if you declare, for example,
1045
1046\c foo     equ     1
1047\c bar     dw      2
1048
1049then the two lines of code
1050
1051\c         mov     ax,foo
1052\c         mov     ax,bar
1053
1054generate completely different opcodes, despite having
1055identical-looking syntaxes.
1056
1057NASM avoids this undesirable situation by having a much simpler
1058syntax for memory references. The rule is simply that any access to
1059the \e{contents} of a memory location requires square brackets
1060around the address, and any access to the \e{address} of a variable
1061doesn't. So an instruction of the form \c{mov ax,foo} will
1062\e{always} refer to a compile-time constant, whether it's an \c{EQU}
1063or the address of a variable; and to access the \e{contents} of the
1064variable \c{bar}, you must code \c{mov ax,[bar]}.
1065
1066This also means that NASM has no need for MASM's \i\c{OFFSET}
1067keyword, since the MASM code \c{mov ax,offset bar} means exactly the
1068same thing as NASM's \c{mov ax,bar}. If you're trying to get
1069large amounts of MASM code to assemble sensibly under NASM, you
1070can always code \c{%idefine offset} to make the preprocessor treat
1071the \c{OFFSET} keyword as a no-op.
1072
1073This issue is even more confusing in \i\c{a86}, where declaring a
1074label with a trailing colon defines it to be a `label' as opposed to
1075a `variable' and causes \c{a86} to adopt NASM-style semantics; so in
1076\c{a86}, \c{mov ax,var} has different behaviour depending on whether
1077\c{var} was declared as \c{var: dw 0} (a label) or \c{var dw 0} (a
1078word-size variable). NASM is very simple by comparison:
1079\e{everything} is a label.
1080
1081NASM, in the interests of simplicity, also does not support the
1082\i{hybrid syntaxes} supported by MASM and its clones, such as
1083\c{mov ax,table[bx]}, where a memory reference is denoted by one
1084portion outside square brackets and another portion inside. The
1085correct syntax for the above is \c{mov ax,[table+bx]}. Likewise,
1086\c{mov ax,es:[di]} is wrong and \c{mov ax,[es:di]} is right.
1087
1088
1089\S{qstypes} NASM Doesn't Store \i{Variable Types}
1090
1091NASM, by design, chooses not to remember the types of variables you
1092declare. Whereas MASM will remember, on seeing \c{var dw 0}, that
1093you declared \c{var} as a word-size variable, and will then be able
1094to fill in the \i{ambiguity} in the size of the instruction \c{mov
1095var,2}, NASM will deliberately remember nothing about the symbol
1096\c{var} except where it begins, and so you must explicitly code
1097\c{mov word [var],2}.
1098
1099For this reason, NASM doesn't support the \c{LODS}, \c{MOVS},
1100\c{STOS}, \c{SCAS}, \c{CMPS}, \c{INS}, or \c{OUTS} instructions,
1101but only supports the forms such as \c{LODSB}, \c{MOVSW}, and
1102\c{SCASD}, which explicitly specify the size of the components of
1103the strings being manipulated.
1104
1105
1106\S{qsassume} NASM Doesn't \i\c{ASSUME}
1107
1108As part of NASM's drive for simplicity, it also does not support the
1109\c{ASSUME} directive. NASM will not keep track of what values you
1110choose to put in your segment registers, and will never
1111\e{automatically} generate a \i{segment override} prefix.
1112
1113
1114\S{qsmodel} NASM Doesn't Support \i{Memory Models}
1115
1116NASM also does not have any directives to support different 16-bit
1117memory models. The programmer has to keep track of which functions
1118are supposed to be called with a \i{far call} and which with a
1119\i{near call}, and is responsible for putting the correct form of
1120\c{RET} instruction (\c{RETN} or \c{RETF}; NASM accepts \c{RET}
1121itself as an alternate form for \c{RETN}); in addition, the
1122programmer is responsible for coding CALL FAR instructions where
1123necessary when calling \e{external} functions, and must also keep
1124track of which external variable definitions are far and which are
1125near.
1126
1127
1128\S{qsfpu} \i{Floating-Point} Differences
1129
1130NASM uses different names to refer to floating-point registers from
1131MASM: where MASM would call them \c{ST(0)}, \c{ST(1)} and so on, and
1132\i\c{a86} would call them simply \c{0}, \c{1} and so on, NASM
1133chooses to call them \c{st0}, \c{st1} etc.
1134
1135As of version 0.96, NASM now treats the instructions with
1136\i{`nowait'} forms in the same way as MASM-compatible assemblers.
1137The idiosyncratic treatment employed by 0.95 and earlier was based
1138on a misunderstanding by the authors.
1139
1140
1141\S{qsother} Other Differences
1142
1143For historical reasons, NASM uses the keyword \i\c{TWORD} where MASM
1144and compatible assemblers use \i\c{TBYTE}.
1145
1146Historically, NASM does not declare \i{uninitialized storage} in the
1147same way as MASM: where a MASM programmer might use \c{stack db 64 dup
1148(?)}, NASM requires \c{stack resb 64}, intended to be read as `reserve
114964 bytes'. For a limited amount of compatibility, since NASM treats
1150\c{?} as a valid character in symbol names, you can code \c{? equ 0}
1151and then writing \c{dw ?} will at least do something vaguely useful.
1152
1153As of NASM 2.15, the MASM syntax is also supported.
1154
1155In addition to all of this, macros and directives work completely
1156differently to MASM. See \k{preproc} and \k{directive} for further
1157details.
1158
1159\S{masm-compat} MASM compatibility package
1160
1161See \k{pkg_masm}.
1162
1163
1164\C{lang} The NASM Language
1165
1166\H{syntax} Layout of a NASM Source Line
1167
1168Like most assemblers, each NASM source line contains (unless it
1169is a macro, a preprocessor directive or an assembler directive: see
1170\k{preproc} and \k{directive}) some combination of the four fields
1171
1172\c label:    instruction operands        ; comment
1173
1174As usual, most of these fields are optional; the presence or absence
1175of any combination of a label, an instruction and a comment is allowed.
1176Of course, the operand field is either required or forbidden by the
1177presence and nature of the instruction field.
1178
1179NASM uses backslash (\\) as the line continuation character; if a line
1180ends with backslash, the next line is considered to be a part of the
1181backslash-ended line.
1182
1183NASM places no restrictions on white space within a line: labels may
1184have white space before them, or instructions may have no space
1185before them, or anything. The \i{colon} after a label is also
1186optional. (Note that this means that if you intend to code \c{lodsb}
1187alone on a line, and type \c{lodab} by accident, then that's still a
1188valid source line which does nothing but define a label. Running
1189NASM with the command-line option
1190\I{label-orphan}\c{-w+orphan-labels} will cause it to warn you if
1191you define a label alone on a line without a \i{trailing colon}.)
1192
1193\i{Valid characters} in labels are letters, numbers, \c{_}, \c{$},
1194\c{#}, \c{@}, \c{~}, \c{.}, and \c{?}. The only characters which may
1195be used as the \e{first} character of an identifier are letters,
1196\c{.} (with special meaning: see \k{locallab}), \c{_} and \c{?}.
1197An identifier may also be prefixed with a \I{$, prefix}\c{$} to
1198indicate that it is intended to be read as an identifier and not a
1199reserved word; thus, if some other module you are linking with
1200defines a symbol called \c{eax}, you can refer to \c{$eax} in NASM
1201code to distinguish the symbol from the register. Maximum length of
1202an identifier is 4095 characters.
1203
1204The instruction field may contain any machine instruction: Pentium and
1205P6 instructions, FPU instructions, MMX instructions and even
1206undocumented instructions are all supported. The instruction may be
1207prefixed by \c{LOCK}, \c{REP}, \c{REPE}/\c{REPZ}, \c{REPNE}/\c{REPNZ},
1208\c{XACQUIRE}/\c{XRELEASE} or \c{BND}/\c{NOBND}, in the usual
1209way. Explicit \I{address-size prefixes}address-size and
1210\i{operand-size prefixes} \i\c{A16}, \i\c{A32}, \i\c{A64}, \i\c{O16}
1211and \i\c{O32}, \i\c{O64} are provided - one example of their use is
1212given in \k{mixsize}. You can also use the name of a \I{segment
1213override}segment register as an instruction prefix: coding \c{es mov
1214[bx],ax} is equivalent to coding \c{mov [es:bx],ax}. We recommend the
1215latter syntax, since it is consistent with other syntactic features of
1216the language, but for instructions such as \c{LODSB}, which has no
1217operands and yet can require a segment override, there is no clean
1218syntactic way to proceed apart from \c{es lodsb}.
1219
1220An instruction is not required to use a prefix: prefixes such as
1221\c{CS}, \c{A32}, \c{LOCK} or \c{REPE} can appear on a line by
1222themselves, and NASM will just generate the prefix bytes.
1223
1224In addition to actual machine instructions, NASM also supports a
1225number of pseudo-instructions, described in \k{pseudop}.
1226
1227Instruction \i{operands} may take a number of forms: they can be
1228registers, described simply by the register name (e.g. \c{ax},
1229\c{bp}, \c{ebx}, \c{cr0}: NASM does not use the \c{gas}-style
1230syntax in which register names must be prefixed by a \c{%} sign), or
1231they can be \i{effective addresses} (see \k{effaddr}), constants
1232(\k{const}) or expressions (\k{expr}).
1233
1234For x87 \i{floating-point} instructions, NASM accepts a wide range of
1235syntaxes: you can use two-operand forms like MASM supports, or you
1236can use NASM's native single-operand forms in most cases.
1237\# Details of
1238\# all forms of each supported instruction are given in
1239\# \k{iref}.
1240For example, you can code:
1241
1242\c         fadd    st1             ; this sets st0 := st0 + st1
1243\c         fadd    st0,st1         ; so does this
1244\c
1245\c         fadd    st1,st0         ; this sets st1 := st1 + st0
1246\c         fadd    to st1          ; so does this
1247
1248Almost any x87 floating-point instruction that references memory must
1249use one of the prefixes \i\c{DWORD}, \i\c{QWORD} or \i\c{TWORD} to
1250indicate what size of \i{memory operand} it refers to.
1251
1252
1253\H{pseudop} \i{Pseudo-Instructions}
1254
1255Pseudo-instructions are things which, though not real x86 machine
1256instructions, are used in the instruction field anyway because that's
1257the most convenient place to put them. The current pseudo-instructions
1258are \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, \i\c{DO},
1259\i\c{DY} and \i\c\{DZ}; their \I{storage,
1260uninitialized}\i{uninitialized} counterparts \i\c{RESB}, \i\c{RESW},
1261\i\c{RESD}, \i\c{RESQ}, \i\c{REST}, \i\c{RESO}, \i\c{RESY} and
1262\i\c\{RESZ}; the \i\c{INCBIN} command, the \i\c{EQU} command, and the
1263\i\c{TIMES} prefix.
1264
1265In this documentation, the notation "\c{Dx}" and "\c{RESx}" is used to
1266indicate all the \c{DB} and \c{RESB} type directives, respectively.
1267
1268
1269\S{db} \c{Dx}: Declaring Initialized Data
1270
1271\i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, \i\c{DO}, \i\c{DY}
1272and \i\c{DZ} (collectively "\c{Dx}" in this documentation) are used,
1273much as in MASM, to declare initialized data in the output file. They
1274can be invoked in a wide range of ways:
1275\I{floating-point}\I{character constant}\I{string constant}
1276
1277\c       db    0x55                ; just the byte 0x55
1278\c       db    0x55,0x56,0x57      ; three bytes in succession
1279\c       db    'a',0x55            ; character constants are OK
1280\c       db    'hello',13,10,'$'   ; so are string constants
1281\c       dw    0x1234              ; 0x34 0x12
1282\c       dw    'a'                 ; 0x61 0x00 (it's just a number)
1283\c       dw    'ab'                ; 0x61 0x62 (character constant)
1284\c       dw    'abc'               ; 0x61 0x62 0x63 0x00 (string)
1285\c       dd    0x12345678          ; 0x78 0x56 0x34 0x12
1286\c       dd    1.234567e20         ; floating-point constant
1287\c       dq    0x123456789abcdef0  ; eight byte constant
1288\c       dq    1.234567e20         ; double-precision float
1289\c       dt    1.234567e20         ; extended-precision float
1290
1291\c{DT}, \c{DO}, \c{DY} and \c{DZ} do not accept integer
1292\i{numeric constants} as operands.
1293
1294\I{masmdb} Starting in NASM 2.15, a the following \i{MASM}-like features
1295have been implemented:
1296
1297\b A \I{?db}\c{?} argument to declare \i{uninitialized storage}:
1298
1299\c       db    ?                   ; uninitialized
1300
1301\b A superset of the \i\c{DUP} syntax. The NASM version of this has
1302the following syntax specification; capital letters indicate literal
1303keywords:
1304
1305\c      dx      := DB | DW | DD | DQ | DT | DO | DY | DZ
1306\c      type    := BYTE | WORD | DWORD | QWORD | TWORD | OWORD | YWORD | ZWORD
1307\c      atom    := expression | string | float | '?'
1308\c      parlist := '(' value [, value ...] ')'
1309\c      duplist := expression DUP [type] ['%'] parlist
1310\c      list    := duplist | '%' parlist | type ['%'] parlist
1311\c      value   := atom | type value | list
1312\c
1313\c      stmt    := dx value [, value...]
1314
1315\> Note that a \e{list} needs to be prefixed with a \I{%db}\c{%} sign unless
1316prefixed by either \c{DUP} or a \e{type} in order to avoid confusing it with
1317a parentesis starting an expression. The following expressions are all
1318valid:
1319
1320\c        db 33
1321\c        db (44)		; Integer expression
1322\c      ; db (44,55)		; Invalid - error
1323\c        db %(44,55)
1324\c        db %('XX','YY')
1325\c        db ('AA')		; Integer expression - outputs single byte
1326\c        db %('BB')		; List, containing a string
1327\c        db ?
1328\c        db 6 dup (33)
1329\c        db 6 dup (33, 34)
1330\c        db 6 dup (33, 34), 35
1331\c        db 7 dup (99)
1332\c        db 7 dup dword (?, word ?, ?)
1333\c        dw byte (?,44)
1334\c        dw 3 dup (0xcc, 4 dup byte ('PQR'), ?), 0xabcd
1335\c        dd 16 dup (0xaaaa, ?, 0xbbbbbb)
1336\c        dd 64 dup (?)
1337
1338\I{baddb} The use of \c{$} (current address) in a \c{Dx} statement is
1339undefined in the current version of NASM, \e{except in the following
1340cases}:
1341
1342\b For the first expression in the statement, either a \c{DUP} or a data
1343item.
1344
1345\b An expression of the form "\e{value}\c{ - $}", which is converted
1346to a self-relative relocation.
1347
1348Future versions of NASM is likely to produce a different result or
1349issue an error this case.
1350
1351There is no such restriction on using \c{$$} or section-relative
1352symbols.
1353
1354\S{resb} \c{RESB} and Friends: Declaring \i{Uninitialized} Data
1355
1356\i\c{RESB}, \i\c{RESW}, \i\c{RESD}, \i\c{RESQ}, \i\c{REST},
1357\i\c{RESO}, \i\c{RESY} and \i\c\{RESZ} are designed to be used in the
1358BSS section of a module: they declare \e{uninitialized} storage
1359space. Each takes a single operand, which is the number of bytes,
1360words, doublewords or whatever to reserve. The operand to a
1361\c{RESB}-type pseudo-instruction is a \i\e{critical expression}: see
1362\k{crit}.
1363
1364For example:
1365
1366\c buffer:         resb    64              ; reserve 64 bytes
1367\c wordvar:        resw    1               ; reserve a word
1368\c realarray       resq    10              ; array of ten reals
1369\c ymmval:         resy    1               ; one YMM register
1370\c zmmvals:        resz    32              ; 32 ZMM registers
1371
1372\I{masmdb} Since NASM 2.15, the MASM syntax of using \I{?db}\c{?}
1373and \i\c{DUP} in the \c{D}\e{x} directives is also supported. Thus,
1374the above example could also be written:
1375
1376\c buffer:         db      64 dup (?)      ; reserve 64 bytes
1377\c wordvar:        dw      ?               ; reserve a word
1378\c realarray       dq      10 dup (?)      ; array of ten reals
1379\c ymmval:         dy      ?               ; one YMM register
1380\c zmmvals:        dz      32 dup (?)      ; 32 ZMM registers
1381
1382
1383\S{incbin} \i\c{INCBIN}: Including External \i{Binary Files}
1384
1385\c{INCBIN} includes binary file data verbatim into the output
1386file. This can be handy for (for example) including \i{graphics} and
1387\i{sound} data directly into a game executable file. It can be called
1388in one of these three ways:
1389
1390\c     incbin  "file.dat"             ; include the whole file
1391\c     incbin  "file.dat",1024        ; skip the first 1024 bytes
1392\c     incbin  "file.dat",1024,512    ; skip the first 1024, and
1393\c                                    ; actually include at most 512
1394
1395\c{INCBIN} is both a directive and a standard macro; the standard
1396macro version searches for the file in the include file search path
1397and adds the file to the dependency lists.  This macro can be
1398overridden if desired.
1399
1400
1401\S{equ} \i\c{EQU}: Defining Constants
1402
1403\c{EQU} defines a symbol to a given constant value: when \c{EQU} is
1404used, the source line must contain a label. The action of \c{EQU} is
1405to define the given label name to the value of its (only) operand.
1406This definition is absolute, and cannot change later. So, for
1407example,
1408
1409\c message         db      'hello, world'
1410\c msglen          equ     $-message
1411
1412defines \c{msglen} to be the constant 12. \c{msglen} may not then be
1413redefined later. This is not a \i{preprocessor} definition either:
1414the value of \c{msglen} is evaluated \e{once}, using the value of
1415\c{$} (see \k{expr} for an explanation of \c{$}) at the point of
1416definition, rather than being evaluated wherever it is referenced
1417and using the value of \c{$} at the point of reference.
1418
1419
1420\S{times} \i\c{TIMES}: \i{Repeating} Instructions or Data
1421
1422The \c{TIMES} prefix causes the instruction to be assembled multiple
1423times. This is partly present as NASM's equivalent of the \i\c{DUP}
1424syntax supported by \i{MASM}-compatible assemblers, in that you can
1425code
1426
1427\c zerobuf:        times 64 db 0
1428
1429or similar things; but \c{TIMES} is more versatile than that. The
1430argument to \c{TIMES} is not just a numeric constant, but a numeric
1431\e{expression}, so you can do things like
1432
1433\c buffer: db      'hello, world'
1434\c         times 64-$+buffer db ' '
1435
1436which will store exactly enough spaces to make the total length of
1437\c{buffer} up to 64. Finally, \c{TIMES} can be applied to ordinary
1438instructions, so you can code trivial \i{unrolled loops} in it:
1439
1440\c         times 100 movsb
1441
1442Note that there is no effective difference between \c{times 100 resb
14431} and \c{resb 100}, except that the latter will be assembled about
1444100 times faster due to the internal structure of the assembler.
1445
1446The operand to \c{TIMES} is a critical expression (\k{crit}).
1447
1448Note also that \c{TIMES} can't be applied to \i{macros}: the reason
1449for this is that \c{TIMES} is processed after the macro phase, which
1450allows the argument to \c{TIMES} to contain expressions such as
1451\c{64-$+buffer} as above. To repeat more than one line of code, or a
1452complex macro, use the preprocessor \i\c{%rep} directive.
1453
1454
1455\H{effaddr} Effective Addresses
1456
1457An \i{effective address} is any operand to an instruction which
1458\I{memory reference}references memory. Effective addresses, in NASM,
1459have a very simple syntax: they consist of an expression evaluating
1460to the desired address, enclosed in \i{square brackets}. For
1461example:
1462
1463\c wordvar dw      123
1464\c         mov     ax,[wordvar]
1465\c         mov     ax,[wordvar+1]
1466\c         mov     ax,[es:wordvar+bx]
1467
1468Anything not conforming to this simple system is not a valid memory
1469reference in NASM, for example \c{es:wordvar[bx]}.
1470
1471More complicated effective addresses, such as those involving more
1472than one register, work in exactly the same way:
1473
1474\c         mov     eax,[ebx*2+ecx+offset]
1475\c         mov     ax,[bp+di+8]
1476
1477NASM is capable of doing \i{algebra} on these effective addresses,
1478so that things which don't necessarily \e{look} legal are perfectly
1479all right:
1480
1481\c     mov     eax,[ebx*5]             ; assembles as [ebx*4+ebx]
1482\c     mov     eax,[label1*2-label2]   ; ie [label1+(label1-label2)]
1483
1484Some forms of effective address have more than one assembled form;
1485in most such cases NASM will generate the smallest form it can. For
1486example, there are distinct assembled forms for the 32-bit effective
1487addresses \c{[eax*2+0]} and \c{[eax+eax]}, and NASM will generally
1488generate the latter on the grounds that the former requires four
1489bytes to store a zero offset.
1490
1491NASM has a hinting mechanism which will cause \c{[eax+ebx]} and
1492\c{[ebx+eax]} to generate different opcodes; this is occasionally
1493useful because \c{[esi+ebp]} and \c{[ebp+esi]} have different
1494default segment registers.
1495
1496However, you can force NASM to generate an effective address in a
1497particular form by the use of the keywords \c{BYTE}, \c{WORD},
1498\c{DWORD} and \c{NOSPLIT}. If you need \c{[eax+3]} to be assembled
1499using a double-word offset field instead of the one byte NASM will
1500normally generate, you can code \c{[dword eax+3]}. Similarly, you
1501can force NASM to use a byte offset for a small value which it
1502hasn't seen on the first pass (see \k{crit} for an example of such a
1503code fragment) by using \c{[byte eax+offset]}. As special cases,
1504\c{[byte eax]} will code \c{[eax+0]} with a byte offset of zero, and
1505\c{[dword eax]} will code it with a double-word offset of zero. The
1506normal form, \c{[eax]}, will be coded with no offset field.
1507
1508The form described in the previous paragraph is also useful if you
1509are trying to access data in a 32-bit segment from within 16 bit code.
1510For more information on this see the section on mixed-size addressing
1511(\k{mixaddr}). In particular, if you need to access data with a known
1512offset that is larger than will fit in a 16-bit value, if you don't
1513specify that it is a dword offset, nasm will cause the high word of
1514the offset to be lost.
1515
1516Similarly, NASM will split \c{[eax*2]} into \c{[eax+eax]} because
1517that allows the offset field to be absent and space to be saved; in
1518fact, it will also split \c{[eax*2+offset]} into
1519\c{[eax+eax+offset]}. You can combat this behaviour by the use of
1520the \c{NOSPLIT} keyword: \c{[nosplit eax*2]} will force
1521\c{[eax*2+0]} to be generated literally. \c{[nosplit eax*1]} also has the
1522same effect. In another way, a split EA form \c{[0, eax*2]} can be used, too.
1523However, \c{NOSPLIT} in \c{[nosplit eax+eax]} will be ignored because user's
1524intention here is considered as \c{[eax+eax]}.
1525
1526In 64-bit mode, NASM will by default generate absolute addresses.  The
1527\i\c{REL} keyword makes it produce \c{RIP}-relative addresses. Since
1528this is frequently the normally desired behaviour, see the \c{DEFAULT}
1529directive (\k{default}). The keyword \i\c{ABS} overrides \i\c{REL}.
1530
1531A new form of split effective addres syntax is also supported. This is
1532mainly intended for mib operands as used by MPX instructions, but can
1533be used for any memory reference. The basic concept of this form is
1534splitting base and index.
1535
1536\c      mov eax,[ebx+8,ecx*4]   ; ebx=base, ecx=index, 4=scale, 8=disp
1537
1538For mib operands, there are several ways of writing effective address depending
1539on the tools. NASM supports all currently possible ways of mib syntax:
1540
1541\c      ; bndstx
1542\c      ; next 5 lines are parsed same
1543\c      ; base=rax, index=rbx, scale=1, displacement=3
1544\c      bndstx [rax+0x3,rbx], bnd0      ; NASM - split EA
1545\c      bndstx [rbx*1+rax+0x3], bnd0    ; GAS - '*1' indecates an index reg
1546\c      bndstx [rax+rbx+3], bnd0        ; GAS - without hints
1547\c      bndstx [rax+0x3], bnd0, rbx     ; ICC-1
1548\c      bndstx [rax+0x3], rbx, bnd0     ; ICC-2
1549
1550When broadcasting decorator is used, the opsize keyword should match
1551the size of each element.
1552
1553\c      VDIVPS zmm4, zmm5, dword [rbx]{1to16}   ; single-precision float
1554\c      VDIVPS zmm4, zmm5, zword [rbx]          ; packed 512 bit memory
1555
1556
1557\H{const} \i{Constants}
1558
1559NASM understands four different types of constant: numeric,
1560character, string and floating-point.
1561
1562
1563\S{numconst} \i{Numeric Constants}
1564
1565A numeric constant is simply a number. NASM allows you to specify
1566numbers in a variety of number bases, in a variety of ways: you can
1567suffix \c{H} or \c{X}, \c{D} or \c{T}, \c{Q} or \c{O}, and \c{B} or
1568\c{Y} for \i{hexadecimal}, \i{decimal}, \i{octal} and \i{binary}
1569respectively, or you can prefix \c{0x}, for hexadecimal in the style
1570of C, or you can prefix \c{$} for hexadecimal in the style of Borland
1571Pascal or Motorola Assemblers. Note, though, that the \I{$,
1572prefix}\c{$} prefix does double duty as a prefix on identifiers (see
1573\k{syntax}), so a hex number prefixed with a \c{$} sign must have a
1574digit after the \c{$} rather than a letter.  In addition, current
1575versions of NASM accept the prefix \c{0h} for hexadecimal, \c{0d} or
1576\c{0t} for decimal, \c{0o} or \c{0q} for octal, and \c{0b} or \c{0y}
1577for binary.  Please note that unlike C, a \c{0} prefix by itself does
1578\e{not} imply an octal constant!
1579
1580Numeric constants can have underscores (\c{_}) interspersed to break
1581up long strings.
1582
1583Some examples (all producing exactly the same code):
1584
1585\c         mov     ax,200          ; decimal
1586\c         mov     ax,0200         ; still decimal
1587\c         mov     ax,0200d        ; explicitly decimal
1588\c         mov     ax,0d200        ; also decimal
1589\c         mov     ax,0c8h         ; hex
1590\c         mov     ax,$0c8         ; hex again: the 0 is required
1591\c         mov     ax,0xc8         ; hex yet again
1592\c         mov     ax,0hc8         ; still hex
1593\c         mov     ax,310q         ; octal
1594\c         mov     ax,310o         ; octal again
1595\c         mov     ax,0o310        ; octal yet again
1596\c         mov     ax,0q310        ; octal yet again
1597\c         mov     ax,11001000b    ; binary
1598\c         mov     ax,1100_1000b   ; same binary constant
1599\c         mov     ax,1100_1000y   ; same binary constant once more
1600\c         mov     ax,0b1100_1000  ; same binary constant yet again
1601\c         mov     ax,0y1100_1000  ; same binary constant yet again
1602
1603\S{strings} \I{string}\I{string constants}\i{Character Strings}
1604
1605A character string consists of up to eight characters enclosed in
1606either single quotes (\c{'...'}), double quotes (\c{"..."}) or
1607backquotes (\c{`...`}).  Single or double quotes are equivalent to
1608NASM (except of course that surrounding the constant with single
1609quotes allows double quotes to appear within it and vice versa); the
1610contents of those are represented verbatim.  Strings enclosed in
1611backquotes support C-style \c{\\}-escapes for special characters.
1612
1613
1614The following \i{escape sequences} are recognized by backquoted strings:
1615
1616\c       \'          single quote (')
1617\c       \"          double quote (")
1618\c       \`          backquote (`)
1619\c       \\\          backslash (\)
1620\c       \?          question mark (?)
1621\c       \a          BEL (ASCII 7)
1622\c       \b          BS  (ASCII 8)
1623\c       \t          TAB (ASCII 9)
1624\c       \n          LF  (ASCII 10)
1625\c       \v          VT  (ASCII 11)
1626\c       \f          FF  (ASCII 12)
1627\c       \r          CR  (ASCII 13)
1628\c       \e          ESC (ASCII 27)
1629\c       \377        Up to 3 octal digits - literal byte
1630\c       \xFF        Up to 2 hexadecimal digits - literal byte
1631\c       \u1234      4 hexadecimal digits - Unicode character
1632\c       \U12345678  8 hexadecimal digits - Unicode character
1633
1634All other escape sequences are reserved.  Note that \c{\\0}, meaning a
1635\c{NUL} character (ASCII 0), is a special case of the octal escape
1636sequence.
1637
1638\i{Unicode} characters specified with \c{\\u} or \c{\\U} are converted to
1639\i{UTF-8}.  For example, the following lines are all equivalent:
1640
1641\c       db `\u263a`            ; UTF-8 smiley face
1642\c       db `\xe2\x98\xba`      ; UTF-8 smiley face
1643\c       db 0E2h, 098h, 0BAh    ; UTF-8 smiley face
1644
1645
1646\S{chrconst} \i{Character Constants}
1647
1648A character constant consists of a string up to eight bytes long, used
1649in an expression context.  It is treated as if it was an integer.
1650
1651A character constant with more than one byte will be arranged
1652with \i{little-endian} order in mind: if you code
1653
1654\c           mov eax,'abcd'
1655
1656then the constant generated is not \c{0x61626364}, but
1657\c{0x64636261}, so that if you were then to store the value into
1658memory, it would read \c{abcd} rather than \c{dcba}. This is also
1659the sense of character constants understood by the Pentium's
1660\i\c{CPUID} instruction.
1661
1662
1663\S{strconst} \i{String Constants}
1664
1665String constants are character strings used in the context of some
1666pseudo-instructions, namely the
1667\I\c{DW}\I\c{DD}\I\c{DQ}\I\c{DT}\I\c{DO}\I\c{DY}\i\c{DB} family and
1668\i\c{INCBIN} (where it represents a filename.)  They are also used in
1669certain preprocessor directives.
1670
1671A string constant looks like a character constant, only longer. It
1672is treated as a concatenation of maximum-size character constants
1673for the conditions. So the following are equivalent:
1674
1675\c       db    'hello'               ; string constant
1676\c       db    'h','e','l','l','o'   ; equivalent character constants
1677
1678And the following are also equivalent:
1679
1680\c       dd    'ninechars'           ; doubleword string constant
1681\c       dd    'nine','char','s'     ; becomes three doublewords
1682\c       db    'ninechars',0,0,0     ; and really looks like this
1683
1684Note that when used in a string-supporting context, quoted strings are
1685treated as a string constants even if they are short enough to be a
1686character constant, because otherwise \c{db 'ab'} would have the same
1687effect as \c{db 'a'}, which would be silly. Similarly, three-character
1688or four-character constants are treated as strings when they are
1689operands to \c{DW}, and so forth.
1690
1691\S{unicode} \I{UTF-16}\I{UTF-32}\i{Unicode} Strings
1692
1693The special operators \i\c{__?utf16?__}, \i\c{__?utf16le?__},
1694\i\c{__?utf16be?__}, \i\c{__?utf32?__}, \i\c{__?utf32le?__} and
1695\i\c{__?utf32be?__} allows definition of Unicode strings.  They take a
1696string in UTF-8 format and converts it to UTF-16 or UTF-32,
1697respectively.  Unless the \c{be} forms are specified, the output is
1698littleendian.
1699
1700For example:
1701
1702\c %define u(x) __?utf16?__(x)
1703\c %define w(x) __?utf32?__(x)
1704\c
1705\c       dw u('C:\WINDOWS'), 0       ; Pathname in UTF-16
1706\c       dd w(`A + B = \u206a`), 0   ; String in UTF-32
1707
1708The UTF operators can be applied either to strings passed to the
1709\c{DB} family instructions, or to character constants in an expression
1710context.
1711
1712\S{fltconst} \I{floating-point, constants}Floating-Point Constants
1713
1714\i{Floating-point} constants are acceptable only as arguments to
1715\i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}, or as
1716arguments to the special operators \i\c{__?float8?__},
1717\i\c{__?float16?__}, \i\c{__?bfloat16?__}, \i\c{__?float32?__},
1718\i\c{__?float64?__}, \i\c{__?float80m?__}, \i\c{__?float80e?__},
1719\i\c{__?float128l?__}, and \i\c{__?float128h?__}. See also \k{pkg_fp}.
1720
1721Floating-point constants are expressed in the traditional form:
1722digits, then a period, then optionally more digits, then optionally an
1723\c{E} followed by an exponent. The period is mandatory, so that NASM
1724can distinguish between \c{dd 1}, which declares an integer constant,
1725and \c{dd 1.0} which declares a floating-point constant.
1726
1727NASM also support C99-style hexadecimal floating-point: \c{0x},
1728hexadecimal digits, period, optionally more hexadeximal digits, then
1729optionally a \c{P} followed by a \e{binary} (not hexadecimal) exponent
1730in decimal notation.  As an extension, NASM additionally supports the
1731\c{0h} and \c{$} prefixes for hexadecimal, as well binary and octal
1732floating-point, using the \c{0b} or \c{0y} and \c{0o} or \c{0q}
1733prefixes, respectively.
1734
1735Underscores to break up groups of digits are permitted in
1736floating-point constants as well.
1737
1738Some examples:
1739
1740\c       db    -0.2                    ; "Quarter precision"
1741\c       dw    -0.5                    ; IEEE 754r/SSE5 half precision
1742\c       dd    1.2                     ; an easy one
1743\c       dd    1.222_222_222           ; underscores are permitted
1744\c       dd    0x1p+2                  ; 1.0x2^2 = 4.0
1745\c       dq    0x1p+32                 ; 1.0x2^32 = 4 294 967 296.0
1746\c       dq    1.e10                   ; 10 000 000 000.0
1747\c       dq    1.e+10                  ; synonymous with 1.e10
1748\c       dq    1.e-10                  ; 0.000 000 000 1
1749\c       dt    3.141592653589793238462 ; pi
1750\c       do    1.e+4000                ; IEEE 754r quad precision
1751
1752The 8-bit "quarter-precision" floating-point format is
1753sign:exponent:mantissa = 1:4:3 with an exponent bias of 7.  This
1754appears to be the most frequently used 8-bit floating-point format,
1755although it is not covered by any formal standard.  This is sometimes
1756called a "\i{minifloat}."
1757
1758The \i\c{bfloat16} format is effectively a compressed version of the
175932-bit single precision format, with a reduced mantissa. It is
1760effectively the same as truncating the 32-bit format to the upper 16
1761bits, except for rounding. There is no \c{D}\e{x} directive that
1762corresponds to \c{bfloat16} as it obviously has the same size as the
1763IEEE standard 16-bit half precision format, see however \k{pkg_fp}.
1764
1765The special operators are used to produce floating-point numbers in
1766other contexts.  They produce the binary representation of a specific
1767floating-point number as an integer, and can use anywhere integer
1768constants are used in an expression.  \c{__?float80m?__} and
1769\c{__?float80e?__} produce the 64-bit mantissa and 16-bit exponent of an
177080-bit floating-point number, and \c{__?float128l?__} and
1771\c{__?float128h?__} produce the lower and upper 64-bit halves of a 128-bit
1772floating-point number, respectively.
1773
1774For example:
1775
1776\c       mov    rax,__?float64?__(3.141592653589793238462)
1777
1778... would assign the binary representation of pi as a 64-bit floating
1779point number into \c{RAX}.  This is exactly equivalent to:
1780
1781\c       mov    rax,0x400921fb54442d18
1782
1783NASM cannot do compile-time arithmetic on floating-point constants.
1784This is because NASM is designed to be portable - although it always
1785generates code to run on x86 processors, the assembler itself can
1786run on any system with an ANSI C compiler. Therefore, the assembler
1787cannot guarantee the presence of a floating-point unit capable of
1788handling the \i{Intel number formats}, and so for NASM to be able to
1789do floating arithmetic it would have to include its own complete set
1790of floating-point routines, which would significantly increase the
1791size of the assembler for very little benefit.
1792
1793The special tokens \i\c{__?Infinity?__}, \i\c{__?QNaN?__} (or
1794\i\c{__?NaN?__}) and \i\c{__?SNaN?__} can be used to generate
1795\I{infinity}infinities, quiet \i{NaN}s, and signalling NaNs,
1796respectively.  These are normally used as macros:
1797
1798\c %define Inf __?Infinity?__
1799\c %define NaN __?QNaN?__
1800\c
1801\c       dq    +1.5, -Inf, NaN         ; Double-precision constants
1802
1803The \c{%use fp} standard macro package contains a set of convenience
1804macros.  See \k{pkg_fp}.
1805
1806\S{bcdconst} \I{floating-point, packed BCD constants}Packed BCD Constants
1807
1808x87-style packed BCD constants can be used in the same contexts as
180980-bit floating-point numbers.  They are suffixed with \c{p} or
1810prefixed with \c{0p}, and can include up to 18 decimal digits.
1811
1812As with other numeric constants, underscores can be used to separate
1813digits.
1814
1815For example:
1816
1817\c       dt 12_345_678_901_245_678p
1818\c       dt -12_345_678_901_245_678p
1819\c       dt +0p33
1820\c       dt 33p
1821
1822
1823\H{expr} \i{Expressions}
1824
1825Expressions in NASM are similar in syntax to those in C.  Expressions
1826are evaluated as 64-bit integers which are then adjusted to the
1827appropriate size.
1828
1829NASM supports two special tokens in expressions, allowing
1830calculations to involve the current assembly position: the
1831\I{$, here}\c{$} and \i\c{$$} tokens. \c{$} evaluates to the assembly
1832position at the beginning of the line containing the expression; so
1833you can code an \i{infinite loop} using \c{JMP $}. \c{$$} evaluates
1834to the beginning of the current section; so you can tell how far
1835into the section you are by using \c{($-$$)}.
1836
1837The arithmetic \i{operators} provided by NASM are listed here, in
1838increasing order of \i{precedence}.
1839
1840A \e{boolean} value is true if nonzero and false if zero. The
1841operators which return a boolean value always return 1 for true and 0
1842for false.
1843
1844
1845\S{exptri} \I{?op}\c{?} ... \c{:}: Conditional Operator
1846
1847The syntax of this operator, similar to the C conditional operator, is:
1848
1849\e{boolean} \c{?} \e{trueval} \c{:} \e{falseval}
1850
1851This operator evaluates to \e{trueval} if \e{boolean} is true,
1852otherwise to \e{falseval}.
1853
1854Note that NASM allows \c{?} characters in symbol names. Therefore, it
1855is highly advisable to always put spaces around the \c{?} and \c{:}
1856characters.
1857
1858
1859\S{expbor}: \i\c{||}: \i{Boolean OR} Operator
1860
1861The \c{||} operator gives a boolean OR: it evaluates to 1 if both sides of
1862the expression are nonzero, otherwise 0.
1863
1864
1865\S{expbxor}: \i\c{^^}: \i{Boolean XOR} Operator
1866
1867The \c{^^} operator gives a boolean XOR: it evaluates to 1 if any one side of
1868the expression is nonzero, otherwise 0.
1869
1870
1871\S{expband}: \i\c{&&}: \i{Boolean AND} Operator
1872
1873The \c{&&} operator gives a boolean AND: it evaluates to 1 if both sides of
1874the expression is nonzero, otherwise 0.
1875
1876
1877\S{exprel}: \i{Comparison Operators}
1878
1879NASM supports the following comparison operators:
1880
1881\b \i\c{=} or \i\c{==} compare for equality.
1882
1883\b \i\c{!=} or \i\c{<>} compare for inequality.
1884
1885\b \i\c{<} compares signed less than.
1886
1887\b \i\c{<=} compares signed less than or equal.
1888
1889\b \i\c{>} compares signed greater than.
1890
1891\b \i\c{>=} compares signed greather than or equal.
1892
1893These operators evaluate to 0 for false or 1 for true.
1894
1895\b \i{<=>} does a signed comparison, and evaluates to -1 for less
1896than, 0 for equal, and 1 for greater than.
1897
1898At this time, NASM does not provide unsigned comparison operators.
1899
1900
1901\S{expor} \i\c{|}: \i{Bitwise OR} Operator
1902
1903The \c{|} operator gives a bitwise OR, exactly as performed by the
1904\c{OR} machine instruction.
1905
1906
1907\S{expxor} \i\c{^}: \i{Bitwise XOR} Operator
1908
1909\c{^} provides the bitwise XOR operation.
1910
1911
1912\S{expand} \i\c{&}: \i{Bitwise AND} Operator
1913
1914\c{&} provides the bitwise AND operation.
1915
1916
1917\S{expshift} \i{Bit Shift} Operators
1918
1919\i\c{<<} gives a bit-shift to the left, just as it does in C. So
1920\c{5<<3} evaluates to 5 times 8, or 40. \i\c{>>} gives an \I{unsigned,
1921bit shift}\e{unsigned} (logical) bit-shift to the right; the bits
1922shifted in from the left are set to zero.
1923
1924\i\c{<<<} gives a bit-shift to the left, exactly equivalent to the
1925\c{<<} operator; it is included for completeness. \i\c{>>>} gives an
1926\I{signed, bit shift}\e{signed} (arithmetic) bit-shift to the right;
1927the bits shifted in from the left are filled with copies of the most
1928significant (sign) bit.
1929
1930
1931\S{expplmi} \I{+ opaddition}\c{+} and \I{- opsubtraction}\c{-}:
1932\i{Addition} and \i{Subtraction} Operators
1933
1934The \c{+} and \c{-} operators do perfectly ordinary addition and
1935subtraction.
1936
1937
1938\S{expmul} \i{Multiplication}, \i{Division} and \i{Modulo}
1939
1940\i\c{*} is the multiplication operator.
1941
1942\i\c{/} and \i\c{//} are both division operators: \c{/} is
1943\I{division, unsigned}\I{unsigned, division}unsigned division and \c{//} is
1944\I{division, signed}\I{signed, division}signed division.
1945
1946Similarly, \i\c{%} and \i\c{%%} provide \I{modulo,
1947unsigned}\I{unsigned, modulo}unsigned and \I{modulo, signed}\I{signed,
1948modulo}signed modulo operators respectively.
1949
1950Since the \c{%} character is used extensively by the macro
1951\i{preprocessor}, you should ensure that both the signed and unsigned
1952modulo operators are followed by white space wherever they appear.
1953
1954NASM, like ANSI C, provides no guarantees about the sensible
1955operation of the signed modulo operator. On most systems it will match
1956the signed division operator, such that:
1957
1958\c      b * (a // b) + (a %% b) = a       (b != 0)
1959
1960
1961\S{expmul} \I{operators, unary}\i{Unary Operators}
1962
1963The highest-priority operators in NASM's expression grammar are those
1964which only apply to one argument.  These are:
1965
1966\b \I{- opunary}\c{-} \I{arithmetic negation}negates (\i{2's complement}) its
1967operand.
1968
1969\b \I{+ opunary}\c{+} does nothing; it's provided for symmetry with \c{-}.
1970
1971\b \I{~ opunary}\c{~} computes the \I{negation, bitwise}\i{bitwise
1972negation} (\i{1's complement}) of its operand.
1973
1974\b \I{! opunary}\c{!} is the \I{negation, boolean}\i{boolean negation}
1975operator. It evaluates to 1 if the argument is 0, otherwise 0.
1976
1977\b \c{SEG} provides the \i{segment address} of its operand (explained in
1978more detail in \k{segwrt}).
1979
1980\b A set of additional operators with leading and trailing double
1981underscores are used to implement the \c{integer functions} of the
1982\c{ifunc} macro package, see \k{pkg_ifunc}.
1983
1984
1985\H{segwrt} \i\c{SEG} and \i\c{WRT}
1986
1987When writing large 16-bit programs, which must be split into
1988multiple \i{segments}, it is often necessary to be able to refer to
1989the \I{segment address}segment part of the address of a symbol. NASM
1990supports the \c{SEG} operator to perform this function.
1991
1992The \c{SEG} operator evaluates to the \i\e{preferred} segment base of a
1993symbol, defined as the segment base relative to which the offset of
1994the symbol makes sense. So the code
1995
1996\c         mov     ax,seg symbol
1997\c         mov     es,ax
1998\c         mov     bx,symbol
1999
2000will load \c{ES:BX} with a valid pointer to the symbol \c{symbol}.
2001
2002Things can be more complex than this: since 16-bit segments and
2003\i{groups} may \I{overlapping segments}overlap, you might occasionally
2004want to refer to some symbol using a different segment base from the
2005preferred one. NASM lets you do this, by the use of the \c{WRT}
2006(With Reference To) keyword. So you can do things like
2007
2008\c         mov     ax,weird_seg        ; weird_seg is a segment base
2009\c         mov     es,ax
2010\c         mov     bx,symbol wrt weird_seg
2011
2012to load \c{ES:BX} with a different, but functionally equivalent,
2013pointer to the symbol \c{symbol}.
2014
2015NASM supports far (inter-segment) calls and jumps by means of the
2016syntax \c{call segment:offset}, where \c{segment} and \c{offset}
2017both represent immediate values. So to call a far procedure, you
2018could code either of
2019
2020\c         call    (seg procedure):procedure
2021\c         call    weird_seg:(procedure wrt weird_seg)
2022
2023(The parentheses are included for clarity, to show the intended
2024parsing of the above instructions. They are not necessary in
2025practice.)
2026
2027NASM supports the syntax \I\c{CALL FAR}\c{call far procedure} as a
2028synonym for the first of the above usages. \c{JMP} works identically
2029to \c{CALL} in these examples.
2030
2031To declare a \i{far pointer} to a data item in a data segment, you
2032must code
2033
2034\c         dw      symbol, seg symbol
2035
2036NASM supports no convenient synonym for this, though you can always
2037invent one using the macro processor.
2038
2039
2040\H{strict} \i\c{STRICT}: Inhibiting Optimization
2041
2042When assembling with the optimizer set to level 2 or higher (see
2043\k{opt-O}), NASM will use size specifiers (\c{BYTE}, \c{WORD},
2044\c{DWORD}, \c{QWORD}, \c{TWORD}, \c{OWORD}, \c{YWORD} or \c{ZWORD}),
2045but will give them the smallest possible size. The keyword \c{STRICT}
2046can be used to inhibit optimization and force a particular operand to
2047be emitted in the specified size. For example, with the optimizer on,
2048and in \c{BITS 16} mode,
2049
2050\c         push dword 33
2051
2052is encoded in three bytes \c{66 6A 21}, whereas
2053
2054\c         push strict dword 33
2055
2056is encoded in six bytes, with a full dword immediate operand \c{66 68
205721 00 00 00}.
2058
2059With the optimizer off, the same code (six bytes) is generated whether
2060the \c{STRICT} keyword was used or not.
2061
2062
2063\H{crit} \i{Critical Expressions}
2064
2065Although NASM has an optional multi-pass optimizer, there are some
2066expressions which must be resolvable on the first pass. These are
2067called \e{Critical Expressions}.
2068
2069The first pass is used to determine the size of all the assembled
2070code and data, so that the second pass, when generating all the
2071code, knows all the symbol addresses the code refers to. So one
2072thing NASM can't handle is code whose size depends on the value of a
2073symbol declared after the code in question. For example,
2074
2075\c         times (label-$) db 0
2076\c label:  db      'Where am I?'
2077
2078The argument to \i\c{TIMES} in this case could equally legally
2079evaluate to anything at all; NASM will reject this example because
2080it cannot tell the size of the \c{TIMES} line when it first sees it.
2081It will just as firmly reject the slightly \I{paradox}paradoxical
2082code
2083
2084\c         times (label-$+1) db 0
2085\c label:  db      'NOW where am I?'
2086
2087in which \e{any} value for the \c{TIMES} argument is by definition
2088wrong!
2089
2090NASM rejects these examples by means of a concept called a
2091\e{critical expression}, which is defined to be an expression whose
2092value is required to be computable in the first pass, and which must
2093therefore depend only on symbols defined before it. The argument to
2094the \c{TIMES} prefix is a critical expression.
2095
2096\H{locallab} \i{Local Labels}
2097
2098NASM gives special treatment to symbols beginning with a \i{period}.
2099A label beginning with a single period is treated as a \e{local}
2100label, which means that it is associated with the previous non-local
2101label. So, for example:
2102
2103\c label1  ; some code
2104\c
2105\c .loop
2106\c         ; some more code
2107\c
2108\c         jne     .loop
2109\c         ret
2110\c
2111\c label2  ; some code
2112\c
2113\c .loop
2114\c         ; some more code
2115\c
2116\c         jne     .loop
2117\c         ret
2118
2119In the above code fragment, each \c{JNE} instruction jumps to the
2120line immediately before it, because the two definitions of \c{.loop}
2121are kept separate by virtue of each being associated with the
2122previous non-local label.
2123
2124This form of local label handling is borrowed from the old Amiga
2125assembler \i{DevPac}; however, NASM goes one step further, in
2126allowing access to local labels from other parts of the code. This
2127is achieved by means of \e{defining} a local label in terms of the
2128previous non-local label: the first definition of \c{.loop} above is
2129really defining a symbol called \c{label1.loop}, and the second
2130defines a symbol called \c{label2.loop}. So, if you really needed
2131to, you could write
2132
2133\c label3  ; some more code
2134\c         ; and some more
2135\c
2136\c         jmp label1.loop
2137
2138Sometimes it is useful - in a macro, for instance - to be able to
2139define a label which can be referenced from anywhere but which
2140doesn't interfere with the normal local-label mechanism. Such a
2141label can't be non-local because it would interfere with subsequent
2142definitions of, and references to, local labels; and it can't be
2143local because the macro that defined it wouldn't know the label's
2144full name. NASM therefore introduces a third type of label, which is
2145probably only useful in macro definitions: if a label begins with
2146the \I{label prefix}special prefix \i\c{..@}, then it does nothing
2147to the local label mechanism. So you could code
2148
2149\c label1:                         ; a non-local label
2150\c .local:                         ; this is really label1.local
2151\c ..@foo:                         ; this is a special symbol
2152\c label2:                         ; another non-local label
2153\c .local:                         ; this is really label2.local
2154\c
2155\c         jmp     ..@foo          ; this will jump three lines up
2156
2157NASM has the capacity to define other special symbols beginning with
2158a double period: for example, \c{..start} is used to specify the
2159entry point in the \c{obj} output format (see \k{dotdotstart}),
2160\c{..imagebase} is used to find out the offset from a base address
2161of the current image in the \c{win64} output format (see \k{win64pic}).
2162So just keep in mind that symbols beginning with a double period are
2163special.
2164
2165
2166\C{preproc} The NASM \i{Preprocessor}
2167
2168NASM contains a powerful \i{macro processor}, which supports
2169conditional assembly, multi-level file inclusion, two forms of macro
2170(single-line and multi-line), and a `context stack' mechanism for
2171extra macro power. Preprocessor directives all begin with a \c{%}
2172sign.
2173
2174The preprocessor collapses all lines which end with a backslash (\\)
2175character into a single line.  Thus:
2176
2177\c %define THIS_VERY_LONG_MACRO_NAME_IS_DEFINED_TO \\
2178\c         THIS_VALUE
2179
2180will work like a single-line macro without the backslash-newline
2181sequence.
2182
2183\H{slmacro} \i{Single-Line Macros}
2184
2185\S{define} The Normal Way: \I\c{%idefine}\i\c{%define}
2186
2187Single-line macros are defined using the \c{%define} preprocessor
2188directive. The definitions work in a similar way to C; so you can do
2189things like
2190
2191\c %define ctrl    0x1F &
2192\c %define param(a,b) ((a)+(a)*(b))
2193\c
2194\c         mov     byte [param(2,ebx)], ctrl 'D'
2195
2196which will expand to
2197
2198\c         mov     byte [(2)+(2)*(ebx)], 0x1F & 'D'
2199
2200When the expansion of a single-line macro contains tokens which
2201invoke another macro, the expansion is performed at invocation time,
2202not at definition time. Thus the code
2203
2204\c %define a(x)    1+b(x)
2205\c %define b(x)    2*x
2206\c
2207\c         mov     ax,a(8)
2208
2209will evaluate in the expected way to \c{mov ax,1+2*8}, even though
2210the macro \c{b} wasn't defined at the time of definition of \c{a}.
2211
2212Note that single-line macro argument list cannot be preceded by whitespace.
2213Otherwise it will be treated as an expansion. For example:
2214
2215\c    %define foo (a,b)               ; no arguments, (a,b) is the expansion
2216\c    %define bar(a,b)                ; two arguments, empty expansion
2217
2218
2219Macros defined with \c{%define} are \i{case sensitive}: after
2220\c{%define foo bar}, only \c{foo} will expand to \c{bar}: \c{Foo} or
2221\c{FOO} will not. By using \c{%idefine} instead of \c{%define} (the
2222`i' stands for `insensitive') you can define all the case variants
2223of a macro at once, so that \c{%idefine foo bar} would cause
2224\c{foo}, \c{Foo}, \c{FOO}, \c{fOO} and so on all to expand to
2225\c{bar}.
2226
2227There is a mechanism which detects when a macro call has occurred as
2228a result of a previous expansion of the same macro, to guard against
2229\i{circular references} and infinite loops. If this happens, the
2230preprocessor will only expand the first occurrence of the macro.
2231Hence, if you code
2232
2233\c %define a(x)    1+a(x)
2234\c
2235\c         mov     ax,a(3)
2236
2237the macro \c{a(3)} will expand once, becoming \c{1+a(3)}, and will
2238then expand no further. This behaviour can be useful: see \k{32c}
2239for an example of its use.
2240
2241You can \I{overloading, single-line macros}overload single-line
2242macros: if you write
2243
2244\c %define foo(x)   1+x
2245\c %define foo(x,y) 1+x*y
2246
2247the preprocessor will be able to handle both types of macro call,
2248by counting the parameters you pass; so \c{foo(3)} will become
2249\c{1+3} whereas \c{foo(ebx,2)} will become \c{1+ebx*2}. However, if
2250you define
2251
2252\c %define foo bar
2253
2254then no other definition of \c{foo} will be accepted: a macro with
2255no parameters prohibits the definition of the same name as a macro
2256\e{with} parameters, and vice versa.
2257
2258This doesn't prevent single-line macros being \e{redefined}: you can
2259perfectly well define a macro with
2260
2261\c %define foo bar
2262
2263and then re-define it later in the same source file with
2264
2265\c %define foo baz
2266
2267Then everywhere the macro \c{foo} is invoked, it will be expanded
2268according to the most recent definition. This is particularly useful
2269when defining single-line macros with \c{%assign} (see \k{assign}).
2270
2271The following additional features were added in NASM 2.15:
2272
2273It is possible to define an empty string instead of an argument name
2274if the argument is never used. For example:
2275
2276\c    %define ereg(foo,) e %+ foo
2277\c      mov eax,ereg(dx,cx)
2278
2279A single pair of parentheses is a subcase of a single, unused argument:
2280
2281\c    %define myreg() eax
2282\c	mov edx,myreg()
2283
2284This is similar to the behavior of the C preprocessor.
2285
2286\b If declared with an \c{=}, NASM will evaluate the argument as an
2287expression after expansion.
2288
2289\b If an argument declared with an \c{&}, a macro parameter will be
2290turned into a quoted string after expansion.
2291
2292\b If declared with a \c{+}, it is a greedy or variadic parameter; it
2293includes any subsequent commas and parameters.
2294
2295\b If declared with an \c{!}, NASM will not strip whitespace and
2296braces (useful in conjunction with \c{&}).
2297
2298For example:
2299
2300\c     %define xyzzy(=expr,&val) expr, str
2301\c     %define plugh(x) xyzzy(x,x)
2302\c     db plugh(3+5), `\0` ; Expands to: db 8, "3+5", `\0`
2303
2304You can \i{pre-define} single-line macros using the `-d' option on
2305the NASM command line: see \k{opt-d}.
2306
2307
2308\S{xdefine} Resolving \c{%define}: \I\c{%ixdefine}\i\c{%xdefine}
2309
2310To have a reference to an embedded single-line macro resolved at the
2311time that the embedding macro is \e{defined}, as opposed to when the
2312embedding macro is \e{expanded}, you need a different mechanism to the
2313one offered by \c{%define}. The solution is to use \c{%xdefine}, or
2314it's \I{case sensitive}case-insensitive counterpart \c{%ixdefine}.
2315
2316Suppose you have the following code:
2317
2318\c %define  isTrue  1
2319\c %define  isFalse isTrue
2320\c %define  isTrue  0
2321\c
2322\c val1:    db      isFalse
2323\c
2324\c %define  isTrue  1
2325\c
2326\c val2:    db      isFalse
2327
2328In this case, \c{val1} is equal to 0, and \c{val2} is equal to 1.
2329This is because, when a single-line macro is defined using
2330\c{%define}, it is expanded only when it is called. As \c{isFalse}
2331expands to \c{isTrue}, the expansion will be the current value of
2332\c{isTrue}. The first time it is called that is 0, and the second
2333time it is 1.
2334
2335If you wanted \c{isFalse} to expand to the value assigned to the
2336embedded macro \c{isTrue} at the time that \c{isFalse} was defined,
2337you need to change the above code to use \c{%xdefine}.
2338
2339\c %xdefine isTrue  1
2340\c %xdefine isFalse isTrue
2341\c %xdefine isTrue  0
2342\c
2343\c val1:    db      isFalse
2344\c
2345\c %xdefine isTrue  1
2346\c
2347\c val2:    db      isFalse
2348
2349Now, each time that \c{isFalse} is called, it expands to 1,
2350as that is what the embedded macro \c{isTrue} expanded to at
2351the time that \c{isFalse} was defined.
2352
2353\c{%xdefine} and \c{%ixdefine} supports argument expansion exactly the
2354same way that \c{%define} and \c{%idefine} does.
2355
2356
2357\S{indmacro} \i{Macro Indirection}: \I\c{%[}\c{%[...]}
2358
2359The \c{%[...]} construct can be used to expand macros in contexts
2360where macro expansion would otherwise not occur, including in the
2361names other macros.  For example, if you have a set of macros named
2362\c{Foo16}, \c{Foo32} and \c{Foo64}, you could write:
2363
2364\c	mov ax,Foo%[__?BITS?__]	; The Foo value
2365
2366to use the builtin macro \c{__?BITS?__} (see \k{bitsm}) to automatically
2367select between them.  Similarly, the two statements:
2368
2369\c %xdefine Bar		Quux	; Expands due to %xdefine
2370\c %define  Bar		%[Quux]	; Expands due to %[...]
2371
2372have, in fact, exactly the same effect.
2373
2374\c{%[...]} concatenates to adjacent tokens in the same way that
2375multi-line macro parameters do, see \k{concat} for details.
2376
2377
2378\S{concat%+} Concatenating Single Line Macro Tokens: \i\c{%+}
2379
2380Individual tokens in single line macros can be concatenated, to produce
2381longer tokens for later processing. This can be useful if there are
2382several similar macros that perform similar functions.
2383
2384Please note that a space is required after \c{%+}, in order to
2385disambiguate it from the syntax \c{%+1} used in multiline macros.
2386
2387As an example, consider the following:
2388
2389\c %define BDASTART 400h                ; Start of BIOS data area
2390
2391\c struc   tBIOSDA                      ; its structure
2392\c         .COM1addr       RESW    1
2393\c         .COM2addr       RESW    1
2394\c         ; ..and so on
2395\c endstruc
2396
2397Now, if we need to access the elements of tBIOSDA in different places,
2398we can end up with:
2399
2400\c         mov     ax,BDASTART + tBIOSDA.COM1addr
2401\c         mov     bx,BDASTART + tBIOSDA.COM2addr
2402
2403This will become pretty ugly (and tedious) if used in many places, and
2404can be reduced in size significantly by using the following macro:
2405
2406\c ; Macro to access BIOS variables by their names (from tBDA):
2407
2408\c %define BDA(x)  BDASTART + tBIOSDA. %+ x
2409
2410Now the above code can be written as:
2411
2412\c         mov     ax,BDA(COM1addr)
2413\c         mov     bx,BDA(COM2addr)
2414
2415Using this feature, we can simplify references to a lot of macros (and,
2416in turn, reduce typing errors).
2417
2418
2419\S{selfref%?} The Macro Name Itself: \i\c{%?} and \i\c{%??}
2420
2421The special symbols \c{%?} and \c{%??} can be used to reference the
2422macro name itself inside a macro expansion, this is supported for both
2423single-and multi-line macros.  \c{%?} refers to the macro name as
2424\e{invoked}, whereas \c{%??} refers to the macro name as
2425\e{declared}.  The two are always the same for case-sensitive
2426macros, but for case-insensitive macros, they can differ.
2427
2428For example:
2429
2430\c %imacro Foo 0
2431\c         mov %?,%??
2432\c %endmacro
2433\c
2434\c         foo
2435\c         FOO
2436
2437will expand to:
2438
2439\c         mov foo,Foo
2440\c         mov FOO,Foo
2441
2442These tokens can be used for single-line macros \e{if defined outside
2443any multi-line macros.} See below.
2444
2445\S{selfref%*?} The Single-Line Macro Name: \i\c{%*?} and \i\c{%*??}
2446
2447If the tokens \c{%?} and \c{%??} are used inside a multi-line macro,
2448they are expanded before any directives are processed. As a result,
2449
2450\c %imacro Foo 0
2451\c       %idefine Bar _%?
2452\c       mov BAR,bAr
2453\c %endmacro
2454\c
2455\c       foo
2456\c       mov eax,bar
2457
2458will expand to:
2459
2460\c       mov _foo,_foo
2461\c       mov eax,_foo
2462
2463which may or may not be what you expected. The tokens \c{%*?} and
2464\c{%*??} behave like \c{%?} and \c{%??} but are only expanded inside
2465single-line macros. Thus:
2466
2467\c %imacro Foo 0
2468\c       %idefine Bar _%*?
2469\c       mov BAR,bAr
2470\c %endmacro
2471\c
2472\c       foo
2473\c       mov eax,bar
2474
2475will expand to:
2476
2477\c       mov _BAR,_bAr
2478\c       mov eax,_bar
2479
2480The \c{%*?} can be used to make a keyword "disappear", for example in
2481case a new instruction has been used as a label in older code.  For
2482example:
2483
2484\c %idefine pause $%*?                 ; Hide the PAUSE instruction
2485
2486\c{%*?} and \c{%*??} were introduced in NASM 2.15.04.
2487
2488\S{undef} Undefining Single-Line Macros: \i\c{%undef}
2489
2490Single-line macros can be removed with the \c{%undef} directive.  For
2491example, the following sequence:
2492
2493\c %define foo bar
2494\c %undef  foo
2495\c
2496\c         mov     eax, foo
2497
2498will expand to the instruction \c{mov eax, foo}, since after
2499\c{%undef} the macro \c{foo} is no longer defined.
2500
2501Macros that would otherwise be pre-defined can be undefined on the
2502command-line using the `-u' option on the NASM command line: see
2503\k{opt-u}.
2504
2505
2506\S{assign} \i{Preprocessor Variables}: \i\c{%assign}
2507
2508An alternative way to define single-line macros is by means of the
2509\c{%assign} command (and its \I{case sensitive}case-insensitive
2510counterpart \i\c{%iassign}, which differs from \c{%assign} in
2511exactly the same way that \c{%idefine} differs from \c{%define}).
2512
2513\c{%assign} is used to define single-line macros which take no
2514parameters and have a numeric value. This value can be specified in
2515the form of an expression, and it will be evaluated once, when the
2516\c{%assign} directive is processed.
2517
2518Like \c{%define}, macros defined using \c{%assign} can be re-defined
2519later, so you can do things like
2520
2521\c %assign i i+1
2522
2523to increment the numeric value of a macro.
2524
2525\c{%assign} is useful for controlling the termination of \c{%rep}
2526preprocessor loops: see \k{rep} for an example of this. Another
2527use for \c{%assign} is given in \k{16c} and \k{32c}.
2528
2529The expression passed to \c{%assign} is a \i{critical expression}
2530(see \k{crit}), and must also evaluate to a pure number (rather than
2531a relocatable reference such as a code or data address, or anything
2532involving a register).
2533
2534
2535\S{defstr} Defining Strings: \I\c{%idefstr}\i\c{%defstr}
2536
2537\c{%defstr}, and its case-insensitive counterpart \c{%idefstr}, define
2538or redefine a single-line macro without parameters but converts the
2539entire right-hand side, after macro expansion, to a quoted string
2540before definition.
2541
2542For example:
2543
2544\c %defstr test TEST
2545
2546is equivalent to
2547
2548\c %define test 'TEST'
2549
2550This can be used, for example, with the \c{%!} construct (see
2551\k{getenv}):
2552
2553\c %defstr PATH %!PATH          ; The operating system PATH variable
2554
2555
2556\S{deftok} Defining Tokens: \I\c{%ideftok}\i\c{%deftok}
2557
2558\c{%deftok}, and its case-insensitive counterpart \c{%ideftok}, define
2559or redefine a single-line macro without parameters but converts the
2560second parameter, after string conversion, to a sequence of tokens.
2561
2562For example:
2563
2564\c %deftok test 'TEST'
2565
2566is equivalent to
2567
2568\c %define test TEST
2569
2570
2571\S{defalias} Defining Aliases: \I\c{%idefalias}\i\c{%defalias}
2572
2573\c{%defalias}, and its case-insensitive counterpart \c{%idefalias}, define an
2574alias to a macro, i.e. equivalent of a symbolic link.
2575
2576When used with various macro defining and undefining directives, it
2577affects the aliased macro. This functionality is intended for being
2578able to rename macros while retaining the legacy names.
2579
2580When an alias is defined, but the aliased macro is then undefined, the
2581aliases can legitimately point to nonexistent macros.
2582
2583The alias can be undefined using the \c{%undefalias} directive.  \e{All}
2584aliases can be undefined using the \c{%clear defalias} directive. This
2585includes backwards compatibility aliases defined by NASM itself.
2586
2587To disable aliases without undefining them, use the \c{%aliases off}
2588directive.
2589
2590To check whether an alias is defined, regardless of the existence of
2591the aliased macro, use \c{%ifdefalias}.
2592
2593For example:
2594
2595\c %defalias OLD NEW
2596\c    ; OLD and NEW both undefined
2597\c %define NEW 123
2598\c    ; OLD and NEW both 123
2599\c %undef OLD
2600\c    ; OLD and NEW both undefined
2601\c %define OLD 456
2602\c    ; OLD and NEW both 456
2603\c %undefalias OLD
2604\c    ; OLD undefined, NEW defined to 456
2605
2606\S{cond-comma} \i{Conditional Comma Operator}: \i\c{%,}
2607
2608As of version 2.15, NASM has a conditional comma operator \c{%,} that
2609expands to a comma \e{unless} followed by a null expansion, which
2610allows suppressing the comma before an empty argument. This is
2611especially useful with greedy single-line macros.
2612
2613For example, all the expressions below are valid:
2614
2615\c %define greedy(a,b,c+) a + 66 %, b * 3 %, c
2616\c
2617\c        db greedy(1,2)          ; db 1 + 66, 2 * 3
2618\c        db greedy(1,2,3)        ; db 1 + 66, 2 * 3, 3
2619\c        db greedy(1,2,3,4)      ; db 1 + 66, 2 * 3, 3, 4
2620\c        db greedy(1,2,3,4,5)    ; db 1 + 66, 2 * 3, 3, 4, 5
2621
2622
2623\H{strlen} \i{String Manipulation in Macros}
2624
2625It's often useful to be able to handle strings in macros. NASM
2626supports a few simple string handling macro operators from which
2627more complex operations can be constructed.
2628
2629All the string operators define or redefine a value (either a string
2630or a numeric value) to a single-line macro.  When producing a string
2631value, it may change the style of quoting of the input string or
2632strings, and possibly use \c{\\}-escapes inside \c{`}-quoted strings.
2633
2634\S{strcat} \i{Concatenating Strings}: \i\c{%strcat}
2635
2636The \c{%strcat} operator concatenates quoted strings and assign them to
2637a single-line macro.
2638
2639For example:
2640
2641\c %strcat alpha "Alpha: ", '12" screen'
2642
2643... would assign the value \c{'Alpha: 12" screen'} to \c{alpha}.
2644Similarly:
2645
2646\c %strcat beta '"foo"\', "'bar'"
2647
2648... would assign the value \c{`"foo"\\\\'bar'`} to \c{beta}.
2649
2650The use of commas to separate strings is permitted but optional.
2651
2652
2653\S{strlen} \i{String Length}: \i\c{%strlen}
2654
2655The \c{%strlen} operator assigns the length of a string to a macro.
2656For example:
2657
2658\c %strlen charcnt 'my string'
2659
2660In this example, \c{charcnt} would receive the value 9, just as
2661if an \c{%assign} had been used. In this example, \c{'my string'}
2662was a literal string but it could also have been a single-line
2663macro that expands to a string, as in the following example:
2664
2665\c %define sometext 'my string'
2666\c %strlen charcnt sometext
2667
2668As in the first case, this would result in \c{charcnt} being
2669assigned the value of 9.
2670
2671
2672\S{substr} \i{Extracting Substrings}: \i\c{%substr}
2673
2674Individual letters or substrings in strings can be extracted using the
2675\c{%substr} operator.  An example of its use is probably more useful
2676than the description:
2677
2678\c %substr mychar 'xyzw' 1       ; equivalent to %define mychar 'x'
2679\c %substr mychar 'xyzw' 2       ; equivalent to %define mychar 'y'
2680\c %substr mychar 'xyzw' 3       ; equivalent to %define mychar 'z'
2681\c %substr mychar 'xyzw' 2,2     ; equivalent to %define mychar 'yz'
2682\c %substr mychar 'xyzw' 2,-1    ; equivalent to %define mychar 'yzw'
2683\c %substr mychar 'xyzw' 2,-2    ; equivalent to %define mychar 'yz'
2684
2685As with \c{%strlen} (see \k{strlen}), the first parameter is the
2686single-line macro to be created and the second is the string. The
2687third parameter specifies the first character to be selected, and the
2688optional fourth parameter preceeded by comma) is the length.  Note
2689that the first index is 1, not 0 and the last index is equal to the
2690value that \c{%strlen} would assign given the same string. Index
2691values out of range result in an empty string.  A negative length
2692means "until N-1 characters before the end of string", i.e. \c{-1}
2693means until end of string, \c{-2} until one character before, etc.
2694
2695
2696\H{mlmacro} \i{Multi-Line Macros}: \I\c{%imacro}\i\c{%macro}
2697
2698Multi-line macros are much more like the type of macro seen in MASM
2699and TASM: a multi-line macro definition in NASM looks something like
2700this.
2701
2702\c %macro  prologue 1
2703\c
2704\c         push    ebp
2705\c         mov     ebp,esp
2706\c         sub     esp,%1
2707\c
2708\c %endmacro
2709
2710This defines a C-like function prologue as a macro: so you would
2711invoke the macro with a call such as:
2712
2713\c myfunc:   prologue 12
2714
2715which would expand to the three lines of code
2716
2717\c myfunc: push    ebp
2718\c         mov     ebp,esp
2719\c         sub     esp,12
2720
2721The number \c{1} after the macro name in the \c{%macro} line defines
2722the number of parameters the macro \c{prologue} expects to receive.
2723The use of \c{%1} inside the macro definition refers to the first
2724parameter to the macro call. With a macro taking more than one
2725parameter, subsequent parameters would be referred to as \c{%2},
2726\c{%3} and so on.
2727
2728Multi-line macros, like single-line macros, are \i{case-sensitive},
2729unless you define them using the alternative directive \c{%imacro}.
2730
2731If you need to pass a comma as \e{part} of a parameter to a
2732multi-line macro, you can do that by enclosing the entire parameter
2733in \I{braces, around macro parameters}braces. So you could code
2734things like:
2735
2736\c %macro  silly 2
2737\c
2738\c     %2: db      %1
2739\c
2740\c %endmacro
2741\c
2742\c         silly 'a', letter_a             ; letter_a:  db 'a'
2743\c         silly 'ab', string_ab           ; string_ab: db 'ab'
2744\c         silly {13,10}, crlf             ; crlf:      db 13,10
2745
2746The behavior with regards to empty arguments at the end of multi-line
2747macros before NASM 2.15 was often very strange. For backwards
2748compatibility, NASM attempts to recognize cases where the legacy
2749behavior would give unexpected results, and issues a warning, but
2750largely tries to match the legacy behavior. This can be disabled with
2751the \c{%pragma} (see \k{pragma-preproc}):
2752
2753\c %pragma preproc sane_empty_expansion
2754
2755
2756\S{mlmacover} Overloading Multi-Line Macros\I{overloading, multi-line macros}
2757
2758As with single-line macros, multi-line macros can be overloaded by
2759defining the same macro name several times with different numbers of
2760parameters. This time, no exception is made for macros with no
2761parameters at all. So you could define
2762
2763\c %macro  prologue 0
2764\c
2765\c         push    ebp
2766\c         mov     ebp,esp
2767\c
2768\c %endmacro
2769
2770to define an alternative form of the function prologue which
2771allocates no local stack space.
2772
2773Sometimes, however, you might want to `overload' a machine
2774instruction; for example, you might want to define
2775
2776\c %macro  push 2
2777\c
2778\c         push    %1
2779\c         push    %2
2780\c
2781\c %endmacro
2782
2783so that you could code
2784
2785\c         push    ebx             ; this line is not a macro call
2786\c         push    eax,ecx         ; but this one is
2787
2788Ordinarily, NASM will give a warning for the first of the above two
2789lines, since \c{push} is now defined to be a macro, and is being
2790invoked with a number of parameters for which no definition has been
2791given. The correct code will still be generated, but the assembler
2792will give a warning. This warning can be disabled by the use of the
2793\c{-w-macro-params} command-line option (see \k{opt-w}).
2794
2795
2796\S{maclocal} \i{Macro-Local Labels}
2797
2798NASM allows you to define labels within a multi-line macro
2799definition in such a way as to make them local to the macro call: so
2800calling the same macro multiple times will use a different label
2801each time. You do this by prefixing \i\c{%%} to the label name. So
2802you can invent an instruction which executes a \c{RET} if the \c{Z}
2803flag is set by doing this:
2804
2805\c %macro  retz 0
2806\c
2807\c         jnz     %%skip
2808\c         ret
2809\c     %%skip:
2810\c
2811\c %endmacro
2812
2813You can call this macro as many times as you want, and every time
2814you call it NASM will make up a different `real' name to substitute
2815for the label \c{%%skip}. The names NASM invents are of the form
2816\c{..@2345.skip}, where the number 2345 changes with every macro
2817call. The \i\c{..@} prefix prevents macro-local labels from
2818interfering with the local label mechanism, as described in
2819\k{locallab}. You should avoid defining your own labels in this form
2820(the \c{..@} prefix, then a number, then another period) in case
2821they interfere with macro-local labels.
2822
2823These labels are really macro-local \e{tokens}, and can be used for
2824other purposes where a token unique to each macro invocation is
2825desired, e.g. to name single-line macros without using the context
2826feature (\k{ctxlocal}).
2827
2828
2829\S{mlmacgre} \i{Greedy Macro Parameters}
2830
2831Occasionally it is useful to define a macro which lumps its entire
2832command line into one parameter definition, possibly after
2833extracting one or two smaller parameters from the front. An example
2834might be a macro to write a text string to a file in MS-DOS, where
2835you might want to be able to write
2836
2837\c         writefile [filehandle],"hello, world",13,10
2838
2839NASM allows you to define the last parameter of a macro to be
2840\e{greedy}, meaning that if you invoke the macro with more
2841parameters than it expects, all the spare parameters get lumped into
2842the last defined one along with the separating commas. So if you
2843code:
2844
2845\c %macro  writefile 2+
2846\c
2847\c         jmp     %%endstr
2848\c   %%str:        db      %2
2849\c   %%endstr:
2850\c         mov     dx,%%str
2851\c         mov     cx,%%endstr-%%str
2852\c         mov     bx,%1
2853\c         mov     ah,0x40
2854\c         int     0x21
2855\c
2856\c %endmacro
2857
2858then the example call to \c{writefile} above will work as expected:
2859the text before the first comma, \c{[filehandle]}, is used as the
2860first macro parameter and expanded when \c{%1} is referred to, and
2861all the subsequent text is lumped into \c{%2} and placed after the
2862\c{db}.
2863
2864The greedy nature of the macro is indicated to NASM by the use of
2865the \I{+ modifier}\c{+} sign after the parameter count on the
2866\c{%macro} line.
2867
2868If you define a greedy macro, you are effectively telling NASM how
2869it should expand the macro given \e{any} number of parameters from
2870the actual number specified up to infinity; in this case, for
2871example, NASM now knows what to do when it sees a call to
2872\c{writefile} with 2, 3, 4 or more parameters. NASM will take this
2873into account when overloading macros, and will not allow you to
2874define another form of \c{writefile} taking 4 parameters (for
2875example).
2876
2877Of course, the above macro could have been implemented as a
2878non-greedy macro, in which case the call to it would have had to
2879look like
2880
2881\c           writefile [filehandle], {"hello, world",13,10}
2882
2883NASM provides both mechanisms for putting \i{commas in macro
2884parameters}, and you choose which one you prefer for each macro
2885definition.
2886
2887See \k{sectmac} for a better way to write the above macro.
2888
2889\S{mlmacrange} \i{Macro Parameters Range}
2890
2891NASM allows you to expand parameters via special construction \c{%\{x:y\}}
2892where \c{x} is the first parameter index and \c{y} is the last. Any index can
2893be either negative or positive but must never be zero.
2894
2895For example
2896
2897\c %macro mpar 1-*
2898\c      db %{3:5}
2899\c %endmacro
2900\c
2901\c mpar 1,2,3,4,5,6
2902
2903expands to \c{3,4,5} range.
2904
2905Even more, the parameters can be reversed so that
2906
2907\c %macro mpar 1-*
2908\c      db %{5:3}
2909\c %endmacro
2910\c
2911\c mpar 1,2,3,4,5,6
2912
2913expands to \c{5,4,3} range.
2914
2915But even this is not the last. The parameters can be addressed via negative
2916indices so NASM will count them reversed. The ones who know Python may see
2917the analogue here.
2918
2919\c %macro mpar 1-*
2920\c      db %{-1:-3}
2921\c %endmacro
2922\c
2923\c mpar 1,2,3,4,5,6
2924
2925expands to \c{6,5,4} range.
2926
2927Note that NASM uses \i{comma} to separate parameters being expanded.
2928
2929By the way, here is a trick - you might use the index \c{%{-1:-1}}
2930which gives you the \i{last} argument passed to a macro.
2931
2932\S{mlmacdef} \i{Default Macro Parameters}
2933
2934NASM also allows you to define a multi-line macro with a \e{range}
2935of allowable parameter counts. If you do this, you can specify
2936defaults for \i{omitted parameters}. So, for example:
2937
2938\c %macro  die 0-1 "Painful program death has occurred."
2939\c
2940\c         writefile 2,%1
2941\c         mov     ax,0x4c01
2942\c         int     0x21
2943\c
2944\c %endmacro
2945
2946This macro (which makes use of the \c{writefile} macro defined in
2947\k{mlmacgre}) can be called with an explicit error message, which it
2948will display on the error output stream before exiting, or it can be
2949called with no parameters, in which case it will use the default
2950error message supplied in the macro definition.
2951
2952In general, you supply a minimum and maximum number of parameters
2953for a macro of this type; the minimum number of parameters are then
2954required in the macro call, and then you provide defaults for the
2955optional ones. So if a macro definition began with the line
2956
2957\c %macro foobar 1-3 eax,[ebx+2]
2958
2959then it could be called with between one and three parameters, and
2960\c{%1} would always be taken from the macro call. \c{%2}, if not
2961specified by the macro call, would default to \c{eax}, and \c{%3} if
2962not specified would default to \c{[ebx+2]}.
2963
2964You can provide extra information to a macro by providing
2965too many default parameters:
2966
2967\c %macro quux 1 something
2968
2969This will trigger a warning by default; see \k{opt-w} for
2970more information.
2971When \c{quux} is invoked, it receives not one but two parameters.
2972\c{something} can be referred to as \c{%2}. The difference
2973between passing \c{something} this way and writing \c{something}
2974in the macro body is that with this way \c{something} is evaluated
2975when the macro is defined, not when it is expanded.
2976
2977You may omit parameter defaults from the macro definition, in which
2978case the parameter default is taken to be blank. This can be useful
2979for macros which can take a variable number of parameters, since the
2980\i\c{%0} token (see \k{percent0}) allows you to determine how many
2981parameters were really passed to the macro call.
2982
2983This defaulting mechanism can be combined with the greedy-parameter
2984mechanism; so the \c{die} macro above could be made more powerful,
2985and more useful, by changing the first line of the definition to
2986
2987\c %macro die 0-1+ "Painful program death has occurred.",13,10
2988
2989The maximum parameter count can be infinite, denoted by \c{*}. In
2990this case, of course, it is impossible to provide a \e{full} set of
2991default parameters. Examples of this usage are shown in \k{rotate}.
2992
2993
2994\S{percent0} \i\c{%0}: \I{counting macro parameters}Macro Parameter Counter
2995
2996The parameter reference \c{%0} will return a numeric constant giving the
2997number of parameters received, that is, if \c{%0} is n then \c{%}n is the
2998last parameter. \c{%0} is mostly useful for macros that can take a variable
2999number of parameters. It can be used as an argument to \c{%rep}
3000(see \k{rep}) in order to iterate through all the parameters of a macro.
3001Examples are given in \k{rotate}.
3002
3003
3004\S{percent00} \i\c{%00}: \I{label preceeding macro}Label Preceeding Macro
3005
3006\c{%00} will return the label preceeding the macro invocation, if any. The
3007label must be on the same line as the macro invocation, may be a local label
3008(see \k{locallab}), and need not end in a colon.
3009
3010If \c{%00} is present anywhere in the macro body, the label itself
3011will not be emitted by NASM. You can, of course, put \c{%00:}
3012explicitly at the beginning of your macro.
3013
3014
3015\S{rotate} \i\c{%rotate}: \i{Rotating Macro Parameters}
3016
3017Unix shell programmers will be familiar with the \I{shift
3018command}\c{shift} shell command, which allows the arguments passed
3019to a shell script (referenced as \c{$1}, \c{$2} and so on) to be
3020moved left by one place, so that the argument previously referenced
3021as \c{$2} becomes available as \c{$1}, and the argument previously
3022referenced as \c{$1} is no longer available at all.
3023
3024NASM provides a similar mechanism, in the form of \c{%rotate}. As
3025its name suggests, it differs from the Unix \c{shift} in that no
3026parameters are lost: parameters rotated off the left end of the
3027argument list reappear on the right, and vice versa.
3028
3029\c{%rotate} is invoked with a single numeric argument (which may be
3030an expression). The macro parameters are rotated to the left by that
3031many places. If the argument to \c{%rotate} is negative, the macro
3032parameters are rotated to the right.
3033
3034\I{iterating over macro parameters}So a pair of macros to save and
3035restore a set of registers might work as follows:
3036
3037\c %macro  multipush 1-*
3038\c
3039\c   %rep  %0
3040\c         push    %1
3041\c   %rotate 1
3042\c   %endrep
3043\c
3044\c %endmacro
3045
3046This macro invokes the \c{PUSH} instruction on each of its arguments
3047in turn, from left to right. It begins by pushing its first
3048argument, \c{%1}, then invokes \c{%rotate} to move all the arguments
3049one place to the left, so that the original second argument is now
3050available as \c{%1}. Repeating this procedure as many times as there
3051were arguments (achieved by supplying \c{%0} as the argument to
3052\c{%rep}) causes each argument in turn to be pushed.
3053
3054Note also the use of \c{*} as the maximum parameter count,
3055indicating that there is no upper limit on the number of parameters
3056you may supply to the \i\c{multipush} macro.
3057
3058It would be convenient, when using this macro, to have a \c{POP}
3059equivalent, which \e{didn't} require the arguments to be given in
3060reverse order. Ideally, you would write the \c{multipush} macro
3061call, then cut-and-paste the line to where the pop needed to be
3062done, and change the name of the called macro to \c{multipop}, and
3063the macro would take care of popping the registers in the opposite
3064order from the one in which they were pushed.
3065
3066This can be done by the following definition:
3067
3068\c %macro  multipop 1-*
3069\c
3070\c   %rep %0
3071\c   %rotate -1
3072\c         pop     %1
3073\c   %endrep
3074\c
3075\c %endmacro
3076
3077This macro begins by rotating its arguments one place to the
3078\e{right}, so that the original \e{last} argument appears as \c{%1}.
3079This is then popped, and the arguments are rotated right again, so
3080the second-to-last argument becomes \c{%1}. Thus the arguments are
3081iterated through in reverse order.
3082
3083
3084\S{concat} \i{Concatenating Macro Parameters}
3085
3086NASM can concatenate macro parameters and macro indirection constructs
3087on to other text surrounding them. This allows you to declare a family
3088of symbols, for example, in a macro definition. If, for example, you
3089wanted to generate a table of key codes along with offsets into the
3090table, you could code something like
3091
3092\c %macro keytab_entry 2
3093\c
3094\c     keypos%1    equ     $-keytab
3095\c                 db      %2
3096\c
3097\c %endmacro
3098\c
3099\c keytab:
3100\c           keytab_entry F1,128+1
3101\c           keytab_entry F2,128+2
3102\c           keytab_entry Return,13
3103
3104which would expand to
3105
3106\c keytab:
3107\c keyposF1        equ     $-keytab
3108\c                 db     128+1
3109\c keyposF2        equ     $-keytab
3110\c                 db      128+2
3111\c keyposReturn    equ     $-keytab
3112\c                 db      13
3113
3114You can just as easily concatenate text on to the other end of a
3115macro parameter, by writing \c{%1foo}.
3116
3117If you need to append a \e{digit} to a macro parameter, for example
3118defining labels \c{foo1} and \c{foo2} when passed the parameter
3119\c{foo}, you can't code \c{%11} because that would be taken as the
3120eleventh macro parameter. Instead, you must code
3121\I{braces, after % sign}\c{%\{1\}1}, which will separate the first
3122\c{1} (giving the number of the macro parameter) from the second
3123(literal text to be concatenated to the parameter).
3124
3125This concatenation can also be applied to other preprocessor in-line
3126objects, such as macro-local labels (\k{maclocal}) and context-local
3127labels (\k{ctxlocal}). In all cases, ambiguities in syntax can be
3128resolved by enclosing everything after the \c{%} sign and before the
3129literal text in braces: so \c{%\{%foo\}bar} concatenates the text
3130\c{bar} to the end of the real name of the macro-local label
3131\c{%%foo}. (This is unnecessary, since the form NASM uses for the
3132real names of macro-local labels means that the two usages
3133\c{%\{%foo\}bar} and \c{%%foobar} would both expand to the same
3134thing anyway; nevertheless, the capability is there.)
3135
3136The single-line macro indirection construct, \c{%[...]}
3137(\k{indmacro}), behaves the same way as macro parameters for the
3138purpose of concatenation.
3139
3140See also the \c{%+} operator, \k{concat%+}.
3141
3142
3143\S{mlmaccc} \i{Condition Codes as Macro Parameters}
3144
3145NASM can give special treatment to a macro parameter which contains
3146a condition code. For a start, you can refer to the macro parameter
3147\c{%1} by means of the alternative syntax \i\c{%+1}, which informs
3148NASM that this macro parameter is supposed to contain a condition
3149code, and will cause the preprocessor to report an error message if
3150the macro is called with a parameter which is \e{not} a valid
3151condition code.
3152
3153Far more usefully, though, you can refer to the macro parameter by
3154means of \i\c{%-1}, which NASM will expand as the \e{inverse}
3155condition code. So the \c{retz} macro defined in \k{maclocal} can be
3156replaced by a general \i{conditional-return macro} like this:
3157
3158\c %macro  retc 1
3159\c
3160\c         j%-1    %%skip
3161\c         ret
3162\c   %%skip:
3163\c
3164\c %endmacro
3165
3166This macro can now be invoked using calls like \c{retc ne}, which
3167will cause the conditional-jump instruction in the macro expansion
3168to come out as \c{JE}, or \c{retc po} which will make the jump a
3169\c{JPE}.
3170
3171The \c{%+1} macro-parameter reference is quite happy to interpret
3172the arguments \c{CXZ} and \c{ECXZ} as valid condition codes;
3173however, \c{%-1} will report an error if passed either of these,
3174because no inverse condition code exists.
3175
3176
3177\S{nolist} \i{Disabling Listing Expansion}\I\c{.nolist}
3178
3179When NASM is generating a listing file from your program, it will
3180generally expand multi-line macros by means of writing the macro
3181call and then listing each line of the expansion. This allows you to
3182see which instructions in the macro expansion are generating what
3183code; however, for some macros this clutters the listing up
3184unnecessarily.
3185
3186NASM therefore provides the \c{.nolist} qualifier, which you can
3187include in a macro definition to inhibit the expansion of the macro
3188in the listing file. The \c{.nolist} qualifier comes directly after
3189the number of parameters, like this:
3190
3191\c %macro foo 1.nolist
3192
3193Or like this:
3194
3195\c %macro bar 1-5+.nolist a,b,c,d,e,f,g,h
3196
3197\S{unmacro} Undefining Multi-Line Macros: \i\c{%unmacro}
3198
3199Multi-line macros can be removed with the \c{%unmacro} directive.
3200Unlike the \c{%undef} directive, however, \c{%unmacro} takes an
3201argument specification, and will only remove \i{exact matches} with
3202that argument specification.
3203
3204For example:
3205
3206\c %macro foo 1-3
3207\c         ; Do something
3208\c %endmacro
3209\c %unmacro foo 1-3
3210
3211removes the previously defined macro \c{foo}, but
3212
3213\c %macro bar 1-3
3214\c         ; Do something
3215\c %endmacro
3216\c %unmacro bar 1
3217
3218does \e{not} remove the macro \c{bar}, since the argument
3219specification does not match exactly.
3220
3221
3222\H{condasm} \i{Conditional Assembly}\I\c{%if}
3223
3224Similarly to the C preprocessor, NASM allows sections of a source
3225file to be assembled only if certain conditions are met. The general
3226syntax of this feature looks like this:
3227
3228\c %if<condition>
3229\c     ; some code which only appears if <condition> is met
3230\c %elif<condition2>
3231\c     ; only appears if <condition> is not met but <condition2> is
3232\c %else
3233\c     ; this appears if neither <condition> nor <condition2> was met
3234\c %endif
3235
3236The inverse forms \i\c{%ifn} and \i\c{%elifn} are also supported.
3237
3238The \i\c{%else} clause is optional, as is the \i\c{%elif} clause.
3239You can have more than one \c{%elif} clause as well.
3240
3241There are a number of variants of the \c{%if} directive.  Each has its
3242corresponding \c{%elif}, \c{%ifn}, and \c{%elifn} directives; for
3243example, the equivalents to the \c{%ifdef} directive are \c{%elifdef},
3244\c{%ifndef}, and \c{%elifndef}.
3245
3246\S{ifdef} \i\c{%ifdef}: Testing Single-Line Macro Existence\I{testing,
3247single-line macro existence}
3248
3249Beginning a conditional-assembly block with the line \c{%ifdef
3250MACRO} will assemble the subsequent code if, and only if, a
3251single-line macro called \c{MACRO} is defined. If not, then the
3252\c{%elif} and \c{%else} blocks (if any) will be processed instead.
3253
3254For example, when debugging a program, you might want to write code
3255such as
3256
3257\c           ; perform some function
3258\c %ifdef DEBUG
3259\c           writefile 2,"Function performed successfully",13,10
3260\c %endif
3261\c           ; go and do something else
3262
3263Then you could use the command-line option \c{-dDEBUG} to create a
3264version of the program which produced debugging messages, and remove
3265the option to generate the final release version of the program.
3266
3267You can test for a macro \e{not} being defined by using
3268\i\c{%ifndef} instead of \c{%ifdef}. You can also test for macro
3269definitions in \c{%elif} blocks by using \i\c{%elifdef} and
3270\i\c{%elifndef}.
3271
3272
3273\S{ifmacro} \i\c{%ifmacro}: Testing Multi-Line Macro
3274Existence\I{testing, multi-line macro existence}
3275
3276The \c{%ifmacro} directive operates in the same way as the \c{%ifdef}
3277directive, except that it checks for the existence of a multi-line macro.
3278
3279For example, you may be working with a large project and not have control
3280over the macros in a library. You may want to create a macro with one
3281name if it doesn't already exist, and another name if one with that name
3282does exist.
3283
3284The \c{%ifmacro} is considered true if defining a macro with the given name
3285and number of arguments would cause a definitions conflict. For example:
3286
3287\c %ifmacro MyMacro 1-3
3288\c
3289\c      %error "MyMacro 1-3" causes a conflict with an existing macro.
3290\c
3291\c %else
3292\c
3293\c      %macro MyMacro 1-3
3294\c
3295\c              ; insert code to define the macro
3296\c
3297\c      %endmacro
3298\c
3299\c %endif
3300
3301This will create the macro "MyMacro 1-3" if no macro already exists which
3302would conflict with it, and emits a warning if there would be a definition
3303conflict.
3304
3305You can test for the macro not existing by using the \i\c{%ifnmacro} instead
3306of \c{%ifmacro}. Additional tests can be performed in \c{%elif} blocks by using
3307\i\c{%elifmacro} and \i\c{%elifnmacro}.
3308
3309
3310\S{ifctx} \i\c{%ifctx}: Testing the Context Stack\I{testing, context
3311stack}
3312
3313The conditional-assembly construct \c{%ifctx} will cause the
3314subsequent code to be assembled if and only if the top context on
3315the preprocessor's context stack has the same name as one of the arguments.
3316As with \c{%ifdef}, the inverse and \c{%elif} forms \i\c{%ifnctx},
3317\i\c{%elifctx} and \i\c{%elifnctx} are also supported.
3318
3319For more details of the context stack, see \k{ctxstack}. For a
3320sample use of \c{%ifctx}, see \k{blockif}.
3321
3322
3323\S{if} \i\c{%if}: Testing Arbitrary Numeric Expressions\I{testing,
3324arbitrary numeric expressions}
3325
3326The conditional-assembly construct \c{%if expr} will cause the
3327subsequent code to be assembled if and only if the value of the
3328numeric expression \c{expr} is non-zero. An example of the use of
3329this feature is in deciding when to break out of a \c{%rep}
3330preprocessor loop: see \k{rep} for a detailed example.
3331
3332The expression given to \c{%if}, and its counterpart \i\c{%elif}, is
3333a critical expression (see \k{crit}).
3334
3335
3336Like other \c{%if} constructs, \c{%if} has a counterpart
3337\i\c{%elif}, and negative forms \i\c{%ifn} and \i\c{%elifn}.
3338
3339\S{ifidn} \i\c{%ifidn} and \i\c{%ifidni}: Testing Exact Text
3340Identity\I{testing, exact text identity}
3341
3342The construct \c{%ifidn text1,text2} will cause the subsequent code
3343to be assembled if and only if \c{text1} and \c{text2}, after
3344expanding single-line macros, are identical pieces of text.
3345Differences in white space are not counted.
3346
3347\c{%ifidni} is similar to \c{%ifidn}, but is \i{case-insensitive}.
3348
3349For example, the following macro pushes a register or number on the
3350stack, and allows you to treat \c{IP} as a real register:
3351
3352\c %macro  pushparam 1
3353\c
3354\c   %ifidni %1,ip
3355\c         call    %%label
3356\c   %%label:
3357\c   %else
3358\c         push    %1
3359\c   %endif
3360\c
3361\c %endmacro
3362
3363Like other \c{%if} constructs, \c{%ifidn} has a counterpart
3364\i\c{%elifidn}, and negative forms \i\c{%ifnidn} and \i\c{%elifnidn}.
3365Similarly, \c{%ifidni} has counterparts \i\c{%elifidni},
3366\i\c{%ifnidni} and \i\c{%elifnidni}.
3367
3368\S{iftyp} \i\c{%ifid}, \i\c{%ifnum}, \i\c{%ifstr}: Testing Token
3369Types\I{testing, token types}
3370
3371Some macros will want to perform different tasks depending on
3372whether they are passed a number, a string, or an identifier. For
3373example, a string output macro might want to be able to cope with
3374being passed either a string constant or a pointer to an existing
3375string.
3376
3377The conditional assembly construct \c{%ifid}, taking one parameter
3378(which may be blank), assembles the subsequent code if and only if
3379the first token in the parameter exists and is an identifier.
3380\c{%ifnum} works similarly, but tests for the token being a numeric
3381constant; \c{%ifstr} tests for it being a string.
3382
3383For example, the \c{writefile} macro defined in \k{mlmacgre} can be
3384extended to take advantage of \c{%ifstr} in the following fashion:
3385
3386\c %macro writefile 2-3+
3387\c
3388\c   %ifstr %2
3389\c         jmp     %%endstr
3390\c     %if %0 = 3
3391\c       %%str:    db      %2,%3
3392\c     %else
3393\c       %%str:    db      %2
3394\c     %endif
3395\c       %%endstr: mov     dx,%%str
3396\c                 mov     cx,%%endstr-%%str
3397\c   %else
3398\c                 mov     dx,%2
3399\c                 mov     cx,%3
3400\c   %endif
3401\c                 mov     bx,%1
3402\c                 mov     ah,0x40
3403\c                 int     0x21
3404\c
3405\c %endmacro
3406
3407Then the \c{writefile} macro can cope with being called in either of
3408the following two ways:
3409
3410\c         writefile [file], strpointer, length
3411\c         writefile [file], "hello", 13, 10
3412
3413In the first, \c{strpointer} is used as the address of an
3414already-declared string, and \c{length} is used as its length; in
3415the second, a string is given to the macro, which therefore declares
3416it itself and works out the address and length for itself.
3417
3418Note the use of \c{%if} inside the \c{%ifstr}: this is to detect
3419whether the macro was passed two arguments (so the string would be a
3420single string constant, and \c{db %2} would be adequate) or more (in
3421which case, all but the first two would be lumped together into
3422\c{%3}, and \c{db %2,%3} would be required).
3423
3424The usual \I\c{%elifid}\I\c{%elifnum}\I\c{%elifstr}\c{%elif}...,
3425\I\c{%ifnid}\I\c{%ifnnum}\I\c{%ifnstr}\c{%ifn}..., and
3426\I\c{%elifnid}\I\c{%elifnnum}\I\c{%elifnstr}\c{%elifn}... versions
3427exist for each of \c{%ifid}, \c{%ifnum} and \c{%ifstr}.
3428
3429\S{iftoken} \i\c{%iftoken}: Test for a Single Token
3430
3431Some macros will want to do different things depending on if it is
3432passed a single token (e.g. paste it to something else using \c{%+})
3433versus a multi-token sequence.
3434
3435The conditional assembly construct \c{%iftoken} assembles the
3436subsequent code if and only if the expanded parameters consist of
3437exactly one token, possibly surrounded by whitespace.
3438
3439For example:
3440
3441\c %iftoken 1
3442
3443will assemble the subsequent code, but
3444
3445\c %iftoken -1
3446
3447will not, since \c{-1} contains two tokens: the unary minus operator
3448\c{-}, and the number \c{1}.
3449
3450The usual \i\c{%eliftoken}, \i\c\{%ifntoken}, and \i\c{%elifntoken}
3451variants are also provided.
3452
3453\S{ifempty} \i\c{%ifempty}: Test for Empty Expansion
3454
3455The conditional assembly construct \c{%ifempty} assembles the
3456subsequent code if and only if the expanded parameters do not contain
3457any tokens at all, whitespace excepted.
3458
3459The usual \i\c{%elifempty}, \i\c\{%ifnempty}, and \i\c{%elifnempty}
3460variants are also provided.
3461
3462\S{ifenv} \i\c{%ifenv}: Test If Environment Variable Exists
3463
3464The conditional assembly construct \c{%ifenv} assembles the
3465subsequent code if and only if the environment variable referenced by
3466the \c{%!}\e{variable} directive exists.
3467
3468The usual \i\c{%elifenv}, \i\c\{%ifnenv}, and \i\c{%elifnenv}
3469variants are also provided.
3470
3471Just as for \c{%!}\e{variable} the argument should be written as a
3472string if it contains characters that would not be legal in an
3473identifier.  See \k{getenv}.
3474
3475\H{rep} \i{Preprocessor Loops}\I{repeating code}: \i\c{%rep}
3476
3477NASM's \c{TIMES} prefix, though useful, cannot be used to invoke a
3478multi-line macro multiple times, because it is processed by NASM
3479after macros have already been expanded. Therefore NASM provides
3480another form of loop, this time at the preprocessor level: \c{%rep}.
3481
3482The directives \c{%rep} and \i\c{%endrep} (\c{%rep} takes a numeric
3483argument, which can be an expression; \c{%endrep} takes no
3484arguments) can be used to enclose a chunk of code, which is then
3485replicated as many times as specified by the preprocessor:
3486
3487\c %assign i 0
3488\c %rep    64
3489\c         inc     word [table+2*i]
3490\c %assign i i+1
3491\c %endrep
3492
3493This will generate a sequence of 64 \c{INC} instructions,
3494incrementing every word of memory from \c{[table]} to
3495\c{[table+126]}.
3496
3497For more complex termination conditions, or to break out of a repeat
3498loop part way along, you can use the \i\c{%exitrep} directive to
3499terminate the loop, like this:
3500
3501\c fibonacci:
3502\c %assign i 0
3503\c %assign j 1
3504\c %rep 100
3505\c %if j > 65535
3506\c     %exitrep
3507\c %endif
3508\c         dw j
3509\c %assign k j+i
3510\c %assign i j
3511\c %assign j k
3512\c %endrep
3513\c
3514\c fib_number equ ($-fibonacci)/2
3515
3516This produces a list of all the Fibonacci numbers that will fit in
351716 bits. Note that a maximum repeat count must still be given to
3518\c{%rep}. This is to prevent the possibility of NASM getting into an
3519infinite loop in the preprocessor, which (on multitasking or
3520multi-user systems) would typically cause all the system memory to
3521be gradually used up and other applications to start crashing.
3522
3523Note the maximum repeat count is limited to the value specified by the
3524\c{--limit-rep} option or \c{%pragma limit rep}, see \k{opt-limit}.
3525
3526
3527\H{files} Source Files and Dependencies
3528
3529These commands allow you to split your sources into multiple files.
3530
3531\S{include} \i\c{%include}: \i{Including Other Files}
3532
3533Using, once again, a very similar syntax to the C preprocessor,
3534NASM's preprocessor lets you include other source files into your
3535code. This is done by the use of the \i\c{%include} directive:
3536
3537\c %include "macros.mac"
3538
3539will include the contents of the file \c{macros.mac} into the source
3540file containing the \c{%include} directive.
3541
3542Include files are \I{searching for include files}searched for in the
3543current directory (the directory you're in when you run NASM, as
3544opposed to the location of the NASM executable or the location of
3545the source file), plus any directories specified on the NASM command
3546line using the \c{-i} option.
3547
3548The standard C idiom for preventing a file being included more than
3549once is just as applicable in NASM: if the file \c{macros.mac} has
3550the form
3551
3552\c %ifndef MACROS_MAC
3553\c     %define MACROS_MAC
3554\c     ; now define some macros
3555\c %endif
3556
3557then including the file more than once will not cause errors,
3558because the second time the file is included nothing will happen
3559because the macro \c{MACROS_MAC} will already be defined.
3560
3561You can force a file to be included even if there is no \c{%include}
3562directive that explicitly includes it, by using the \i\c{-p} option
3563on the NASM command line (see \k{opt-p}).
3564
3565
3566\S{pathsearch} \i\c{%pathsearch}: Search the Include Path
3567
3568The \c{%pathsearch} directive takes a single-line macro name and a
3569filename, and declare or redefines the specified single-line macro to
3570be the include-path-resolved version of the filename, if the file
3571exists (otherwise, it is passed unchanged.)
3572
3573For example,
3574
3575\c %pathsearch MyFoo "foo.bin"
3576
3577... with \c{-Ibins/} in the include path may end up defining the macro
3578\c{MyFoo} to be \c{"bins/foo.bin"}.
3579
3580
3581\S{depend} \i\c{%depend}: Add Dependent Files
3582
3583The \c{%depend} directive takes a filename and adds it to the list of
3584files to be emitted as dependency generation when the \c{-M} options
3585and its relatives (see \k{opt-M}) are used.  It produces no output.
3586
3587This is generally used in conjunction with \c{%pathsearch}.  For
3588example, a simplified version of the standard macro wrapper for the
3589\c{INCBIN} directive looks like:
3590
3591\c %imacro incbin 1-2+ 0
3592\c %pathsearch dep %1
3593\c %depend dep
3594\c         incbin dep,%2
3595\c %endmacro
3596
3597This first resolves the location of the file into the macro \c{dep},
3598then adds it to the dependency lists, and finally issues the
3599assembler-level \c{INCBIN} directive.
3600
3601
3602\S{use} \i\c{%use}: Include Standard Macro Package
3603
3604The \c{%use} directive is similar to \c{%include}, but rather than
3605including the contents of a file, it includes a named standard macro
3606package.  The standard macro packages are part of NASM, and are
3607described in \k{macropkg}.
3608
3609Unlike the \c{%include} directive, package names for the \c{%use}
3610directive do not require quotes, but quotes are permitted.  In NASM
36112.04 and 2.05 the unquoted form would be macro-expanded; this is no
3612longer true.  Thus, the following lines are equivalent:
3613
3614\c %use altreg
3615\c %use 'altreg'
3616
3617Standard macro packages are protected from multiple inclusion.  When a
3618standard macro package is used, a testable single-line macro of the
3619form \c{__?USE_}\e{package}\c{?__} is also defined, see \k{use_def}.
3620
3621\H{ctxstack} The \i{Context Stack}
3622
3623Having labels that are local to a macro definition is sometimes not
3624quite powerful enough: sometimes you want to be able to share labels
3625between several macro calls. An example might be a \c{REPEAT} ...
3626\c{UNTIL} loop, in which the expansion of the \c{REPEAT} macro
3627would need to be able to refer to a label which the \c{UNTIL} macro
3628had defined. However, for such a macro you would also want to be
3629able to nest these loops.
3630
3631NASM provides this level of power by means of a \e{context stack}.
3632The preprocessor maintains a stack of \e{contexts}, each of which is
3633characterized by a name. You add a new context to the stack using
3634the \i\c{%push} directive, and remove one using \i\c{%pop}. You can
3635define labels that are local to a particular context on the stack.
3636
3637
3638\S{pushpop} \i\c{%push} and \i\c{%pop}: \I{creating
3639contexts}\I{removing contexts}Creating and Removing Contexts
3640
3641The \c{%push} directive is used to create a new context and place it
3642on the top of the context stack. \c{%push} takes an optional argument,
3643which is the name of the context. For example:
3644
3645\c %push    foobar
3646
3647This pushes a new context called \c{foobar} on the stack. You can have
3648several contexts on the stack with the same name: they can still be
3649distinguished.  If no name is given, the context is unnamed (this is
3650normally used when both the \c{%push} and the \c{%pop} are inside a
3651single macro definition.)
3652
3653The directive \c{%pop}, taking one optional argument, removes the top
3654context from the context stack and destroys it, along with any
3655labels associated with it.  If an argument is given, it must match the
3656name of the current context, otherwise it will issue an error.
3657
3658
3659\S{ctxlocal} \i{Context-Local Labels}
3660
3661Just as the usage \c{%%foo} defines a label which is local to the
3662particular macro call in which it is used, the usage \I{%$}\c{%$foo}
3663is used to define a label which is local to the context on the top
3664of the context stack. So the \c{REPEAT} and \c{UNTIL} example given
3665above could be implemented by means of:
3666
3667\c %macro repeat 0
3668\c
3669\c     %push   repeat
3670\c     %$begin:
3671\c
3672\c %endmacro
3673\c
3674\c %macro until 1
3675\c
3676\c         j%-1    %$begin
3677\c     %pop
3678\c
3679\c %endmacro
3680
3681and invoked by means of, for example,
3682
3683\c         mov     cx,string
3684\c         repeat
3685\c         add     cx,3
3686\c         scasb
3687\c         until   e
3688
3689which would scan every fourth byte of a string in search of the byte
3690in \c{AL}.
3691
3692If you need to define, or access, labels local to the context
3693\e{below} the top one on the stack, you can use \I{%$$}\c{%$$foo}, or
3694\c{%$$$foo} for the context below that, and so on.
3695
3696
3697\S{ctxdefine} \i{Context-Local Single-Line Macros}
3698
3699NASM also allows you to define single-line macros which are local to
3700a particular context, in just the same way:
3701
3702\c %define %$localmac 3
3703
3704will define the single-line macro \c{%$localmac} to be local to the
3705top context on the stack. Of course, after a subsequent \c{%push},
3706it can then still be accessed by the name \c{%$$localmac}.
3707
3708
3709\S{ctxfallthrough} \i{Context Fall-Through Lookup} \e{(deprecated)}
3710
3711Context fall-through lookup (automatic searching of outer contexts)
3712is a feature that was added in NASM version 0.98.03. Unfortunately,
3713this feature is unintuitive and can result in buggy code that would
3714have otherwise been prevented by NASM's error reporting. As a result,
3715this feature has been \e{deprecated}. NASM version 2.09 will issue a
3716warning when usage of this \e{deprecated} feature is detected. Starting
3717with NASM version 2.10, usage of this \e{deprecated} feature will simply
3718result in an \e{expression syntax error}.
3719
3720An example usage of this \e{deprecated} feature follows:
3721
3722\c %macro ctxthru 0
3723\c %push ctx1
3724\c     %assign %$external 1
3725\c         %push ctx2
3726\c             %assign %$internal 1
3727\c             mov eax, %$external
3728\c             mov eax, %$internal
3729\c         %pop
3730\c %pop
3731\c %endmacro
3732
3733As demonstrated, \c{%$external} is being defined in the \c{ctx1}
3734context and referenced within the \c{ctx2} context. With context
3735fall-through lookup, referencing an undefined context-local macro
3736like this implicitly searches through all outer contexts until a match
3737is made or isn't found in any context. As a result, \c{%$external}
3738referenced within the \c{ctx2} context would implicitly use \c{%$external}
3739as defined in \c{ctx1}. Most people would expect NASM to issue an error in
3740this situation because \c{%$external} was never defined within \c{ctx2} and also
3741isn't qualified with the proper context depth, \c{%$$external}.
3742
3743Here is a revision of the above example with proper context depth:
3744
3745\c %macro ctxthru 0
3746\c %push ctx1
3747\c     %assign %$external 1
3748\c         %push ctx2
3749\c             %assign %$internal 1
3750\c             mov eax, %$$external
3751\c             mov eax, %$internal
3752\c         %pop
3753\c %pop
3754\c %endmacro
3755
3756As demonstrated, \c{%$external} is still being defined in the \c{ctx1}
3757context and referenced within the \c{ctx2} context. However, the
3758reference to \c{%$external} within \c{ctx2} has been fully qualified with
3759the proper context depth, \c{%$$external}, and thus is no longer ambiguous,
3760unintuitive or erroneous.
3761
3762
3763\S{ctxrepl} \i\c{%repl}: \I{renaming contexts}Renaming a Context
3764
3765If you need to change the name of the top context on the stack (in
3766order, for example, to have it respond differently to \c{%ifctx}),
3767you can execute a \c{%pop} followed by a \c{%push}; but this will
3768have the side effect of destroying all context-local labels and
3769macros associated with the context that was just popped.
3770
3771NASM provides the directive \c{%repl}, which \e{replaces} a context
3772with a different name, without touching the associated macros and
3773labels. So you could replace the destructive code
3774
3775\c %pop
3776\c %push   newname
3777
3778with the non-destructive version \c{%repl newname}.
3779
3780
3781\S{blockif} Example Use of the \i{Context Stack}: \i{Block IFs}
3782
3783This example makes use of almost all the context-stack features,
3784including the conditional-assembly construct \i\c{%ifctx}, to
3785implement a block IF statement as a set of macros.
3786
3787\c %macro if 1
3788\c
3789\c     %push if
3790\c     j%-1  %$ifnot
3791\c
3792\c %endmacro
3793\c
3794\c %macro else 0
3795\c
3796\c   %ifctx if
3797\c         %repl   else
3798\c         jmp     %$ifend
3799\c         %$ifnot:
3800\c   %else
3801\c         %error  "expected `if' before `else'"
3802\c   %endif
3803\c
3804\c %endmacro
3805\c
3806\c %macro endif 0
3807\c
3808\c   %ifctx if
3809\c         %$ifnot:
3810\c         %pop
3811\c   %elifctx      else
3812\c         %$ifend:
3813\c         %pop
3814\c   %else
3815\c         %error  "expected `if' or `else' before `endif'"
3816\c   %endif
3817\c
3818\c %endmacro
3819
3820This code is more robust than the \c{REPEAT} and \c{UNTIL} macros
3821given in \k{ctxlocal}, because it uses conditional assembly to check
3822that the macros are issued in the right order (for example, not
3823calling \c{endif} before \c{if}) and issues a \c{%error} if they're
3824not.
3825
3826In addition, the \c{endif} macro has to be able to cope with the two
3827distinct cases of either directly following an \c{if}, or following
3828an \c{else}. It achieves this, again, by using conditional assembly
3829to do different things depending on whether the context on top of
3830the stack is \c{if} or \c{else}.
3831
3832The \c{else} macro has to preserve the context on the stack, in
3833order to have the \c{%$ifnot} referred to by the \c{if} macro be the
3834same as the one defined by the \c{endif} macro, but has to change
3835the context's name so that \c{endif} will know there was an
3836intervening \c{else}. It does this by the use of \c{%repl}.
3837
3838A sample usage of these macros might look like:
3839
3840\c         cmp     ax,bx
3841\c
3842\c         if ae
3843\c                cmp     bx,cx
3844\c
3845\c                if ae
3846\c                        mov     ax,cx
3847\c                else
3848\c                        mov     ax,bx
3849\c                endif
3850\c
3851\c         else
3852\c                cmp     ax,cx
3853\c
3854\c                if ae
3855\c                        mov     ax,cx
3856\c                endif
3857\c
3858\c         endif
3859
3860The block-\c{IF} macros handle nesting quite happily, by means of
3861pushing another context, describing the inner \c{if}, on top of the
3862one describing the outer \c{if}; thus \c{else} and \c{endif} always
3863refer to the last unmatched \c{if} or \c{else}.
3864
3865
3866\H{stackrel} \i{Stack Relative Preprocessor Directives}
3867
3868The following preprocessor directives provide a way to use
3869labels to refer to local variables allocated on the stack.
3870
3871\b\c{%arg}  (see \k{arg})
3872
3873\b\c{%stacksize}  (see \k{stacksize})
3874
3875\b\c{%local}  (see \k{local})
3876
3877
3878\S{arg} \i\c{%arg} Directive
3879
3880The \c{%arg} directive is used to simplify the handling of
3881parameters passed on the stack. Stack based parameter passing
3882is used by many high level languages, including C, C++ and Pascal.
3883
3884While NASM has macros which attempt to duplicate this
3885functionality (see \k{16cmacro}), the syntax is not particularly
3886convenient to use and is not TASM compatible. Here is an example
3887which shows the use of \c{%arg} without any external macros:
3888
3889\c some_function:
3890\c
3891\c     %push     mycontext        ; save the current context
3892\c     %stacksize large           ; tell NASM to use bp
3893\c     %arg      i:word, j_ptr:word
3894\c
3895\c         mov     ax,[i]
3896\c         mov     bx,[j_ptr]
3897\c         add     ax,[bx]
3898\c         ret
3899\c
3900\c     %pop                       ; restore original context
3901
3902This is similar to the procedure defined in \k{16cmacro} and adds
3903the value in i to the value pointed to by j_ptr and returns the
3904sum in the ax register. See \k{pushpop} for an explanation of
3905\c{push} and \c{pop} and the use of context stacks.
3906
3907
3908\S{stacksize} \i\c{%stacksize} Directive
3909
3910The \c{%stacksize} directive is used in conjunction with the
3911\c{%arg} (see \k{arg}) and the \c{%local} (see \k{local}) directives.
3912It tells NASM the default size to use for subsequent \c{%arg} and
3913\c{%local} directives. The \c{%stacksize} directive takes one
3914required argument which is one of \c{flat}, \c{flat64}, \c{large} or \c{small}.
3915
3916\c %stacksize flat
3917
3918This form causes NASM to use stack-based parameter addressing
3919relative to \c{ebp} and it assumes that a near form of call was used
3920to get to this label (i.e. that \c{eip} is on the stack).
3921
3922\c %stacksize flat64
3923
3924This form causes NASM to use stack-based parameter addressing
3925relative to \c{rbp} and it assumes that a near form of call was used
3926to get to this label (i.e. that \c{rip} is on the stack).
3927
3928\c %stacksize large
3929
3930This form uses \c{bp} to do stack-based parameter addressing and
3931assumes that a far form of call was used to get to this address
3932(i.e. that \c{ip} and \c{cs} are on the stack).
3933
3934\c %stacksize small
3935
3936This form also uses \c{bp} to address stack parameters, but it is
3937different from \c{large} because it also assumes that the old value
3938of bp is pushed onto the stack (i.e. it expects an \c{ENTER}
3939instruction). In other words, it expects that \c{bp}, \c{ip} and
3940\c{cs} are on the top of the stack, underneath any local space which
3941may have been allocated by \c{ENTER}. This form is probably most
3942useful when used in combination with the \c{%local} directive
3943(see \k{local}).
3944
3945
3946\S{local} \i\c{%local} Directive
3947
3948The \c{%local} directive is used to simplify the use of local
3949temporary stack variables allocated in a stack frame. Automatic
3950local variables in C are an example of this kind of variable. The
3951\c{%local} directive is most useful when used with the \c{%stacksize}
3952(see \k{stacksize} and is also compatible with the \c{%arg} directive
3953(see \k{arg}). It allows simplified reference to variables on the
3954stack which have been allocated typically by using the \c{ENTER}
3955instruction.
3956\# (see \k{insENTER} for a description of that instruction).
3957An example of its use is the following:
3958
3959\c silly_swap:
3960\c
3961\c     %push mycontext             ; save the current context
3962\c     %stacksize small            ; tell NASM to use bp
3963\c     %assign %$localsize 0       ; see text for explanation
3964\c     %local old_ax:word, old_dx:word
3965\c
3966\c         enter   %$localsize,0   ; see text for explanation
3967\c         mov     [old_ax],ax     ; swap ax & bx
3968\c         mov     [old_dx],dx     ; and swap dx & cx
3969\c         mov     ax,bx
3970\c         mov     dx,cx
3971\c         mov     bx,[old_ax]
3972\c         mov     cx,[old_dx]
3973\c         leave                   ; restore old bp
3974\c         ret                     ;
3975\c
3976\c     %pop                        ; restore original context
3977
3978The \c{%$localsize} variable is used internally by the
3979\c{%local} directive and \e{must} be defined within the
3980current context before the \c{%local} directive may be used.
3981Failure to do so will result in one expression syntax error for
3982each \c{%local} variable declared. It then may be used in
3983the construction of an appropriately sized ENTER instruction
3984as shown in the example.
3985
3986
3987\H{pperror} Reporting \i{User-Defined Errors}: \i\c{%error}, \i\c{%warning}, \i\c{%fatal}
3988
3989The preprocessor directive \c{%error} will cause NASM to report an
3990error if it occurs in assembled code. So if other users are going to
3991try to assemble your source files, you can ensure that they define the
3992right macros by means of code like this:
3993
3994\c %ifdef F1
3995\c     ; do some setup
3996\c %elifdef F2
3997\c     ; do some different setup
3998\c %else
3999\c     %error "Neither F1 nor F2 was defined."
4000\c %endif
4001
4002Then any user who fails to understand the way your code is supposed
4003to be assembled will be quickly warned of their mistake, rather than
4004having to wait until the program crashes on being run and then not
4005knowing what went wrong.
4006
4007Similarly, \c{%warning} issues a warning, but allows assembly to continue:
4008
4009\c %ifdef F1
4010\c     ; do some setup
4011\c %elifdef F2
4012\c     ; do some different setup
4013\c %else
4014\c     %warning "Neither F1 nor F2 was defined, assuming F1."
4015\c     %define F1
4016\c %endif
4017
4018\c{%error} and \c{%warning} are issued only on the final assembly
4019pass.  This makes them safe to use in conjunction with tests that
4020depend on symbol values.
4021
4022\c{%fatal} terminates assembly immediately, regardless of pass.  This
4023is useful when there is no point in continuing the assembly further,
4024and doing so is likely just going to cause a spew of confusing error
4025messages.
4026
4027It is optional for the message string after \c{%error}, \c{%warning}
4028or \c{%fatal} to be quoted.  If it is \e{not}, then single-line macros
4029are expanded in it, which can be used to display more information to
4030the user.  For example:
4031
4032\c %if foo > 64
4033\c     %assign foo_over foo-64
4034\c     %error foo is foo_over bytes too large
4035\c %endif
4036
4037
4038\H{pragma} \i\c{%pragma}: Setting Options
4039
4040The \c{%pragma} directive controls a number of options in
4041NASM. Pragmas are intended to remain backwards compatible, and
4042therefore an unknown \c{%pragma} directive is not an error.
4043
4044The various pragmas are documented with the options they affect.
4045
4046The general structure of a NASM pragma is:
4047
4048\c{%pragma} \e{namespace} \e{directive} [\e{arguments...}]
4049
4050Currently defined namespaces are:
4051
4052\b \c{ignore}: this \c{%pragma} is unconditionally ignored.
4053
4054\b \c{preproc}: preprocessor, see \k{pragma-preproc}.
4055
4056\b \c{limit}: resource limits, see \k{opt-limit}.
4057
4058\b \c{asm}: the parser and assembler proper. Currently no such pragmas
4059are defined.
4060
4061\b \c{list}: listing options, see \k{opt-L}.
4062
4063\b \c{file}: general file handling options. Currently no such pragmas
4064are defined.
4065
4066\b \c{input}: input file handling options. Currently no such pragmas
4067are defined.
4068
4069\b \c{output}: output format options.
4070
4071\b \c{debug}: debug format options.
4072
4073In addition, the name of any output or debug format, and sometimes
4074groups thereof, also constitue \c{%pragma} namespaces. The namespaces
4075\c{output} and \c{debug} simply refer to \e{any} output or debug
4076format, respectively.
4077
4078For example, to prepend an underscore to global symbols regardless of
4079the output format (see \k{mangling}):
4080
4081\c %pragma output gprefix _
4082
4083... whereas to prepend an underscore to global symbols only when the
4084output is either \c{win32} or \c{win64}:
4085
4086\c %pragma win gprefix _
4087
4088
4089\S{pragma-preproc} Preprocessor Pragmas
4090
4091The only preprocessor \c{%pragma} defined in NASM 2.15 is:
4092
4093\b \c{%pragma preproc sane_empty_expansion}: disables legacy
4094compatibility handling of braceless empty arguments to multi-line
4095macros. See \k{mlmacro} and \k{opt-w}.
4096
4097
4098\H{otherpreproc} \i{Other Preprocessor Directives}
4099
4100\S{line} \i\c{%line} Directive
4101
4102The \c{%line} directive is used to notify NASM that the input line
4103corresponds to a specific line number in another file.  Typically
4104this other file would be an original source file, with the current
4105NASM input being the output of a pre-processor.  The \c{%line}
4106directive allows NASM to output messages which indicate the line
4107number of the original source file, instead of the file that is being
4108read by NASM.
4109
4110This preprocessor directive is not generally used directly by
4111programmers, but may be of interest to preprocessor authors.  The
4112usage of the \c{%line} preprocessor directive is as follows:
4113
4114\c %line nnn[+mmm] [filename]
4115
4116In this directive, \c{nnn} identifies the line of the original source
4117file which this line corresponds to.  \c{mmm} is an optional parameter
4118which specifies a line increment value; each line of the input file
4119read in is considered to correspond to \c{mmm} lines of the original
4120source file.  Finally, \c{filename} is an optional parameter which
4121specifies the file name of the original source file. It may be a
4122quoted string, in which case any additional argument after the quoted
4123string will be ignored.
4124
4125After reading a \c{%line} preprocessor directive, NASM will report
4126all file name and line numbers relative to the values specified
4127therein.
4128
4129If the command line option \i\c{--no-line} is given, all \c{%line}
4130directives are ignored. This may be useful for debugging preprocessed
4131code. See \k{opt-no-line}.
4132
4133Starting in NASM 2.15, \c{%line} directives are processed before any
4134other processing takes place.
4135
4136For compatibility with the output from some other preprocessors,
4137including many C preprocessors, a \c{#} character followed by
4138whitespace \e{at the very beginning of a line} is also treated as a
4139\c{%line} directive, except that double quotes surrounding the
4140filename are treated like NASM backquotes, with \c{\\}-escaped
4141sequences decoded.
4142
4143\# This isn't a directive, it should be moved elsewhere...
4144\S{getenv} \i\c{%!}\e{variable}: Read an Environment Variable.
4145
4146The \c{%!}\e{variable} directive makes it possible to read the value of an
4147environment variable at assembly time. This could, for example, be used
4148to store the contents of an environment variable into a string, which
4149could be used at some other point in your code.
4150
4151For example, suppose that you have an environment variable \c{FOO},
4152and you want the contents of \c{FOO} to be embedded in your program as
4153a quoted string. You could do that as follows:
4154
4155\c %defstr FOO          %!FOO
4156
4157See \k{defstr} for notes on the \c{%defstr} directive.
4158
4159If the name of the environment variable contains non-identifier
4160characters, you can use string quotes to surround the name of the
4161variable, for example:
4162
4163\c %defstr C_colon      %!'C:'
4164
4165
4166\S{clear} \i\c\{%clear}: Clear All Macro Definitions
4167
4168The directive \c{%clear} clears all definitions of a certain type,
4169\e{including the ones defined by NASM itself.} This can be useful when
4170preprocessing non-NASM code, or to drop backwards compatibility
4171aliases.
4172
4173The syntax is:
4174
4175\c    %clear [global|context] type...
4176
4177... where \c{context} indicates that this applies to context-local
4178macros only; the default is \c{global}.
4179
4180\c{type} can be one or more of:
4181
4182\b \c{define}   single-line macros
4183
4184\b \c{defalias} single-line macro aliases (useful to remove backwards
4185compatibility aliases)
4186
4187\b \c{alldefine} same as \c{define defalias}
4188
4189\b \c{macro}     multi-line macros
4190
4191\b \c{all}       same as \c{alldefine macro} (default)
4192
4193In NASM 2.14 and earlier, only the single syntax \c{%clear} was
4194supported, which is equivalent to \c{%clear global all}.
4195
4196
4197
4198
4199\C{stdmac} \i{Standard Macros}
4200
4201NASM defines a set of standard macros, which are already defined when
4202it starts to process any source file. If you really need a program to
4203be assembled with no pre-defined macros, you can use the \i\c{%clear}
4204directive to empty the preprocessor of everything but context-local
4205preprocessor variables and single-line macros, see \k{clear}.
4206
4207Most \i{user-level directives} (see \k{directive}) are implemented as
4208macros which invoke primitive directives; these are described in
4209\k{directive}. The rest of the standard macro set is described here.
4210
4211For compability with NASM versions before NASM 2.15, most standard
4212macros of the form \c{__?foo?__} have aliases of form \c{__foo__} (see
4213\k{defalias}). These can be removed with the directive \c{%clear
4214defalias}.
4215
4216
4217\H{stdmacver} \i{NASM Version Macros}
4218
4219The single-line macros \i\c{__?NASM_MAJOR?__}, \i\c{__?NASM_MINOR?__},
4220\i\c{__?NASM_SUBMINOR?__} and \i\c{__?NASM_PATCHLEVEL?__} expand to the
4221major, minor, subminor and patch level parts of the \i{version
4222number of NASM} being used. So, under NASM 0.98.32p1 for
4223example, \c{__?NASM_MAJOR?__} would be defined to be 0, \c{__?NASM_MINOR?__}
4224would be defined as 98, \c{__?NASM_SUBMINOR?__} would be defined to 32,
4225and \c{__?NASM_PATCHLEVEL?__} would be defined as 1.
4226
4227Additionally, the macro \i\c{__?NASM_SNAPSHOT?__} is defined for
4228automatically generated snapshot releases \e{only}.
4229
4230
4231\S{stdmacverid} \i\c{__?NASM_VERSION_ID?__}: \i{NASM Version ID}
4232
4233The single-line macro \c{__?NASM_VERSION_ID?__} expands to a dword integer
4234representing the full version number of the version of nasm being used.
4235The value is the equivalent to \c{__?NASM_MAJOR?__}, \c{__?NASM_MINOR?__},
4236\c{__?NASM_SUBMINOR?__} and \c{__?NASM_PATCHLEVEL?__} concatenated to
4237produce a single doubleword. Hence, for 0.98.32p1, the returned number
4238would be equivalent to:
4239
4240\c         dd      0x00622001
4241
4242or
4243
4244\c         db      1,32,98,0
4245
4246Note that the above lines are generate exactly the same code, the second
4247line is used just to give an indication of the order that the separate
4248values will be present in memory.
4249
4250
4251\S{stdmacverstr} \i\c{__?NASM_VER?__}: \i{NASM Version String}
4252
4253The single-line macro \c{__?NASM_VER?__} expands to a string which defines
4254the version number of nasm being used. So, under NASM 0.98.32 for example,
4255
4256\c         db      __?NASM_VER?__
4257
4258would expand to
4259
4260\c         db      "0.98.32"
4261
4262
4263\H{fileline} \i\c{__?FILE?__} and \i\c{__?LINE?__}: File Name and Line Number
4264
4265Like the C preprocessor, NASM allows the user to find out the file
4266name and line number containing the current instruction. The macro
4267\c{__?FILE?__} expands to a string constant giving the name of the
4268current input file (which may change through the course of assembly
4269if \c{%include} directives are used), and \c{__?LINE?__} expands to a
4270numeric constant giving the current line number in the input file.
4271
4272These macros could be used, for example, to communicate debugging
4273information to a macro, since invoking \c{__?LINE?__} inside a macro
4274definition (either single-line or multi-line) will return the line
4275number of the macro \e{call}, rather than \e{definition}. So to
4276determine where in a piece of code a crash is occurring, for
4277example, one could write a routine \c{stillhere}, which is passed a
4278line number in \c{EAX} and outputs something like \c{line 155: still
4279here}. You could then write a macro:
4280
4281\c %macro  notdeadyet 0
4282\c
4283\c         push    eax
4284\c         mov     eax,__?LINE?__
4285\c         call    stillhere
4286\c         pop     eax
4287\c
4288\c %endmacro
4289
4290and then pepper your code with calls to \c{notdeadyet} until you
4291find the crash point.
4292
4293
4294\H{bitsm} \i\c{__?BITS?__}: Current Code Generation Mode
4295
4296The \c{__?BITS?__} standard macro is updated every time that the BITS mode is
4297set using the \c{BITS XX} or \c{[BITS XX]} directive, where XX is a valid mode
4298number of 16, 32 or 64. \c{__?BITS?__} receives the specified mode number and
4299makes it globally available. This can be very useful for those who utilize
4300mode-dependent macros.
4301
4302\H{ofmtm} \i\c{__?OUTPUT_FORMAT?__}: Current Output Format
4303
4304The \c{__?OUTPUT_FORMAT?__} standard macro holds the current output
4305format name, as given by the \c{-f} option or NASM's default. Type
4306\c{nasm -h} for a list.
4307
4308\c %ifidn __?OUTPUT_FORMAT?__, win32
4309\c  %define NEWLINE 13, 10
4310\c %elifidn __?OUTPUT_FORMAT?__, elf32
4311\c  %define NEWLINE 10
4312\c %endif
4313
4314\H{dfmtm} \i\c{__?DEBUG_FORMAT?__}: Current Debug Format
4315
4316If debugging information generation is enabled, The
4317\c{__?DEBUG_FORMAT?__} standard macro holds the current debug format
4318name as specified by the \c{-F} or \c{-g} option or the output format
4319default. Type \c{nasm -f} \e{output} \c{y} for a list.
4320
4321\c{__?DEBUG_FORMAT?__} is not defined if debugging is not enabled, or if
4322the debug format specified is \c{null}.
4323
4324\H{datetime} Assembly Date and Time Macros
4325
4326NASM provides a variety of macros that represent the timestamp of the
4327assembly session.
4328
4329\b The \i\c{__?DATE?__} and \i\c{__?TIME?__} macros give the assembly date and
4330time as strings, in ISO 8601 format (\c{"YYYY-MM-DD"} and \c{"HH:MM:SS"},
4331respectively.)
4332
4333\b The \i\c{__?DATE_NUM?__} and \i\c{__?TIME_NUM?__} macros give the assembly
4334date and time in numeric form; in the format \c{YYYYMMDD} and
4335\c{HHMMSS} respectively.
4336
4337\b The \i\c{__?UTC_DATE?__} and \i\c{__?UTC_TIME?__} macros give the assembly
4338date and time in universal time (UTC) as strings, in ISO 8601 format
4339(\c{"YYYY-MM-DD"} and \c{"HH:MM:SS"}, respectively.)  If the host
4340platform doesn't provide UTC time, these macros are undefined.
4341
4342\b The \i\c{__?UTC_DATE_NUM?__} and \i\c{__?UTC_TIME_NUM?__} macros give the
4343assembly date and time universal time (UTC) in numeric form; in the
4344format \c{YYYYMMDD} and \c{HHMMSS} respectively.  If the
4345host platform doesn't provide UTC time, these macros are
4346undefined.
4347
4348\b The \c{__?POSIX_TIME?__} macro is defined as a number containing the
4349number of seconds since the POSIX epoch, 1 January 1970 00:00:00 UTC;
4350excluding any leap seconds.  This is computed using UTC time if
4351available on the host platform, otherwise it is computed using the
4352local time as if it was UTC.
4353
4354All instances of time and date macros in the same assembly session
4355produce consistent output.  For example, in an assembly session
4356started at 42 seconds after midnight on January 1, 2010 in Moscow
4357(timezone UTC+3) these macros would have the following values,
4358assuming, of course, a properly configured environment with a correct
4359clock:
4360
4361\c       __?DATE?__             "2010-01-01"
4362\c       __?TIME?__             "00:00:42"
4363\c       __?DATE_NUM?__         20100101
4364\c       __?TIME_NUM?__         000042
4365\c       __?UTC_DATE?__         "2009-12-31"
4366\c       __?UTC_TIME?__         "21:00:42"
4367\c       __?UTC_DATE_NUM?__     20091231
4368\c       __?UTC_TIME_NUM?__     210042
4369\c       __?POSIX_TIME?__       1262293242
4370
4371
4372\H{use_def} \I\c{__?USE_*?__}\c{__?USE_}\e{package}\c{?__}: Package
4373Include Test
4374
4375When a standard macro package (see \k{macropkg}) is included with the
4376\c{%use} directive (see \k{use}), a single-line macro of the form
4377\c{__USE_}\e{package}\c{__} is automatically defined.  This allows
4378testing if a particular package is invoked or not.
4379
4380For example, if the \c{altreg} package is included (see
4381\k{pkg_altreg}), then the macro \c{__?USE_ALTREG?__} is defined.
4382
4383
4384\H{pass_macro} \i\c{__?PASS?__}: Assembly Pass
4385
4386The macro \c{__?PASS?__} is defined to be \c{1} on preparatory passes,
4387and \c{2} on the final pass.  In preprocess-only mode, it is set to
4388\c{3}, and when running only to generate dependencies (due to the
4389\c{-M} or \c{-MG} option, see \k{opt-M}) it is set to \c{0}.
4390
4391\e{Avoid using this macro if at all possible.  It is tremendously easy
4392to generate very strange errors by misusing it, and the semantics may
4393change in future versions of NASM.}
4394
4395
4396\H{strucs} \i{Structure Data Types}
4397
4398\S{struc} \i\c{STRUC} and \i\c{ENDSTRUC}: \i{Declaring Structure} Data Types
4399
4400The core of NASM contains no intrinsic means of defining data
4401structures; instead, the preprocessor is sufficiently powerful that
4402data structures can be implemented as a set of macros. The macros
4403\c{STRUC} and \c{ENDSTRUC} are used to define a structure data type.
4404
4405\c{STRUC} takes one or two parameters. The first parameter is the name
4406of the data type. The second, optional parameter is the base offset of
4407the structure. The name of the data type is defined as a symbol with
4408the value of the base offset, and the name of the data type with the
4409suffix \c{_size} appended to it is defined as an \c{EQU} giving the
4410size of the structure. Once \c{STRUC} has been issued, you are
4411defining the structure, and should define fields using the \c{RESB}
4412family of pseudo-instructions, and then invoke \c{ENDSTRUC} to finish
4413the definition.
4414
4415For example, to define a structure called \c{mytype} containing a
4416longword, a word, a byte and a string of bytes, you might code
4417
4418\c struc   mytype
4419\c
4420\c   mt_long:      resd    1
4421\c   mt_word:      resw    1
4422\c   mt_byte:      resb    1
4423\c   mt_str:       resb    32
4424\c
4425\c endstruc
4426
4427The above code defines six symbols: \c{mt_long} as 0 (the offset
4428from the beginning of a \c{mytype} structure to the longword field),
4429\c{mt_word} as 4, \c{mt_byte} as 6, \c{mt_str} as 7, \c{mytype_size}
4430as 39, and \c{mytype} itself as zero.
4431
4432The reason why the structure type name is defined at zero by default
4433is a side effect of allowing structures to work with the local label
4434mechanism: if your structure members tend to have the same names in
4435more than one structure, you can define the above structure like this:
4436
4437\c struc mytype
4438\c
4439\c   .long:        resd    1
4440\c   .word:        resw    1
4441\c   .byte:        resb    1
4442\c   .str:         resb    32
4443\c
4444\c endstruc
4445
4446This defines the offsets to the structure fields as \c{mytype.long},
4447\c{mytype.word}, \c{mytype.byte} and \c{mytype.str}.
4448
4449NASM, since it has no \e{intrinsic} structure support, does not
4450support any form of period notation to refer to the elements of a
4451structure once you have one (except the above local-label notation),
4452so code such as \c{mov ax,[mystruc.mt_word]} is not valid.
4453\c{mt_word} is a constant just like any other constant, so the
4454correct syntax is \c{mov ax,[mystruc+mt_word]} or \c{mov
4455ax,[mystruc+mytype.word]}.
4456
4457Sometimes you only have the address of the structure displaced by an
4458offset. For example, consider this standard stack frame setup:
4459
4460\c push ebp
4461\c mov ebp, esp
4462\c sub esp, 40
4463
4464In this case, you could access an element by subtracting the offset:
4465
4466\c mov [ebp - 40 + mytype.word], ax
4467
4468However, if you do not want to repeat this offset, you can use -40 as
4469a base offset:
4470
4471\c struc mytype, -40
4472
4473And access an element this way:
4474
4475\c mov [ebp + mytype.word], ax
4476
4477
4478\S{istruc} \i\c{ISTRUC}, \i\c{AT} and \i\c{IEND}: Declaring
4479\i{Instances of Structures}
4480
4481Having defined a structure type, the next thing you typically want
4482to do is to declare instances of that structure in your data
4483segment. NASM provides an easy way to do this in the \c{ISTRUC}
4484mechanism. To declare a structure of type \c{mytype} in a program,
4485you code something like this:
4486
4487\c mystruc:
4488\c     istruc mytype
4489\c
4490\c         at mt_long, dd      123456
4491\c         at mt_word, dw      1024
4492\c         at mt_byte, db      'x'
4493\c         at mt_str,  db      'hello, world', 13, 10, 0
4494\c
4495\c     iend
4496
4497The function of the \c{AT} macro is to make use of the \c{TIMES}
4498prefix to advance the assembly position to the correct point for the
4499specified structure field, and then to declare the specified data.
4500Therefore the structure fields must be declared in the same order as
4501they were specified in the structure definition.
4502
4503If the data to go in a structure field requires more than one source
4504line to specify, the remaining source lines can easily come after
4505the \c{AT} line. For example:
4506
4507\c         at mt_str,  db      123,134,145,156,167,178,189
4508\c                     db      190,100,0
4509
4510Depending on personal taste, you can also omit the code part of the
4511\c{AT} line completely, and start the structure field on the next
4512line:
4513
4514\c         at mt_str
4515\c                 db      'hello, world'
4516\c                 db      13,10,0
4517
4518\H{alignment} \i{Alignment} Control
4519
4520\S{align} \i\c{ALIGN} and \i\c{ALIGNB}: Code and Data Alignment
4521
4522The \c{ALIGN} and \c{ALIGNB} macros provides a convenient way to
4523align code or data on a word, longword, paragraph or other boundary.
4524(Some assemblers call this directive \i\c{EVEN}.) The syntax of the
4525\c{ALIGN} and \c{ALIGNB} macros is
4526
4527\c         align   4               ; align on 4-byte boundary
4528\c         align   16              ; align on 16-byte boundary
4529\c         align   8,db 0          ; pad with 0s rather than NOPs
4530\c         align   4,resb 1        ; align to 4 in the BSS
4531\c         alignb  4               ; equivalent to previous line
4532
4533Both macros require their first argument to be a power of two; they
4534both compute the number of additional bytes required to bring the
4535length of the current section up to a multiple of that power of two,
4536and then apply the \c{TIMES} prefix to their second argument to
4537perform the alignment.
4538
4539If the second argument is not specified, the default for \c{ALIGN}
4540is \c{NOP}, and the default for \c{ALIGNB} is \c{RESB 1}. So if the
4541second argument is specified, the two macros are equivalent.
4542Normally, you can just use \c{ALIGN} in code and data sections and
4543\c{ALIGNB} in BSS sections, and never need the second argument
4544except for special purposes.
4545
4546\c{ALIGN} and \c{ALIGNB}, being simple macros, perform no error
4547checking: they cannot warn you if their first argument fails to be a
4548power of two, or if their second argument generates more than one
4549byte of code. In each of these cases they will silently do the wrong
4550thing.
4551
4552\c{ALIGNB} (or \c{ALIGN} with a second argument of \c{RESB 1}) can
4553be used within structure definitions:
4554
4555\c struc mytype2
4556\c
4557\c   mt_byte:
4558\c         resb 1
4559\c         alignb 2
4560\c   mt_word:
4561\c         resw 1
4562\c         alignb 4
4563\c   mt_long:
4564\c         resd 1
4565\c   mt_str:
4566\c         resb 32
4567\c
4568\c endstruc
4569
4570This will ensure that the structure members are sensibly aligned
4571relative to the base of the structure.
4572
4573A final caveat: \c{ALIGN} and \c{ALIGNB} work relative to the
4574beginning of the \e{section}, not the beginning of the address space
4575in the final executable. Aligning to a 16-byte boundary when the
4576section you're in is only guaranteed to be aligned to a 4-byte
4577boundary, for example, is a waste of effort. Again, NASM does not
4578check that the section's alignment characteristics are sensible for
4579the use of \c{ALIGN} or \c{ALIGNB}.
4580
4581Both \c{ALIGN} and \c{ALIGNB} do call \c{SECTALIGN} macro implicitly.
4582See \k{sectalign} for details.
4583
4584See also the \c{smartalign} standard macro package, \k{pkg_smartalign}.
4585
4586
4587\S{sectalign} \i\c{SECTALIGN}: Section Alignment
4588
4589The \c{SECTALIGN} macros provides a way to modify alignment attribute
4590of output file section. Unlike the \c{align=} attribute (which is allowed
4591at section definition only) the \c{SECTALIGN} macro may be used at any time.
4592
4593For example the directive
4594
4595\c SECTALIGN 16
4596
4597sets the section alignment requirements to 16 bytes. Once increased it can
4598not be decreased, the magnitude may grow only.
4599
4600Note that \c{ALIGN} (see \k{align}) calls the \c{SECTALIGN} macro implicitly
4601so the active section alignment requirements may be updated. This is by default
4602behaviour, if for some reason you want the \c{ALIGN} do not call \c{SECTALIGN}
4603at all use the directive
4604
4605\c SECTALIGN OFF
4606
4607It is still possible to turn in on again by
4608
4609\c SECTALIGN ON
4610
4611Note that \c{SECTALIGN <ON|OFF>} affects only the \c{ALIGN}/\c{ALIGNB} directives,
4612not an explicit \c{SECTALIGN} directive.
4613
4614\C{macropkg} \i{Standard Macro Packages}
4615
4616The \i\c{%use} directive (see \k{use}) includes one of the standard
4617macro packages included with the NASM distribution and compiled into
4618the NASM binary.  It operates like the \c{%include} directive (see
4619\k{include}), but the included contents is provided by NASM itself.
4620
4621The names of standard macro packages are case insensitive and can be
4622quoted or not.
4623
4624As of version 2.15, NASM has \c{%ifusable} and \c{%ifusing} directives to help
4625the user understand whether an individual package available in this version of
4626NASM (\c{%ifusable}) or a particular package already loaded (\c{%ifusing}).
4627
4628
4629\H{pkg_altreg} \i\c{altreg}: \i{Alternate Register Names}
4630
4631The \c{altreg} standard macro package provides alternate register
4632names.  It provides numeric register names for all registers (not just
4633\c{R8}-\c{R15}), the Intel-defined aliases \c{R8L}-\c{R15L} for the
4634low bytes of register (as opposed to the NASM/AMD standard names
4635\c{R8B}-\c{R15B}), and the names \c{R0H}-\c{R3H} (by analogy with
4636\c{R0L}-\c{R3L}) for \c{AH}, \c{CH}, \c{DH}, and \c{BH}.
4637
4638Example use:
4639
4640\c %use altreg
4641\c
4642\c proc:
4643\c       mov r0l,r3h                    ; mov al,bh
4644\c       ret
4645
4646See also \k{reg64}.
4647
4648
4649\H{pkg_smartalign} \i\c{smartalign}\I{align, smart}: Smart \c{ALIGN} Macro
4650
4651The \c{smartalign} standard macro package provides for an \i\c{ALIGN}
4652macro which is more powerful than the default (and
4653backwards-compatible) one (see \k{align}).  When the \c{smartalign}
4654package is enabled, when \c{ALIGN} is used without a second argument,
4655NASM will generate a sequence of instructions more efficient than a
4656series of \c{NOP}.  Furthermore, if the padding exceeds a specific
4657threshold, then NASM will generate a jump over the entire padding
4658sequence.
4659
4660The specific instructions generated can be controlled with the
4661new \i\c{ALIGNMODE} macro.  This macro takes two parameters: one mode,
4662and an optional jump threshold override. If (for any reason) you need
4663to turn off the jump completely just set jump threshold value to -1
4664(or set it to \c{nojmp}). The following modes are possible:
4665
4666\b \c{generic}: Works on all x86 CPUs and should have reasonable
4667performance.  The default jump threshold is 8.  This is the
4668default.
4669
4670\b \c{nop}: Pad out with \c{NOP} instructions.  The only difference
4671compared to the standard \c{ALIGN} macro is that NASM can still jump
4672over a large padding area.  The default jump threshold is 16.
4673
4674\b \c{k7}: Optimize for the AMD K7 (Athlon/Althon XP).  These
4675instructions should still work on all x86 CPUs.  The default jump
4676threshold is 16.
4677
4678\b \c{k8}: Optimize for the AMD K8 (Opteron/Althon 64).  These
4679instructions should still work on all x86 CPUs.  The default jump
4680threshold is 16.
4681
4682\b \c{p6}: Optimize for Intel CPUs.  This uses the long \c{NOP}
4683instructions first introduced in Pentium Pro.  This is incompatible
4684with all CPUs of family 5 or lower, as well as some VIA CPUs and
4685several virtualization solutions.  The default jump threshold is 16.
4686
4687The macro \i\c{__?ALIGNMODE?__} is defined to contain the current
4688alignment mode.  A number of other macros beginning with \c{__?ALIGN_}
4689are used internally by this macro package.
4690
4691
4692\H{pkg_fp} \i\c\{fp}: Floating-point macros
4693
4694This packages contains the following floating-point convenience macros:
4695
4696\c %define Inf             __?Infinity?__
4697\c %define NaN             __?QNaN?__
4698\c %define QNaN            __?QNaN?__
4699\c %define SNaN            __?SNaN?__
4700\c
4701\c %define float8(x)       __?float8?__(x)
4702\c %define float16(x)      __?float16?__(x)
4703\c %define bfloat16(x)     __?bfloat16?__(x)
4704\c %define float32(x)      __?float32?__(x)
4705\c %define float64(x)      __?float64?__(x)
4706\c %define float80m(x)     __?float80m?__(x)
4707\c %define float80e(x)     __?float80e?__(x)
4708\c %define float128l(x)    __?float128l?__(x)
4709\c %define float128h(x)    __?float128h?__(x)
4710
4711It also defines the a multi-line macro \i\c{bf16} that can be used
4712in a similar way to the \c{D}\e{x} directives for the other
4713floating-point numbers:
4714
4715\c      bf16 -3.1415, NaN, 2000.0, +Inf
4716
4717
4718\H{pkg_ifunc} \i\c{ifunc}: \i{Integer functions}
4719
4720This package contains a set of macros which implement integer
4721functions.  These are actually implemented as special operators, but
4722are most conveniently accessed via this macro package.
4723
4724The macros provided are:
4725
4726\S{ilog2} \i{Integer logarithms}
4727
4728These functions calculate the integer logarithm base 2 of their
4729argument, considered as an unsigned integer.  The only differences
4730between the functions is their respective behavior if the argument
4731provided is not a power of two.
4732
4733The function \i\c{ilog2e()} (alias \i\c{ilog2()}) generates an error if
4734the argument is not a power of two.
4735
4736The function \i\c{ilog2f()} rounds the argument down to the nearest
4737power of two; if the argument is zero it returns zero.
4738
4739The function \i\c{ilog2c()} rounds the argument up to the nearest
4740power of two.
4741
4742The functions \i\c{ilog2fw()} (alias \i\c{ilog2w()}) and
4743\i\c{ilog2cw()} generate a warning if the argument is not a power of
4744two, but otherwise behaves like \c{ilog2f()} and \c{ilog2c()},
4745respectively.
4746
4747\H{pkg_masm} \i\c{masm}: \i{MASM compatibility}
4748
4749Since version 2.15, NASM has a MASM compatibility package with minimal
4750functionality, as intended to be used primarily with machine-generated code.
4751It does not include any "programmer-friendly" shortcuts, nor does it in any way
4752support ASSUME, symbol typing, or MASM-style structures.
4753
4754To enable the package, use the directive:
4755
4756\c{%use masm}
4757
4758Currently, the MASM compatibility package emulates:
4759
4760\b The \c{FLAT} and \c{OFFSET} keywords are recognized and ignored.
4761
4762\b The \c{PTR} keyword signifies a memory reference, as if the
4763argument had been put in square brackets:
4764
4765\c      mov eax,[foo]               ; memory reference
4766\c      mov eax,dword ptr foo       ; memory reference
4767\c      mov eax,dowrd ptr flat:foo  ; memory reference
4768\c      mov eax,offset foo          ; address
4769\c      mov eax,foo                 ; address (ambiguous syntax in MASM)
4770
4771\b The \c{SEGMENT} ... \c{ENDS} syntax:
4772
4773\c    segname SEGMENT
4774\c        ...
4775\c    segname ENDS
4776
4777\b The \c{PROC} ... \c{ENDP} syntax:
4778
4779\c    procname PROC [FAR]
4780\c         ...
4781\c    procname ENDP
4782
4783\> \c{PROC} will also define \c{RET} as a macro expanding to either
4784\c{RETF} if \c{FAR} is specified and \c{RETN} otherwise. Any keyword
4785after \c{PROC} other than \c{FAR} is ignored.
4786
4787\b The \c{TBYTE} keyword as an alias for \c{TWORD} (see \k{qsother}).
4788
4789\b The \c{END} directive is ignored.
4790
4791\b In 64-bit mode relative addressing is the default (\c{DEFAULT REL},
4792see \k{REL & ABS}).
4793
4794In addition, NASM now natively supports, regardless of whether this
4795package is used or not:
4796
4797\b \c{?} and \c{DUP} syntax for the \c{DB} etc data declaration
4798directives (see \k{db}).
4799
4800\b \c{displacement[base+index]} syntax for memory operations, instead
4801of \c{[base+index+displacement]}.
4802
4803\b \c{seg:[addr]} instead of \c{[seg:addr]} syntax.
4804
4805\b A pure offset can be given to \c{LEA} without square brackets:
4806
4807\c      lea rax,[foo]               ; standard syntax
4808\c      lea rax,foo                 ; also accepted
4809
4810\C{directive} \i{Assembler Directives}
4811
4812NASM, though it attempts to avoid the bureaucracy of assemblers like
4813MASM and TASM, is nevertheless forced to support a \e{few}
4814directives. These are described in this chapter.
4815
4816NASM's directives come in two types: \I{user-level
4817directives}\e{user-level} directives and \I{primitive
4818directives}\e{primitive} directives. Typically, each directive has a
4819user-level form and a primitive form. In almost all cases, we
4820recommend that users use the user-level forms of the directives,
4821which are implemented as macros which call the primitive forms.
4822
4823Primitive directives are enclosed in square brackets; user-level
4824directives are not.
4825
4826In addition to the universal directives described in this chapter,
4827each object file format can optionally supply extra directives in
4828order to control particular features of that file format. These
4829\I{format-specific directives}\e{format-specific} directives are
4830documented along with the formats that implement them, in \k{outfmt}.
4831
4832
4833\H{bits} \i\c{BITS}: Specifying Target \i{Processor Mode}
4834
4835The \c{BITS} directive specifies whether NASM should generate code
4836\I{16-bit mode, versus 32-bit mode}designed to run on a processor
4837operating in 16-bit mode, 32-bit mode or 64-bit mode. The syntax is
4838\c{BITS XX}, where XX is 16, 32 or 64.
4839
4840In most cases, you should not need to use \c{BITS} explicitly. The
4841\c{aout}, \c{coff}, \c{elf*}, \c{macho}, \c{win32} and \c{win64}
4842object formats, which are designed for use in 32-bit or 64-bit
4843operating systems, all cause NASM to select 32-bit or 64-bit mode,
4844respectively, by default. The \c{obj} object format allows you
4845to specify each segment you define as either \c{USE16} or \c{USE32},
4846and NASM will set its operating mode accordingly, so the use of the
4847\c{BITS} directive is once again unnecessary.
4848
4849The most likely reason for using the \c{BITS} directive is to write
485032-bit or 64-bit code in a flat binary file; this is because the \c{bin}
4851output format defaults to 16-bit mode in anticipation of it being
4852used most frequently to write DOS \c{.COM} programs, DOS \c{.SYS}
4853device drivers and boot loader software.
4854
4855The \c{BITS} directive can also be used to generate code for a
4856different mode than the standard one for the output format.
4857
4858You do \e{not} need to specify \c{BITS 32} merely in order to use
485932-bit instructions in a 16-bit DOS program; if you do, the
4860assembler will generate incorrect code because it will be writing
4861code targeted at a 32-bit platform, to be run on a 16-bit one.
4862
4863When NASM is in \c{BITS 16} mode, instructions which use 32-bit
4864data are prefixed with an 0x66 byte, and those referring to 32-bit
4865addresses have an 0x67 prefix. In \c{BITS 32} mode, the reverse is
4866true: 32-bit instructions require no prefixes, whereas instructions
4867using 16-bit data need an 0x66 and those working on 16-bit addresses
4868need an 0x67.
4869
4870When NASM is in \c{BITS 64} mode, most instructions operate the same
4871as they do for \c{BITS 32} mode. However, there are 8 more general and
4872SSE registers, and 16-bit addressing is no longer supported.
4873
4874The default address size is 64 bits; 32-bit addressing can be selected
4875with the 0x67 prefix.  The default operand size is still 32 bits,
4876however, and the 0x66 prefix selects 16-bit operand size.  The \c{REX}
4877prefix is used both to select 64-bit operand size, and to access the
4878new registers. NASM automatically inserts REX prefixes when
4879necessary.
4880
4881When the \c{REX} prefix is used, the processor does not know how to
4882address the AH, BH, CH or DH (high 8-bit legacy) registers. Instead,
4883it is possible to access the the low 8-bits of the SP, BP SI and DI
4884registers as SPL, BPL, SIL and DIL, respectively; but only when the
4885REX prefix is used.
4886
4887The \c{BITS} directive has an exactly equivalent primitive form,
4888\c{[BITS 16]}, \c{[BITS 32]} and \c{[BITS 64]}. The user-level form is
4889a macro which has no function other than to call the primitive form.
4890
4891Note that the space is neccessary, e.g. \c{BITS32} will \e{not} work!
4892
4893\S{USE16 & USE32} \i\c{USE16} & \i\c{USE32}: Aliases for BITS
4894
4895The `\c{USE16}' and `\c{USE32}' directives can be used in place of
4896`\c{BITS 16}' and `\c{BITS 32}', for compatibility with other assemblers.
4897
4898
4899\H{default} \i\c{DEFAULT}: Change the assembler defaults
4900
4901The \c{DEFAULT} directive changes the assembler defaults.  Normally,
4902NASM defaults to a mode where the programmer is expected to explicitly
4903specify most features directly.  However, this is occasionally
4904obnoxious, as the explicit form is pretty much the only one one wishes
4905to use.
4906
4907Currently, \c{DEFAULT} can set \c{REL} & \c{ABS} and \c{BND} & \c{NOBND}.
4908
4909\S{REL & ABS} \i\c{REL} & \i\c{ABS}: RIP-relative addressing
4910
4911This sets whether registerless instructions in 64-bit mode are \c{RIP}-relative
4912or not. By default, they are absolute unless overridden with the \i\c{REL}
4913specifier (see \k{effaddr}).  However, if \c{DEFAULT REL} is
4914specified, \c{REL} is default, unless overridden with the \c{ABS}
4915specifier, \e{except when used with an FS or GS segment override}.
4916
4917The special handling of \c{FS} and \c{GS} overrides are due to the
4918fact that these registers are generally used as thread pointers or
4919other special functions in 64-bit mode, and generating
4920\c{RIP}-relative addresses would be extremely confusing.
4921
4922\c{DEFAULT REL} is disabled with \c{DEFAULT ABS}.
4923
4924\S{BND & NOBND} \i\c{BND} & \i\c{NOBND}: \c{BND} prefix
4925
4926If \c{DEFAULT BND} is set, all bnd-prefix available instructions following
4927this directive are prefixed with bnd. To override it, \c{NOBND} prefix can
4928be used.
4929
4930\c  DEFAULT BND
4931\c      call foo            ; BND will be prefixed
4932\c      nobnd call foo      ; BND will NOT be prefixed
4933
4934\c{DEFAULT NOBND} can disable \c{DEFAULT BND} and then \c{BND} prefix will be
4935added only when explicitly specified in code.
4936
4937\c{DEFAULT BND} is expected to be the normal configuration for writing
4938MPX-enabled code.
4939
4940\H{section} \i\c{SECTION} or \i\c{SEGMENT}: Changing and \i{Defining
4941Sections}
4942
4943\I{changing sections}\I{switching between sections}The \c{SECTION}
4944directive (\c{SEGMENT} is an exactly equivalent synonym) changes
4945which section of the output file the code you write will be
4946assembled into. In some object file formats, the number and names of
4947sections are fixed; in others, the user may make up as many as they
4948wish. Hence \c{SECTION} may sometimes give an error message, or may
4949define a new section, if you try to switch to a section that does
4950not (yet) exist.
4951
4952The Unix object formats, and the \c{bin} object format (but see
4953\k{multisec}), all support
4954the \i{standardized section names} \c{.text}, \c{.data} and \c{.bss}
4955for the code, data and uninitialized-data sections. The \c{obj}
4956format, by contrast, does not recognize these section names as being
4957special, and indeed will strip off the leading period of any section
4958name that has one.
4959
4960
4961\S{sectmac} The \i\c{__?SECT?__} Macro
4962
4963The \c{SECTION} directive is unusual in that its user-level form
4964functions differently from its primitive form. The primitive form,
4965\c{[SECTION xyz]}, simply switches the current target section to the
4966one given. The user-level form, \c{SECTION xyz}, however, first
4967defines the single-line macro \c{__?SECT?__} to be the primitive
4968\c{[SECTION]} directive which it is about to issue, and then issues
4969it. So the user-level directive
4970
4971\c         SECTION .text
4972
4973expands to the two lines
4974
4975\c %define __?SECT?__        [SECTION .text]
4976\c         [SECTION .text]
4977
4978Users may find it useful to make use of this in their own macros.
4979For example, the \c{writefile} macro defined in \k{mlmacgre} can be
4980usefully rewritten in the following more sophisticated form:
4981
4982\c %macro  writefile 2+
4983\c
4984\c         [section .data]
4985\c
4986\c   %%str:        db      %2
4987\c   %%endstr:
4988\c
4989\c         __?SECT?__
4990\c
4991\c         mov     dx,%%str
4992\c         mov     cx,%%endstr-%%str
4993\c         mov     bx,%1
4994\c         mov     ah,0x40
4995\c         int     0x21
4996\c
4997\c %endmacro
4998
4999This form of the macro, once passed a string to output, first
5000switches temporarily to the data section of the file, using the
5001primitive form of the \c{SECTION} directive so as not to modify
5002\c{__?SECT?__}. It then declares its string in the data section, and
5003then invokes \c{__?SECT?__} to switch back to \e{whichever} section
5004the user was previously working in. It thus avoids the need, in the
5005previous version of the macro, to include a \c{JMP} instruction to
5006jump over the data, and also does not fail if, in a complicated
5007\c{OBJ} format module, the user could potentially be assembling the
5008code in any of several separate code sections.
5009
5010
5011\H{absolute} \i\c{ABSOLUTE}: Defining Absolute Labels
5012
5013The \c{ABSOLUTE} directive can be thought of as an alternative form
5014of \c{SECTION}: it causes the subsequent code to be directed at no
5015physical section, but at the hypothetical section starting at the
5016given absolute address. The only instructions you can use in this
5017mode are the \c{RESB} family.
5018
5019\c{ABSOLUTE} is used as follows:
5020
5021\c absolute 0x1A
5022\c
5023\c     kbuf_chr    resw    1
5024\c     kbuf_free   resw    1
5025\c     kbuf        resw    16
5026
5027This example describes a section of the PC BIOS data area, at
5028segment address 0x40: the above code defines \c{kbuf_chr} to be
50290x1A, \c{kbuf_free} to be 0x1C, and \c{kbuf} to be 0x1E.
5030
5031The user-level form of \c{ABSOLUTE}, like that of \c{SECTION},
5032redefines the \i\c{__?SECT?__} macro when it is invoked.
5033
5034\i\c{STRUC} and \i\c{ENDSTRUC} are defined as macros which use
5035\c{ABSOLUTE} (and also \c{__?SECT?__}).
5036
5037\c{ABSOLUTE} doesn't have to take an absolute constant as an
5038argument: it can take an expression (actually, a \i{critical
5039expression}: see \k{crit}) and it can be a value in a segment. For
5040example, a TSR can re-use its setup code as run-time BSS like this:
5041
5042\c         org     100h               ; it's a .COM program
5043\c
5044\c         jmp     setup              ; setup code comes last
5045\c
5046\c         ; the resident part of the TSR goes here
5047\c setup:
5048\c         ; now write the code that installs the TSR here
5049\c
5050\c absolute setup
5051\c
5052\c runtimevar1     resw    1
5053\c runtimevar2     resd    20
5054\c
5055\c tsr_end:
5056
5057This defines some variables `on top of' the setup code, so that
5058after the setup has finished running, the space it took up can be
5059re-used as data storage for the running TSR. The symbol `tsr_end'
5060can be used to calculate the total size of the part of the TSR that
5061needs to be made resident.
5062
5063
5064\H{extern} \i\c{EXTERN}: \i{Importing Symbols} from Other Modules
5065
5066\c{EXTERN} is similar to the MASM directive \c{EXTRN} and the C
5067keyword \c{extern}: it is used to declare a symbol which is not
5068defined anywhere in the module being assembled, but is assumed to be
5069defined in some other module and needs to be referred to by this
5070one. Not every object-file format can support external variables:
5071the \c{bin} format cannot.
5072
5073The \c{EXTERN} directive takes as many arguments as you like. Each
5074argument is the name of a symbol:
5075
5076\c extern  _printf
5077\c extern  _sscanf,_fscanf
5078
5079Some object-file formats provide extra features to the \c{EXTERN}
5080directive. In all cases, the extra features are used by suffixing a
5081colon to the symbol name followed by object-format specific text.
5082For example, the \c{obj} format allows you to declare that the
5083default segment base of an external should be the group \c{dgroup}
5084by means of the directive
5085
5086\c extern  _variable:wrt dgroup
5087
5088The primitive form of \c{EXTERN} differs from the user-level form
5089only in that it can take only one argument at a time: the support
5090for multiple arguments is implemented at the preprocessor level.
5091
5092You can declare the same variable as \c{EXTERN} more than once: NASM
5093will quietly ignore the second and later redeclarations.
5094
5095If a variable is declared both \c{GLOBAL} and \c{EXTERN}, or if it is
5096declared as \c{EXTERN} and then defined, it will be treated as
5097\c{GLOBAL}. If a variable is declared both as \c{COMMON} and
5098\c{EXTERN}, it will be treated as \c{COMMON}.
5099
5100
5101\H{required} \i\c{REQUIRED}: \i{Unconditionally Importing Symbols} from Other Modules
5102
5103The \c{REQUIRED} keyword is similar to \c{EXTERN} one. The difference
5104is that the \c{EXTERN} keyword as of version 2.15 does not generate
5105unknown symbols as that prevents using common header files, as it
5106might cause the linker to pull in a bunch of unnecessary modules.
5107
5108If the old behavior is required, use \c{REQUIRED} keyword instead.
5109
5110
5111\H{global} \i\c{GLOBAL}: \i{Exporting Symbols} to Other Modules
5112
5113\c{GLOBAL} is the other end of \c{EXTERN}: if one module declares a
5114symbol as \c{EXTERN} and refers to it, then in order to prevent
5115linker errors, some other module must actually \e{define} the
5116symbol and declare it as \c{GLOBAL}. Some assemblers use the name
5117\i\c{PUBLIC} for this purpose.
5118
5119\c{GLOBAL} uses the same syntax as \c{EXTERN}, except that it must
5120refer to symbols which \e{are} defined in the same module as the
5121\c{GLOBAL} directive. For example:
5122
5123\c global _main
5124\c _main:
5125\c         ; some code
5126
5127\c{GLOBAL}, like \c{EXTERN}, allows object formats to define private
5128extensions by means of a colon. The ELF object format, for example,
5129lets you specify whether global data items are functions or data:
5130
5131\c global  hashlookup:function, hashtable:data
5132
5133Like \c{EXTERN}, the primitive form of \c{GLOBAL} differs from the
5134user-level form only in that it can take only one argument at a
5135time.
5136
5137
5138\H{common} \i\c{COMMON}: Defining Common Data Areas
5139
5140The \c{COMMON} directive is used to declare \i\e{common variables}.
5141A common variable is much like a global variable declared in the
5142uninitialized data section, so that
5143
5144\c common  intvar  4
5145
5146is similar in function to
5147
5148\c global  intvar
5149\c section .bss
5150\c
5151\c intvar  resd    1
5152
5153The difference is that if more than one module defines the same
5154common variable, then at link time those variables will be
5155\e{merged}, and references to \c{intvar} in all modules will point
5156at the same piece of memory.
5157
5158Like \c{GLOBAL} and \c{EXTERN}, \c{COMMON} supports object-format
5159specific extensions. For example, the \c{obj} format allows common
5160variables to be NEAR or FAR, and the ELF format allows you to specify
5161the alignment requirements of a common variable:
5162
5163\c common  commvar  4:near  ; works in OBJ
5164\c common  intarray 100:4   ; works in ELF: 4 byte aligned
5165
5166Once again, like \c{EXTERN} and \c{GLOBAL}, the primitive form of
5167\c{COMMON} differs from the user-level form only in that it can take
5168only one argument at a time.
5169
5170\H{static} \i\c{STATIC}: Local Symbols within Modules
5171
5172Opposite to \c{EXTERN} and \c{GLOBAL}, \c{STATIC} is local symbol, but
5173should be named according to the global mangling rules (named by
5174analogy with the C keyword \c{static} as applied to functions or
5175global variables).
5176
5177\c static foo
5178\c foo:
5179\c          ; codes
5180
5181Unlike \c{GLOBAL}, \c{STATIC} does not allow object formats to accept
5182private extensions mentioned in \k{global}.
5183
5184\H{mangling} \i\c{(G|L)PREFIX}, \i\c{(G|L)POSTFIX}: Mangling Symbols
5185
5186\c{PREFIX}, \c{GPREFIX}, \c{LPREFIX}, \c{POSTFIX}, \c{GPOSTFIX}, and
5187\c{LPOSTFIX} directives can prepend or append a string to a certain
5188type of symbols, normally to fit specific ABI conventions
5189
5190\b\c{PREFIX}|\c{GPREFIX}: Prepend the argument to all \c{EXTERN}
5191\c{COMMON}, \c{STATIC}, and \c{GLOBAL} symbols.
5192
5193\b\c{LPREFIX}: Prepend the argument to all other symbols
5194such as local labels and backend defined symbols.
5195
5196\b\c{POSTFIX}|\c{GPOSTFIX}: Append the argument to all \c{EXTERN}
5197\c{COMMON}, \c{STATIC}, and \c{GLOBAL} symbols.
5198
5199\b\c{LPOSTFIX}: Append the argument to all other symbols
5200such as local labels and backend defined symbols.
5201
5202These a macros implemented as pragmas, and using \c{%pragma} syntax
5203can be restricted to specific backends (see \k{pragma}):
5204
5205\c %pragma macho lprefix L_
5206
5207Command line options are also available. See also \k{opt-pfix}.
5208
5209One example which supports many ABIs:
5210
5211\c ; The most common conventions
5212\c %pragma output gprefix _
5213\c %pragma output lprefix L_
5214\c ; ELF uses a different convention
5215\c %pragma elf    gprefix			; empty
5216\c %pragma elf    lprefix .L
5217
5218Some toolchains is aware of a particular prefix for its own optimization
5219options, such as code elimination. For instance, Mach-O backend has a
5220linker that uses a simplistic naming scheme to chunk up sections into a
5221meta section. When the \c{subsections_via_symbols} directive
5222(\k{macho-ssvs}) is declared, each symbol is the start of a
5223separate block. The meta section is, then, defined to include sections
5224before the one that starts with a 'L'. \c{LPREFIX} is useful here to mark
5225all local symbols with the 'L' prefix to be excluded to the meta section.
5226It converts local symbols compatible with the particular toolchain.
5227Note that local symbols declared with \c{STATIC} (\k{static})
5228are excluded from the symbol mangling and also not marked as global.
5229
5230
5231\H{CPU} \i\c{CPU}: Defining CPU Dependencies
5232
5233The \i\c{CPU} directive restricts assembly to those instructions which
5234are available on the specified CPU.
5235
5236Options are:
5237
5238\b\c{CPU 8086}          Assemble only 8086 instruction set
5239
5240\b\c{CPU 186}           Assemble instructions up to the 80186 instruction set
5241
5242\b\c{CPU 286}           Assemble instructions up to the 286 instruction set
5243
5244\b\c{CPU 386}           Assemble instructions up to the 386 instruction set
5245
5246\b\c{CPU 486}           486 instruction set
5247
5248\b\c{CPU 586}           Pentium instruction set
5249
5250\b\c{CPU PENTIUM}       Same as 586
5251
5252\b\c{CPU 686}           P6 instruction set
5253
5254\b\c{CPU PPRO}          Same as 686
5255
5256\b\c{CPU P2}            Same as 686
5257
5258\b\c{CPU P3}            Pentium III (Katmai) instruction sets
5259
5260\b\c{CPU KATMAI}        Same as P3
5261
5262\b\c{CPU P4}            Pentium 4 (Willamette) instruction set
5263
5264\b\c{CPU WILLAMETTE}    Same as P4
5265
5266\b\c{CPU PRESCOTT}      Prescott instruction set
5267
5268\b\c{CPU X64}           x86-64 (x64/AMD64/Intel 64) instruction set
5269
5270\b\c{CPU IA64}          IA64 CPU (in x86 mode) instruction set
5271
5272All options are case insensitive.  All instructions will be selected
5273only if they apply to the selected CPU or lower.  By default, all
5274instructions are available.
5275
5276
5277\H{FLOAT} \i\c{FLOAT}: Handling of \I{floating-point, constants}floating-point constants
5278
5279By default, floating-point constants are rounded to nearest, and IEEE
5280denormals are supported.  The following options can be set to alter
5281this behaviour:
5282
5283\b\c{FLOAT DAZ}         Flush denormals to zero
5284
5285\b\c{FLOAT NODAZ}       Do not flush denormals to zero (default)
5286
5287\b\c{FLOAT NEAR}        Round to nearest (default)
5288
5289\b\c{FLOAT UP}          Round up (toward +Infinity)
5290
5291\b\c{FLOAT DOWN}        Round down (toward -Infinity)
5292
5293\b\c{FLOAT ZERO}        Round toward zero
5294
5295\b\c{FLOAT DEFAULT}     Restore default settings
5296
5297The standard macros \i\c{__?FLOAT_DAZ?__}, \i\c{__?FLOAT_ROUND?__}, and
5298\i\c{__?FLOAT?__} contain the current state, as long as the programmer
5299has avoided the use of the brackeded primitive form, (\c{[FLOAT]}).
5300
5301\c{__?FLOAT?__} contains the full set of floating-point settings; this
5302value can be saved away and invoked later to restore the setting.
5303
5304
5305\H{asmdir-warning} \i\c{[WARNING]}: Enable or disable warnings
5306
5307The \c{[WARNING]} directive can be used to enable or disable classes
5308of warnings in the same way as the \c{-w} option, see \k{opt-w} for
5309more details about warning classes.
5310
5311\b \c{[warning +}\e{warning-class}\c{]} enables warnings for
5312   \e{warning-class}.
5313
5314\b \c{[warning -}\e{warning-class}\c{]} disables warnings for
5315   \e{warning-class}.
5316
5317\b \c{[warning *}\e{warning-class}\c{]} restores \e{warning-class} to
5318   the original value, either the default value or as specified on the
5319   command line.
5320
5321\b \c{[warning push]} saves the current warning state on a stack.
5322
5323\b \c{[warning pop]} restores the current warning state from the stack.
5324
5325The \c{[WARNING]} directive also accepts the \c{all}, \c{error} and
5326\c{error=}\e{warning-class} specifiers.
5327
5328No "user form" (without the brackets) currently exists.
5329
5330
5331\C{outfmt} \i{Output Formats}
5332
5333NASM is a portable assembler, designed to be able to compile on any
5334ANSI C-supporting platform and produce output to run on a variety of
5335Intel x86 operating systems. For this reason, it has a large number
5336of available output formats, selected using the \i\c{-f} option on
5337the NASM \i{command line}. Each of these formats, along with its
5338extensions to the base NASM syntax, is detailed in this chapter.
5339
5340As stated in \k{opt-o}, NASM chooses a \i{default name} for your
5341output file based on the input file name and the chosen output
5342format. This will be generated by removing the \i{extension}
5343(\c{.asm}, \c{.s}, or whatever you like to use) from the input file
5344name, and substituting an extension defined by the output format.
5345The extensions are given with each format below.
5346
5347
5348\H{binfmt} \i\c{bin}: \i{Flat-Form Binary}\I{pure binary} Output
5349
5350The \c{bin} format does not produce object files: it generates
5351nothing in the output file except the code you wrote. Such `pure
5352binary' files are used by \i{MS-DOS}: \i\c{.COM} executables and
5353\i\c{.SYS} device drivers are pure binary files. Pure binary output
5354is also useful for \i{operating system} and \i{boot loader}
5355development.
5356
5357The \c{bin} format supports \i{multiple section names}. For details of
5358how NASM handles sections in the \c{bin} format, see \k{multisec}.
5359
5360Using the \c{bin} format puts NASM by default into 16-bit mode (see
5361\k{bits}). In order to use \c{bin} to write 32-bit or 64-bit code,
5362such as an OS kernel, you need to explicitly issue the \I\c{BITS}\c{BITS 32}
5363or \I\c{BITS}\c{BITS 64} directive.
5364
5365\c{bin} has no default output file name extension: instead, it
5366leaves your file name as it is once the original extension has been
5367removed. Thus, the default is for NASM to assemble \c{binprog.asm}
5368into a binary file called \c{binprog}.
5369
5370
5371\S{org} \i\c{ORG}: Binary File \i{Program Origin}
5372
5373The \c{bin} format provides an additional directive to the list
5374given in \k{directive}: \c{ORG}. The function of the \c{ORG}
5375directive is to specify the origin address which NASM will assume
5376the program begins at when it is loaded into memory.
5377
5378For example, the following code will generate the longword
5379\c{0x00000104}:
5380
5381\c         org     0x100
5382\c         dd      label
5383\c label:
5384
5385Unlike the \c{ORG} directive provided by MASM-compatible assemblers,
5386which allows you to jump around in the object file and overwrite
5387code you have already generated, NASM's \c{ORG} does exactly what
5388the directive says: \e{origin}. Its sole function is to specify one
5389offset which is added to all internal address references within the
5390section; it does not permit any of the trickery that MASM's version
5391does. See \k{proborg} for further comments.
5392
5393
5394\S{binseg} \c{bin} Extensions to the \c{SECTION}
5395Directive\I{\c{SECTION}, \c{bin} extensions to}
5396
5397The \c{bin} output format extends the \c{SECTION} (or \c{SEGMENT})
5398directive to allow you to specify the alignment requirements of
5399segments. This is done by appending the \i\c{ALIGN} qualifier to the
5400end of the section-definition line. For example,
5401
5402\c section .data   align=16
5403
5404switches to the section \c{.data} and also specifies that it must be
5405aligned on a 16-byte boundary.
5406
5407The parameter to \c{ALIGN} specifies how many low bits of the
5408section start address must be forced to zero. The alignment value
5409given may be any power of two.\I{section alignment, in
5410bin}\I{segment alignment, in bin}\I{alignment, in bin sections}
5411
5412
5413\S{multisec} \i{Multisection}\I{bin, multisection} Support for the \c{bin} Format
5414
5415The \c{bin} format allows the use of multiple sections, of arbitrary names,
5416besides the "known" \c{.text}, \c{.data}, and \c{.bss} names.
5417
5418\b Sections may be designated \i\c{progbits} or \i\c{nobits}. Default
5419is \c{progbits} (except \c{.bss}, which defaults to \c{nobits},
5420of course).
5421
5422\b Sections can be aligned at a specified boundary following the previous
5423section with \c{align=}, or at an arbitrary byte-granular position with
5424\i\c{start=}.
5425
5426\b Sections can be given a virtual start address, which will be used
5427for the calculation of all memory references within that section
5428with \i\c{vstart=}.
5429
5430\b Sections can be ordered using \i\c{follows=}\c{<section>} or
5431\i\c{vfollows=}\c{<section>} as an alternative to specifying an explicit
5432start address.
5433
5434\b Arguments to \c{org}, \c{start}, \c{vstart}, and \c{align=} are
5435critical expressions. See \k{crit}. E.g. \c{align=(1 << ALIGN_SHIFT)}
5436- \c{ALIGN_SHIFT} must be defined before it is used here.
5437
5438\b Any code which comes before an explicit \c{SECTION} directive
5439is directed by default into the \c{.text} section.
5440
5441\b If an \c{ORG} statement is not given, \c{ORG 0} is used
5442by default.
5443
5444\b The \c{.bss} section will be placed after the last \c{progbits}
5445section, unless \c{start=}, \c{vstart=}, \c{follows=}, or \c{vfollows=}
5446has been specified.
5447
5448\b All sections are aligned on dword boundaries, unless a different
5449alignment has been specified.
5450
5451\b Sections may not overlap.
5452
5453\b NASM creates the \c{section.<secname>.start} for each section,
5454which may be used in your code.
5455
5456\S{map}\i{Map Files}
5457
5458Map files can be generated in \c{-f bin} format by means of the \c{[map]}
5459option. Map types of \c{all} (default), \c{brief}, \c{sections}, \c{segments},
5460or \c{symbols} may be specified. Output may be directed to \c{stdout}
5461(default), \c{stderr}, or a specified file. E.g.
5462\c{[map symbols myfile.map]}. No "user form" exists, the square
5463brackets must be used.
5464
5465
5466\H{ithfmt} \i\c{ith}: \i{Intel Hex} Output
5467
5468The \c{ith} file format produces Intel hex-format files.  Just as the
5469\c{bin} format, this is a flat memory image format with no support for
5470relocation or linking.  It is usually used with ROM programmers and
5471similar utilities.
5472
5473All extensions supported by the \c{bin} file format is also supported by
5474the \c{ith} file format.
5475
5476\c{ith} provides a default output file-name extension of \c{.ith}.
5477
5478
5479\H{srecfmt} \i\c{srec}: \i{Motorola S-Records} Output
5480
5481The \c{srec} file format produces Motorola S-records files.  Just as the
5482\c{bin} format, this is a flat memory image format with no support for
5483relocation or linking.  It is usually used with ROM programmers and
5484similar utilities.
5485
5486All extensions supported by the \c{bin} file format is also supported by
5487the \c{srec} file format.
5488
5489\c{srec} provides a default output file-name extension of \c{.srec}.
5490
5491
5492\H{objfmt} \i\c{obj}: \i{Microsoft OMF}\I{OMF} Object Files
5493
5494The \c{obj} file format (NASM calls it \c{obj} rather than \c{omf}
5495for historical reasons) is the one produced by \i{MASM} and
5496\i{TASM}, which is typically fed to 16-bit DOS linkers to produce
5497\i\c{.EXE} files. It is also the format used by \i{OS/2}.
5498
5499\c{obj} provides a default output file-name extension of \c{.obj}.
5500
5501\c{obj} is not exclusively a 16-bit format, though: NASM has full
5502support for the 32-bit extensions to the format. In particular,
550332-bit \c{obj} format files are used by \i{Borland's Win32
5504compilers}, instead of using Microsoft's newer \i\c{win32} object
5505file format.
5506
5507The \c{obj} format does not define any special segment names: you
5508can call your segments anything you like. Typical names for segments
5509in \c{obj} format files are \c{CODE}, \c{DATA} and \c{BSS}.
5510
5511If your source file contains code before specifying an explicit
5512\c{SEGMENT} directive, then NASM will invent its own segment called
5513\i\c{__NASMDEFSEG} for you.
5514
5515When you define a segment in an \c{obj} file, NASM defines the
5516segment name as a symbol as well, so that you can access the segment
5517address of the segment. So, for example:
5518
5519\c segment data
5520\c
5521\c dvar:   dw      1234
5522\c
5523\c segment code
5524\c
5525\c function:
5526\c         mov     ax,data         ; get segment address of data
5527\c         mov     ds,ax           ; and move it into DS
5528\c         inc     word [dvar]     ; now this reference will work
5529\c         ret
5530
5531The \c{obj} format also enables the use of the \i\c{SEG} and
5532\i\c{WRT} operators, so that you can write code which does things
5533like
5534
5535\c extern  foo
5536\c
5537\c       mov   ax,seg foo            ; get preferred segment of foo
5538\c       mov   ds,ax
5539\c       mov   ax,data               ; a different segment
5540\c       mov   es,ax
5541\c       mov   ax,[ds:foo]           ; this accesses `foo'
5542\c       mov   [es:foo wrt data],bx  ; so does this
5543
5544
5545\S{objseg} \c{obj} Extensions to the \c{SEGMENT}
5546Directive\I{SEGMENT, obj extensions to}
5547
5548The \c{obj} output format extends the \c{SEGMENT} (or \c{SECTION})
5549directive to allow you to specify various properties of the segment
5550you are defining. This is done by appending extra qualifiers to the
5551end of the segment-definition line. For example,
5552
5553\c segment code private align=16
5554
5555defines the segment \c{code}, but also declares it to be a private
5556segment, and requires that the portion of it described in this code
5557module must be aligned on a 16-byte boundary.
5558
5559The available qualifiers are:
5560
5561\b \i\c{PRIVATE}, \i\c{PUBLIC}, \i\c{COMMON} and \i\c{STACK} specify
5562the combination characteristics of the segment. \c{PRIVATE} segments
5563do not get combined with any others by the linker; \c{PUBLIC} and
5564\c{STACK} segments get concatenated together at link time; and
5565\c{COMMON} segments all get overlaid on top of each other rather
5566than stuck end-to-end.
5567
5568\b \i\c{ALIGN} is used, as shown above, to specify how many low bits
5569of the segment start address must be forced to zero. The alignment
5570value given may be any power of two from 1 to 4096; in reality, the
5571only values supported are 1, 2, 4, 16, 256 and 4096, so if 8 is
5572specified it will be rounded up to 16, and 32, 64 and 128 will all
5573be rounded up to 256, and so on. Note that alignment to 4096-byte
5574boundaries is a \i{PharLap} extension to the format and may not be
5575supported by all linkers.\I{section alignment, in OBJ}\I{segment
5576alignment, in OBJ}\I{alignment, in OBJ sections}
5577
5578\b \i\c{CLASS} can be used to specify the segment class; this feature
5579indicates to the linker that segments of the same class should be
5580placed near each other in the output file. The class name can be any
5581word, e.g. \c{CLASS=CODE}.
5582
5583\b \i\c{OVERLAY}, like \c{CLASS}, is specified with an arbitrary word
5584as an argument, and provides overlay information to an
5585overlay-capable linker.
5586
5587\b Segments can be declared as \i\c{USE16} or \i\c{USE32}, which has
5588the effect of recording the choice in the object file and also
5589ensuring that NASM's default assembly mode when assembling in that
5590segment is 16-bit or 32-bit respectively.
5591
5592\b When writing \i{OS/2} object files, you should declare 32-bit
5593segments as \i\c{FLAT}, which causes the default segment base for
5594anything in the segment to be the special group \c{FLAT}, and also
5595defines the group if it is not already defined.
5596
5597\b The \c{obj} file format also allows segments to be declared as
5598having a pre-defined absolute segment address, although no linkers
5599are currently known to make sensible use of this feature;
5600nevertheless, NASM allows you to declare a segment such as
5601\c{SEGMENT SCREEN ABSOLUTE=0xB800} if you need to. The \i\c{ABSOLUTE}
5602and \c{ALIGN} keywords are mutually exclusive.
5603
5604NASM's default segment attributes are \c{PUBLIC}, \c{ALIGN=1}, no
5605class, no overlay, and \c{USE16}.
5606
5607
5608\S{group} \i\c{GROUP}: Defining Groups of Segments\I{segments, groups of}
5609
5610The \c{obj} format also allows segments to be grouped, so that a
5611single segment register can be used to refer to all the segments in
5612a group. NASM therefore supplies the \c{GROUP} directive, whereby
5613you can code
5614
5615\c segment data
5616\c
5617\c         ; some data
5618\c
5619\c segment bss
5620\c
5621\c         ; some uninitialized data
5622\c
5623\c group dgroup data bss
5624
5625which will define a group called \c{dgroup} to contain the segments
5626\c{data} and \c{bss}. Like \c{SEGMENT}, \c{GROUP} causes the group
5627name to be defined as a symbol, so that you can refer to a variable
5628\c{var} in the \c{data} segment as \c{var wrt data} or as \c{var wrt
5629dgroup}, depending on which segment value is currently in your
5630segment register.
5631
5632If you just refer to \c{var}, however, and \c{var} is declared in a
5633segment which is part of a group, then NASM will default to giving
5634you the offset of \c{var} from the beginning of the \e{group}, not
5635the \e{segment}. Therefore \c{SEG var}, also, will return the group
5636base rather than the segment base.
5637
5638NASM will allow a segment to be part of more than one group, but
5639will generate a warning if you do this. Variables declared in a
5640segment which is part of more than one group will default to being
5641relative to the first group that was defined to contain the segment.
5642
5643A group does not have to contain any segments; you can still make
5644\c{WRT} references to a group which does not contain the variable
5645you are referring to. OS/2, for example, defines the special group
5646\c{FLAT} with no segments in it.
5647
5648
5649\S{uppercase} \i\c{UPPERCASE}: Disabling Case Sensitivity in Output
5650
5651Although NASM itself is \i{case sensitive}, some OMF linkers are
5652not; therefore it can be useful for NASM to output single-case
5653object files. The \c{UPPERCASE} format-specific directive causes all
5654segment, group and symbol names that are written to the object file
5655to be forced to upper case just before being written. Within a
5656source file, NASM is still case-sensitive; but the object file can
5657be written entirely in upper case if desired.
5658
5659\c{UPPERCASE} is used alone on a line; it requires no parameters.
5660
5661
5662\S{import} \i\c{IMPORT}: Importing DLL Symbols\I{DLL symbols,
5663importing}\I{symbols, importing from DLLs}
5664
5665The \c{IMPORT} format-specific directive defines a symbol to be
5666imported from a DLL, for use if you are writing a DLL's \i{import
5667library} in NASM. You still need to declare the symbol as \c{EXTERN}
5668as well as using the \c{IMPORT} directive.
5669
5670The \c{IMPORT} directive takes two required parameters, separated by
5671white space, which are (respectively) the name of the symbol you
5672wish to import and the name of the library you wish to import it
5673from. For example:
5674
5675\c     import  WSAStartup wsock32.dll
5676
5677A third optional parameter gives the name by which the symbol is
5678known in the library you are importing it from, in case this is not
5679the same as the name you wish the symbol to be known by to your code
5680once you have imported it. For example:
5681
5682\c     import  asyncsel wsock32.dll WSAAsyncSelect
5683
5684
5685\S{export} \i\c{EXPORT}: Exporting DLL Symbols\I{DLL symbols,
5686exporting}\I{symbols, exporting from DLLs}
5687
5688The \c{EXPORT} format-specific directive defines a global symbol to
5689be exported as a DLL symbol, for use if you are writing a DLL in
5690NASM. You still need to declare the symbol as \c{GLOBAL} as well as
5691using the \c{EXPORT} directive.
5692
5693\c{EXPORT} takes one required parameter, which is the name of the
5694symbol you wish to export, as it was defined in your source file. An
5695optional second parameter (separated by white space from the first)
5696gives the \e{external} name of the symbol: the name by which you
5697wish the symbol to be known to programs using the DLL. If this name
5698is the same as the internal name, you may leave the second parameter
5699off.
5700
5701Further parameters can be given to define attributes of the exported
5702symbol. These parameters, like the second, are separated by white
5703space. If further parameters are given, the external name must also
5704be specified, even if it is the same as the internal name. The
5705available attributes are:
5706
5707\b \c{resident} indicates that the exported name is to be kept
5708resident by the system loader. This is an optimization for
5709frequently used symbols imported by name.
5710
5711\b \c{nodata} indicates that the exported symbol is a function which
5712does not make use of any initialized data.
5713
5714\b \c{parm=NNN}, where \c{NNN} is an integer, sets the number of
5715parameter words for the case in which the symbol is a call gate
5716between 32-bit and 16-bit segments.
5717
5718\b An attribute which is just a number indicates that the symbol
5719should be exported with an identifying number (ordinal), and gives
5720the desired number.
5721
5722For example:
5723
5724\c     export  myfunc
5725\c     export  myfunc TheRealMoreFormalLookingFunctionName
5726\c     export  myfunc myfunc 1234  ; export by ordinal
5727\c     export  myfunc myfunc resident parm=23 nodata
5728
5729
5730\S{dotdotstart} \i\c{..start}: Defining the \i{Program Entry
5731Point}
5732
5733\c{OMF} linkers require exactly one of the object files being linked to
5734define the program entry point, where execution will begin when the
5735program is run. If the object file that defines the entry point is
5736assembled using NASM, you specify the entry point by declaring the
5737special symbol \c{..start} at the point where you wish execution to
5738begin.
5739
5740
5741\S{objextern} \c{obj} Extensions to the \c{EXTERN}
5742Directive\I{EXTERN, obj extensions to}
5743
5744If you declare an external symbol with the directive
5745
5746\c     extern  foo
5747
5748then references such as \c{mov ax,foo} will give you the offset of
5749\c{foo} from its preferred segment base (as specified in whichever
5750module \c{foo} is actually defined in). So to access the contents of
5751\c{foo} you will usually need to do something like
5752
5753\c         mov     ax,seg foo      ; get preferred segment base
5754\c         mov     es,ax           ; move it into ES
5755\c         mov     ax,[es:foo]     ; and use offset `foo' from it
5756
5757This is a little unwieldy, particularly if you know that an external
5758is going to be accessible from a given segment or group, say
5759\c{dgroup}. So if \c{DS} already contained \c{dgroup}, you could
5760simply code
5761
5762\c         mov     ax,[foo wrt dgroup]
5763
5764However, having to type this every time you want to access \c{foo}
5765can be a pain; so NASM allows you to declare \c{foo} in the
5766alternative form
5767
5768\c     extern  foo:wrt dgroup
5769
5770This form causes NASM to pretend that the preferred segment base of
5771\c{foo} is in fact \c{dgroup}; so the expression \c{seg foo} will
5772now return \c{dgroup}, and the expression \c{foo} is equivalent to
5773\c{foo wrt dgroup}.
5774
5775This \I{default-WRT mechanism}default-\c{WRT} mechanism can be used
5776to make externals appear to be relative to any group or segment in
5777your program. It can also be applied to common variables: see
5778\k{objcommon}.
5779
5780
5781\S{objcommon} \c{obj} Extensions to the \c{COMMON}
5782Directive\I{COMMON, obj extensions to}
5783
5784The \c{obj} format allows common variables to be either near\I{near
5785common variables} or far\I{far common variables}; NASM allows you to
5786specify which your variables should be by the use of the syntax
5787
5788\c common  nearvar 2:near   ; `nearvar' is a near common
5789\c common  farvar  10:far   ; and `farvar' is far
5790
5791Far common variables may be greater in size than 64Kb, and so the
5792OMF specification says that they are declared as a number of
5793\e{elements} of a given size. So a 10-byte far common variable could
5794be declared as ten one-byte elements, five two-byte elements, two
5795five-byte elements or one ten-byte element.
5796
5797Some \c{OMF} linkers require the \I{element size, in common
5798variables}\I{common variables, element size}element size, as well as
5799the variable size, to match when resolving common variables declared
5800in more than one module. Therefore NASM must allow you to specify
5801the element size on your far common variables. This is done by the
5802following syntax:
5803
5804\c common  c_5by2  10:far 5        ; two five-byte elements
5805\c common  c_2by5  10:far 2        ; five two-byte elements
5806
5807If no element size is specified, the default is 1. Also, the \c{FAR}
5808keyword is not required when an element size is specified, since
5809only far commons may have element sizes at all. So the above
5810declarations could equivalently be
5811
5812\c common  c_5by2  10:5            ; two five-byte elements
5813\c common  c_2by5  10:2            ; five two-byte elements
5814
5815In addition to these extensions, the \c{COMMON} directive in \c{obj}
5816also supports default-\c{WRT} specification like \c{EXTERN} does
5817(explained in \k{objextern}). So you can also declare things like
5818
5819\c common  foo     10:wrt dgroup
5820\c common  bar     16:far 2:wrt data
5821\c common  baz     24:wrt data:6
5822
5823
5824\S{objdepend} Embedded File Dependency Information
5825
5826Since NASM 2.13.02, \c{obj} files contain embedded dependency file
5827information.  To suppress the generation of dependencies, use
5828
5829\c %pragma obj nodepend
5830
5831
5832\H{win32fmt} \i\c{win32}: Microsoft Win32 Object Files
5833
5834The \c{win32} output format generates Microsoft Win32 object files,
5835suitable for passing to Microsoft linkers such as \i{Visual C++}.
5836Note that Borland Win32 compilers do not use this format, but use
5837\c{obj} instead (see \k{objfmt}).
5838
5839\c{win32} provides a default output file-name extension of \c{.obj}.
5840
5841Note that although Microsoft say that Win32 object files follow the
5842\c{COFF} (Common Object File Format) standard, the object files produced
5843by Microsoft Win32 compilers are not compatible with COFF linkers
5844such as DJGPP's, and vice versa. This is due to a difference of
5845opinion over the precise semantics of PC-relative relocations. To
5846produce COFF files suitable for DJGPP, use NASM's \c{coff} output
5847format; conversely, the \c{coff} format does not produce object
5848files that Win32 linkers can generate correct output from.
5849
5850
5851\S{win32sect} \c{win32} Extensions to the \c{SECTION}
5852Directive\I{SECTION, Windows extensions to}
5853
5854Like the \c{obj} format, \c{win32} allows you to specify additional
5855information on the \c{SECTION} directive line, to control the type
5856and properties of sections you declare. Section types and properties
5857are generated automatically by NASM for the \i{standard section names}
5858\c{.text}, \c{.data} and \c{.bss}, but may still be overridden by
5859these qualifiers.
5860
5861The available qualifiers are:
5862
5863\b \c{code}, or equivalently \c{text}, defines the section to be a
5864code section. This marks the section as readable and executable, but
5865not writable, and also indicates to the linker that the type of the
5866section is code.
5867
5868\b \c{data} and \c{bss} define the section to be a data section,
5869analogously to \c{code}. Data sections are marked as readable and
5870writable, but not executable. \c{data} declares an initialized data
5871section, whereas \c{bss} declares an uninitialized data section.
5872
5873\b \c{rdata} declares an initialized data section that is readable
5874but not writable. Microsoft compilers use this section to place
5875constants in it.
5876
5877\b \c{info} defines the section to be an \i{informational section},
5878which is not included in the executable file by the linker, but may
5879(for example) pass information \e{to} the linker. For example,
5880declaring an \c{info}-type section called \i\c{.drectve} causes the
5881linker to interpret the contents of the section as command-line
5882options.
5883
5884\b \c{align=}, used with a trailing number as in \c{obj}, gives the
5885\I{section alignment, in win32}\I{alignment, in win32
5886sections}alignment requirements of the section. The maximum you may
5887specify is 64: the Win32 object file format contains no means to
5888request a greater section alignment than this. If alignment is not
5889explicitly specified, the defaults are 16-byte alignment for code
5890sections, 8-byte alignment for rdata sections and 4-byte alignment
5891for data (and BSS) sections.
5892Informational sections get a default alignment of 1 byte (no
5893alignment), though the value does not matter.
5894
5895The defaults assumed by NASM if you do not specify the above
5896qualifiers are:
5897
5898\c section .text    code  align=16
5899\c section .data    data  align=4
5900\c section .rdata   rdata align=8
5901\c section .bss     bss   align=4
5902
5903The \c{win64} format also adds:
5904
5905\c section .pdata   rdata align=4
5906\c section .xdata   rdata align=8
5907
5908Any other section name is treated by default like \c{.text}.
5909
5910\S{win32safeseh} \c{win32}: Safe Structured Exception Handling
5911
5912Among other improvements in Windows XP SP2 and Windows Server 2003
5913Microsoft has introduced concept of "safe structured exception
5914handling." General idea is to collect handlers' entry points in
5915designated read-only table and have alleged entry point verified
5916against this table prior exception control is passed to the handler. In
5917order for an executable module to be equipped with such "safe exception
5918handler table," all object modules on linker command line has to comply
5919with certain criteria. If one single module among them does not, then
5920the table in question is omitted and above mentioned run-time checks
5921will not be performed for application in question. Table omission is by
5922default silent and therefore can be easily overlooked. One can instruct
5923linker to refuse to produce binary without such table by passing
5924\c{/safeseh} command line option.
5925
5926Without regard to this run-time check merits it's natural to expect
5927NASM to be capable of generating modules suitable for \c{/safeseh}
5928linking. From developer's viewpoint the problem is two-fold:
5929
5930\b how to adapt modules not deploying exception handlers of their own;
5931
5932\b how to adapt/develop modules utilizing custom exception handling;
5933
5934Former can be easily achieved with any NASM version by adding following
5935line to source code:
5936
5937\c $@feat.00 equ 1
5938
5939As of version 2.03 NASM adds this absolute symbol automatically. If
5940it's not already present to be precise. I.e. if for whatever reason
5941developer would choose to assign another value in source file, it would
5942still be perfectly possible.
5943
5944Registering custom exception handler on the other hand requires certain
5945"magic." As of version 2.03 additional directive is implemented,
5946\c{safeseh}, which instructs the assembler to produce appropriately
5947formatted input data for above mentioned "safe exception handler
5948table." Its typical use would be:
5949
5950\c section .text
5951\c extern  _MessageBoxA@16
5952\c %if     __?NASM_VERSION_ID?__ >= 0x02030000
5953\c safeseh handler         ; register handler as "safe handler"
5954\c %endif
5955\c handler:
5956\c         push    DWORD 1 ; MB_OKCANCEL
5957\c         push    DWORD caption
5958\c         push    DWORD text
5959\c         push    DWORD 0
5960\c         call    _MessageBoxA@16
5961\c         sub     eax,1   ; incidentally suits as return value
5962\c                         ; for exception handler
5963\c         ret
5964\c global  _main
5965\c _main:
5966\c         push    DWORD handler
5967\c         push    DWORD [fs:0]
5968\c         mov     DWORD [fs:0],esp ; engage exception handler
5969\c         xor     eax,eax
5970\c         mov     eax,DWORD[eax]   ; cause exception
5971\c         pop     DWORD [fs:0]     ; disengage exception handler
5972\c         add     esp,4
5973\c         ret
5974\c text:   db      'OK to rethrow, CANCEL to generate core dump',0
5975\c caption:db      'SEGV',0
5976\c
5977\c section .drectve info
5978\c         db      '/defaultlib:user32.lib /defaultlib:msvcrt.lib '
5979
5980As you might imagine, it's perfectly possible to produce .exe binary
5981with "safe exception handler table" and yet engage unregistered
5982exception handler. Indeed, handler is engaged by simply manipulating
5983\c{[fs:0]} location at run-time, something linker has no power over,
5984run-time that is. It should be explicitly mentioned that such failure
5985to register handler's entry point with \c{safeseh} directive has
5986undesired side effect at run-time. If exception is raised and
5987unregistered handler is to be executed, the application is abruptly
5988terminated without any notification whatsoever. One can argue that
5989system could  at least have logged some kind "non-safe exception
5990handler in x.exe at address n" message in event log, but no, literally
5991no notification is provided and user is left with no clue on what
5992caused application failure.
5993
5994Finally, all mentions of linker in this paragraph refer to Microsoft
5995linker version 7.x and later. Presence of \c{@feat.00} symbol and input
5996data for "safe exception handler table" causes no backward
5997incompatibilities and "safeseh" modules generated by NASM 2.03 and
5998later can still be linked by earlier versions or non-Microsoft linkers.
5999
6000\S{codeview} Debugging formats for Windows
6001\I{Windows debugging formats}
6002
6003The \c{win32} and \c{win64} formats support the Microsoft \i{CodeView
6004debugging format}.  Currently CodeView version 8 format is supported
6005(\i\c{cv8}), but newer versions of the CodeView debugger should be
6006able to handle this format as well.
6007
6008
6009\H{win64fmt} \i\c{win64}: Microsoft Win64 Object Files
6010
6011The \c{win64} output format generates Microsoft Win64 object files,
6012which is nearly 100% identical to the \c{win32} object format (\k{win32fmt})
6013with the exception that it is meant to target 64-bit code and the x86-64
6014platform altogether. This object file is used exactly the same as the \c{win32}
6015object format (\k{win32fmt}), in NASM, with regard to this exception.
6016
6017\S{win64pic} \c{win64}: Writing Position-Independent Code
6018
6019While \c{REL} takes good care of RIP-relative addressing, there is one
6020aspect that is easy to overlook for a Win64 programmer: indirect
6021references. Consider a switch dispatch table:
6022
6023\c         jmp     qword [dsptch+rax*8]
6024\c         ...
6025\c dsptch: dq      case0
6026\c         dq      case1
6027\c         ...
6028
6029Even a novice Win64 assembler programmer will soon realize that the code
6030is not 64-bit savvy. Most notably linker will refuse to link it with
6031
6032\c 'ADDR32' relocation to '.text' invalid without /LARGEADDRESSAWARE:NO
6033
6034So [s]he will have to split jmp instruction as following:
6035
6036\c         lea     rbx,[rel dsptch]
6037\c         jmp     qword [rbx+rax*8]
6038
6039What happens behind the scene is that effective address in \c{lea} is
6040encoded relative to instruction pointer, or in perfectly
6041position-independent manner. But this is only part of the problem!
6042Trouble is that in .dll context \c{caseN} relocations will make their
6043way to the final module and might have to be adjusted at .dll load
6044time. To be specific when it can't be loaded at preferred address. And
6045when this occurs, pages with such relocations will be rendered private
6046to current process, which kind of undermines the idea of sharing .dll.
6047But no worry, it's trivial to fix:
6048
6049\c         lea     rbx,[rel dsptch]
6050\c         add     rbx,[rbx+rax*8]
6051\c         jmp     rbx
6052\c         ...
6053\c dsptch: dq      case0-dsptch
6054\c         dq      case1-dsptch
6055\c         ...
6056
6057NASM version 2.03 and later provides another alternative, \c{wrt
6058..imagebase} operator, which returns offset from base address of the
6059current image, be it .exe or .dll module, therefore the name. For those
6060acquainted with PE-COFF format base address denotes start of
6061\c{IMAGE_DOS_HEADER} structure. Here is how to implement switch with
6062these image-relative references:
6063
6064\c         lea     rbx,[rel dsptch]
6065\c         mov     eax,[rbx+rax*4]
6066\c         sub     rbx,dsptch wrt ..imagebase
6067\c         add     rbx,rax
6068\c         jmp     rbx
6069\c         ...
6070\c dsptch: dd      case0 wrt ..imagebase
6071\c         dd      case1 wrt ..imagebase
6072
6073One can argue that the operator is redundant. Indeed,  snippet before
6074last works just fine with any NASM version and is not even Windows
6075specific... The real reason for implementing \c{wrt ..imagebase} will
6076become apparent in next paragraph.
6077
6078It should be noted that \c{wrt ..imagebase} is defined as 32-bit
6079operand only:
6080
6081\c         dd      label wrt ..imagebase           ; ok
6082\c         dq      label wrt ..imagebase           ; bad
6083\c         mov     eax,label wrt ..imagebase       ; ok
6084\c         mov     rax,label wrt ..imagebase       ; bad
6085
6086\S{win64seh} \c{win64}: Structured Exception Handling
6087
6088Structured exception handing in Win64 is completely different matter
6089from Win32. Upon exception program counter value is noted, and
6090linker-generated table comprising start and end addresses of all the
6091functions [in given executable module] is traversed and compared to the
6092saved program counter. Thus so called \c{UNWIND_INFO} structure is
6093identified. If it's not found, then offending subroutine is assumed to
6094be "leaf" and just mentioned lookup procedure is attempted for its
6095caller. In Win64 leaf function is such function that does not call any
6096other function \e{nor} modifies any Win64 non-volatile registers,
6097including stack pointer. The latter ensures that it's possible to
6098identify leaf function's caller by simply pulling the value from the
6099top of the stack.
6100
6101While majority of subroutines written in assembler are not calling any
6102other function, requirement for non-volatile registers' immutability
6103leaves developer with not more than 7 registers and no stack frame,
6104which is not necessarily what [s]he counted with. Customarily one would
6105meet the requirement by saving non-volatile registers on stack and
6106restoring them upon return, so what can go wrong? If [and only if] an
6107exception is raised at run-time and no \c{UNWIND_INFO} structure is
6108associated with such "leaf" function, the stack unwind procedure will
6109expect to find caller's return address on the top of stack immediately
6110followed by its frame. Given that developer pushed caller's
6111non-volatile registers on stack, would the value on top point at some
6112code segment or even addressable space? Well, developer can attempt
6113copying caller's return address to the top of stack and this would
6114actually work in some very specific circumstances. But unless developer
6115can guarantee that these circumstances are always met, it's more
6116appropriate to assume worst case scenario, i.e. stack unwind procedure
6117going berserk. Relevant question is what happens then? Application is
6118abruptly terminated without any notification whatsoever. Just like in
6119Win32 case, one can argue that system could at least have logged
6120"unwind procedure went berserk in x.exe at address n" in event log, but
6121no, no trace of failure is left.
6122
6123Now, when we understand significance of the \c{UNWIND_INFO} structure,
6124let's discuss what's in it and/or how it's processed. First of all it
6125is checked for presence of reference to custom language-specific
6126exception handler. If there is one, then it's invoked. Depending on the
6127return value, execution flow is resumed (exception is said to be
6128"handled"), \e{or} rest of \c{UNWIND_INFO} structure is processed as
6129following. Beside optional reference to custom handler, it carries
6130information about current callee's stack frame and where non-volatile
6131registers are saved. Information is detailed enough to be able to
6132reconstruct contents of caller's non-volatile registers upon call to
6133current callee. And so caller's context is reconstructed, and then
6134unwind procedure is repeated, i.e. another \c{UNWIND_INFO} structure is
6135associated, this time, with caller's instruction pointer, which is then
6136checked for presence of reference to language-specific handler, etc.
6137The procedure is recursively repeated till exception is handled. As
6138last resort system "handles" it by generating memory core dump and
6139terminating the application.
6140
6141As for the moment of this writing NASM unfortunately does not
6142facilitate generation of above mentioned detailed information about
6143stack frame layout. But as of version 2.03 it implements building
6144blocks for generating structures involved in stack unwinding. As
6145simplest example, here is how to deploy custom exception handler for
6146leaf function:
6147
6148\c default rel
6149\c section .text
6150\c extern  MessageBoxA
6151\c handler:
6152\c         sub     rsp,40
6153\c         mov     rcx,0
6154\c         lea     rdx,[text]
6155\c         lea     r8,[caption]
6156\c         mov     r9,1    ; MB_OKCANCEL
6157\c         call    MessageBoxA
6158\c         sub     eax,1   ; incidentally suits as return value
6159\c                         ; for exception handler
6160\c         add     rsp,40
6161\c         ret
6162\c global  main
6163\c main:
6164\c         xor     rax,rax
6165\c         mov     rax,QWORD[rax]  ; cause exception
6166\c         ret
6167\c main_end:
6168\c text:   db      'OK to rethrow, CANCEL to generate core dump',0
6169\c caption:db      'SEGV',0
6170\c
6171\c section .pdata  rdata align=4
6172\c         dd      main wrt ..imagebase
6173\c         dd      main_end wrt ..imagebase
6174\c         dd      xmain wrt ..imagebase
6175\c section .xdata  rdata align=8
6176\c xmain:  db      9,0,0,0
6177\c         dd      handler wrt ..imagebase
6178\c section .drectve info
6179\c         db      '/defaultlib:user32.lib /defaultlib:msvcrt.lib '
6180
6181What you see in \c{.pdata} section is element of the "table comprising
6182start and end addresses of function" along with reference to associated
6183\c{UNWIND_INFO} structure. And what you see in \c{.xdata} section is
6184\c{UNWIND_INFO} structure describing function with no frame, but with
6185designated exception handler. References are \e{required} to be
6186image-relative (which is the real reason for implementing \c{wrt
6187..imagebase} operator). It should be noted that \c{rdata align=n}, as
6188well as \c{wrt ..imagebase}, are optional in these two segments'
6189contexts, i.e. can be omitted. Latter means that \e{all} 32-bit
6190references, not only above listed required ones, placed into these two
6191segments turn out image-relative. Why is it important to understand?
6192Developer is allowed to append handler-specific data to \c{UNWIND_INFO}
6193structure, and if [s]he adds a 32-bit reference, then [s]he will have
6194to remember to adjust its value to obtain the real pointer.
6195
6196As already mentioned, in Win64 terms leaf function is one that does not
6197call any other function \e{nor} modifies any non-volatile register,
6198including stack pointer. But it's not uncommon that assembler
6199programmer plans to utilize every single register and sometimes even
6200have variable stack frame. Is there anything one can do with bare
6201building blocks? I.e. besides manually composing fully-fledged
6202\c{UNWIND_INFO} structure, which would surely be considered
6203error-prone? Yes, there is. Recall that exception handler is called
6204first, before stack layout is analyzed. As it turned out, it's
6205perfectly possible to manipulate current callee's context in custom
6206handler in manner that permits further stack unwinding. General idea is
6207that handler would not actually "handle" the exception, but instead
6208restore callee's context, as it was at its entry point and thus mimic
6209leaf function. In other words, handler would simply undertake part of
6210unwinding procedure. Consider following example:
6211
6212\c function:
6213\c         mov     rax,rsp         ; copy rsp to volatile register
6214\c         push    r15             ; save non-volatile registers
6215\c         push    rbx
6216\c         push    rbp
6217\c         mov     r11,rsp         ; prepare variable stack frame
6218\c         sub     r11,rcx
6219\c         and     r11,-64
6220\c         mov     QWORD[r11],rax  ; check for exceptions
6221\c         mov     rsp,r11         ; allocate stack frame
6222\c         mov     QWORD[rsp],rax  ; save original rsp value
6223\c magic_point:
6224\c         ...
6225\c         mov     r11,QWORD[rsp]  ; pull original rsp value
6226\c         mov     rbp,QWORD[r11-24]
6227\c         mov     rbx,QWORD[r11-16]
6228\c         mov     r15,QWORD[r11-8]
6229\c         mov     rsp,r11         ; destroy frame
6230\c         ret
6231
6232The keyword is that up to \c{magic_point} original \c{rsp} value
6233remains in chosen volatile register and no non-volatile register,
6234except for \c{rsp}, is modified. While past \c{magic_point} \c{rsp}
6235remains constant till the very end of the \c{function}. In this case
6236custom language-specific exception handler would look like this:
6237
6238\c EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
6239\c         CONTEXT *context,DISPATCHER_CONTEXT *disp)
6240\c {   ULONG64 *rsp;
6241\c     if (context->Rip<(ULONG64)magic_point)
6242\c         rsp = (ULONG64 *)context->Rax;
6243\c     else
6244\c     {   rsp = ((ULONG64 **)context->Rsp)[0];
6245\c         context->Rbp = rsp[-3];
6246\c         context->Rbx = rsp[-2];
6247\c         context->R15 = rsp[-1];
6248\c     }
6249\c     context->Rsp = (ULONG64)rsp;
6250\c
6251\c     memcpy (disp->ContextRecord,context,sizeof(CONTEXT));
6252\c     RtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase,
6253\c         dips->ControlPc,disp->FunctionEntry,disp->ContextRecord,
6254\c         &disp->HandlerData,&disp->EstablisherFrame,NULL);
6255\c     return ExceptionContinueSearch;
6256\c }
6257
6258As custom handler mimics leaf function, corresponding \c{UNWIND_INFO}
6259structure does not have to contain any information about stack frame
6260and its layout.
6261
6262\H{cofffmt} \i\c{coff}: \i{Common Object File Format}
6263
6264The \c{coff} output type produces \c{COFF} object files suitable for
6265linking with the \i{DJGPP} linker.
6266
6267\c{coff} provides a default output file-name extension of \c{.o}.
6268
6269The \c{coff} format supports the same extensions to the \c{SECTION}
6270directive as \c{win32} does, except that the \c{align} qualifier and
6271the \c{info} section type are not supported.
6272
6273\H{machofmt} \I{Mach-O}\i\c{macho32} and \i\c{macho64}: \i{Mach Object File Format}
6274
6275The \c{macho32} and \c{macho64} output formts produces Mach-O
6276object files suitable for linking with the \i{MacOS X} linker.
6277\i\c{macho} is a synonym for \c{macho32}.
6278
6279\c{macho} provides a default output file-name extension of \c{.o}.
6280
6281\S{machosect} \c{macho} extensions to the \c{SECTION} Directive
6282\I{SECTION, macho extensions to}
6283
6284The \c{macho} output format specifies section names in the format
6285"\e{segment}\c{,}\e{section}".  No spaces are allowed around the
6286comma.  The following flags can also be specified:
6287
6288\b \c{data} - this section contains initialized data items
6289
6290\b \c{code} - this section contains code exclusively
6291
6292\b \c{mixed} - this section contains both code and data
6293
6294\b \c{bss} - this section is uninitialized and filled with zero
6295
6296\b \c{zerofill} - same as \c{bss}
6297
6298\b \c{no_dead_strip} - inhibit dead code stripping for this section
6299
6300\b \c{live_support} - set the live support flag for this section
6301
6302\b \c{strip_static_syms} - strip static symbols for this section
6303
6304\b \c{debug} - this section contains debugging information
6305
6306\b \c{align=}\e{alignment} - specify section alignment
6307
6308The default is \c{data}, unless the section name is \c{__text} or
6309\c{__bss} in which case the default is \c{text} or \c{bss},
6310respectively.
6311
6312For compatibility with other Unix platforms, the following standard
6313names are also supported:
6314
6315\c .text    = __TEXT,__text  text
6316\c .rodata  = __DATA,__const data
6317\c .data    = __DATA,__data  data
6318\c .bss     = __DATA,__bss   bss
6319
6320If the \c{.rodata} section contains no relocations, it is instead put
6321into the \c{__TEXT,__const} section unless this section has already
6322been specified explicitly.  However, it is probably better to specify
6323\c{__TEXT,__const} and \c{__DATA,__const} explicitly as appropriate.
6324
6325\S{machotls} \i{Thread Local Storage in Mach-O}\I{TLS}: \c{macho} special
6326symbols and \i\c{WRT}
6327
6328Mach-O defines the following special symbols that can be used on the
6329right-hand side of the \c{WRT} operator:
6330
6331\b \c{..tlvp} is used to specify access to thread-local storage.
6332
6333\b \c{..gotpcrel} is used to specify references to the Global Offset
6334   Table.  The GOT is supported in the \c{macho64} format only.
6335
6336\S{macho-ssvs} \c{macho} specfic directive \i\c{subsections_via_symbols}
6337
6338The directive \c{subsections_via_symbols} sets the
6339\c{MH_SUBSECTIONS_VIA_SYMBOLS} flag in the Mach-O header, that effectively
6340separates a block (or a subsection) based on a symbol. It is often used
6341for eliminating dead codes by a linker.
6342
6343This directive takes no arguments.
6344
6345This is a macro implemented as a \c{%pragma}.  It can also be
6346specified in its \c{%pragma} form, in which case it will not affect
6347non-Mach-O builds of the same source code:
6348
6349\c      %pragma macho subsections_via_symbols
6350
6351\S{macho-ssvs} \c{macho} specfic directive \i\c{no_dead_strip}
6352
6353The directive \c{no_dead_strip} sets the Mach-O \c{SH_NO_DEAD_STRIP}
6354section flag on the section containing a a specific symbol.  This
6355directive takes a list of symbols as its arguments.
6356
6357This is a macro implemented as a \c{%pragma}.  It can also be
6358specified in its \c{%pragma} form, in which case it will not affect
6359non-Mach-O builds of the same source code:
6360
6361\c      %pragma macho no_dead_strip symbol...
6362
6363\S{macho-pext} \c{macho} specific extensions to the \c{GLOBAL}
6364Directive: \i\c{private_extern}
6365
6366The directive extension to \c{GLOBAL} marks the symbol with limited
6367global scope. For example, you can specify the global symbol with
6368this extension:
6369
6370\c global foo:private_extern
6371\c foo:
6372\c          ; codes
6373
6374Using with static linker will clear the private extern attribute.
6375But linker option like \c{-keep_private_externs} can avoid it.
6376
6377\H{elffmt} \i\c{elf32}, \i\c{elf64}, \i\c{elfx32}: \I{ELF}\I{linux, elf}\i{Executable and Linkable
6378Format} Object Files
6379
6380The \c{elf32}, \c{elf64} and \c{elfx32} output formats generate
6381\c{ELF32 and ELF64} (Executable and Linkable Format) object files, as
6382used by Linux as well as \i{Unix System V}, including \i{Solaris x86},
6383\i{UnixWare} and \i{SCO Unix}. ELF provides a default output
6384file-name extension of \c{.o}. \c{elf} is a synonym for \c{elf32}.
6385
6386The \c{elfx32} format is used for the \i{x32} ABI, which is a 32-bit
6387ABI with the CPU in 64-bit mode.
6388
6389\S{abisect} ELF specific directive \i\c{osabi}
6390
6391The ELF header specifies the application binary interface for the
6392target operating system (OSABI).  This field can be set by using the
6393\c{osabi} directive with the numeric value (0-255) of the target
6394system. If this directive is not used, the default value will be "UNIX
6395System V ABI" (0) which will work on most systems which support ELF.
6396
6397\S{elfsect} ELF extensions to the \c{SECTION} Directive
6398\I{SECTION, ELF extensions to}
6399
6400Like the \c{obj} format, \c{elf} allows you to specify additional
6401information on the \c{SECTION} directive line, to control the type
6402and properties of sections you declare. Section types and properties
6403are generated automatically by NASM for the \i{standard section
6404names}, but may still be
6405overridden by these qualifiers.
6406
6407The available qualifiers are:
6408
6409\b \i\c{alloc} defines the section to be one which is loaded into
6410memory when the program is run. \i\c{noalloc} defines it to be one
6411which is not, such as an informational or comment section.
6412
6413\b \i\c{exec} defines the section to be one which should have execute
6414permission when the program is run. \i\c{noexec} defines it as one
6415which should not.
6416
6417\b \i\c{write} defines the section to be one which should be writable
6418when the program is run. \i\c{nowrite} defines it as one which should
6419not.
6420
6421\b \i\c{progbits} defines the section to be one with explicit contents
6422stored in the object file: an ordinary code or data section, for
6423example.
6424
6425\b \i\c{nobits} defines the section to be one with no explicit
6426contents given, such as a BSS section.
6427
6428\b \i\c{note} indicates that this section contains ELF notes. The
6429content of ELF notes are specified using normal assembly instructions;
6430it is up to the programmer to ensure these are valid ELF notes.
6431
6432\b \i\c{preinit_array} indicates that this section contains function
6433addresses to be called before any other initialization has happened.
6434
6435\b \i\c{init_array} indicates that this section contains function
6436addresses to be called during initialization.
6437
6438\b \i\c{fini_array} indicates that this section contains function
6439pointers to be called during termination.
6440
6441\b \I{align, ELF attribute}\c{align=}, used with a trailing number as in \c{obj}, gives the
6442\I{section alignment, in elf}\I{alignment, in elf sections}alignment
6443requirements of the section.
6444
6445\b \c{byte}, \c{word}, \c{dword}, \c{qword}, \c{tword}, \c{oword},
6446\c{yword}, or \c{zword} with an optional \c{*}\i{multiplier} specify
6447the fundamental data item size for a section which contains either
6448fixed-sized data structures or strings; it also sets a default
6449alignment. This is generally used with the \c{strings} and \c{merge}
6450attributes (see below.) For example \c{byte*4} defines a unit size of
64514 bytes, with a default alignment of 1; \c{dword} also defines a unit
6452size of 4 bytes, but with a default alignment of 4. The \c{align=}
6453attribute, if specified, overrides this default alignment.
6454
6455\b \I{pointer, ELF attribute}\c{pointer} is equivalent to \c{dword}
6456for \c{elf32} or \c{elfx32}, and \c{qword} for \c{elf64}.
6457
6458\b \I{strings, ELF attribute}\c{strings} indicate that this section
6459contains exclusively null-terminated strings. By default these are
6460assumed to be byte strings, but a size specifier can be used to
6461override that.
6462
6463\b \i\c{merge} indicates that duplicate data elements in this section
6464should be merged with data elements from other object files. Data
6465elements can be either fixed-sized objects or null-terminatedstrings
6466(with the \c{strings} attribute.) A size specifier is required unless
6467\c{strings} is specified, in which case the size defaults to \c{byte}.
6468
6469\b \i\c{tls} defines the section to be one which contains
6470thread local variables.
6471
6472The defaults assumed by NASM if you do not specify the above
6473qualifiers are:
6474
6475\I\c{.text} \I\c{.rodata} \I\c{.lrodata} \I\c{.data} \I\c{.ldata}
6476\I\c{.bss} \I\c{.lbss} \I\c{.tdata} \I\c{.tbss} \I\c\{.comment}
6477
6478\c section .text          progbits      alloc   exec    nowrite  align=16
6479\c section .rodata        progbits      alloc   noexec  nowrite  align=4
6480\c section .lrodata       progbits      alloc   noexec  nowrite  align=4
6481\c section .data          progbits      alloc   noexec  write    align=4
6482\c section .ldata         progbits      alloc   noexec  write    align=4
6483\c section .bss           nobits        alloc   noexec  write    align=4
6484\c section .lbss          nobits        alloc   noexec  write    align=4
6485\c section .tdata         progbits      alloc   noexec  write    align=4   tls
6486\c section .tbss          nobits        alloc   noexec  write    align=4   tls
6487\c section .comment       progbits      noalloc noexec  nowrite  align=1
6488\c section .preinit_array preinit_array alloc   noexec  nowrite  pointer
6489\c section .init_array    init_array    alloc   noexec  nowrite  pointer
6490\c section .fini_array    fini_array    alloc   noexec  nowrite  pointer
6491\c section .note          note          noalloc noexec  nowrite  align=4
6492\c section other          progbits      alloc   noexec  nowrite  align=1
6493
6494(Any section name other than those in the above table
6495 is treated by default like \c{other} in the above table.
6496 Please note that section names are case sensitive.)
6497
6498
6499\S{elfwrt} \i{Position-Independent Code}\I{PIC}: ELF Special
6500Symbols and \i\c{WRT}
6501
6502Since \c{ELF} does not support segment-base references, the \c{WRT}
6503operator is not used for its normal purpose; therefore NASM's
6504\c{elf} output format makes use of \c{WRT} for a different purpose,
6505namely the PIC-specific \I{relocations, PIC-specific}relocation
6506types.
6507
6508\c{elf} defines five special symbols which you can use as the
6509right-hand side of the \c{WRT} operator to obtain PIC relocation
6510types. They are \i\c{..gotpc}, \i\c{..gotoff}, \i\c{..got},
6511\i\c{..plt} and \i\c{..sym}. Their functions are summarized here:
6512
6513\b Referring to the symbol marking the global offset table base
6514using \c{wrt ..gotpc} will end up giving the distance from the
6515beginning of the current section to the global offset table.
6516(\i\c{_GLOBAL_OFFSET_TABLE_} is the standard symbol name used to
6517refer to the \i{GOT}.) So you would then need to add \i\c{$$} to the
6518result to get the real address of the GOT.
6519
6520\b Referring to a location in one of your own sections using \c{wrt
6521..gotoff} will give the distance from the beginning of the GOT to
6522the specified location, so that adding on the address of the GOT
6523would give the real address of the location you wanted.
6524
6525\b Referring to an external or global symbol using \c{wrt ..got}
6526causes the linker to build an entry \e{in} the GOT containing the
6527address of the symbol, and the reference gives the distance from the
6528beginning of the GOT to the entry; so you can add on the address of
6529the GOT, load from the resulting address, and end up with the
6530address of the symbol.
6531
6532\b Referring to a procedure name using \c{wrt ..plt} causes the
6533linker to build a \i{procedure linkage table} entry for the symbol,
6534and the reference gives the address of the \i{PLT} entry. You can
6535only use this in contexts which would generate a PC-relative
6536relocation normally (i.e. as the destination for \c{CALL} or
6537\c{JMP}), since ELF contains no relocation type to refer to PLT
6538entries absolutely.
6539
6540\b Referring to a symbol name using \c{wrt ..sym} causes NASM to
6541write an ordinary relocation, but instead of making the relocation
6542relative to the start of the section and then adding on the offset
6543to the symbol, it will write a relocation record aimed directly at
6544the symbol in question. The distinction is a necessary one due to a
6545peculiarity of the dynamic linker.
6546
6547A fuller explanation of how to use these relocation types to write
6548shared libraries entirely in NASM is given in \k{picdll}.
6549
6550\S{elftls} \i{Thread Local Storage in ELF}\I{TLS}: \c{elf} Special
6551Symbols and \i\c{WRT}
6552
6553\b In ELF32 mode, referring to an external or global symbol using
6554\c{wrt ..tlsie} \I\c{..tlsie}
6555causes the linker to build an entry \e{in} the GOT containing the
6556offset of the symbol within the TLS block, so you can access the value
6557of the symbol with code such as:
6558
6559\c	  mov  eax,[tid wrt ..tlsie]
6560\c	  mov  [gs:eax],ebx
6561
6562
6563\b In ELF64 or ELFx32 mode, referring to an external or global symbol using
6564\c{wrt ..gottpoff} \I\c{..gottpoff}
6565causes the linker to build an entry \e{in} the GOT containing the
6566offset of the symbol within the TLS block, so you can access the value
6567of the symbol with code such as:
6568
6569\c	  mov	rax,[rel tid wrt ..gottpoff]
6570\c	  mov	rcx,[fs:rax]
6571
6572
6573\S{elfglob} \c{elf} Extensions to the \c{GLOBAL} Directive\I{GLOBAL,
6574elf extensions to}\I{GLOBAL, aoutb extensions to}
6575
6576\c{ELF} object files can contain more information about a global
6577symbol than just its address: they can contain the \I{symbols,
6578specifying sizes}\I{size, of symbols}size of the symbol and its
6579\I{symbols, specifying types}\I{type, of symbols}type as well. These
6580are not merely debugger conveniences, but are actually necessary when
6581the program being written is a \I{elf shared library}shared
6582library. NASM therefore supports some extensions to the \c{GLOBAL}
6583directive, allowing you to specify these features.
6584
6585You can specify whether a global variable is a function or a data
6586object by suffixing the name with a colon and the word
6587\i\c{function} or \i\c{data}. (\i\c{object} is a synonym for
6588\c{data}.) For example:
6589
6590\c global   hashlookup:function, hashtable:data
6591
6592exports the global symbol \c{hashlookup} as a function and
6593\c{hashtable} as a data object.
6594
6595Optionally, you can control the ELF visibility of the symbol.  Just
6596add one of the visibility keywords: \i\c{default}, \i\c{internal},
6597\i\c{hidden}, or \i\c{protected}.  The default is \i\c{default} of
6598course.  For example, to make \c{hashlookup} hidden:
6599
6600\c global   hashlookup:function hidden
6601
6602Since version 2.15, it is possible to specify symbols binding. The keywords
6603are: \i\c{weak} to generate weak symbol or \i\c{strong}. The default is \i\c{strong}.
6604
6605You can also specify the size of the data associated with the
6606symbol, as a numeric expression (which may involve labels, and even
6607forward references) after the type specifier. Like this:
6608
6609\c global  hashtable:data (hashtable.end - hashtable)
6610\c
6611\c hashtable:
6612\c         db this,that,theother  ; some data here
6613\c .end:
6614
6615This makes NASM automatically calculate the length of the table and
6616place that information into the \c{ELF} symbol table.
6617
6618Declaring the type and size of global symbols is necessary when
6619writing shared library code. For more information, see
6620\k{picglobal}.
6621
6622
6623\S{elfextrn} \c{elf} Extensions to the \c{EXTERN} Directive\I{EXTERN,
6624elf extensions to}\I{EXTERN, elf extensions to}
6625
6626Since version 2.15 it is possible to specify keyword \i\c{weak} to generate weak external
6627reference. Example:
6628
6629\c extern weak_ref:weak
6630
6631
6632\S{elfcomm} \c{elf} Extensions to the \c{COMMON} Directive
6633\I{COMMON, elf extensions to}
6634
6635\c{ELF} also allows you to specify alignment requirements \I{common
6636variables, alignment in elf}\I{alignment, of elf common variables}on
6637common variables. This is done by putting a number (which must be a
6638power of two) after the name and size of the common variable,
6639separated (as usual) by a colon. For example, an array of
6640doublewords would benefit from 4-byte alignment:
6641
6642\c common  dwordarray 128:4
6643
6644This declares the total size of the array to be 128 bytes, and
6645requires that it be aligned on a 4-byte boundary.
6646
6647
6648\S{elf16} 16-bit code and ELF
6649\I{ELF, 16-bit code}
6650
6651Older versions of the \c{ELF32} specification did not provide
6652relocations for 8- and 16-bit values. It is now part of the formal
6653specification, and any new enough linker should support them.
6654
6655ELF has currently no support for segmented programming.
6656
6657\S{elfdbg} Debug formats and ELF
6658\I{ELF, debug formats}
6659
6660ELF provides debug information in \c{STABS} and \c{DWARF} formats.
6661Line number information is generated for all executable sections, but please
6662note that only the ".text" section is executable by default.
6663
6664\H{aoutfmt} \i\c{aout}: Linux \I{a.out, Linux version}\I{linux, a.out}\c{a.out} Object Files
6665
6666The \c{aout} format generates \c{a.out} object files, in the form used
6667by early Linux systems (current Linux systems use ELF, see
6668\k{elffmt}.) These differ from other \c{a.out} object files in that
6669the magic number in the first four bytes of the file is
6670different; also, some implementations of \c{a.out}, for example
6671NetBSD's, support position-independent code, which Linux's
6672implementation does not.
6673
6674\c{a.out} provides a default output file-name extension of \c{.o}.
6675
6676\c{a.out} is a very simple object format. It supports no special
6677directives, no special symbols, no use of \c{SEG} or \c{WRT}, and no
6678extensions to any standard directives. It supports only the three
6679\i{standard section names} \i\c{.text}, \i\c{.data} and \i\c{.bss}.
6680
6681
6682\H{aoutfmt} \i\c{aoutb}: \i{NetBSD}/\i{FreeBSD}/\i{OpenBSD}
6683\I{a.out, BSD version}\c{a.out} Object Files
6684
6685The \c{aoutb} format generates \c{a.out} object files, in the form
6686used by the various free \c{BSD Unix} clones, \c{NetBSD}, \c{FreeBSD}
6687and \c{OpenBSD}. For simple object files, this object format is exactly
6688the same as \c{aout} except for the magic number in the first four bytes
6689of the file. However, the \c{aoutb} format supports
6690\I{PIC}\i{position-independent code} in the same way as the \c{elf}
6691format, so you can use it to write \c{BSD} \i{shared libraries}.
6692
6693\c{aoutb} provides a default output file-name extension of \c{.o}.
6694
6695\c{aoutb} supports no special directives, no special symbols, and
6696only the three \i{standard section names} \i\c{.text}, \i\c{.data}
6697and \i\c{.bss}. However, it also supports the same use of \i\c{WRT} as
6698\c{elf} does, to provide position-independent code relocation types.
6699See \k{elfwrt} for full documentation of this feature.
6700
6701\c{aoutb} also supports the same extensions to the \c{GLOBAL}
6702directive as \c{elf} does: see \k{elfglob} for documentation of
6703this.
6704
6705
6706\H{as86fmt} \c{as86}: \i{Minix}/Linux\I{linux, as86} \i\c{as86} Object Files
6707
6708The Minix/Linux 16-bit assembler \c{as86} has its own non-standard
6709object file format. Although its companion linker \i\c{ld86} produces
6710something close to ordinary \c{a.out} binaries as output, the object
6711file format used to communicate between \c{as86} and \c{ld86} is not
6712itself \c{a.out}.
6713
6714NASM supports this format, just in case it is useful, as \c{as86}.
6715\c{as86} provides a default output file-name extension of \c{.o}.
6716
6717\c{as86} is a very simple object format (from the NASM user's point
6718of view). It supports no special directives, no use of \c{SEG} or \c{WRT},
6719and no extensions to any standard directives. It supports only the three
6720\i{standard section names} \i\c{.text}, \i\c{.data} and \i\c{.bss}.  The
6721only special symbol supported is \c{..start}.
6722
6723
6724\H{rdffmt} \I{RDOFF}\i\c{rdf}: \i{Relocatable Dynamic Object File
6725Format} (deprecated)
6726
6727\e{The RDOFF format is strongly deprecated and has been disabled
6728starting in NASM 2.15.04. The RDOFF backend has been broken since at
6729least NASM 2.14. The RDOFF utilities are scheduled to be removed from
6730the NASM distribution in NASM 2.16.} If you have a strong use case for
6731the RDOFF format, file a bug report at
6732\W{https://bugs.nasm.us/}\c{https://bugs.nasm.us/} as soon as possible.
6733
6734The \c{rdf} output format produces \c{RDOFF} object files. \c{RDOFF}
6735(Relocatable Dynamic Object File Format) is a home-grown object-file
6736format, designed alongside NASM itself and reflecting in its file
6737format the internal structure of the assembler.
6738
6739\c{RDOFF} is not used by any well-known operating systems. Those
6740writing their own systems, however, may well wish to use \c{RDOFF}
6741as their object format, on the grounds that it is designed primarily
6742for simplicity and contains very little file-header bureaucracy.
6743
6744The Unix NASM archive, and the DOS archive which includes sources,
6745both contain an \I{rdoff subdirectory}\c{rdoff} subdirectory holding
6746a set of RDOFF utilities: an RDF linker, an \c{RDF} static-library
6747manager, an RDF file dump utility, and a program which will load and
6748execute an RDF executable under Linux.
6749
6750
6751\S{rdflib} Requiring a Library: The \i\c{LIBRARY} Directive
6752
6753\c{RDOFF} contains a mechanism for an object file to demand a given
6754library to be linked to the module, either at load time or run time.
6755This is done by the \c{LIBRARY} directive, which takes one argument
6756which is the name of the module:
6757
6758\c     library  mylib.rdl
6759
6760
6761\S{rdfmod} Specifying a Module Name: The \i\c{MODULE} Directive
6762
6763Special \c{RDOFF} header record is used to store the name of the module.
6764It can be used, for example, by run-time loader to perform dynamic
6765linking. \c{MODULE} directive takes one argument which is the name
6766of current module:
6767
6768\c     module  mymodname
6769
6770Note that when you statically link modules and tell linker to strip
6771the symbols from output file, all module names will be stripped too.
6772To avoid it, you should start module names with \I{$, prefix}\c{$}, like:
6773
6774\c     module  $kernel.core
6775
6776
6777\S{rdfglob} \c{rdf} Extensions to the \c{GLOBAL} Directive\I{GLOBAL,
6778rdf extensions to}
6779
6780\c{RDOFF} global symbols can contain additional information needed by
6781the static linker. You can mark a global symbol as exported, thus
6782telling the linker do not strip it from target executable or library
6783file. Like in \c{ELF}, you can also specify whether an exported symbol
6784is a procedure (function) or data object.
6785
6786Suffixing the name with a colon and the word \i\c{export} you make the
6787symbol exported:
6788
6789\c     global  sys_open:export
6790
6791To specify that exported symbol is a procedure (function), you add the
6792word \i\c{proc} or \i\c{function} after declaration:
6793
6794\c     global  sys_open:export proc
6795
6796Similarly, to specify exported data object, add the word \i\c{data}
6797or \i\c{object} to the directive:
6798
6799\c     global  kernel_ticks:export data
6800
6801
6802\S{rdfimpt} \c{rdf} Extensions to the \c{EXTERN} Directive\I{EXTERN,
6803rdf extensions to}
6804
6805By default the \c{EXTERN} directive in \c{RDOFF} declares a "pure external"
6806symbol (i.e. the static linker will complain if such a symbol is not resolved).
6807To declare an "imported" symbol, which must be resolved later during a dynamic
6808linking phase, \c{RDOFF} offers an additional \c{import} modifier. As in
6809\c{GLOBAL}, you can also specify whether an imported symbol is a procedure
6810(function) or data object. For example:
6811
6812\c     library $libc
6813\c     extern  _open:import
6814\c     extern  _printf:import proc
6815\c     extern  _errno:import data
6816
6817Here the directive \c{LIBRARY} is also included, which gives the dynamic linker
6818a hint as to where to find requested symbols.
6819
6820
6821\H{dbgfmt} \i\c{dbg}: Debugging Format
6822
6823The \c{dbg} format does not output an object file as such; instead,
6824it outputs a text file which contains a complete list of all the
6825transactions between the main body of NASM and the output-format
6826back end module. It is primarily intended to aid people who want to
6827write their own output drivers, so that they can get a clearer idea
6828of the various requests the main program makes of the output driver,
6829and in what order they happen.
6830
6831For simple files, one can easily use the \c{dbg} format like this:
6832
6833\c nasm -f dbg filename.asm
6834
6835which will generate a diagnostic file called \c{filename.dbg}.
6836However, this will not work well on files which were designed for a
6837different object format, because each object format defines its own
6838macros (usually user-level forms of directives), and those macros
6839will not be defined in the \c{dbg} format. Therefore it can be
6840useful to run NASM twice, in order to do the preprocessing with the
6841native object format selected:
6842
6843\c nasm -e -f rdf -o rdfprog.i rdfprog.asm
6844\c nasm -a -f dbg rdfprog.i
6845
6846This preprocesses \c{rdfprog.asm} into \c{rdfprog.i}, keeping the
6847\c{rdf} object format selected in order to make sure RDF special
6848directives are converted into primitive form correctly. Then the
6849preprocessed source is fed through the \c{dbg} format to generate
6850the final diagnostic output.
6851
6852This workaround will still typically not work for programs intended
6853for \c{obj} format, because the \c{obj} \c{SEGMENT} and \c{GROUP}
6854directives have side effects of defining the segment and group names
6855as symbols; \c{dbg} will not do this, so the program will not
6856assemble. You will have to work around that by defining the symbols
6857yourself (using \c{EXTERN}, for example) if you really need to get a
6858\c{dbg} trace of an \c{obj}-specific source file.
6859
6860\c{dbg} accepts any section name and any directives at all, and logs
6861them all to its output file.
6862
6863\c{dbg} accepts and logs any \c{%pragma}, but the specific
6864\c{%pragma}:
6865
6866\c      %pragma dbg maxdump <size>
6867
6868where \c{<size>} is either a number or \c{unlimited}, can be used to
6869control the maximum size for dumping the full contents of a
6870\c{rawdata} output object.
6871
6872
6873\C{16bit} Writing 16-bit Code (DOS, Windows 3/3.1)
6874
6875This chapter attempts to cover some of the common issues encountered
6876when writing 16-bit code to run under \c{MS-DOS} or \c{Windows 3.x}. It
6877covers how to link programs to produce \c{.EXE} or \c{.COM} files,
6878how to write \c{.SYS} device drivers, and how to interface assembly
6879language code with 16-bit C compilers and with Borland Pascal.
6880
6881
6882\H{exefiles} Producing \i\c{.EXE} Files
6883
6884Any large program written under DOS needs to be built as a \c{.EXE}
6885file: only \c{.EXE} files have the necessary internal structure
6886required to span more than one 64K segment. \i{Windows} programs,
6887also, have to be built as \c{.EXE} files, since Windows does not
6888support the \c{.COM} format.
6889
6890In general, you generate \c{.EXE} files by using the \c{obj} output
6891format to produce one or more \i\c{.obj} files, and then linking
6892them together using a linker. However, NASM also supports the direct
6893generation of simple DOS \c{.EXE} files using the \c{bin} output
6894format (by using \c{DB} and \c{DW} to construct the \c{.EXE} file
6895header), and a macro package is supplied to do this. Thanks to
6896Yann Guidon for contributing the code for this.
6897
6898NASM may also support \c{.EXE} natively as another output format in
6899future releases.
6900
6901
6902\S{objexe} Using the \c{obj} Format To Generate \c{.EXE} Files
6903
6904This section describes the usual method of generating \c{.EXE} files
6905by linking \c{.OBJ} files together.
6906
6907Most 16-bit programming language packages come with a suitable
6908linker; if you have none of these, there is a free linker called
6909\i{VAL}\I{linker, free}, available in \c{LZH} archive format from
6910\W{ftp://x2ftp.oulu.fi/pub/msdos/programming/lang/}\i\c{x2ftp.oulu.fi}.
6911An LZH archiver can be found at
6912\W{ftp://ftp.simtel.net/pub/simtelnet/msdos/arcers}\i\c{ftp.simtel.net}.
6913There is another `free' linker (though this one doesn't come with
6914sources) called \i{FREELINK}, available from
6915\W{http://www.pcorner.com/tpc/old/3-101.html}\i\c{www.pcorner.com}.
6916A third, \i\c{djlink}, written by DJ Delorie, is available at
6917\W{http://www.delorie.com/djgpp/16bit/djlink/}\i\c{www.delorie.com}.
6918A fourth linker, \i\c{ALINK}, written by Anthony A.J. Williams, is
6919available at \W{http://alink.sourceforge.net}\i\c{alink.sourceforge.net}.
6920
6921When linking several \c{.OBJ} files into a \c{.EXE} file, you should
6922ensure that exactly one of them has a start point defined (using the
6923\I{program entry point}\i\c{..start} special symbol defined by the
6924\c{obj} format: see \k{dotdotstart}). If no module defines a start
6925point, the linker will not know what value to give the entry-point
6926field in the output file header; if more than one defines a start
6927point, the linker will not know \e{which} value to use.
6928
6929An example of a NASM source file which can be assembled to a
6930\c{.OBJ} file and linked on its own to a \c{.EXE} is given here. It
6931demonstrates the basic principles of defining a stack, initialising
6932the segment registers, and declaring a start point. This file is
6933also provided in the \I{test subdirectory}\c{test} subdirectory of
6934the NASM archives, under the name \c{objexe.asm}.
6935
6936\c segment code
6937\c
6938\c ..start:
6939\c         mov     ax,data
6940\c         mov     ds,ax
6941\c         mov     ax,stack
6942\c         mov     ss,ax
6943\c         mov     sp,stacktop
6944
6945This initial piece of code sets up \c{DS} to point to the data
6946segment, and initializes \c{SS} and \c{SP} to point to the top of
6947the provided stack. Notice that interrupts are implicitly disabled
6948for one instruction after a move into \c{SS}, precisely for this
6949situation, so that there's no chance of an interrupt occurring
6950between the loads of \c{SS} and \c{SP} and not having a stack to
6951execute on.
6952
6953Note also that the special symbol \c{..start} is defined at the
6954beginning of this code, which means that will be the entry point
6955into the resulting executable file.
6956
6957\c         mov     dx,hello
6958\c         mov     ah,9
6959\c         int     0x21
6960
6961The above is the main program: load \c{DS:DX} with a pointer to the
6962greeting message (\c{hello} is implicitly relative to the segment
6963\c{data}, which was loaded into \c{DS} in the setup code, so the
6964full pointer is valid), and call the DOS print-string function.
6965
6966\c         mov     ax,0x4c00
6967\c         int     0x21
6968
6969This terminates the program using another DOS system call.
6970
6971\c segment data
6972\c
6973\c hello:  db      'hello, world', 13, 10, '$'
6974
6975The data segment contains the string we want to display.
6976
6977\c segment stack stack
6978\c         resb 64
6979\c stacktop:
6980
6981The above code declares a stack segment containing 64 bytes of
6982uninitialized stack space, and points \c{stacktop} at the top of it.
6983The directive \c{segment stack stack} defines a segment \e{called}
6984\c{stack}, and also of \e{type} \c{STACK}. The latter is not
6985necessary to the correct running of the program, but linkers are
6986likely to issue warnings or errors if your program has no segment of
6987type \c{STACK}.
6988
6989The above file, when assembled into a \c{.OBJ} file, will link on
6990its own to a valid \c{.EXE} file, which when run will print `hello,
6991world' and then exit.
6992
6993
6994\S{binexe} Using the \c{bin} Format To Generate \c{.EXE} Files
6995
6996The \c{.EXE} file format is simple enough that it's possible to
6997build a \c{.EXE} file by writing a pure-binary program and sticking
6998a 32-byte header on the front. This header is simple enough that it
6999can be generated using \c{DB} and \c{DW} commands by NASM itself, so
7000that you can use the \c{bin} output format to directly generate
7001\c{.EXE} files.
7002
7003Included in the NASM archives, in the \I{misc subdirectory}\c{misc}
7004subdirectory, is a file \i\c{exebin.mac} of macros. It defines three
7005macros: \i\c{EXE_begin}, \i\c{EXE_stack} and \i\c{EXE_end}.
7006
7007To produce a \c{.EXE} file using this method, you should start by
7008using \c{%include} to load the \c{exebin.mac} macro package into
7009your source file. You should then issue the \c{EXE_begin} macro call
7010(which takes no arguments) to generate the file header data. Then
7011write code as normal for the \c{bin} format - you can use all three
7012standard sections \c{.text}, \c{.data} and \c{.bss}. At the end of
7013the file you should call the \c{EXE_end} macro (again, no arguments),
7014which defines some symbols to mark section sizes, and these symbols
7015are referred to in the header code generated by \c{EXE_begin}.
7016
7017In this model, the code you end up writing starts at \c{0x100}, just
7018like a \c{.COM} file - in fact, if you strip off the 32-byte header
7019from the resulting \c{.EXE} file, you will have a valid \c{.COM}
7020program. All the segment bases are the same, so you are limited to a
702164K program, again just like a \c{.COM} file. Note that an \c{ORG}
7022directive is issued by the \c{EXE_begin} macro, so you should not
7023explicitly issue one of your own.
7024
7025You can't directly refer to your segment base value, unfortunately,
7026since this would require a relocation in the header, and things
7027would get a lot more complicated. So you should get your segment
7028base by copying it out of \c{CS} instead.
7029
7030On entry to your \c{.EXE} file, \c{SS:SP} are already set up to
7031point to the top of a 2Kb stack. You can adjust the default stack
7032size of 2Kb by calling the \c{EXE_stack} macro. For example, to
7033change the stack size of your program to 64 bytes, you would call
7034\c{EXE_stack 64}.
7035
7036A sample program which generates a \c{.EXE} file in this way is
7037given in the \c{test} subdirectory of the NASM archive, as
7038\c{binexe.asm}.
7039
7040
7041\H{comfiles} Producing \i\c{.COM} Files
7042
7043While large DOS programs must be written as \c{.EXE} files, small
7044ones are often better written as \c{.COM} files. \c{.COM} files are
7045pure binary, and therefore most easily produced using the \c{bin}
7046output format.
7047
7048
7049\S{combinfmt} Using the \c{bin} Format To Generate \c{.COM} Files
7050
7051\c{.COM} files expect to be loaded at offset \c{100h} into their
7052segment (though the segment may change). Execution then begins at
7053\I\c{ORG}\c{100h}, i.e. right at the start of the program. So to
7054write a \c{.COM} program, you would create a source file looking
7055like
7056
7057\c         org 100h
7058\c
7059\c section .text
7060\c
7061\c start:
7062\c         ; put your code here
7063\c
7064\c section .data
7065\c
7066\c         ; put data items here
7067\c
7068\c section .bss
7069\c
7070\c         ; put uninitialized data here
7071
7072The \c{bin} format puts the \c{.text} section first in the file, so
7073you can declare data or BSS items before beginning to write code if
7074you want to and the code will still end up at the front of the file
7075where it belongs.
7076
7077The BSS (uninitialized data) section does not take up space in the
7078\c{.COM} file itself: instead, addresses of BSS items are resolved
7079to point at space beyond the end of the file, on the grounds that
7080this will be free memory when the program is run. Therefore you
7081should not rely on your BSS being initialized to all zeros when you
7082run.
7083
7084To assemble the above program, you should use a command line like
7085
7086\c nasm myprog.asm -fbin -o myprog.com
7087
7088The \c{bin} format would produce a file called \c{myprog} if no
7089explicit output file name were specified, so you have to override it
7090and give the desired file name.
7091
7092
7093\S{comobjfmt} Using the \c{obj} Format To Generate \c{.COM} Files
7094
7095If you are writing a \c{.COM} program as more than one module, you
7096may wish to assemble several \c{.OBJ} files and link them together
7097into a \c{.COM} program. You can do this, provided you have a linker
7098capable of outputting \c{.COM} files directly (\i{TLINK} does this),
7099or alternatively a converter program such as \i\c{EXE2BIN} to
7100transform the \c{.EXE} file output from the linker into a \c{.COM}
7101file.
7102
7103If you do this, you need to take care of several things:
7104
7105\b The first object file containing code should start its code
7106segment with a line like \c{RESB 100h}. This is to ensure that the
7107code begins at offset \c{100h} relative to the beginning of the code
7108segment, so that the linker or converter program does not have to
7109adjust address references within the file when generating the
7110\c{.COM} file. Other assemblers use an \i\c{ORG} directive for this
7111purpose, but \c{ORG} in NASM is a format-specific directive to the
7112\c{bin} output format, and does not mean the same thing as it does
7113in MASM-compatible assemblers.
7114
7115\b You don't need to define a stack segment.
7116
7117\b All your segments should be in the same group, so that every time
7118your code or data references a symbol offset, all offsets are
7119relative to the same segment base. This is because, when a \c{.COM}
7120file is loaded, all the segment registers contain the same value.
7121
7122
7123\H{sysfiles} Producing \i\c{.SYS} Files
7124
7125\i{MS-DOS device drivers} - \c{.SYS} files - are pure binary files,
7126similar to \c{.COM} files, except that they start at origin zero
7127rather than \c{100h}. Therefore, if you are writing a device driver
7128using the \c{bin} format, you do not need the \c{ORG} directive,
7129since the default origin for \c{bin} is zero. Similarly, if you are
7130using \c{obj}, you do not need the \c{RESB 100h} at the start of
7131your code segment.
7132
7133\c{.SYS} files start with a header structure, containing pointers to
7134the various routines inside the driver which do the work. This
7135structure should be defined at the start of the code segment, even
7136though it is not actually code.
7137
7138For more information on the format of \c{.SYS} files, and the data
7139which has to go in the header structure, a list of books is given in
7140the Frequently Asked Questions list for the newsgroup
7141\W{news:comp.os.msdos.programmer}\i\c{comp.os.msdos.programmer}.
7142
7143
7144\H{16c} Interfacing to 16-bit C Programs
7145
7146This section covers the basics of writing assembly routines that
7147call, or are called from, C programs. To do this, you would
7148typically write an assembly module as a \c{.OBJ} file, and link it
7149with your C modules to produce a \i{mixed-language program}.
7150
7151
7152\S{16cunder} External Symbol Names
7153
7154\I{C symbol names}\I{underscore, in C symbols}C compilers have the
7155convention that the names of all global symbols (functions or data)
7156they define are formed by prefixing an underscore to the name as it
7157appears in the C program. So, for example, the function a C
7158programmer thinks of as \c{printf} appears to an assembly language
7159programmer as \c{_printf}. This means that in your assembly
7160programs, you can define symbols without a leading underscore, and
7161not have to worry about name clashes with C symbols.
7162
7163If you find the underscores inconvenient, you can define macros to
7164replace the \c{GLOBAL} and \c{EXTERN} directives as follows:
7165
7166\c %macro  cglobal 1
7167\c
7168\c   global  _%1
7169\c   %define %1 _%1
7170\c
7171\c %endmacro
7172\c
7173\c %macro  cextern 1
7174\c
7175\c   extern  _%1
7176\c   %define %1 _%1
7177\c
7178\c %endmacro
7179
7180(These forms of the macros only take one argument at a time; a
7181\c{%rep} construct could solve this.)
7182
7183If you then declare an external like this:
7184
7185\c cextern printf
7186
7187then the macro will expand it as
7188
7189\c extern  _printf
7190\c %define printf _printf
7191
7192Thereafter, you can reference \c{printf} as if it was a symbol, and
7193the preprocessor will put the leading underscore on where necessary.
7194
7195The \c{cglobal} macro works similarly. You must use \c{cglobal}
7196before defining the symbol in question, but you would have had to do
7197that anyway if you used \c{GLOBAL}.
7198
7199Also see \k{opt-pfix}.
7200
7201\S{16cmodels} \i{Memory Models}
7202
7203NASM contains no mechanism to support the various C memory models
7204directly; you have to keep track yourself of which one you are
7205writing for. This means you have to keep track of the following
7206things:
7207
7208\b In models using a single code segment (tiny, small and compact),
7209functions are near. This means that function pointers, when stored
7210in data segments or pushed on the stack as function arguments, are
721116 bits long and contain only an offset field (the \c{CS} register
7212never changes its value, and always gives the segment part of the
7213full function address), and that functions are called using ordinary
7214near \c{CALL} instructions and return using \c{RETN} (which, in
7215NASM, is synonymous with \c{RET} anyway). This means both that you
7216should write your own routines to return with \c{RETN}, and that you
7217should call external C routines with near \c{CALL} instructions.
7218
7219\b In models using more than one code segment (medium, large and
7220huge), functions are far. This means that function pointers are 32
7221bits long (consisting of a 16-bit offset followed by a 16-bit
7222segment), and that functions are called using \c{CALL FAR} (or
7223\c{CALL seg:offset}) and return using \c{RETF}. Again, you should
7224therefore write your own routines to return with \c{RETF} and use
7225\c{CALL FAR} to call external routines.
7226
7227\b In models using a single data segment (tiny, small and medium),
7228data pointers are 16 bits long, containing only an offset field (the
7229\c{DS} register doesn't change its value, and always gives the
7230segment part of the full data item address).
7231
7232\b In models using more than one data segment (compact, large and
7233huge), data pointers are 32 bits long, consisting of a 16-bit offset
7234followed by a 16-bit segment. You should still be careful not to
7235modify \c{DS} in your routines without restoring it afterwards, but
7236\c{ES} is free for you to use to access the contents of 32-bit data
7237pointers you are passed.
7238
7239\b The huge memory model allows single data items to exceed 64K in
7240size. In all other memory models, you can access the whole of a data
7241item just by doing arithmetic on the offset field of the pointer you
7242are given, whether a segment field is present or not; in huge model,
7243you have to be more careful of your pointer arithmetic.
7244
7245\b In most memory models, there is a \e{default} data segment, whose
7246segment address is kept in \c{DS} throughout the program. This data
7247segment is typically the same segment as the stack, kept in \c{SS},
7248so that functions' local variables (which are stored on the stack)
7249and global data items can both be accessed easily without changing
7250\c{DS}. Particularly large data items are typically stored in other
7251segments. However, some memory models (though not the standard
7252ones, usually) allow the assumption that \c{SS} and \c{DS} hold the
7253same value to be removed. Be careful about functions' local
7254variables in this latter case.
7255
7256In models with a single code segment, the segment is called
7257\i\c{_TEXT}, so your code segment must also go by this name in order
7258to be linked into the same place as the main code segment. In models
7259with a single data segment, or with a default data segment, it is
7260called \i\c{_DATA}.
7261
7262
7263\S{16cfunc} Function Definitions and Function Calls
7264
7265\I{functions, C calling convention}The \i{C calling convention} in
726616-bit programs is as follows. In the following description, the
7267words \e{caller} and \e{callee} are used to denote the function
7268doing the calling and the function which gets called.
7269
7270\b The caller pushes the function's parameters on the stack, one
7271after another, in reverse order (right to left, so that the first
7272argument specified to the function is pushed last).
7273
7274\b The caller then executes a \c{CALL} instruction to pass control
7275to the callee. This \c{CALL} is either near or far depending on the
7276memory model.
7277
7278\b The callee receives control, and typically (although this is not
7279actually necessary, in functions which do not need to access their
7280parameters) starts by saving the value of \c{SP} in \c{BP} so as to
7281be able to use \c{BP} as a base pointer to find its parameters on
7282the stack. However, the caller was probably doing this too, so part
7283of the calling convention states that \c{BP} must be preserved by
7284any C function. Hence the callee, if it is going to set up \c{BP} as
7285a \i\e{frame pointer}, must push the previous value first.
7286
7287\b The callee may then access its parameters relative to \c{BP}.
7288The word at \c{[BP]} holds the previous value of \c{BP} as it was
7289pushed; the next word, at \c{[BP+2]}, holds the offset part of the
7290return address, pushed implicitly by \c{CALL}. In a small-model
7291(near) function, the parameters start after that, at \c{[BP+4]}; in
7292a large-model (far) function, the segment part of the return address
7293lives at \c{[BP+4]}, and the parameters begin at \c{[BP+6]}. The
7294leftmost parameter of the function, since it was pushed last, is
7295accessible at this offset from \c{BP}; the others follow, at
7296successively greater offsets. Thus, in a function such as \c{printf}
7297which takes a variable number of parameters, the pushing of the
7298parameters in reverse order means that the function knows where to
7299find its first parameter, which tells it the number and type of the
7300remaining ones.
7301
7302\b The callee may also wish to decrease \c{SP} further, so as to
7303allocate space on the stack for local variables, which will then be
7304accessible at negative offsets from \c{BP}.
7305
7306\b The callee, if it wishes to return a value to the caller, should
7307leave the value in \c{AL}, \c{AX} or \c{DX:AX} depending on the size
7308of the value. Floating-point results are sometimes (depending on the
7309compiler) returned in \c{ST0}.
7310
7311\b Once the callee has finished processing, it restores \c{SP} from
7312\c{BP} if it had allocated local stack space, then pops the previous
7313value of \c{BP}, and returns via \c{RETN} or \c{RETF} depending on
7314memory model.
7315
7316\b When the caller regains control from the callee, the function
7317parameters are still on the stack, so it typically adds an immediate
7318constant to \c{SP} to remove them (instead of executing a number of
7319slow \c{POP} instructions). Thus, if a function is accidentally
7320called with the wrong number of parameters due to a prototype
7321mismatch, the stack will still be returned to a sensible state since
7322the caller, which \e{knows} how many parameters it pushed, does the
7323removing.
7324
7325It is instructive to compare this calling convention with that for
7326Pascal programs (described in \k{16bpfunc}). Pascal has a simpler
7327convention, since no functions have variable numbers of parameters.
7328Therefore the callee knows how many parameters it should have been
7329passed, and is able to deallocate them from the stack itself by
7330passing an immediate argument to the \c{RET} or \c{RETF}
7331instruction, so the caller does not have to do it. Also, the
7332parameters are pushed in left-to-right order, not right-to-left,
7333which means that a compiler can give better guarantees about
7334sequence points without performance suffering.
7335
7336Thus, you would define a function in C style in the following way.
7337The following example is for small model:
7338
7339\c global  _myfunc
7340\c
7341\c _myfunc:
7342\c         push    bp
7343\c         mov     bp,sp
7344\c         sub     sp,0x40         ; 64 bytes of local stack space
7345\c         mov     bx,[bp+4]       ; first parameter to function
7346\c
7347\c         ; some more code
7348\c
7349\c         mov     sp,bp           ; undo "sub sp,0x40" above
7350\c         pop     bp
7351\c         ret
7352
7353For a large-model function, you would replace \c{RET} by \c{RETF},
7354and look for the first parameter at \c{[BP+6]} instead of
7355\c{[BP+4]}. Of course, if one of the parameters is a pointer, then
7356the offsets of \e{subsequent} parameters will change depending on
7357the memory model as well: far pointers take up four bytes on the
7358stack when passed as a parameter, whereas near pointers take up two.
7359
7360At the other end of the process, to call a C function from your
7361assembly code, you would do something like this:
7362
7363\c extern  _printf
7364\c
7365\c       ; and then, further down...
7366\c
7367\c       push    word [myint]        ; one of my integer variables
7368\c       push    word mystring       ; pointer into my data segment
7369\c       call    _printf
7370\c       add     sp,byte 4           ; `byte' saves space
7371\c
7372\c       ; then those data items...
7373\c
7374\c segment _DATA
7375\c
7376\c myint         dw    1234
7377\c mystring      db    'This number -> %d <- should be 1234',10,0
7378
7379This piece of code is the small-model assembly equivalent of the C
7380code
7381
7382\c     int myint = 1234;
7383\c     printf("This number -> %d <- should be 1234\n", myint);
7384
7385In large model, the function-call code might look more like this. In
7386this example, it is assumed that \c{DS} already holds the segment
7387base of the segment \c{_DATA}. If not, you would have to initialize
7388it first.
7389
7390\c       push    word [myint]
7391\c       push    word seg mystring   ; Now push the segment, and...
7392\c       push    word mystring       ; ... offset of "mystring"
7393\c       call    far _printf
7394\c       add    sp,byte 6
7395
7396The integer value still takes up one word on the stack, since large
7397model does not affect the size of the \c{int} data type. The first
7398argument (pushed last) to \c{printf}, however, is a data pointer,
7399and therefore has to contain a segment and offset part. The segment
7400should be stored second in memory, and therefore must be pushed
7401first. (Of course, \c{PUSH DS} would have been a shorter instruction
7402than \c{PUSH WORD SEG mystring}, if \c{DS} was set up as the above
7403example assumed.) Then the actual call becomes a far call, since
7404functions expect far calls in large model; and \c{SP} has to be
7405increased by 6 rather than 4 afterwards to make up for the extra
7406word of parameters.
7407
7408
7409\S{16cdata} Accessing Data Items
7410
7411To get at the contents of C variables, or to declare variables which
7412C can access, you need only declare the names as \c{GLOBAL} or
7413\c{EXTERN}. (Again, the names require leading underscores, as stated
7414in \k{16cunder}.) Thus, a C variable declared as \c{int i} can be
7415accessed from assembler as
7416
7417\c extern _i
7418\c
7419\c         mov ax,[_i]
7420
7421And to declare your own integer variable which C programs can access
7422as \c{extern int j}, you do this (making sure you are assembling in
7423the \c{_DATA} segment, if necessary):
7424
7425\c global  _j
7426\c
7427\c _j      dw      0
7428
7429To access a C array, you need to know the size of the components of
7430the array. For example, \c{int} variables are two bytes long, so if
7431a C program declares an array as \c{int a[10]}, you can access
7432\c{a[3]} by coding \c{mov ax,[_a+6]}. (The byte offset 6 is obtained
7433by multiplying the desired array index, 3, by the size of the array
7434element, 2.) The sizes of the C base types in 16-bit compilers are:
74351 for \c{char}, 2 for \c{short} and \c{int}, 4 for \c{long} and
7436\c{float}, and 8 for \c{double}.
7437
7438To access a C \i{data structure}, you need to know the offset from
7439the base of the structure to the field you are interested in. You
7440can either do this by converting the C structure definition into a
7441NASM structure definition (using \i\c{STRUC}), or by calculating the
7442one offset and using just that.
7443
7444To do either of these, you should read your C compiler's manual to
7445find out how it organizes data structures. NASM gives no special
7446alignment to structure members in its own \c{STRUC} macro, so you
7447have to specify alignment yourself if the C compiler generates it.
7448Typically, you might find that a structure like
7449
7450\c struct {
7451\c     char c;
7452\c     int i;
7453\c } foo;
7454
7455might be four bytes long rather than three, since the \c{int} field
7456would be aligned to a two-byte boundary. However, this sort of
7457feature tends to be a configurable option in the C compiler, either
7458using command-line options or \c{#pragma} lines, so you have to find
7459out how your own compiler does it.
7460
7461
7462\S{16cmacro} \i\c{c16.mac}: Helper Macros for the 16-bit C Interface
7463
7464Included in the NASM archives, in the \I{misc subdirectory}\c{misc}
7465directory, is a file \c{c16.mac} of macros. It defines three macros:
7466\i\c{proc}, \i\c{arg} and \i\c{endproc}. These are intended to be
7467used for C-style procedure definitions, and they automate a lot of
7468the work involved in keeping track of the calling convention.
7469
7470(An alternative, TASM compatible form of \c{arg} is also now built
7471into NASM's preprocessor. See \k{stackrel} for details.)
7472
7473An example of an assembly function using the macro set is given
7474here:
7475
7476\c proc    _nearproc
7477\c
7478\c %$i     arg
7479\c %$j     arg
7480\c         mov     ax,[bp + %$i]
7481\c         mov     bx,[bp + %$j]
7482\c         add     ax,[bx]
7483\c
7484\c endproc
7485
7486This defines \c{_nearproc} to be a procedure taking two arguments,
7487the first (\c{i}) an integer and the second (\c{j}) a pointer to an
7488integer. It returns \c{i + *j}.
7489
7490Note that the \c{arg} macro has an \c{EQU} as the first line of its
7491expansion, and since the label before the macro call gets prepended
7492to the first line of the expanded macro, the \c{EQU} works, defining
7493\c{%$i} to be an offset from \c{BP}. A context-local variable is
7494used, local to the context pushed by the \c{proc} macro and popped
7495by the \c{endproc} macro, so that the same argument name can be used
7496in later procedures. Of course, you don't \e{have} to do that.
7497
7498The macro set produces code for near functions (tiny, small and
7499compact-model code) by default. You can have it generate far
7500functions (medium, large and huge-model code) by means of coding
7501\I\c{FARCODE}\c{%define FARCODE}. This changes the kind of return
7502instruction generated by \c{endproc}, and also changes the starting
7503point for the argument offsets. The macro set contains no intrinsic
7504dependency on whether data pointers are far or not.
7505
7506\c{arg} can take an optional parameter, giving the size of the
7507argument. If no size is given, 2 is assumed, since it is likely that
7508many function parameters will be of type \c{int}.
7509
7510The large-model equivalent of the above function would look like this:
7511
7512\c %define FARCODE
7513\c
7514\c proc    _farproc
7515\c
7516\c %$i     arg
7517\c %$j     arg     4
7518\c         mov     ax,[bp + %$i]
7519\c         mov     bx,[bp + %$j]
7520\c         mov     es,[bp + %$j + 2]
7521\c         add     ax,[bx]
7522\c
7523\c endproc
7524
7525This makes use of the argument to the \c{arg} macro to define a
7526parameter of size 4, because \c{j} is now a far pointer. When we
7527load from \c{j}, we must load a segment and an offset.
7528
7529
7530\H{16bp} Interfacing to \i{Borland Pascal} Programs
7531
7532Interfacing to Borland Pascal programs is similar in concept to
7533interfacing to 16-bit C programs. The differences are:
7534
7535\b The leading underscore required for interfacing to C programs is
7536not required for Pascal.
7537
7538\b The memory model is always large: functions are far, data
7539pointers are far, and no data item can be more than 64K long.
7540(Actually, some functions are near, but only those functions that
7541are local to a Pascal unit and never called from outside it. All
7542assembly functions that Pascal calls, and all Pascal functions that
7543assembly routines are able to call, are far.) However, all static
7544data declared in a Pascal program goes into the default data
7545segment, which is the one whose segment address will be in \c{DS}
7546when control is passed to your assembly code. The only things that
7547do not live in the default data segment are local variables (they
7548live in the stack segment) and dynamically allocated variables. All
7549data \e{pointers}, however, are far.
7550
7551\b The function calling convention is different - described below.
7552
7553\b Some data types, such as strings, are stored differently.
7554
7555\b There are restrictions on the segment names you are allowed to
7556use - Borland Pascal will ignore code or data declared in a segment
7557it doesn't like the name of. The restrictions are described below.
7558
7559
7560\S{16bpfunc} The Pascal Calling Convention
7561
7562\I{functions, Pascal calling convention}\I{Pascal calling
7563convention}The 16-bit Pascal calling convention is as follows. In
7564the following description, the words \e{caller} and \e{callee} are
7565used to denote the function doing the calling and the function which
7566gets called.
7567
7568\b The caller pushes the function's parameters on the stack, one
7569after another, in normal order (left to right, so that the first
7570argument specified to the function is pushed first).
7571
7572\b The caller then executes a far \c{CALL} instruction to pass
7573control to the callee.
7574
7575\b The callee receives control, and typically (although this is not
7576actually necessary, in functions which do not need to access their
7577parameters) starts by saving the value of \c{SP} in \c{BP} so as to
7578be able to use \c{BP} as a base pointer to find its parameters on
7579the stack. However, the caller was probably doing this too, so part
7580of the calling convention states that \c{BP} must be preserved by
7581any function. Hence the callee, if it is going to set up \c{BP} as a
7582\i{frame pointer}, must push the previous value first.
7583
7584\b The callee may then access its parameters relative to \c{BP}.
7585The word at \c{[BP]} holds the previous value of \c{BP} as it was
7586pushed. The next word, at \c{[BP+2]}, holds the offset part of the
7587return address, and the next one at \c{[BP+4]} the segment part. The
7588parameters begin at \c{[BP+6]}. The rightmost parameter of the
7589function, since it was pushed last, is accessible at this offset
7590from \c{BP}; the others follow, at successively greater offsets.
7591
7592\b The callee may also wish to decrease \c{SP} further, so as to
7593allocate space on the stack for local variables, which will then be
7594accessible at negative offsets from \c{BP}.
7595
7596\b The callee, if it wishes to return a value to the caller, should
7597leave the value in \c{AL}, \c{AX} or \c{DX:AX} depending on the size
7598of the value. Floating-point results are returned in \c{ST0}.
7599Results of type \c{Real} (Borland's own custom floating-point data
7600type, not handled directly by the FPU) are returned in \c{DX:BX:AX}.
7601To return a result of type \c{String}, the caller pushes a pointer
7602to a temporary string before pushing the parameters, and the callee
7603places the returned string value at that location. The pointer is
7604not a parameter, and should not be removed from the stack by the
7605\c{RETF} instruction.
7606
7607\b Once the callee has finished processing, it restores \c{SP} from
7608\c{BP} if it had allocated local stack space, then pops the previous
7609value of \c{BP}, and returns via \c{RETF}. It uses the form of
7610\c{RETF} with an immediate parameter, giving the number of bytes
7611taken up by the parameters on the stack. This causes the parameters
7612to be removed from the stack as a side effect of the return
7613instruction.
7614
7615\b When the caller regains control from the callee, the function
7616parameters have already been removed from the stack, so it needs to
7617do nothing further.
7618
7619Thus, you would define a function in Pascal style, taking two
7620\c{Integer}-type parameters, in the following way:
7621
7622\c global  myfunc
7623\c
7624\c myfunc: push    bp
7625\c         mov     bp,sp
7626\c         sub     sp,0x40         ; 64 bytes of local stack space
7627\c         mov     bx,[bp+8]       ; first parameter to function
7628\c         mov     bx,[bp+6]       ; second parameter to function
7629\c
7630\c         ; some more code
7631\c
7632\c         mov     sp,bp           ; undo "sub sp,0x40" above
7633\c         pop     bp
7634\c         retf    4               ; total size of params is 4
7635
7636At the other end of the process, to call a Pascal function from your
7637assembly code, you would do something like this:
7638
7639\c extern  SomeFunc
7640\c
7641\c        ; and then, further down...
7642\c
7643\c        push   word seg mystring   ; Now push the segment, and...
7644\c        push   word mystring       ; ... offset of "mystring"
7645\c        push   word [myint]        ; one of my variables
7646\c        call   far SomeFunc
7647
7648This is equivalent to the Pascal code
7649
7650\c procedure SomeFunc(String: PChar; Int: Integer);
7651\c     SomeFunc(@mystring, myint);
7652
7653
7654\S{16bpseg} Borland Pascal \I{segment names, Borland Pascal}Segment
7655Name Restrictions
7656
7657Since Borland Pascal's internal unit file format is completely
7658different from \c{OBJ}, it only makes a very sketchy job of actually
7659reading and understanding the various information contained in a
7660real \c{OBJ} file when it links that in. Therefore an object file
7661intended to be linked to a Pascal program must obey a number of
7662restrictions:
7663
7664\b Procedures and functions must be in a segment whose name is
7665either \c{CODE}, \c{CSEG}, or something ending in \c{_TEXT}.
7666
7667\b initialized data must be in a segment whose name is either
7668\c{CONST} or something ending in \c{_DATA}.
7669
7670\b Uninitialized data must be in a segment whose name is either
7671\c{DATA}, \c{DSEG}, or something ending in \c{_BSS}.
7672
7673\b Any other segments in the object file are completely ignored.
7674\c{GROUP} directives and segment attributes are also ignored.
7675
7676
7677\S{16bpmacro} Using \i\c{c16.mac} With Pascal Programs
7678
7679The \c{c16.mac} macro package, described in \k{16cmacro}, can also
7680be used to simplify writing functions to be called from Pascal
7681programs, if you code \I\c{PASCAL}\c{%define PASCAL}. This
7682definition ensures that functions are far (it implies
7683\i\c{FARCODE}), and also causes procedure return instructions to be
7684generated with an operand.
7685
7686Defining \c{PASCAL} does not change the code which calculates the
7687argument offsets; you must declare your function's arguments in
7688reverse order. For example:
7689
7690\c %define PASCAL
7691\c
7692\c proc    _pascalproc
7693\c
7694\c %$j     arg 4
7695\c %$i     arg
7696\c         mov     ax,[bp + %$i]
7697\c         mov     bx,[bp + %$j]
7698\c         mov     es,[bp + %$j + 2]
7699\c         add     ax,[bx]
7700\c
7701\c endproc
7702
7703This defines the same routine, conceptually, as the example in
7704\k{16cmacro}: it defines a function taking two arguments, an integer
7705and a pointer to an integer, which returns the sum of the integer
7706and the contents of the pointer. The only difference between this
7707code and the large-model C version is that \c{PASCAL} is defined
7708instead of \c{FARCODE}, and that the arguments are declared in
7709reverse order.
7710
7711
7712\C{32bit} Writing 32-bit Code (Unix, Win32, DJGPP)
7713
7714This chapter attempts to cover some of the common issues involved
7715when writing 32-bit code, to run under \i{Win32} or Unix, or to be
7716linked with C code generated by a Unix-style C compiler such as
7717\i{DJGPP}. It covers how to write assembly code to interface with
771832-bit C routines, and how to write position-independent code for
7719shared libraries.
7720
7721Almost all 32-bit code, and in particular all code running under
7722\c{Win32}, \c{DJGPP} or any of the PC Unix variants, runs in \I{flat
7723memory model}\e{flat} memory model. This means that the segment registers
7724and paging have already been set up to give you the same 32-bit 4Gb
7725address space no matter what segment you work relative to, and that
7726you should ignore all segment registers completely. When writing
7727flat-model application code, you never need to use a segment
7728override or modify any segment register, and the code-section
7729addresses you pass to \c{CALL} and \c{JMP} live in the same address
7730space as the data-section addresses you access your variables by and
7731the stack-section addresses you access local variables and procedure
7732parameters by. Every address is 32 bits long and contains only an
7733offset part.
7734
7735
7736\H{32c} Interfacing to 32-bit C Programs
7737
7738A lot of the discussion in \k{16c}, about interfacing to 16-bit C
7739programs, still applies when working in 32 bits. The absence of
7740memory models or segmentation worries simplifies things a lot.
7741
7742
7743\S{32cunder} External Symbol Names
7744
7745Most 32-bit C compilers share the convention used by 16-bit
7746compilers, that the names of all global symbols (functions or data)
7747they define are formed by prefixing an underscore to the name as it
7748appears in the C program. However, not all of them do: the \c{ELF}
7749specification states that C symbols do \e{not} have a leading
7750underscore on their assembly-language names.
7751
7752The older Linux \c{a.out} C compiler, all \c{Win32} compilers,
7753\c{DJGPP}, and \c{NetBSD} and \c{FreeBSD}, all use the leading
7754underscore; for these compilers, the macros \c{cextern} and
7755\c{cglobal}, as given in \k{16cunder}, will still work. For \c{ELF},
7756though, the leading underscore should not be used.
7757
7758See also \k{opt-pfix}.
7759
7760\S{32cfunc} Function Definitions and Function Calls
7761
7762\I{functions, C calling convention}The \i{C calling convention}
7763in 32-bit programs is as follows. In the following description,
7764the words \e{caller} and \e{callee} are used to denote
7765the function doing the calling and the function which gets called.
7766
7767\b The caller pushes the function's parameters on the stack, one
7768after another, in reverse order (right to left, so that the first
7769argument specified to the function is pushed last).
7770
7771\b The caller then executes a near \c{CALL} instruction to pass
7772control to the callee.
7773
7774\b The callee receives control, and typically (although this is not
7775actually necessary, in functions which do not need to access their
7776parameters) starts by saving the value of \c{ESP} in \c{EBP} so as
7777to be able to use \c{EBP} as a base pointer to find its parameters
7778on the stack. However, the caller was probably doing this too, so
7779part of the calling convention states that \c{EBP} must be preserved
7780by any C function. Hence the callee, if it is going to set up
7781\c{EBP} as a \i{frame pointer}, must push the previous value first.
7782
7783\b The callee may then access its parameters relative to \c{EBP}.
7784The doubleword at \c{[EBP]} holds the previous value of \c{EBP} as
7785it was pushed; the next doubleword, at \c{[EBP+4]}, holds the return
7786address, pushed implicitly by \c{CALL}. The parameters start after
7787that, at \c{[EBP+8]}. The leftmost parameter of the function, since
7788it was pushed last, is accessible at this offset from \c{EBP}; the
7789others follow, at successively greater offsets. Thus, in a function
7790such as \c{printf} which takes a variable number of parameters, the
7791pushing of the parameters in reverse order means that the function
7792knows where to find its first parameter, which tells it the number
7793and type of the remaining ones.
7794
7795\b The callee may also wish to decrease \c{ESP} further, so as to
7796allocate space on the stack for local variables, which will then be
7797accessible at negative offsets from \c{EBP}.
7798
7799\b The callee, if it wishes to return a value to the caller, should
7800leave the value in \c{AL}, \c{AX} or \c{EAX} depending on the size
7801of the value. Floating-point results are typically returned in
7802\c{ST0}.
7803
7804\b Once the callee has finished processing, it restores \c{ESP} from
7805\c{EBP} if it had allocated local stack space, then pops the previous
7806value of \c{EBP}, and returns via \c{RET} (equivalently, \c{RETN}).
7807
7808\b When the caller regains control from the callee, the function
7809parameters are still on the stack, so it typically adds an immediate
7810constant to \c{ESP} to remove them (instead of executing a number of
7811slow \c{POP} instructions). Thus, if a function is accidentally
7812called with the wrong number of parameters due to a prototype
7813mismatch, the stack will still be returned to a sensible state since
7814the caller, which \e{knows} how many parameters it pushed, does the
7815removing.
7816
7817There is an alternative calling convention used by Win32 programs
7818for Windows API calls, and also for functions called \e{by} the
7819Windows API such as window procedures: they follow what Microsoft
7820calls the \c{__stdcall} convention. This is slightly closer to the
7821Pascal convention, in that the callee clears the stack by passing a
7822parameter to the \c{RET} instruction. However, the parameters are
7823still pushed in right-to-left order.
7824
7825Thus, you would define a function in C style in the following way:
7826
7827\c global  _myfunc
7828\c
7829\c _myfunc:
7830\c         push    ebp
7831\c         mov     ebp,esp
7832\c         sub     esp,0x40        ; 64 bytes of local stack space
7833\c         mov     ebx,[ebp+8]     ; first parameter to function
7834\c
7835\c         ; some more code
7836\c
7837\c         leave                   ; mov esp,ebp / pop ebp
7838\c         ret
7839
7840At the other end of the process, to call a C function from your
7841assembly code, you would do something like this:
7842
7843\c extern  _printf
7844\c
7845\c         ; and then, further down...
7846\c
7847\c         push    dword [myint]   ; one of my integer variables
7848\c         push    dword mystring  ; pointer into my data segment
7849\c         call    _printf
7850\c         add     esp,byte 8      ; `byte' saves space
7851\c
7852\c         ; then those data items...
7853\c
7854\c segment _DATA
7855\c
7856\c myint       dd   1234
7857\c mystring    db   'This number -> %d <- should be 1234',10,0
7858
7859This piece of code is the assembly equivalent of the C code
7860
7861\c     int myint = 1234;
7862\c     printf("This number -> %d <- should be 1234\n", myint);
7863
7864
7865\S{32cdata} Accessing Data Items
7866
7867To get at the contents of C variables, or to declare variables which
7868C can access, you need only declare the names as \c{GLOBAL} or
7869\c{EXTERN}. (Again, the names require leading underscores, as stated
7870in \k{32cunder}.) Thus, a C variable declared as \c{int i} can be
7871accessed from assembler as
7872
7873\c           extern _i
7874\c           mov eax,[_i]
7875
7876And to declare your own integer variable which C programs can access
7877as \c{extern int j}, you do this (making sure you are assembling in
7878the \c{_DATA} segment, if necessary):
7879
7880\c           global _j
7881\c _j        dd 0
7882
7883To access a C array, you need to know the size of the components of
7884the array. For example, \c{int} variables are four bytes long, so if
7885a C program declares an array as \c{int a[10]}, you can access
7886\c{a[3]} by coding \c{mov ax,[_a+12]}. (The byte offset 12 is obtained
7887by multiplying the desired array index, 3, by the size of the array
7888element, 4.) The sizes of the C base types in 32-bit compilers are:
78891 for \c{char}, 2 for \c{short}, 4 for \c{int}, \c{long} and
7890\c{float}, and 8 for \c{double}. Pointers, being 32-bit addresses,
7891are also 4 bytes long.
7892
7893To access a C \i{data structure}, you need to know the offset from
7894the base of the structure to the field you are interested in. You
7895can either do this by converting the C structure definition into a
7896NASM structure definition (using \c{STRUC}), or by calculating the
7897one offset and using just that.
7898
7899To do either of these, you should read your C compiler's manual to
7900find out how it organizes data structures. NASM gives no special
7901alignment to structure members in its own \i\c{STRUC} macro, so you
7902have to specify alignment yourself if the C compiler generates it.
7903Typically, you might find that a structure like
7904
7905\c struct {
7906\c     char c;
7907\c     int i;
7908\c } foo;
7909
7910might be eight bytes long rather than five, since the \c{int} field
7911would be aligned to a four-byte boundary. However, this sort of
7912feature is sometimes a configurable option in the C compiler, either
7913using command-line options or \c{#pragma} lines, so you have to find
7914out how your own compiler does it.
7915
7916
7917\S{32cmacro} \i\c{c32.mac}: Helper Macros for the 32-bit C Interface
7918
7919Included in the NASM archives, in the \I{misc directory}\c{misc}
7920directory, is a file \c{c32.mac} of macros. It defines three macros:
7921\i\c{proc}, \i\c{arg} and \i\c{endproc}. These are intended to be
7922used for C-style procedure definitions, and they automate a lot of
7923the work involved in keeping track of the calling convention.
7924
7925An example of an assembly function using the macro set is given
7926here:
7927
7928\c proc    _proc32
7929\c
7930\c %$i     arg
7931\c %$j     arg
7932\c         mov     eax,[ebp + %$i]
7933\c         mov     ebx,[ebp + %$j]
7934\c         add     eax,[ebx]
7935\c
7936\c endproc
7937
7938This defines \c{_proc32} to be a procedure taking two arguments, the
7939first (\c{i}) an integer and the second (\c{j}) a pointer to an
7940integer. It returns \c{i + *j}.
7941
7942Note that the \c{arg} macro has an \c{EQU} as the first line of its
7943expansion, and since the label before the macro call gets prepended
7944to the first line of the expanded macro, the \c{EQU} works, defining
7945\c{%$i} to be an offset from \c{BP}. A context-local variable is
7946used, local to the context pushed by the \c{proc} macro and popped
7947by the \c{endproc} macro, so that the same argument name can be used
7948in later procedures. Of course, you don't \e{have} to do that.
7949
7950\c{arg} can take an optional parameter, giving the size of the
7951argument. If no size is given, 4 is assumed, since it is likely that
7952many function parameters will be of type \c{int} or pointers.
7953
7954
7955\H{picdll} Writing NetBSD/FreeBSD/OpenBSD and Linux/ELF \i{Shared
7956Libraries}
7957
7958\c{ELF} replaced the older \c{a.out} object file format under Linux
7959because it contains support for \i{position-independent code}
7960(\i{PIC}), which makes writing shared libraries much easier. NASM
7961supports the \c{ELF} position-independent code features, so you can
7962write Linux \c{ELF} shared libraries in NASM.
7963
7964\i{NetBSD}, and its close cousins \i{FreeBSD} and \i{OpenBSD}, take
7965a different approach by hacking PIC support into the \c{a.out}
7966format. NASM supports this as the \i\c{aoutb} output format, so you
7967can write \i{BSD} shared libraries in NASM too.
7968
7969The operating system loads a PIC shared library by memory-mapping
7970the library file at an arbitrarily chosen point in the address space
7971of the running process. The contents of the library's code section
7972must therefore not depend on where it is loaded in memory.
7973
7974Therefore, you cannot get at your variables by writing code like
7975this:
7976
7977\c         mov     eax,[myvar]             ; WRONG
7978
7979Instead, the linker provides an area of memory called the
7980\i\e{global offset table}, or \i{GOT}; the GOT is situated at a
7981constant distance from your library's code, so if you can find out
7982where your library is loaded (which is typically done using a
7983\c{CALL} and \c{POP} combination), you can obtain the address of the
7984GOT, and you can then load the addresses of your variables out of
7985linker-generated entries in the GOT.
7986
7987The \e{data} section of a PIC shared library does not have these
7988restrictions: since the data section is writable, it has to be
7989copied into memory anyway rather than just paged in from the library
7990file, so as long as it's being copied it can be relocated too. So
7991you can put ordinary types of relocation in the data section without
7992too much worry (but see \k{picglobal} for a caveat).
7993
7994
7995\S{picgot} Obtaining the Address of the GOT
7996
7997Each code module in your shared library should define the GOT as an
7998external symbol:
7999
8000\c extern  _GLOBAL_OFFSET_TABLE_   ; in ELF
8001\c extern  __GLOBAL_OFFSET_TABLE_  ; in BSD a.out
8002
8003At the beginning of any function in your shared library which plans
8004to access your data or BSS sections, you must first calculate the
8005address of the GOT. This is typically done by writing the function
8006in this form:
8007
8008\c func:   push    ebp
8009\c         mov     ebp,esp
8010\c         push    ebx
8011\c         call    .get_GOT
8012\c .get_GOT:
8013\c         pop     ebx
8014\c         add     ebx,_GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc
8015\c
8016\c         ; the function body comes here
8017\c
8018\c         mov     ebx,[ebp-4]
8019\c         mov     esp,ebp
8020\c         pop     ebp
8021\c         ret
8022
8023(For BSD, again, the symbol \c{_GLOBAL_OFFSET_TABLE} requires a
8024second leading underscore.)
8025
8026The first two lines of this function are simply the standard C
8027prologue to set up a stack frame, and the last three lines are
8028standard C function epilogue. The third line, and the fourth to last
8029line, save and restore the \c{EBX} register, because PIC shared
8030libraries use this register to store the address of the GOT.
8031
8032The interesting bit is the \c{CALL} instruction and the following
8033two lines. The \c{CALL} and \c{POP} combination obtains the address
8034of the label \c{.get_GOT}, without having to know in advance where
8035the program was loaded (since the \c{CALL} instruction is encoded
8036relative to the current position). The \c{ADD} instruction makes use
8037of one of the special PIC relocation types: \i{GOTPC relocation}.
8038With the \i\c{WRT ..gotpc} qualifier specified, the symbol
8039referenced (here \c{_GLOBAL_OFFSET_TABLE_}, the special symbol
8040assigned to the GOT) is given as an offset from the beginning of the
8041section. (Actually, \c{ELF} encodes it as the offset from the operand
8042field of the \c{ADD} instruction, but NASM simplifies this
8043deliberately, so you do things the same way for both \c{ELF} and
8044\c{BSD}.) So the instruction then \e{adds} the beginning of the section,
8045to get the real address of the GOT, and subtracts the value of
8046\c{.get_GOT} which it knows is in \c{EBX}. Therefore, by the time
8047that instruction has finished, \c{EBX} contains the address of the GOT.
8048
8049If you didn't follow that, don't worry: it's never necessary to
8050obtain the address of the GOT by any other means, so you can put
8051those three instructions into a macro and safely ignore them:
8052
8053\c %macro  get_GOT 0
8054\c
8055\c         call    %%getgot
8056\c   %%getgot:
8057\c         pop     ebx
8058\c         add     ebx,_GLOBAL_OFFSET_TABLE_+$$-%%getgot wrt ..gotpc
8059\c
8060\c %endmacro
8061
8062\S{piclocal} Finding Your Local Data Items
8063
8064Having got the GOT, you can then use it to obtain the addresses of
8065your data items. Most variables will reside in the sections you have
8066declared; they can be accessed using the \I{GOTOFF
8067relocation}\c{..gotoff} special \I\c{WRT ..gotoff}\c{WRT} type. The
8068way this works is like this:
8069
8070\c         lea     eax,[ebx+myvar wrt ..gotoff]
8071
8072The expression \c{myvar wrt ..gotoff} is calculated, when the shared
8073library is linked, to be the offset to the local variable \c{myvar}
8074from the beginning of the GOT. Therefore, adding it to \c{EBX} as
8075above will place the real address of \c{myvar} in \c{EAX}.
8076
8077If you declare variables as \c{GLOBAL} without specifying a size for
8078them, they are shared between code modules in the library, but do
8079not get exported from the library to the program that loaded it.
8080They will still be in your ordinary data and BSS sections, so you
8081can access them in the same way as local variables, using the above
8082\c{..gotoff} mechanism.
8083
8084Note that due to a peculiarity of the way BSD \c{a.out} format
8085handles this relocation type, there must be at least one non-local
8086symbol in the same section as the address you're trying to access.
8087
8088
8089\S{picextern} Finding External and Common Data Items
8090
8091If your library needs to get at an external variable (external to
8092the \e{library}, not just to one of the modules within it), you must
8093use the \I{GOT relocations}\I\c{WRT ..got}\c{..got} type to get at
8094it. The \c{..got} type, instead of giving you the offset from the
8095GOT base to the variable, gives you the offset from the GOT base to
8096a GOT \e{entry} containing the address of the variable. The linker
8097will set up this GOT entry when it builds the library, and the
8098dynamic linker will place the correct address in it at load time. So
8099to obtain the address of an external variable \c{extvar} in \c{EAX},
8100you would code
8101
8102\c         mov     eax,[ebx+extvar wrt ..got]
8103
8104This loads the address of \c{extvar} out of an entry in the GOT. The
8105linker, when it builds the shared library, collects together every
8106relocation of type \c{..got}, and builds the GOT so as to ensure it
8107has every necessary entry present.
8108
8109Common variables must also be accessed in this way.
8110
8111
8112\S{picglobal} Exporting Symbols to the Library User
8113
8114If you want to export symbols to the user of the library, you have
8115to declare whether they are functions or data, and if they are data,
8116you have to give the size of the data item. This is because the
8117dynamic linker has to build \I{PLT}\i{procedure linkage table}
8118entries for any exported functions, and also moves exported data
8119items away from the library's data section in which they were
8120declared.
8121
8122So to export a function to users of the library, you must use
8123
8124\c global  func:function           ; declare it as a function
8125\c
8126\c func:   push    ebp
8127\c
8128\c         ; etc.
8129
8130And to export a data item such as an array, you would have to code
8131
8132\c global  array:data array.end-array      ; give the size too
8133\c
8134\c array:  resd    128
8135\c .end:
8136
8137Be careful: If you export a variable to the library user, by
8138declaring it as \c{GLOBAL} and supplying a size, the variable will
8139end up living in the data section of the main program, rather than
8140in your library's data section, where you declared it. So you will
8141have to access your own global variable with the \c{..got} mechanism
8142rather than \c{..gotoff}, as if it were external (which,
8143effectively, it has become).
8144
8145Equally, if you need to store the address of an exported global in
8146one of your data sections, you can't do it by means of the standard
8147sort of code:
8148
8149\c dataptr:        dd      global_data_item        ; WRONG
8150
8151NASM will interpret this code as an ordinary relocation, in which
8152\c{global_data_item} is merely an offset from the beginning of the
8153\c{.data} section (or whatever); so this reference will end up
8154pointing at your data section instead of at the exported global
8155which resides elsewhere.
8156
8157Instead of the above code, then, you must write
8158
8159\c dataptr:        dd      global_data_item wrt ..sym
8160
8161which makes use of the special \c{WRT} type \I\c{WRT ..sym}\c{..sym}
8162to instruct NASM to search the symbol table for a particular symbol
8163at that address, rather than just relocating by section base.
8164
8165Either method will work for functions: referring to one of your
8166functions by means of
8167
8168\c funcptr:        dd      my_function
8169
8170will give the user the address of the code you wrote, whereas
8171
8172\c funcptr:        dd      my_function wrt ..sym
8173
8174will give the address of the procedure linkage table for the
8175function, which is where the calling program will \e{believe} the
8176function lives. Either address is a valid way to call the function.
8177
8178
8179\S{picproc} Calling Procedures Outside the Library
8180
8181Calling procedures outside your shared library has to be done by
8182means of a \i\e{procedure linkage table}, or \i{PLT}. The PLT is
8183placed at a known offset from where the library is loaded, so the
8184library code can make calls to the PLT in a position-independent
8185way. Within the PLT there is code to jump to offsets contained in
8186the GOT, so function calls to other shared libraries or to routines
8187in the main program can be transparently passed off to their real
8188destinations.
8189
8190To call an external routine, you must use another special PIC
8191relocation type, \I{PLT relocations}\i\c{WRT ..plt}. This is much
8192easier than the GOT-based ones: you simply replace calls such as
8193\c{CALL printf} with the PLT-relative version \c{CALL printf WRT
8194..plt}.
8195
8196
8197\S{link} Generating the Library File
8198
8199Having written some code modules and assembled them to \c{.o} files,
8200you then generate your shared library with a command such as
8201
8202\c ld -shared -o library.so module1.o module2.o       # for ELF
8203\c ld -Bshareable -o library.so module1.o module2.o   # for BSD
8204
8205For ELF, if your shared library is going to reside in system
8206directories such as \c{/usr/lib} or \c{/lib}, it is usually worth
8207using the \i\c{-soname} flag to the linker, to store the final
8208library file name, with a version number, into the library:
8209
8210\c ld -shared -soname library.so.1 -o library.so.1.2 *.o
8211
8212You would then copy \c{library.so.1.2} into the library directory,
8213and create \c{library.so.1} as a symbolic link to it.
8214
8215
8216\C{mixsize} Mixing 16- and 32-bit Code
8217
8218This chapter tries to cover some of the issues, largely related to
8219unusual forms of addressing and jump instructions, encountered when
8220writing operating system code such as protected-mode initialization
8221routines, which require code that operates in mixed segment sizes,
8222such as code in a 16-bit segment trying to modify data in a 32-bit
8223one, or jumps between different-size segments.
8224
8225
8226\H{mixjump} Mixed-Size Jumps\I{jumps, mixed-size}
8227
8228\I{operating system, writing}\I{writing operating systems}The most
8229common form of \i{mixed-size instruction} is the one used when
8230writing a 32-bit OS: having done your setup in 16-bit mode, such as
8231loading the kernel, you then have to boot it by switching into
8232protected mode and jumping to the 32-bit kernel start address. In a
8233fully 32-bit OS, this tends to be the \e{only} mixed-size
8234instruction you need, since everything before it can be done in pure
823516-bit code, and everything after it can be pure 32-bit.
8236
8237This jump must specify a 48-bit far address, since the target
8238segment is a 32-bit one. However, it must be assembled in a 16-bit
8239segment, so just coding, for example,
8240
8241\c         jmp     0x1234:0x56789ABC       ; wrong!
8242
8243will not work, since the offset part of the address will be
8244truncated to \c{0x9ABC} and the jump will be an ordinary 16-bit far
8245one.
8246
8247The Linux kernel setup code gets round the inability of \c{as86} to
8248generate the required instruction by coding it manually, using
8249\c{DB} instructions. NASM can go one better than that, by actually
8250generating the right instruction itself. Here's how to do it right:
8251
8252\c         jmp     dword 0x1234:0x56789ABC         ; right
8253
8254\I\c{JMP DWORD}The \c{DWORD} prefix (strictly speaking, it should
8255come \e{after} the colon, since it is declaring the \e{offset} field
8256to be a doubleword; but NASM will accept either form, since both are
8257unambiguous) forces the offset part to be treated as far, in the
8258assumption that you are deliberately writing a jump from a 16-bit
8259segment to a 32-bit one.
8260
8261You can do the reverse operation, jumping from a 32-bit segment to a
826216-bit one, by means of the \c{WORD} prefix:
8263
8264\c         jmp     word 0x8765:0x4321      ; 32 to 16 bit
8265
8266If the \c{WORD} prefix is specified in 16-bit mode, or the \c{DWORD}
8267prefix in 32-bit mode, they will be ignored, since each is
8268explicitly forcing NASM into a mode it was in anyway.
8269
8270
8271\H{mixaddr} Addressing Between Different-Size Segments\I{addressing,
8272mixed-size}\I{mixed-size addressing}
8273
8274If your OS is mixed 16 and 32-bit, or if you are writing a DOS
8275extender, you are likely to have to deal with some 16-bit segments
8276and some 32-bit ones. At some point, you will probably end up
8277writing code in a 16-bit segment which has to access data in a
827832-bit segment, or vice versa.
8279
8280If the data you are trying to access in a 32-bit segment lies within
8281the first 64K of the segment, you may be able to get away with using
8282an ordinary 16-bit addressing operation for the purpose; but sooner
8283or later, you will want to do 32-bit addressing from 16-bit mode.
8284
8285The easiest way to do this is to make sure you use a register for
8286the address, since any effective address containing a 32-bit
8287register is forced to be a 32-bit address. So you can do
8288
8289\c         mov     eax,offset_into_32_bit_segment_specified_by_fs
8290\c         mov     dword [fs:eax],0x11223344
8291
8292This is fine, but slightly cumbersome (since it wastes an
8293instruction and a register) if you already know the precise offset
8294you are aiming at. The x86 architecture does allow 32-bit effective
8295addresses to specify nothing but a 4-byte offset, so why shouldn't
8296NASM be able to generate the best instruction for the purpose?
8297
8298It can. As in \k{mixjump}, you need only prefix the address with the
8299\c{DWORD} keyword, and it will be forced to be a 32-bit address:
8300
8301\c         mov     dword [fs:dword my_offset],0x11223344
8302
8303Also as in \k{mixjump}, NASM is not fussy about whether the
8304\c{DWORD} prefix comes before or after the segment override, so
8305arguably a nicer-looking way to code the above instruction is
8306
8307\c         mov     dword [dword fs:my_offset],0x11223344
8308
8309Don't confuse the \c{DWORD} prefix \e{outside} the square brackets,
8310which controls the size of the data stored at the address, with the
8311one \c{inside} the square brackets which controls the length of the
8312address itself. The two can quite easily be different:
8313
8314\c         mov     word [dword 0x12345678],0x9ABC
8315
8316This moves 16 bits of data to an address specified by a 32-bit
8317offset.
8318
8319You can also specify \c{WORD} or \c{DWORD} prefixes along with the
8320\c{FAR} prefix to indirect far jumps or calls. For example:
8321
8322\c         call    dword far [fs:word 0x4321]
8323
8324This instruction contains an address specified by a 16-bit offset;
8325it loads a 48-bit far pointer from that (16-bit segment and 32-bit
8326offset), and calls that address.
8327
8328
8329\H{mixother} Other Mixed-Size Instructions
8330
8331The other way you might want to access data might be using the
8332string instructions (\c{LODSx}, \c{STOSx} and so on) or the
8333\c{XLATB} instruction. These instructions, since they take no
8334parameters, might seem to have no easy way to make them perform
833532-bit addressing when assembled in a 16-bit segment.
8336
8337This is the purpose of NASM's \i\c{a16}, \i\c{a32} and \i\c{a64} prefixes. If
8338you are coding \c{LODSB} in a 16-bit segment but it is supposed to
8339be accessing a string in a 32-bit segment, you should load the
8340desired address into \c{ESI} and then code
8341
8342\c         a32     lodsb
8343
8344The prefix forces the addressing size to 32 bits, meaning that
8345\c{LODSB} loads from \c{[DS:ESI]} instead of \c{[DS:SI]}. To access
8346a string in a 16-bit segment when coding in a 32-bit one, the
8347corresponding \c{a16} prefix can be used.
8348
8349The \c{a16}, \c{a32} and \c{a64} prefixes can be applied to any instruction
8350in NASM's instruction table, but most of them can generate all the
8351useful forms without them. The prefixes are necessary only for
8352instructions with implicit addressing:
8353\# \c{CMPSx} (\k{insCMPSB}),
8354\# \c{SCASx} (\k{insSCASB}), \c{LODSx} (\k{insLODSB}), \c{STOSx}
8355\# (\k{insSTOSB}), \c{MOVSx} (\k{insMOVSB}), \c{INSx} (\k{insINSB}),
8356\# \c{OUTSx} (\k{insOUTSB}), and \c{XLATB} (\k{insXLATB}).
8357\c{CMPSx}, \c{SCASx}, \c{LODSx}, \c{STOSx}, \c{MOVSx}, \c{INSx},
8358\c{OUTSx}, and \c{XLATB}.
8359Also, the
8360various push and pop instructions (\c{PUSHA} and \c{POPF} as well as
8361the more usual \c{PUSH} and \c{POP}) can accept \c{a16}, \c{a32} or \c{a64}
8362prefixes to force a particular one of \c{SP}, \c{ESP} or \c{RSP} to be used
8363as a stack pointer, in case the stack segment in use is a different
8364size from the code segment.
8365
8366\c{PUSH} and \c{POP}, when applied to segment registers in 32-bit
8367mode, also have the slightly odd behaviour that they push and pop 4
8368bytes at a time, of which the top two are ignored and the bottom two
8369give the value of the segment register being manipulated. To force
8370the 16-bit behaviour of segment-register push and pop instructions,
8371you can use the operand-size prefix \i\c{o16}:
8372
8373\c         o16 push    ss
8374\c         o16 push    ds
8375
8376This code saves a doubleword of stack space by fitting two segment
8377registers into the space which would normally be consumed by pushing
8378one.
8379
8380(You can also use the \i\c{o32} prefix to force the 32-bit behaviour
8381when in 16-bit mode, but this seems less useful.)
8382
8383
8384\C{64bit} Writing 64-bit Code (Unix, Win64)
8385
8386This chapter attempts to cover some of the common issues involved when
8387writing 64-bit code, to run under \i{Win64} or Unix.  It covers how to
8388write assembly code to interface with 64-bit C routines, and how to
8389write position-independent code for shared libraries.
8390
8391All 64-bit code uses a flat memory model, since segmentation is not
8392available in 64-bit mode.  The one exception is the \c{FS} and \c{GS}
8393registers, which still add their bases.
8394
8395Position independence in 64-bit mode is significantly simpler, since
8396the processor supports \c{RIP}-relative addressing directly; see the
8397\c{REL} keyword (\k{effaddr}).  On most 64-bit platforms, it is
8398probably desirable to make that the default, using the directive
8399\c{DEFAULT REL} (\k{default}).
8400
840164-bit programming is relatively similar to 32-bit programming, but
8402of course pointers are 64 bits long; additionally, all existing
8403platforms pass arguments in registers rather than on the stack.
8404Furthermore, 64-bit platforms use SSE2 by default for floating point.
8405Please see the ABI documentation for your platform.
8406
840764-bit platforms differ in the sizes of the C/C++ fundamental
8408datatypes, not just from 32-bit platforms but from each other.  If a
8409specific size data type is desired, it is probably best to use the
8410types defined in the standard C header \c{<inttypes.h>}.
8411
8412All known 64-bit platforms except some embedded platforms require that
8413the stack is 16-byte aligned at the entry to a function.  In order to
8414enforce that, the stack pointer (\c{RSP}) needs to be aligned on an
8415\c{odd} multiple of 8 bytes before the \c{CALL} instruction.
8416
8417In 64-bit mode, the default instruction size is still 32 bits.  When
8418loading a value into a 32-bit register (but not an 8- or 16-bit
8419register), the upper 32 bits of the corresponding 64-bit register are
8420set to zero.
8421
8422\H{reg64} Register Names in 64-bit Mode
8423
8424NASM uses the following names for general-purpose registers in 64-bit
8425mode, for 8-, 16-, 32- and 64-bit references, respectively:
8426
8427\c      AL/AH, CL/CH, DL/DH, BL/BH, SPL, BPL, SIL, DIL, R8B-R15B
8428\c      AX, CX, DX, BX, SP, BP, SI, DI, R8W-R15W
8429\c      EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D-R15D
8430\c      RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8-R15
8431
8432This is consistent with the AMD documentation and most other
8433assemblers.  The Intel documentation, however, uses the names
8434\c{R8L-R15L} for 8-bit references to the higher registers.  It is
8435possible to use those names by definiting them as macros; similarly,
8436if one wants to use numeric names for the low 8 registers, define them
8437as macros.  The standard macro package \c{altreg} (see \k{pkg_altreg})
8438can be used for this purpose.
8439
8440\H{id64} Immediates and Displacements in 64-bit Mode
8441
8442In 64-bit mode, immediates and displacements are generally only 32
8443bits wide.  NASM will therefore truncate most displacements and
8444immediates to 32 bits.
8445
8446The only instruction which takes a full \i{64-bit immediate} is:
8447
8448\c      MOV reg64,imm64
8449
8450NASM will produce this instruction whenever the programmer uses
8451\c{MOV} with an immediate into a 64-bit register.  If this is not
8452desirable, simply specify the equivalent 32-bit register, which will
8453be automatically zero-extended by the processor, or specify the
8454immediate as \c{DWORD}:
8455
8456\c      mov rax,foo             ; 64-bit immediate
8457\c      mov rax,qword foo       ; (identical)
8458\c      mov eax,foo             ; 32-bit immediate, zero-extended
8459\c      mov rax,dword foo       ; 32-bit immediate, sign-extended
8460
8461The length of these instructions are 10, 5 and 7 bytes, respectively.
8462
8463If optimization is enabled and NASM can determine at assembly time
8464that a shorter instruction will suffice, the shorter instruction will
8465be emitted unless of course \c{STRICT QWORD} or \c{STRICT DWORD} is
8466specified (see \k{strict}):
8467
8468\c      mov rax,1		; Assembles as "mov eax,1" (5 bytes)
8469\c      mov rax,strict qword 1  ; Full 10-byte instruction
8470\c	mov rax,strict dword 1	; 7-byte instruction
8471\c      mov rax,symbol          ; 10 bytes, not known at assembly time
8472\c      lea rax,[rel symbol]    ; 7 bytes, usually preferred by the ABI
8473
8474Note that \c{lea rax,[rel symbol]} is position-independent, whereas
8475\c{mov rax,symbol} is not.  Most ABIs prefer or even require
8476position-independent code in 64-bit mode.  However, the \c{MOV}
8477instruction is able to reference a symbol anywhere in the 64-bit
8478address space, whereas \c{LEA} is only able to access a symbol within
8479within 2 GB of the instruction itself (see below.)
8480
8481The only instructions which take a full \I{64-bit displacement}64-bit
8482\e{displacement} is loading or storing, using \c{MOV}, \c{AL}, \c{AX},
8483\c{EAX} or \c{RAX} (but no other registers) to an absolute 64-bit address.
8484Since this is a relatively rarely used instruction (64-bit code generally uses
8485relative addressing), the programmer has to explicitly declare the
8486displacement size as \c{ABS QWORD}:
8487
8488\c      default abs
8489\c
8490\c      mov eax,[foo]           ; 32-bit absolute disp, sign-extended
8491\c      mov eax,[a32 foo]       ; 32-bit absolute disp, zero-extended
8492\c      mov eax,[qword foo]     ; 64-bit absolute disp
8493\c
8494\c      default rel
8495\c
8496\c      mov eax,[foo]           ; 32-bit relative disp
8497\c      mov eax,[a32 foo]       ; d:o, address truncated to 32 bits(!)
8498\c      mov eax,[qword foo]     ; error
8499\c      mov eax,[abs qword foo] ; 64-bit absolute disp
8500
8501A sign-extended absolute displacement can access from -2 GB to +2 GB;
8502a zero-extended absolute displacement can access from 0 to 4 GB.
8503
8504\H{unix64} Interfacing to 64-bit C Programs (Unix)
8505
8506On Unix, the 64-bit ABI as well as the x32 ABI (32-bit ABI with the
8507CPU in 64-bit mode) is defined by the documents at:
8508
8509\W{http://www.nasm.us/abi/unix64}\c{http://www.nasm.us/abi/unix64}
8510
8511Although written for AT&T-syntax assembly, the concepts apply equally
8512well for NASM-style assembly.  What follows is a simplified summary.
8513
8514The first six integer arguments (from the left) are passed in \c{RDI},
8515\c{RSI}, \c{RDX}, \c{RCX}, \c{R8}, and \c{R9}, in that order.
8516Additional integer arguments are passed on the stack.  These
8517registers, plus \c{RAX}, \c{R10} and \c{R11} are destroyed by function
8518calls, and thus are available for use by the function without saving.
8519
8520Integer return values are passed in \c{RAX} and \c{RDX}, in that order.
8521
8522Floating point is done using SSE registers, except for \c{long
8523double}, which is 80 bits (\c{TWORD}) on most platforms (Android is
8524one exception; there \c{long double} is 64 bits and treated the same
8525as \c{double}.)  Floating-point arguments are passed in \c{XMM0} to
8526\c{XMM7}; return is \c{XMM0} and \c{XMM1}.  \c{long double} are passed
8527on the stack, and returned in \c{ST0} and \c{ST1}.
8528
8529All SSE and x87 registers are destroyed by function calls.
8530
8531On 64-bit Unix, \c{long} is 64 bits.
8532
8533Integer and SSE register arguments are counted separately, so for the case of
8534
8535\c      void foo(long a, double b, int c)
8536
8537\c{a} is passed in \c{RDI}, \c{b} in \c{XMM0}, and \c{c} in \c{ESI}.
8538
8539\H{win64} Interfacing to 64-bit C Programs (Win64)
8540
8541The Win64 ABI is described by the document at:
8542
8543\W{http://www.nasm.us/abi/win64}\c{http://www.nasm.us/abi/win64}
8544
8545What follows is a simplified summary.
8546
8547The first four integer arguments are passed in \c{RCX}, \c{RDX},
8548\c{R8} and \c{R9}, in that order.  Additional integer arguments are
8549passed on the stack.  These registers, plus \c{RAX}, \c{R10} and
8550\c{R11} are destroyed by function calls, and thus are available for
8551use by the function without saving.
8552
8553Integer return values are passed in \c{RAX} only.
8554
8555Floating point is done using SSE registers, except for \c{long
8556double}.  Floating-point arguments are passed in \c{XMM0} to \c{XMM3};
8557return is \c{XMM0} only.
8558
8559On Win64, \c{long} is 32 bits; \c{long long} or \c{_int64} is 64 bits.
8560
8561Integer and SSE register arguments are counted together, so for the case of
8562
8563\c      void foo(long long a, double b, int c)
8564
8565\c{a} is passed in \c{RCX}, \c{b} in \c{XMM1}, and \c{c} in \c{R8D}.
8566
8567\C{trouble} Troubleshooting
8568
8569This chapter describes some of the common problems that users have
8570been known to encounter with NASM, and answers them.  If you think you
8571have found a bug in NASM, please see \k{bugs}.
8572
8573
8574\H{problems} Common Problems
8575
8576\S{inefficient} NASM Generates \i{Inefficient Code}
8577
8578We sometimes get `bug' reports about NASM generating inefficient, or
8579even `wrong', code on instructions such as \c{ADD ESP,8}. This is a
8580deliberate design feature, connected to predictability of output:
8581NASM, on seeing \c{ADD ESP,8}, will generate the form of the
8582instruction which leaves room for a 32-bit offset. You need to code
8583\I\c{BYTE}\c{ADD ESP,BYTE 8} if you want the space-efficient form of
8584the instruction. This isn't a bug, it's user error: if you prefer to
8585have NASM produce the more efficient code automatically enable
8586optimization with the \c{-O} option (see \k{opt-O}).
8587
8588
8589\S{jmprange} My Jumps are Out of Range\I{out of range, jumps}
8590
8591Similarly, people complain that when they issue \i{conditional
8592jumps} (which are \c{SHORT} by default) that try to jump too far,
8593NASM reports `short jump out of range' instead of making the jumps
8594longer.
8595
8596This, again, is partly a predictability issue, but in fact has a
8597more practical reason as well. NASM has no means of being told what
8598type of processor the code it is generating will be run on; so it
8599cannot decide for itself that it should generate \i\c{Jcc NEAR} type
8600instructions, because it doesn't know that it's working for a 386 or
8601above. Alternatively, it could replace the out-of-range short
8602\c{JNE} instruction with a very short \c{JE} instruction that jumps
8603over a \c{JMP NEAR}; this is a sensible solution for processors
8604below a 386, but hardly efficient on processors which have good
8605branch prediction \e{and} could have used \c{JNE NEAR} instead. So,
8606once again, it's up to the user, not the assembler, to decide what
8607instructions should be generated. See \k{opt-O}.
8608
8609
8610\S{proborg} \i\c{ORG} Doesn't Work
8611
8612People writing \i{boot sector} programs in the \c{bin} format often
8613complain that \c{ORG} doesn't work the way they'd like: in order to
8614place the \c{0xAA55} signature word at the end of a 512-byte boot
8615sector, people who are used to MASM tend to code
8616
8617\c         ORG 0
8618\c
8619\c         ; some boot sector code
8620\c
8621\c         ORG 510
8622\c         DW 0xAA55
8623
8624This is not the intended use of the \c{ORG} directive in NASM, and
8625will not work. The correct way to solve this problem in NASM is to
8626use the \i\c{TIMES} directive, like this:
8627
8628\c         ORG 0
8629\c
8630\c         ; some boot sector code
8631\c
8632\c         TIMES 510-($-$$) DB 0
8633\c         DW 0xAA55
8634
8635The \c{TIMES} directive will insert exactly enough zero bytes into
8636the output to move the assembly point up to 510. This method also
8637has the advantage that if you accidentally fill your boot sector too
8638full, NASM will catch the problem at assembly time and report it, so
8639you won't end up with a boot sector that you have to disassemble to
8640find out what's wrong with it.
8641
8642
8643\S{probtimes} \i\c{TIMES} Doesn't Work
8644
8645The other common problem with the above code is people who write the
8646\c{TIMES} line as
8647
8648\c         TIMES 510-$ DB 0
8649
8650by reasoning that \c{$} should be a pure number, just like 510, so
8651the difference between them is also a pure number and can happily be
8652fed to \c{TIMES}.
8653
8654NASM is a \e{modular} assembler: the various component parts are
8655designed to be easily separable for re-use, so they don't exchange
8656information unnecessarily. In consequence, the \c{bin} output
8657format, even though it has been told by the \c{ORG} directive that
8658the \c{.text} section should start at 0, does not pass that
8659information back to the expression evaluator. So from the
8660evaluator's point of view, \c{$} isn't a pure number: it's an offset
8661from a section base. Therefore the difference between \c{$} and 510
8662is also not a pure number, but involves a section base. Values
8663involving section bases cannot be passed as arguments to \c{TIMES}.
8664
8665The solution, as in the previous section, is to code the \c{TIMES}
8666line in the form
8667
8668\c         TIMES 510-($-$$) DB 0
8669
8670in which \c{$} and \c{$$} are offsets from the same section base,
8671and so their difference is a pure number. This will solve the
8672problem and generate sensible code.
8673
8674\A{ndisasm} \i{Ndisasm}
8675
8676                  The Netwide Disassembler, NDISASM
8677
8678\H{ndisintro} Introduction
8679
8680
8681The Netwide Disassembler is a small companion program to the Netwide
8682Assembler, NASM. It seemed a shame to have an x86 assembler,
8683complete with a full instruction table, and not make as much use of
8684it as possible, so here's a disassembler which shares the
8685instruction table (and some other bits of code) with NASM.
8686
8687The Netwide Disassembler does nothing except to produce
8688disassemblies of \e{binary} source files. NDISASM does not have any
8689understanding of object file formats, like \c{objdump}, and it will
8690not understand \c{DOS .EXE} files like \c{debug} will. It just
8691disassembles.
8692
8693
8694\H{ndisrun} Running NDISASM
8695
8696To disassemble a file, you will typically use a command of the form
8697
8698\c        ndisasm -b {16|32|64} filename
8699
8700NDISASM can disassemble 16-, 32- or 64-bit code equally easily,
8701provided of course that you remember to specify which it is to work
8702with. If no \i\c{-b} switch is present, NDISASM works in 16-bit mode
8703by default. The \i\c{-u} switch (for USE32) also invokes 32-bit mode.
8704
8705Two more command line options are \i\c{-r} which reports the version
8706number of NDISASM you are running, and \i\c{-h} which gives a short
8707summary of command line options.
8708
8709
8710\S{ndiscom} COM Files: Specifying an Origin
8711
8712To disassemble a \c{DOS .COM} file correctly, a disassembler must assume
8713that the first instruction in the file is loaded at address \c{0x100},
8714rather than at zero. NDISASM, which assumes by default that any file
8715you give it is loaded at zero, will therefore need to be informed of
8716this.
8717
8718The \i\c{-o} option allows you to declare a different origin for the
8719file you are disassembling. Its argument may be expressed in any of
8720the NASM numeric formats: decimal by default, if it begins with `\c{$}'
8721or `\c{0x}' or ends in `\c{H}' it's \c{hex}, if it ends in `\c{Q}' it's
8722\c{octal}, and if it ends in `\c{B}' it's \c{binary}.
8723
8724Hence, to disassemble a \c{.COM} file:
8725
8726\c        ndisasm -o100h filename.com
8727
8728will do the trick.
8729
8730
8731\S{ndissync} Code Following Data: Synchronization
8732
8733Suppose you are disassembling a file which contains some data which
8734isn't machine code, and \e{then} contains some machine code. NDISASM
8735will faithfully plough through the data section, producing machine
8736instructions wherever it can (although most of them will look
8737bizarre, and some may have unusual prefixes, e.g. `\c{FS OR AX,0x240A}'),
8738and generating `DB' instructions ever so often if it's totally stumped.
8739Then it will reach the code section.
8740
8741Supposing NDISASM has just finished generating a strange machine
8742instruction from part of the data section, and its file position is
8743now one byte \e{before} the beginning of the code section. It's
8744entirely possible that another spurious instruction will get
8745generated, starting with the final byte of the data section, and
8746then the correct first instruction in the code section will not be
8747seen because the starting point skipped over it. This isn't really
8748ideal.
8749
8750To avoid this, you can specify a `\i{synchronization}' point, or indeed
8751as many synchronization points as you like (although NDISASM can
8752only handle 2147483647 sync points internally). The definition of a sync
8753point is this: NDISASM guarantees to hit sync points exactly during
8754disassembly. If it is thinking about generating an instruction which
8755would cause it to jump over a sync point, it will discard that
8756instruction and output a `\c{db}' instead. So it \e{will} start
8757disassembly exactly from the sync point, and so you \e{will} see all
8758the instructions in your code section.
8759
8760Sync points are specified using the \i\c{-s} option: they are measured
8761in terms of the program origin, not the file position. So if you
8762want to synchronize after 32 bytes of a \c{.COM} file, you would have to
8763do
8764
8765\c        ndisasm -o100h -s120h file.com
8766
8767rather than
8768
8769\c        ndisasm -o100h -s20h file.com
8770
8771As stated above, you can specify multiple sync markers if you need
8772to, just by repeating the \c{-s} option.
8773
8774
8775\S{ndisisync} Mixed Code and Data: Automatic (Intelligent) Synchronization
8776\I\c{auto-sync}
8777
8778Suppose you are disassembling the boot sector of a \c{DOS} floppy (maybe
8779it has a virus, and you need to understand the virus so that you
8780know what kinds of damage it might have done you). Typically, this
8781will contain a \c{JMP} instruction, then some data, then the rest of the
8782code. So there is a very good chance of NDISASM being \e{misaligned}
8783when the data ends and the code begins. Hence a sync point is
8784needed.
8785
8786On the other hand, why should you have to specify the sync point
8787manually? What you'd do in order to find where the sync point would
8788be, surely, would be to read the \c{JMP} instruction, and then to use
8789its target address as a sync point. So can NDISASM do that for you?
8790
8791The answer, of course, is yes: using either of the synonymous
8792switches \i\c{-a} (for automatic sync) or \i\c{-i} (for intelligent
8793sync) will enable \c{auto-sync} mode. Auto-sync mode automatically
8794generates a sync point for any forward-referring PC-relative jump or
8795call instruction that NDISASM encounters. (Since NDISASM is one-pass,
8796if it encounters a PC-relative jump whose target has already been
8797processed, there isn't much it can do about it...)
8798
8799Only PC-relative jumps are processed, since an absolute jump is
8800either through a register (in which case NDISASM doesn't know what
8801the register contains) or involves a segment address (in which case
8802the target code isn't in the same segment that NDISASM is working
8803in, and so the sync point can't be placed anywhere useful).
8804
8805For some kinds of file, this mechanism will automatically put sync
8806points in all the right places, and save you from having to place
8807any sync points manually. However, it should be stressed that
8808auto-sync mode is \e{not} guaranteed to catch all the sync points, and
8809you may still have to place some manually.
8810
8811Auto-sync mode doesn't prevent you from declaring manual sync
8812points: it just adds automatically generated ones to the ones you
8813provide. It's perfectly feasible to specify \c{-i} \e{and} some \c{-s}
8814options.
8815
8816Another caveat with auto-sync mode is that if, by some unpleasant
8817fluke, something in your data section should disassemble to a
8818PC-relative call or jump instruction, NDISASM may obediently place a
8819sync point in a totally random place, for example in the middle of
8820one of the instructions in your code section. So you may end up with
8821a wrong disassembly even if you use auto-sync. Again, there isn't
8822much I can do about this. If you have problems, you'll have to use
8823manual sync points, or use the \c{-k} option (documented below) to
8824suppress disassembly of the data area.
8825
8826
8827\S{ndisother} Other Options
8828
8829The \i\c{-e} option skips a header on the file, by ignoring the first N
8830bytes. This means that the header is \e{not} counted towards the
8831disassembly offset: if you give \c{-e10 -o10}, disassembly will start
8832at byte 10 in the file, and this will be given offset 10, not 20.
8833
8834The \i\c{-k} option is provided with two comma-separated numeric
8835arguments, the first of which is an assembly offset and the second
8836is a number of bytes to skip. This \e{will} count the skipped bytes
8837towards the assembly offset: its use is to suppress disassembly of a
8838data section which wouldn't contain anything you wanted to see
8839anyway.
8840
8841
8842\A{inslist} \i{Instruction List}
8843
8844\H{inslistintro} Introduction
8845
8846The following sections show the instructions which NASM currently supports. For each
8847instruction, there is a separate entry for each supported addressing mode. The third
8848column shows the processor type in which the instruction was introduced and,
8849 when appropriate, one or more usage flags.
8850
8851\& inslist.src
8852
8853\A{changelog} \i{NASM Version History}
8854
8855\& changes.src
8856
8857\A{source} Building NASM from Source
8858
8859The source code for NASM is available from our website,
8860\W{http://www.nasm.us/}{http://wwww.nasm.us/}, see \k{website}.
8861
8862\H{tarball} Building from a Source Archive
8863
8864The source archives available on the web site should be capable of
8865building on a number of platforms.  This is the recommended method for
8866building NASM to support platforms for which executables are not
8867available.
8868
8869On a system which has Unix shell (\c{sh}), run:
8870
8871\c      sh configure
8872\c      make everything
8873
8874A number of options can be passed to \c{configure}; see
8875\c{sh configure --help}.
8876
8877A set of Makefiles for some other environments are also available;
8878please see the file \c{Mkfiles/README}.
8879
8880To build the installer for the Windows platform, you will need the
8881\i\e{Nullsoft Scriptable Installer}, \i{NSIS}, installed.
8882
8883To build the documentation, you will need a set of additional tools.
8884The documentation is not likely to be able to build on non-Unix
8885systems.
8886
8887\H{git} Building from the \i\c{git} Repository
8888
8889The NASM development tree is kept in a source code repository using
8890the \c{git} distributed source control system.  The link is available
8891on the website.  This is recommended only to participate in the
8892development of NASM or to assist with testing the development code.
8893
8894To build NASM from the \c{git} repository you will need a Perl
8895interpreter and, if building on a Unix system, GNU autoconf installed
8896on your system.
8897
8898To build on a Unix system, run:
8899
8900\c      sh autogen.sh
8901
8902to create the \c{configure} script and then build as listed above.
8903
8904\H{builddoc} Building the documentation
8905
8906To build the documentation, you will need a Perl interpreter, a
8907Postscript to PDF converter such as Ghostscript, and suitable fonts
8908installed on your system. The recommended (and default) fonts are
8909Adobe's Source Sans and Source Code fonts, which are freely available
8910under the SIL Open Font License.
8911
8912\A{contact} Contact Information
8913
8914\H{website} Website
8915
8916NASM has a \i{website} at
8917\W{http://www.nasm.us/}\c{http://www.nasm.us/}.
8918
8919\i{New releases}, \i{release candidates}, and \I{snapshots, daily
8920development}\i{daily development snapshots} of NASM are available from
8921the official web site in source form as well as binaries for a number
8922of common platforms.
8923
8924\S{forums} User Forums
8925
8926Users of NASM may find the Forums on the website useful.  These are,
8927however, not frequented much by the developers of NASM, so they are
8928not suitable for reporting bugs.
8929
8930\S{develcom} Development Community
8931
8932The development of NASM is coordinated primarily though the
8933\i\c{nasm-devel} mailing list.  If you wish to participate in
8934development of NASM, please join this mailing list.  Subscription
8935links and archives of past posts are available on the website.
8936
8937\H{bugs} \i{Reporting Bugs}\I{bugs}
8938
8939To report bugs in NASM, please use the \i{bug tracker} at
8940\W{http://www.nasm.us/}\c{http://www.nasm.us/} (click on "Bug
8941Tracker"), or if that fails then through one of the contacts in
8942\k{website}.
8943
8944Please read \k{qstart} first, and don't report the bug if it's
8945listed in there as a deliberate feature. (If you think the feature
8946is badly thought out, feel free to send us reasons why you think it
8947should be changed, but don't just send us mail saying `This is a
8948bug' if the documentation says we did it on purpose.) Then read
8949\k{problems}, and don't bother reporting the bug if it's listed
8950there.
8951
8952If you do report a bug, \e{please} make sure your bug report includes
8953the following information:
8954
8955\b What operating system you're running NASM under.  Linux,
8956FreeBSD, NetBSD, MacOS X, Win16, Win32, Win64, MS-DOS, OS/2, VMS,
8957whatever.
8958
8959\b If you compiled your own executable from a source archive, compiled
8960your own executable from \c{git}, used the standard distribution
8961binaries from the website, or got an executable from somewhere else
8962(e.g. a Linux distribution.) If you were using a locally built
8963executable, try to reproduce the problem using one of the standard
8964binaries, as this will make it easier for us to reproduce your problem
8965prior to fixing it.
8966
8967\b Which version of NASM you're using, and exactly how you invoked
8968it. Give us the precise command line, and the contents of the
8969\c{NASMENV} environment variable if any.
8970
8971\b Which versions of any supplementary programs you're using, and
8972how you invoked them. If the problem only becomes visible at link
8973time, tell us what linker you're using, what version of it you've
8974got, and the exact linker command line. If the problem involves
8975linking against object files generated by a compiler, tell us what
8976compiler, what version, and what command line or options you used.
8977(If you're compiling in an IDE, please try to reproduce the problem
8978with the command-line version of the compiler.)
8979
8980\b If at all possible, send us a NASM source file which exhibits the
8981problem. If this causes copyright problems (e.g. you can only
8982reproduce the bug in restricted-distribution code) then bear in mind
8983the following two points: firstly, we guarantee that any source code
8984sent to us for the purposes of debugging NASM will be used \e{only}
8985for the purposes of debugging NASM, and that we will delete all our
8986copies of it as soon as we have found and fixed the bug or bugs in
8987question; and secondly, we would prefer \e{not} to be mailed large
8988chunks of code anyway. The smaller the file, the better. A
8989three-line sample file that does nothing useful \e{except}
8990demonstrate the problem is much easier to work with than a
8991fully fledged ten-thousand-line program. (Of course, some errors
8992\e{do} only crop up in large files, so this may not be possible.)
8993
8994\b A description of what the problem actually \e{is}. `It doesn't
8995work' is \e{not} a helpful description! Please describe exactly what
8996is happening that shouldn't be, or what isn't happening that should.
8997Examples might be: `NASM generates an error message saying Line 3
8998for an error that's actually on Line 5'; `NASM generates an error
8999message that I believe it shouldn't be generating at all'; `NASM
9000fails to generate an error message that I believe it \e{should} be
9001generating'; `the object file produced from this source code crashes
9002my linker'; `the ninth byte of the output file is 66 and I think it
9003should be 77 instead'.
9004
9005\b If you believe the output file from NASM to be faulty, send it to
9006us. That allows us to determine whether our own copy of NASM
9007generates the same file, or whether the problem is related to
9008portability issues between our development platforms and yours. We
9009can handle binary files mailed to us as MIME attachments, uuencoded,
9010and even BinHex. Alternatively, we may be able to provide an FTP
9011site you can upload the suspect files to; but mailing them is easier
9012for us.
9013
9014\b Any other information or data files that might be helpful. If,
9015for example, the problem involves NASM failing to generate an object
9016file while TASM can generate an equivalent file without trouble,
9017then send us \e{both} object files, so we can see what TASM is doing
9018differently from us.
9019