1*a9fa9459Szrj /* This is the Assembler Pre-Processor
2*a9fa9459Szrj Copyright (C) 1987-2016 Free Software Foundation, Inc.
3*a9fa9459Szrj
4*a9fa9459Szrj This file is part of GAS, the GNU Assembler.
5*a9fa9459Szrj
6*a9fa9459Szrj GAS is free software; you can redistribute it and/or modify
7*a9fa9459Szrj it under the terms of the GNU General Public License as published by
8*a9fa9459Szrj the Free Software Foundation; either version 3, or (at your option)
9*a9fa9459Szrj any later version.
10*a9fa9459Szrj
11*a9fa9459Szrj GAS is distributed in the hope that it will be useful, but WITHOUT
12*a9fa9459Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13*a9fa9459Szrj or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14*a9fa9459Szrj License for more details.
15*a9fa9459Szrj
16*a9fa9459Szrj You should have received a copy of the GNU General Public License
17*a9fa9459Szrj along with GAS; see the file COPYING. If not, write to the Free
18*a9fa9459Szrj Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19*a9fa9459Szrj 02110-1301, USA. */
20*a9fa9459Szrj
21*a9fa9459Szrj /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
22*a9fa9459Szrj /* App, the assembler pre-processor. This pre-processor strips out
23*a9fa9459Szrj excess spaces, turns single-quoted characters into a decimal
24*a9fa9459Szrj constant, and turns the # in # <number> <filename> <garbage> into a
25*a9fa9459Szrj .linefile. This needs better error-handling. */
26*a9fa9459Szrj
27*a9fa9459Szrj #include "as.h"
28*a9fa9459Szrj
29*a9fa9459Szrj #if (__STDC__ != 1)
30*a9fa9459Szrj #ifndef const
31*a9fa9459Szrj #define const /* empty */
32*a9fa9459Szrj #endif
33*a9fa9459Szrj #endif
34*a9fa9459Szrj
35*a9fa9459Szrj #ifdef H_TICK_HEX
36*a9fa9459Szrj int enable_h_tick_hex = 0;
37*a9fa9459Szrj #endif
38*a9fa9459Szrj
39*a9fa9459Szrj #ifdef TC_M68K
40*a9fa9459Szrj /* Whether we are scrubbing in m68k MRI mode. This is different from
41*a9fa9459Szrj flag_m68k_mri, because the two flags will be affected by the .mri
42*a9fa9459Szrj pseudo-op at different times. */
43*a9fa9459Szrj static int scrub_m68k_mri;
44*a9fa9459Szrj
45*a9fa9459Szrj /* The pseudo-op which switches in and out of MRI mode. See the
46*a9fa9459Szrj comment in do_scrub_chars. */
47*a9fa9459Szrj static const char mri_pseudo[] = ".mri 0";
48*a9fa9459Szrj #else
49*a9fa9459Szrj #define scrub_m68k_mri 0
50*a9fa9459Szrj #endif
51*a9fa9459Szrj
52*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
53*a9fa9459Szrj /* The pseudo-op for which we need to special-case `@' characters.
54*a9fa9459Szrj See the comment in do_scrub_chars. */
55*a9fa9459Szrj static const char symver_pseudo[] = ".symver";
56*a9fa9459Szrj static const char * symver_state;
57*a9fa9459Szrj #endif
58*a9fa9459Szrj
59*a9fa9459Szrj static char lex[256];
60*a9fa9459Szrj static const char symbol_chars[] =
61*a9fa9459Szrj "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
62*a9fa9459Szrj
63*a9fa9459Szrj #define LEX_IS_SYMBOL_COMPONENT 1
64*a9fa9459Szrj #define LEX_IS_WHITESPACE 2
65*a9fa9459Szrj #define LEX_IS_LINE_SEPARATOR 3
66*a9fa9459Szrj #define LEX_IS_COMMENT_START 4
67*a9fa9459Szrj #define LEX_IS_LINE_COMMENT_START 5
68*a9fa9459Szrj #define LEX_IS_TWOCHAR_COMMENT_1ST 6
69*a9fa9459Szrj #define LEX_IS_STRINGQUOTE 8
70*a9fa9459Szrj #define LEX_IS_COLON 9
71*a9fa9459Szrj #define LEX_IS_NEWLINE 10
72*a9fa9459Szrj #define LEX_IS_ONECHAR_QUOTE 11
73*a9fa9459Szrj #ifdef TC_V850
74*a9fa9459Szrj #define LEX_IS_DOUBLEDASH_1ST 12
75*a9fa9459Szrj #endif
76*a9fa9459Szrj #ifdef TC_M32R
77*a9fa9459Szrj #define DOUBLEBAR_PARALLEL
78*a9fa9459Szrj #endif
79*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
80*a9fa9459Szrj #define LEX_IS_DOUBLEBAR_1ST 13
81*a9fa9459Szrj #endif
82*a9fa9459Szrj #define LEX_IS_PARALLEL_SEPARATOR 14
83*a9fa9459Szrj #ifdef H_TICK_HEX
84*a9fa9459Szrj #define LEX_IS_H 15
85*a9fa9459Szrj #endif
86*a9fa9459Szrj #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
87*a9fa9459Szrj #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
88*a9fa9459Szrj #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
89*a9fa9459Szrj #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
90*a9fa9459Szrj #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
91*a9fa9459Szrj #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
92*a9fa9459Szrj #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
93*a9fa9459Szrj
94*a9fa9459Szrj static int process_escape (int);
95*a9fa9459Szrj
96*a9fa9459Szrj /* FIXME-soon: The entire lexer/parser thingy should be
97*a9fa9459Szrj built statically at compile time rather than dynamically
98*a9fa9459Szrj each and every time the assembler is run. xoxorich. */
99*a9fa9459Szrj
100*a9fa9459Szrj void
do_scrub_begin(int m68k_mri ATTRIBUTE_UNUSED)101*a9fa9459Szrj do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
102*a9fa9459Szrj {
103*a9fa9459Szrj const char *p;
104*a9fa9459Szrj int c;
105*a9fa9459Szrj
106*a9fa9459Szrj lex[' '] = LEX_IS_WHITESPACE;
107*a9fa9459Szrj lex['\t'] = LEX_IS_WHITESPACE;
108*a9fa9459Szrj lex['\r'] = LEX_IS_WHITESPACE;
109*a9fa9459Szrj lex['\n'] = LEX_IS_NEWLINE;
110*a9fa9459Szrj lex[':'] = LEX_IS_COLON;
111*a9fa9459Szrj
112*a9fa9459Szrj #ifdef TC_M68K
113*a9fa9459Szrj scrub_m68k_mri = m68k_mri;
114*a9fa9459Szrj
115*a9fa9459Szrj if (! m68k_mri)
116*a9fa9459Szrj #endif
117*a9fa9459Szrj {
118*a9fa9459Szrj lex['"'] = LEX_IS_STRINGQUOTE;
119*a9fa9459Szrj
120*a9fa9459Szrj #if ! defined (TC_HPPA) && ! defined (TC_I370)
121*a9fa9459Szrj /* I370 uses single-quotes to delimit integer, float constants. */
122*a9fa9459Szrj lex['\''] = LEX_IS_ONECHAR_QUOTE;
123*a9fa9459Szrj #endif
124*a9fa9459Szrj
125*a9fa9459Szrj #ifdef SINGLE_QUOTE_STRINGS
126*a9fa9459Szrj lex['\''] = LEX_IS_STRINGQUOTE;
127*a9fa9459Szrj #endif
128*a9fa9459Szrj }
129*a9fa9459Szrj
130*a9fa9459Szrj /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
131*a9fa9459Szrj in state 5 of do_scrub_chars must be changed. */
132*a9fa9459Szrj
133*a9fa9459Szrj /* Note that these override the previous defaults, e.g. if ';' is a
134*a9fa9459Szrj comment char, then it isn't a line separator. */
135*a9fa9459Szrj for (p = symbol_chars; *p; ++p)
136*a9fa9459Szrj lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
137*a9fa9459Szrj
138*a9fa9459Szrj for (c = 128; c < 256; ++c)
139*a9fa9459Szrj lex[c] = LEX_IS_SYMBOL_COMPONENT;
140*a9fa9459Szrj
141*a9fa9459Szrj #ifdef tc_symbol_chars
142*a9fa9459Szrj /* This macro permits the processor to specify all characters which
143*a9fa9459Szrj may appears in an operand. This will prevent the scrubber from
144*a9fa9459Szrj discarding meaningful whitespace in certain cases. The i386
145*a9fa9459Szrj backend uses this to support prefixes, which can confuse the
146*a9fa9459Szrj scrubber as to whether it is parsing operands or opcodes. */
147*a9fa9459Szrj for (p = tc_symbol_chars; *p; ++p)
148*a9fa9459Szrj lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
149*a9fa9459Szrj #endif
150*a9fa9459Szrj
151*a9fa9459Szrj /* The m68k backend wants to be able to change comment_chars. */
152*a9fa9459Szrj #ifndef tc_comment_chars
153*a9fa9459Szrj #define tc_comment_chars comment_chars
154*a9fa9459Szrj #endif
155*a9fa9459Szrj for (p = tc_comment_chars; *p; p++)
156*a9fa9459Szrj lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
157*a9fa9459Szrj
158*a9fa9459Szrj for (p = line_comment_chars; *p; p++)
159*a9fa9459Szrj lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
160*a9fa9459Szrj
161*a9fa9459Szrj #ifndef tc_line_separator_chars
162*a9fa9459Szrj #define tc_line_separator_chars line_separator_chars
163*a9fa9459Szrj #endif
164*a9fa9459Szrj for (p = tc_line_separator_chars; *p; p++)
165*a9fa9459Szrj lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
166*a9fa9459Szrj
167*a9fa9459Szrj #ifdef tc_parallel_separator_chars
168*a9fa9459Szrj /* This macro permits the processor to specify all characters which
169*a9fa9459Szrj separate parallel insns on the same line. */
170*a9fa9459Szrj for (p = tc_parallel_separator_chars; *p; p++)
171*a9fa9459Szrj lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
172*a9fa9459Szrj #endif
173*a9fa9459Szrj
174*a9fa9459Szrj /* Only allow slash-star comments if slash is not in use.
175*a9fa9459Szrj FIXME: This isn't right. We should always permit them. */
176*a9fa9459Szrj if (lex['/'] == 0)
177*a9fa9459Szrj lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
178*a9fa9459Szrj
179*a9fa9459Szrj #ifdef TC_M68K
180*a9fa9459Szrj if (m68k_mri)
181*a9fa9459Szrj {
182*a9fa9459Szrj lex['\''] = LEX_IS_STRINGQUOTE;
183*a9fa9459Szrj lex[';'] = LEX_IS_COMMENT_START;
184*a9fa9459Szrj lex['*'] = LEX_IS_LINE_COMMENT_START;
185*a9fa9459Szrj /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
186*a9fa9459Szrj then it can't be used in an expression. */
187*a9fa9459Szrj lex['!'] = LEX_IS_LINE_COMMENT_START;
188*a9fa9459Szrj }
189*a9fa9459Szrj #endif
190*a9fa9459Szrj
191*a9fa9459Szrj #ifdef TC_V850
192*a9fa9459Szrj lex['-'] = LEX_IS_DOUBLEDASH_1ST;
193*a9fa9459Szrj #endif
194*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
195*a9fa9459Szrj lex['|'] = LEX_IS_DOUBLEBAR_1ST;
196*a9fa9459Szrj #endif
197*a9fa9459Szrj #ifdef TC_D30V
198*a9fa9459Szrj /* Must do this is we want VLIW instruction with "->" or "<-". */
199*a9fa9459Szrj lex['-'] = LEX_IS_SYMBOL_COMPONENT;
200*a9fa9459Szrj #endif
201*a9fa9459Szrj
202*a9fa9459Szrj #ifdef H_TICK_HEX
203*a9fa9459Szrj if (enable_h_tick_hex)
204*a9fa9459Szrj {
205*a9fa9459Szrj lex['h'] = LEX_IS_H;
206*a9fa9459Szrj lex['H'] = LEX_IS_H;
207*a9fa9459Szrj }
208*a9fa9459Szrj #endif
209*a9fa9459Szrj }
210*a9fa9459Szrj
211*a9fa9459Szrj /* Saved state of the scrubber. */
212*a9fa9459Szrj static int state;
213*a9fa9459Szrj static int old_state;
214*a9fa9459Szrj static const char *out_string;
215*a9fa9459Szrj static char out_buf[20];
216*a9fa9459Szrj static int add_newlines;
217*a9fa9459Szrj static char *saved_input;
218*a9fa9459Szrj static size_t saved_input_len;
219*a9fa9459Szrj static char input_buffer[32 * 1024];
220*a9fa9459Szrj static const char *mri_state;
221*a9fa9459Szrj static char mri_last_ch;
222*a9fa9459Szrj
223*a9fa9459Szrj /* Data structure for saving the state of app across #include's. Note that
224*a9fa9459Szrj app is called asynchronously to the parsing of the .include's, so our
225*a9fa9459Szrj state at the time .include is interpreted is completely unrelated.
226*a9fa9459Szrj That's why we have to save it all. */
227*a9fa9459Szrj
228*a9fa9459Szrj struct app_save
229*a9fa9459Szrj {
230*a9fa9459Szrj int state;
231*a9fa9459Szrj int old_state;
232*a9fa9459Szrj const char * out_string;
233*a9fa9459Szrj char out_buf[sizeof (out_buf)];
234*a9fa9459Szrj int add_newlines;
235*a9fa9459Szrj char * saved_input;
236*a9fa9459Szrj size_t saved_input_len;
237*a9fa9459Szrj #ifdef TC_M68K
238*a9fa9459Szrj int scrub_m68k_mri;
239*a9fa9459Szrj #endif
240*a9fa9459Szrj const char * mri_state;
241*a9fa9459Szrj char mri_last_ch;
242*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
243*a9fa9459Szrj const char * symver_state;
244*a9fa9459Szrj #endif
245*a9fa9459Szrj };
246*a9fa9459Szrj
247*a9fa9459Szrj char *
app_push(void)248*a9fa9459Szrj app_push (void)
249*a9fa9459Szrj {
250*a9fa9459Szrj struct app_save *saved;
251*a9fa9459Szrj
252*a9fa9459Szrj saved = XNEW (struct app_save);
253*a9fa9459Szrj saved->state = state;
254*a9fa9459Szrj saved->old_state = old_state;
255*a9fa9459Szrj saved->out_string = out_string;
256*a9fa9459Szrj memcpy (saved->out_buf, out_buf, sizeof (out_buf));
257*a9fa9459Szrj saved->add_newlines = add_newlines;
258*a9fa9459Szrj if (saved_input == NULL)
259*a9fa9459Szrj saved->saved_input = NULL;
260*a9fa9459Szrj else
261*a9fa9459Szrj {
262*a9fa9459Szrj saved->saved_input = XNEWVEC (char, saved_input_len);
263*a9fa9459Szrj memcpy (saved->saved_input, saved_input, saved_input_len);
264*a9fa9459Szrj saved->saved_input_len = saved_input_len;
265*a9fa9459Szrj }
266*a9fa9459Szrj #ifdef TC_M68K
267*a9fa9459Szrj saved->scrub_m68k_mri = scrub_m68k_mri;
268*a9fa9459Szrj #endif
269*a9fa9459Szrj saved->mri_state = mri_state;
270*a9fa9459Szrj saved->mri_last_ch = mri_last_ch;
271*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
272*a9fa9459Szrj saved->symver_state = symver_state;
273*a9fa9459Szrj #endif
274*a9fa9459Szrj
275*a9fa9459Szrj /* do_scrub_begin() is not useful, just wastes time. */
276*a9fa9459Szrj
277*a9fa9459Szrj state = 0;
278*a9fa9459Szrj saved_input = NULL;
279*a9fa9459Szrj add_newlines = 0;
280*a9fa9459Szrj
281*a9fa9459Szrj return (char *) saved;
282*a9fa9459Szrj }
283*a9fa9459Szrj
284*a9fa9459Szrj void
app_pop(char * arg)285*a9fa9459Szrj app_pop (char *arg)
286*a9fa9459Szrj {
287*a9fa9459Szrj struct app_save *saved = (struct app_save *) arg;
288*a9fa9459Szrj
289*a9fa9459Szrj /* There is no do_scrub_end (). */
290*a9fa9459Szrj state = saved->state;
291*a9fa9459Szrj old_state = saved->old_state;
292*a9fa9459Szrj out_string = saved->out_string;
293*a9fa9459Szrj memcpy (out_buf, saved->out_buf, sizeof (out_buf));
294*a9fa9459Szrj add_newlines = saved->add_newlines;
295*a9fa9459Szrj if (saved->saved_input == NULL)
296*a9fa9459Szrj saved_input = NULL;
297*a9fa9459Szrj else
298*a9fa9459Szrj {
299*a9fa9459Szrj gas_assert (saved->saved_input_len <= sizeof (input_buffer));
300*a9fa9459Szrj memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
301*a9fa9459Szrj saved_input = input_buffer;
302*a9fa9459Szrj saved_input_len = saved->saved_input_len;
303*a9fa9459Szrj free (saved->saved_input);
304*a9fa9459Szrj }
305*a9fa9459Szrj #ifdef TC_M68K
306*a9fa9459Szrj scrub_m68k_mri = saved->scrub_m68k_mri;
307*a9fa9459Szrj #endif
308*a9fa9459Szrj mri_state = saved->mri_state;
309*a9fa9459Szrj mri_last_ch = saved->mri_last_ch;
310*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
311*a9fa9459Szrj symver_state = saved->symver_state;
312*a9fa9459Szrj #endif
313*a9fa9459Szrj
314*a9fa9459Szrj free (arg);
315*a9fa9459Szrj }
316*a9fa9459Szrj
317*a9fa9459Szrj /* @@ This assumes that \n &c are the same on host and target. This is not
318*a9fa9459Szrj necessarily true. */
319*a9fa9459Szrj
320*a9fa9459Szrj static int
process_escape(int ch)321*a9fa9459Szrj process_escape (int ch)
322*a9fa9459Szrj {
323*a9fa9459Szrj switch (ch)
324*a9fa9459Szrj {
325*a9fa9459Szrj case 'b':
326*a9fa9459Szrj return '\b';
327*a9fa9459Szrj case 'f':
328*a9fa9459Szrj return '\f';
329*a9fa9459Szrj case 'n':
330*a9fa9459Szrj return '\n';
331*a9fa9459Szrj case 'r':
332*a9fa9459Szrj return '\r';
333*a9fa9459Szrj case 't':
334*a9fa9459Szrj return '\t';
335*a9fa9459Szrj case '\'':
336*a9fa9459Szrj return '\'';
337*a9fa9459Szrj case '"':
338*a9fa9459Szrj return '\"';
339*a9fa9459Szrj default:
340*a9fa9459Szrj return ch;
341*a9fa9459Szrj }
342*a9fa9459Szrj }
343*a9fa9459Szrj
344*a9fa9459Szrj /* This function is called to process input characters. The GET
345*a9fa9459Szrj parameter is used to retrieve more input characters. GET should
346*a9fa9459Szrj set its parameter to point to a buffer, and return the length of
347*a9fa9459Szrj the buffer; it should return 0 at end of file. The scrubbed output
348*a9fa9459Szrj characters are put into the buffer starting at TOSTART; the TOSTART
349*a9fa9459Szrj buffer is TOLEN bytes in length. The function returns the number
350*a9fa9459Szrj of scrubbed characters put into TOSTART. This will be TOLEN unless
351*a9fa9459Szrj end of file was seen. This function is arranged as a state
352*a9fa9459Szrj machine, and saves its state so that it may return at any point.
353*a9fa9459Szrj This is the way the old code used to work. */
354*a9fa9459Szrj
355*a9fa9459Szrj size_t
do_scrub_chars(size_t (* get)(char *,size_t),char * tostart,size_t tolen)356*a9fa9459Szrj do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
357*a9fa9459Szrj {
358*a9fa9459Szrj char *to = tostart;
359*a9fa9459Szrj char *toend = tostart + tolen;
360*a9fa9459Szrj char *from;
361*a9fa9459Szrj char *fromend;
362*a9fa9459Szrj size_t fromlen;
363*a9fa9459Szrj int ch, ch2 = 0;
364*a9fa9459Szrj /* Character that started the string we're working on. */
365*a9fa9459Szrj static char quotechar;
366*a9fa9459Szrj
367*a9fa9459Szrj /*State 0: beginning of normal line
368*a9fa9459Szrj 1: After first whitespace on line (flush more white)
369*a9fa9459Szrj 2: After first non-white (opcode) on line (keep 1white)
370*a9fa9459Szrj 3: after second white on line (into operands) (flush white)
371*a9fa9459Szrj 4: after putting out a .linefile, put out digits
372*a9fa9459Szrj 5: parsing a string, then go to old-state
373*a9fa9459Szrj 6: putting out \ escape in a "d string.
374*a9fa9459Szrj 7: no longer used
375*a9fa9459Szrj 8: no longer used
376*a9fa9459Szrj 9: After seeing symbol char in state 3 (keep 1white after symchar)
377*a9fa9459Szrj 10: After seeing whitespace in state 9 (keep white before symchar)
378*a9fa9459Szrj 11: After seeing a symbol character in state 0 (eg a label definition)
379*a9fa9459Szrj -1: output string in out_string and go to the state in old_state
380*a9fa9459Szrj -2: flush text until a '*' '/' is seen, then go to state old_state
381*a9fa9459Szrj #ifdef TC_V850
382*a9fa9459Szrj 12: After seeing a dash, looking for a second dash as a start
383*a9fa9459Szrj of comment.
384*a9fa9459Szrj #endif
385*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
386*a9fa9459Szrj 13: After seeing a vertical bar, looking for a second
387*a9fa9459Szrj vertical bar as a parallel expression separator.
388*a9fa9459Szrj #endif
389*a9fa9459Szrj #ifdef TC_PREDICATE_START_CHAR
390*a9fa9459Szrj 14: After seeing a predicate start character at state 0, looking
391*a9fa9459Szrj for a predicate end character as predicate.
392*a9fa9459Szrj 15: After seeing a predicate start character at state 1, looking
393*a9fa9459Szrj for a predicate end character as predicate.
394*a9fa9459Szrj #endif
395*a9fa9459Szrj #ifdef TC_Z80
396*a9fa9459Szrj 16: After seeing an 'a' or an 'A' at the start of a symbol
397*a9fa9459Szrj 17: After seeing an 'f' or an 'F' in state 16
398*a9fa9459Szrj #endif
399*a9fa9459Szrj */
400*a9fa9459Szrj
401*a9fa9459Szrj /* I added states 9 and 10 because the MIPS ECOFF assembler uses
402*a9fa9459Szrj constructs like ``.loc 1 20''. This was turning into ``.loc
403*a9fa9459Szrj 120''. States 9 and 10 ensure that a space is never dropped in
404*a9fa9459Szrj between characters which could appear in an identifier. Ian
405*a9fa9459Szrj Taylor, ian@cygnus.com.
406*a9fa9459Szrj
407*a9fa9459Szrj I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
408*a9fa9459Szrj correctly on the PA (and any other target where colons are optional).
409*a9fa9459Szrj Jeff Law, law@cs.utah.edu.
410*a9fa9459Szrj
411*a9fa9459Szrj I added state 13 so that something like "cmp r1, r2 || trap #1" does not
412*a9fa9459Szrj get squashed into "cmp r1,r2||trap#1", with the all important space
413*a9fa9459Szrj between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
414*a9fa9459Szrj
415*a9fa9459Szrj /* This macro gets the next input character. */
416*a9fa9459Szrj
417*a9fa9459Szrj #define GET() \
418*a9fa9459Szrj (from < fromend \
419*a9fa9459Szrj ? * (unsigned char *) (from++) \
420*a9fa9459Szrj : (saved_input = NULL, \
421*a9fa9459Szrj fromlen = (*get) (input_buffer, sizeof input_buffer), \
422*a9fa9459Szrj from = input_buffer, \
423*a9fa9459Szrj fromend = from + fromlen, \
424*a9fa9459Szrj (fromlen == 0 \
425*a9fa9459Szrj ? EOF \
426*a9fa9459Szrj : * (unsigned char *) (from++))))
427*a9fa9459Szrj
428*a9fa9459Szrj /* This macro pushes a character back on the input stream. */
429*a9fa9459Szrj
430*a9fa9459Szrj #define UNGET(uch) (*--from = (uch))
431*a9fa9459Szrj
432*a9fa9459Szrj /* This macro puts a character into the output buffer. If this
433*a9fa9459Szrj character fills the output buffer, this macro jumps to the label
434*a9fa9459Szrj TOFULL. We use this rather ugly approach because we need to
435*a9fa9459Szrj handle two different termination conditions: EOF on the input
436*a9fa9459Szrj stream, and a full output buffer. It would be simpler if we
437*a9fa9459Szrj always read in the entire input stream before processing it, but
438*a9fa9459Szrj I don't want to make such a significant change to the assembler's
439*a9fa9459Szrj memory usage. */
440*a9fa9459Szrj
441*a9fa9459Szrj #define PUT(pch) \
442*a9fa9459Szrj do \
443*a9fa9459Szrj { \
444*a9fa9459Szrj *to++ = (pch); \
445*a9fa9459Szrj if (to >= toend) \
446*a9fa9459Szrj goto tofull; \
447*a9fa9459Szrj } \
448*a9fa9459Szrj while (0)
449*a9fa9459Szrj
450*a9fa9459Szrj if (saved_input != NULL)
451*a9fa9459Szrj {
452*a9fa9459Szrj from = saved_input;
453*a9fa9459Szrj fromend = from + saved_input_len;
454*a9fa9459Szrj }
455*a9fa9459Szrj else
456*a9fa9459Szrj {
457*a9fa9459Szrj fromlen = (*get) (input_buffer, sizeof input_buffer);
458*a9fa9459Szrj if (fromlen == 0)
459*a9fa9459Szrj return 0;
460*a9fa9459Szrj from = input_buffer;
461*a9fa9459Szrj fromend = from + fromlen;
462*a9fa9459Szrj }
463*a9fa9459Szrj
464*a9fa9459Szrj while (1)
465*a9fa9459Szrj {
466*a9fa9459Szrj /* The cases in this switch end with continue, in order to
467*a9fa9459Szrj branch back to the top of this while loop and generate the
468*a9fa9459Szrj next output character in the appropriate state. */
469*a9fa9459Szrj switch (state)
470*a9fa9459Szrj {
471*a9fa9459Szrj case -1:
472*a9fa9459Szrj ch = *out_string++;
473*a9fa9459Szrj if (*out_string == '\0')
474*a9fa9459Szrj {
475*a9fa9459Szrj state = old_state;
476*a9fa9459Szrj old_state = 3;
477*a9fa9459Szrj }
478*a9fa9459Szrj PUT (ch);
479*a9fa9459Szrj continue;
480*a9fa9459Szrj
481*a9fa9459Szrj case -2:
482*a9fa9459Szrj for (;;)
483*a9fa9459Szrj {
484*a9fa9459Szrj do
485*a9fa9459Szrj {
486*a9fa9459Szrj ch = GET ();
487*a9fa9459Szrj
488*a9fa9459Szrj if (ch == EOF)
489*a9fa9459Szrj {
490*a9fa9459Szrj as_warn (_("end of file in comment"));
491*a9fa9459Szrj goto fromeof;
492*a9fa9459Szrj }
493*a9fa9459Szrj
494*a9fa9459Szrj if (ch == '\n')
495*a9fa9459Szrj PUT ('\n');
496*a9fa9459Szrj }
497*a9fa9459Szrj while (ch != '*');
498*a9fa9459Szrj
499*a9fa9459Szrj while ((ch = GET ()) == '*')
500*a9fa9459Szrj ;
501*a9fa9459Szrj
502*a9fa9459Szrj if (ch == EOF)
503*a9fa9459Szrj {
504*a9fa9459Szrj as_warn (_("end of file in comment"));
505*a9fa9459Szrj goto fromeof;
506*a9fa9459Szrj }
507*a9fa9459Szrj
508*a9fa9459Szrj if (ch == '/')
509*a9fa9459Szrj break;
510*a9fa9459Szrj
511*a9fa9459Szrj UNGET (ch);
512*a9fa9459Szrj }
513*a9fa9459Szrj
514*a9fa9459Szrj state = old_state;
515*a9fa9459Szrj UNGET (' ');
516*a9fa9459Szrj continue;
517*a9fa9459Szrj
518*a9fa9459Szrj case 4:
519*a9fa9459Szrj ch = GET ();
520*a9fa9459Szrj if (ch == EOF)
521*a9fa9459Szrj goto fromeof;
522*a9fa9459Szrj else if (ch >= '0' && ch <= '9')
523*a9fa9459Szrj PUT (ch);
524*a9fa9459Szrj else
525*a9fa9459Szrj {
526*a9fa9459Szrj while (ch != EOF && IS_WHITESPACE (ch))
527*a9fa9459Szrj ch = GET ();
528*a9fa9459Szrj if (ch == '"')
529*a9fa9459Szrj {
530*a9fa9459Szrj quotechar = ch;
531*a9fa9459Szrj state = 5;
532*a9fa9459Szrj old_state = 3;
533*a9fa9459Szrj PUT (ch);
534*a9fa9459Szrj }
535*a9fa9459Szrj else
536*a9fa9459Szrj {
537*a9fa9459Szrj while (ch != EOF && ch != '\n')
538*a9fa9459Szrj ch = GET ();
539*a9fa9459Szrj state = 0;
540*a9fa9459Szrj PUT (ch);
541*a9fa9459Szrj }
542*a9fa9459Szrj }
543*a9fa9459Szrj continue;
544*a9fa9459Szrj
545*a9fa9459Szrj case 5:
546*a9fa9459Szrj /* We are going to copy everything up to a quote character,
547*a9fa9459Szrj with special handling for a backslash. We try to
548*a9fa9459Szrj optimize the copying in the simple case without using the
549*a9fa9459Szrj GET and PUT macros. */
550*a9fa9459Szrj {
551*a9fa9459Szrj char *s;
552*a9fa9459Szrj ptrdiff_t len;
553*a9fa9459Szrj
554*a9fa9459Szrj for (s = from; s < fromend; s++)
555*a9fa9459Szrj {
556*a9fa9459Szrj ch = *s;
557*a9fa9459Szrj if (ch == '\\'
558*a9fa9459Szrj || ch == quotechar
559*a9fa9459Szrj || ch == '\n')
560*a9fa9459Szrj break;
561*a9fa9459Szrj }
562*a9fa9459Szrj len = s - from;
563*a9fa9459Szrj if (len > toend - to)
564*a9fa9459Szrj len = toend - to;
565*a9fa9459Szrj if (len > 0)
566*a9fa9459Szrj {
567*a9fa9459Szrj memcpy (to, from, len);
568*a9fa9459Szrj to += len;
569*a9fa9459Szrj from += len;
570*a9fa9459Szrj if (to >= toend)
571*a9fa9459Szrj goto tofull;
572*a9fa9459Szrj }
573*a9fa9459Szrj }
574*a9fa9459Szrj
575*a9fa9459Szrj ch = GET ();
576*a9fa9459Szrj if (ch == EOF)
577*a9fa9459Szrj {
578*a9fa9459Szrj /* This buffer is here specifically so
579*a9fa9459Szrj that the UNGET below will work. */
580*a9fa9459Szrj static char one_char_buf[1];
581*a9fa9459Szrj
582*a9fa9459Szrj as_warn (_("end of file in string; '%c' inserted"), quotechar);
583*a9fa9459Szrj state = old_state;
584*a9fa9459Szrj from = fromend = one_char_buf + 1;
585*a9fa9459Szrj fromlen = 1;
586*a9fa9459Szrj UNGET ('\n');
587*a9fa9459Szrj PUT (quotechar);
588*a9fa9459Szrj }
589*a9fa9459Szrj else if (ch == quotechar)
590*a9fa9459Szrj {
591*a9fa9459Szrj state = old_state;
592*a9fa9459Szrj PUT (ch);
593*a9fa9459Szrj }
594*a9fa9459Szrj #ifndef NO_STRING_ESCAPES
595*a9fa9459Szrj else if (ch == '\\')
596*a9fa9459Szrj {
597*a9fa9459Szrj state = 6;
598*a9fa9459Szrj PUT (ch);
599*a9fa9459Szrj }
600*a9fa9459Szrj #endif
601*a9fa9459Szrj else if (scrub_m68k_mri && ch == '\n')
602*a9fa9459Szrj {
603*a9fa9459Szrj /* Just quietly terminate the string. This permits lines like
604*a9fa9459Szrj bne label loop if we haven't reach end yet. */
605*a9fa9459Szrj state = old_state;
606*a9fa9459Szrj UNGET (ch);
607*a9fa9459Szrj PUT ('\'');
608*a9fa9459Szrj }
609*a9fa9459Szrj else
610*a9fa9459Szrj {
611*a9fa9459Szrj PUT (ch);
612*a9fa9459Szrj }
613*a9fa9459Szrj continue;
614*a9fa9459Szrj
615*a9fa9459Szrj case 6:
616*a9fa9459Szrj state = 5;
617*a9fa9459Szrj ch = GET ();
618*a9fa9459Szrj switch (ch)
619*a9fa9459Szrj {
620*a9fa9459Szrj /* Handle strings broken across lines, by turning '\n' into
621*a9fa9459Szrj '\\' and 'n'. */
622*a9fa9459Szrj case '\n':
623*a9fa9459Szrj UNGET ('n');
624*a9fa9459Szrj add_newlines++;
625*a9fa9459Szrj PUT ('\\');
626*a9fa9459Szrj continue;
627*a9fa9459Szrj
628*a9fa9459Szrj case EOF:
629*a9fa9459Szrj as_warn (_("end of file in string; '%c' inserted"), quotechar);
630*a9fa9459Szrj PUT (quotechar);
631*a9fa9459Szrj continue;
632*a9fa9459Szrj
633*a9fa9459Szrj case '"':
634*a9fa9459Szrj case '\\':
635*a9fa9459Szrj case 'b':
636*a9fa9459Szrj case 'f':
637*a9fa9459Szrj case 'n':
638*a9fa9459Szrj case 'r':
639*a9fa9459Szrj case 't':
640*a9fa9459Szrj case 'v':
641*a9fa9459Szrj case 'x':
642*a9fa9459Szrj case 'X':
643*a9fa9459Szrj case '0':
644*a9fa9459Szrj case '1':
645*a9fa9459Szrj case '2':
646*a9fa9459Szrj case '3':
647*a9fa9459Szrj case '4':
648*a9fa9459Szrj case '5':
649*a9fa9459Szrj case '6':
650*a9fa9459Szrj case '7':
651*a9fa9459Szrj break;
652*a9fa9459Szrj
653*a9fa9459Szrj default:
654*a9fa9459Szrj #ifdef ONLY_STANDARD_ESCAPES
655*a9fa9459Szrj as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
656*a9fa9459Szrj #endif
657*a9fa9459Szrj break;
658*a9fa9459Szrj }
659*a9fa9459Szrj PUT (ch);
660*a9fa9459Szrj continue;
661*a9fa9459Szrj
662*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
663*a9fa9459Szrj case 13:
664*a9fa9459Szrj ch = GET ();
665*a9fa9459Szrj if (ch != '|')
666*a9fa9459Szrj abort ();
667*a9fa9459Szrj
668*a9fa9459Szrj /* Reset back to state 1 and pretend that we are parsing a
669*a9fa9459Szrj line from just after the first white space. */
670*a9fa9459Szrj state = 1;
671*a9fa9459Szrj PUT ('|');
672*a9fa9459Szrj #ifdef TC_TIC6X
673*a9fa9459Szrj /* "||^" is used for SPMASKed instructions. */
674*a9fa9459Szrj ch = GET ();
675*a9fa9459Szrj if (ch == EOF)
676*a9fa9459Szrj goto fromeof;
677*a9fa9459Szrj else if (ch == '^')
678*a9fa9459Szrj PUT ('^');
679*a9fa9459Szrj else
680*a9fa9459Szrj UNGET (ch);
681*a9fa9459Szrj #endif
682*a9fa9459Szrj continue;
683*a9fa9459Szrj #endif
684*a9fa9459Szrj #ifdef TC_Z80
685*a9fa9459Szrj case 16:
686*a9fa9459Szrj /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
687*a9fa9459Szrj ch = GET ();
688*a9fa9459Szrj if (ch == 'f' || ch == 'F')
689*a9fa9459Szrj {
690*a9fa9459Szrj state = 17;
691*a9fa9459Szrj PUT (ch);
692*a9fa9459Szrj }
693*a9fa9459Szrj else
694*a9fa9459Szrj {
695*a9fa9459Szrj state = 9;
696*a9fa9459Szrj break;
697*a9fa9459Szrj }
698*a9fa9459Szrj case 17:
699*a9fa9459Szrj /* We have seen "af" at the start of a symbol,
700*a9fa9459Szrj a ' here is a part of that symbol. */
701*a9fa9459Szrj ch = GET ();
702*a9fa9459Szrj state = 9;
703*a9fa9459Szrj if (ch == '\'')
704*a9fa9459Szrj /* Change to avoid warning about unclosed string. */
705*a9fa9459Szrj PUT ('`');
706*a9fa9459Szrj else if (ch != EOF)
707*a9fa9459Szrj UNGET (ch);
708*a9fa9459Szrj break;
709*a9fa9459Szrj #endif
710*a9fa9459Szrj }
711*a9fa9459Szrj
712*a9fa9459Szrj /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
713*a9fa9459Szrj
714*a9fa9459Szrj /* flushchar: */
715*a9fa9459Szrj ch = GET ();
716*a9fa9459Szrj
717*a9fa9459Szrj #ifdef TC_PREDICATE_START_CHAR
718*a9fa9459Szrj if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
719*a9fa9459Szrj {
720*a9fa9459Szrj state += 14;
721*a9fa9459Szrj PUT (ch);
722*a9fa9459Szrj continue;
723*a9fa9459Szrj }
724*a9fa9459Szrj else if (state == 14 || state == 15)
725*a9fa9459Szrj {
726*a9fa9459Szrj if (ch == TC_PREDICATE_END_CHAR)
727*a9fa9459Szrj {
728*a9fa9459Szrj state -= 14;
729*a9fa9459Szrj PUT (ch);
730*a9fa9459Szrj ch = GET ();
731*a9fa9459Szrj }
732*a9fa9459Szrj else
733*a9fa9459Szrj {
734*a9fa9459Szrj PUT (ch);
735*a9fa9459Szrj continue;
736*a9fa9459Szrj }
737*a9fa9459Szrj }
738*a9fa9459Szrj #endif
739*a9fa9459Szrj
740*a9fa9459Szrj recycle:
741*a9fa9459Szrj
742*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
743*a9fa9459Szrj /* We need to watch out for .symver directives. See the comment later
744*a9fa9459Szrj in this function. */
745*a9fa9459Szrj if (symver_state == NULL)
746*a9fa9459Szrj {
747*a9fa9459Szrj if ((state == 0 || state == 1) && ch == symver_pseudo[0])
748*a9fa9459Szrj symver_state = symver_pseudo + 1;
749*a9fa9459Szrj }
750*a9fa9459Szrj else
751*a9fa9459Szrj {
752*a9fa9459Szrj /* We advance to the next state if we find the right
753*a9fa9459Szrj character. */
754*a9fa9459Szrj if (ch != '\0' && (*symver_state == ch))
755*a9fa9459Szrj ++symver_state;
756*a9fa9459Szrj else if (*symver_state != '\0')
757*a9fa9459Szrj /* We did not get the expected character, or we didn't
758*a9fa9459Szrj get a valid terminating character after seeing the
759*a9fa9459Szrj entire pseudo-op, so we must go back to the beginning. */
760*a9fa9459Szrj symver_state = NULL;
761*a9fa9459Szrj else
762*a9fa9459Szrj {
763*a9fa9459Szrj /* We've read the entire pseudo-op. If this is the end
764*a9fa9459Szrj of the line, go back to the beginning. */
765*a9fa9459Szrj if (IS_NEWLINE (ch))
766*a9fa9459Szrj symver_state = NULL;
767*a9fa9459Szrj }
768*a9fa9459Szrj }
769*a9fa9459Szrj #endif /* TC_ARM && OBJ_ELF */
770*a9fa9459Szrj
771*a9fa9459Szrj #ifdef TC_M68K
772*a9fa9459Szrj /* We want to have pseudo-ops which control whether we are in
773*a9fa9459Szrj MRI mode or not. Unfortunately, since m68k MRI mode affects
774*a9fa9459Szrj the scrubber, that means that we need a special purpose
775*a9fa9459Szrj recognizer here. */
776*a9fa9459Szrj if (mri_state == NULL)
777*a9fa9459Szrj {
778*a9fa9459Szrj if ((state == 0 || state == 1)
779*a9fa9459Szrj && ch == mri_pseudo[0])
780*a9fa9459Szrj mri_state = mri_pseudo + 1;
781*a9fa9459Szrj }
782*a9fa9459Szrj else
783*a9fa9459Szrj {
784*a9fa9459Szrj /* We advance to the next state if we find the right
785*a9fa9459Szrj character, or if we need a space character and we get any
786*a9fa9459Szrj whitespace character, or if we need a '0' and we get a
787*a9fa9459Szrj '1' (this is so that we only need one state to handle
788*a9fa9459Szrj ``.mri 0'' and ``.mri 1''). */
789*a9fa9459Szrj if (ch != '\0'
790*a9fa9459Szrj && (*mri_state == ch
791*a9fa9459Szrj || (*mri_state == ' '
792*a9fa9459Szrj && lex[ch] == LEX_IS_WHITESPACE)
793*a9fa9459Szrj || (*mri_state == '0'
794*a9fa9459Szrj && ch == '1')))
795*a9fa9459Szrj {
796*a9fa9459Szrj mri_last_ch = ch;
797*a9fa9459Szrj ++mri_state;
798*a9fa9459Szrj }
799*a9fa9459Szrj else if (*mri_state != '\0'
800*a9fa9459Szrj || (lex[ch] != LEX_IS_WHITESPACE
801*a9fa9459Szrj && lex[ch] != LEX_IS_NEWLINE))
802*a9fa9459Szrj {
803*a9fa9459Szrj /* We did not get the expected character, or we didn't
804*a9fa9459Szrj get a valid terminating character after seeing the
805*a9fa9459Szrj entire pseudo-op, so we must go back to the
806*a9fa9459Szrj beginning. */
807*a9fa9459Szrj mri_state = NULL;
808*a9fa9459Szrj }
809*a9fa9459Szrj else
810*a9fa9459Szrj {
811*a9fa9459Szrj /* We've read the entire pseudo-op. mips_last_ch is
812*a9fa9459Szrj either '0' or '1' indicating whether to enter or
813*a9fa9459Szrj leave MRI mode. */
814*a9fa9459Szrj do_scrub_begin (mri_last_ch == '1');
815*a9fa9459Szrj mri_state = NULL;
816*a9fa9459Szrj
817*a9fa9459Szrj /* We continue handling the character as usual. The
818*a9fa9459Szrj main gas reader must also handle the .mri pseudo-op
819*a9fa9459Szrj to control expression parsing and the like. */
820*a9fa9459Szrj }
821*a9fa9459Szrj }
822*a9fa9459Szrj #endif
823*a9fa9459Szrj
824*a9fa9459Szrj if (ch == EOF)
825*a9fa9459Szrj {
826*a9fa9459Szrj if (state != 0)
827*a9fa9459Szrj {
828*a9fa9459Szrj as_warn (_("end of file not at end of a line; newline inserted"));
829*a9fa9459Szrj state = 0;
830*a9fa9459Szrj PUT ('\n');
831*a9fa9459Szrj }
832*a9fa9459Szrj goto fromeof;
833*a9fa9459Szrj }
834*a9fa9459Szrj
835*a9fa9459Szrj switch (lex[ch])
836*a9fa9459Szrj {
837*a9fa9459Szrj case LEX_IS_WHITESPACE:
838*a9fa9459Szrj do
839*a9fa9459Szrj {
840*a9fa9459Szrj ch = GET ();
841*a9fa9459Szrj }
842*a9fa9459Szrj while (ch != EOF && IS_WHITESPACE (ch));
843*a9fa9459Szrj if (ch == EOF)
844*a9fa9459Szrj goto fromeof;
845*a9fa9459Szrj
846*a9fa9459Szrj if (state == 0)
847*a9fa9459Szrj {
848*a9fa9459Szrj /* Preserve a single whitespace character at the
849*a9fa9459Szrj beginning of a line. */
850*a9fa9459Szrj state = 1;
851*a9fa9459Szrj UNGET (ch);
852*a9fa9459Szrj PUT (' ');
853*a9fa9459Szrj break;
854*a9fa9459Szrj }
855*a9fa9459Szrj
856*a9fa9459Szrj #ifdef KEEP_WHITE_AROUND_COLON
857*a9fa9459Szrj if (lex[ch] == LEX_IS_COLON)
858*a9fa9459Szrj {
859*a9fa9459Szrj /* Only keep this white if there's no white *after* the
860*a9fa9459Szrj colon. */
861*a9fa9459Szrj ch2 = GET ();
862*a9fa9459Szrj if (ch2 != EOF)
863*a9fa9459Szrj UNGET (ch2);
864*a9fa9459Szrj if (!IS_WHITESPACE (ch2))
865*a9fa9459Szrj {
866*a9fa9459Szrj state = 9;
867*a9fa9459Szrj UNGET (ch);
868*a9fa9459Szrj PUT (' ');
869*a9fa9459Szrj break;
870*a9fa9459Szrj }
871*a9fa9459Szrj }
872*a9fa9459Szrj #endif
873*a9fa9459Szrj if (IS_COMMENT (ch)
874*a9fa9459Szrj || ch == '/'
875*a9fa9459Szrj || IS_LINE_SEPARATOR (ch)
876*a9fa9459Szrj || IS_PARALLEL_SEPARATOR (ch))
877*a9fa9459Szrj {
878*a9fa9459Szrj if (scrub_m68k_mri)
879*a9fa9459Szrj {
880*a9fa9459Szrj /* In MRI mode, we keep these spaces. */
881*a9fa9459Szrj UNGET (ch);
882*a9fa9459Szrj PUT (' ');
883*a9fa9459Szrj break;
884*a9fa9459Szrj }
885*a9fa9459Szrj goto recycle;
886*a9fa9459Szrj }
887*a9fa9459Szrj
888*a9fa9459Szrj /* If we're in state 2 or 11, we've seen a non-white
889*a9fa9459Szrj character followed by whitespace. If the next character
890*a9fa9459Szrj is ':', this is whitespace after a label name which we
891*a9fa9459Szrj normally must ignore. In MRI mode, though, spaces are
892*a9fa9459Szrj not permitted between the label and the colon. */
893*a9fa9459Szrj if ((state == 2 || state == 11)
894*a9fa9459Szrj && lex[ch] == LEX_IS_COLON
895*a9fa9459Szrj && ! scrub_m68k_mri)
896*a9fa9459Szrj {
897*a9fa9459Szrj state = 1;
898*a9fa9459Szrj PUT (ch);
899*a9fa9459Szrj break;
900*a9fa9459Szrj }
901*a9fa9459Szrj
902*a9fa9459Szrj switch (state)
903*a9fa9459Szrj {
904*a9fa9459Szrj case 1:
905*a9fa9459Szrj /* We can arrive here if we leave a leading whitespace
906*a9fa9459Szrj character at the beginning of a line. */
907*a9fa9459Szrj goto recycle;
908*a9fa9459Szrj case 2:
909*a9fa9459Szrj state = 3;
910*a9fa9459Szrj if (to + 1 < toend)
911*a9fa9459Szrj {
912*a9fa9459Szrj /* Optimize common case by skipping UNGET/GET. */
913*a9fa9459Szrj PUT (' '); /* Sp after opco */
914*a9fa9459Szrj goto recycle;
915*a9fa9459Szrj }
916*a9fa9459Szrj UNGET (ch);
917*a9fa9459Szrj PUT (' ');
918*a9fa9459Szrj break;
919*a9fa9459Szrj case 3:
920*a9fa9459Szrj #ifndef TC_KEEP_OPERAND_SPACES
921*a9fa9459Szrj /* For TI C6X, we keep these spaces as they may separate
922*a9fa9459Szrj functional unit specifiers from operands. */
923*a9fa9459Szrj if (scrub_m68k_mri)
924*a9fa9459Szrj #endif
925*a9fa9459Szrj {
926*a9fa9459Szrj /* In MRI mode, we keep these spaces. */
927*a9fa9459Szrj UNGET (ch);
928*a9fa9459Szrj PUT (' ');
929*a9fa9459Szrj break;
930*a9fa9459Szrj }
931*a9fa9459Szrj goto recycle; /* Sp in operands */
932*a9fa9459Szrj case 9:
933*a9fa9459Szrj case 10:
934*a9fa9459Szrj #ifndef TC_KEEP_OPERAND_SPACES
935*a9fa9459Szrj if (scrub_m68k_mri)
936*a9fa9459Szrj #endif
937*a9fa9459Szrj {
938*a9fa9459Szrj /* In MRI mode, we keep these spaces. */
939*a9fa9459Szrj state = 3;
940*a9fa9459Szrj UNGET (ch);
941*a9fa9459Szrj PUT (' ');
942*a9fa9459Szrj break;
943*a9fa9459Szrj }
944*a9fa9459Szrj state = 10; /* Sp after symbol char */
945*a9fa9459Szrj goto recycle;
946*a9fa9459Szrj case 11:
947*a9fa9459Szrj if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
948*a9fa9459Szrj state = 1;
949*a9fa9459Szrj else
950*a9fa9459Szrj {
951*a9fa9459Szrj /* We know that ch is not ':', since we tested that
952*a9fa9459Szrj case above. Therefore this is not a label, so it
953*a9fa9459Szrj must be the opcode, and we've just seen the
954*a9fa9459Szrj whitespace after it. */
955*a9fa9459Szrj state = 3;
956*a9fa9459Szrj }
957*a9fa9459Szrj UNGET (ch);
958*a9fa9459Szrj PUT (' '); /* Sp after label definition. */
959*a9fa9459Szrj break;
960*a9fa9459Szrj default:
961*a9fa9459Szrj BAD_CASE (state);
962*a9fa9459Szrj }
963*a9fa9459Szrj break;
964*a9fa9459Szrj
965*a9fa9459Szrj case LEX_IS_TWOCHAR_COMMENT_1ST:
966*a9fa9459Szrj ch2 = GET ();
967*a9fa9459Szrj if (ch2 == '*')
968*a9fa9459Szrj {
969*a9fa9459Szrj for (;;)
970*a9fa9459Szrj {
971*a9fa9459Szrj do
972*a9fa9459Szrj {
973*a9fa9459Szrj ch2 = GET ();
974*a9fa9459Szrj if (ch2 != EOF && IS_NEWLINE (ch2))
975*a9fa9459Szrj add_newlines++;
976*a9fa9459Szrj }
977*a9fa9459Szrj while (ch2 != EOF && ch2 != '*');
978*a9fa9459Szrj
979*a9fa9459Szrj while (ch2 == '*')
980*a9fa9459Szrj ch2 = GET ();
981*a9fa9459Szrj
982*a9fa9459Szrj if (ch2 == EOF || ch2 == '/')
983*a9fa9459Szrj break;
984*a9fa9459Szrj
985*a9fa9459Szrj /* This UNGET will ensure that we count newlines
986*a9fa9459Szrj correctly. */
987*a9fa9459Szrj UNGET (ch2);
988*a9fa9459Szrj }
989*a9fa9459Szrj
990*a9fa9459Szrj if (ch2 == EOF)
991*a9fa9459Szrj as_warn (_("end of file in multiline comment"));
992*a9fa9459Szrj
993*a9fa9459Szrj ch = ' ';
994*a9fa9459Szrj goto recycle;
995*a9fa9459Szrj }
996*a9fa9459Szrj #ifdef DOUBLESLASH_LINE_COMMENTS
997*a9fa9459Szrj else if (ch2 == '/')
998*a9fa9459Szrj {
999*a9fa9459Szrj do
1000*a9fa9459Szrj {
1001*a9fa9459Szrj ch = GET ();
1002*a9fa9459Szrj }
1003*a9fa9459Szrj while (ch != EOF && !IS_NEWLINE (ch));
1004*a9fa9459Szrj if (ch == EOF)
1005*a9fa9459Szrj as_warn ("end of file in comment; newline inserted");
1006*a9fa9459Szrj state = 0;
1007*a9fa9459Szrj PUT ('\n');
1008*a9fa9459Szrj break;
1009*a9fa9459Szrj }
1010*a9fa9459Szrj #endif
1011*a9fa9459Szrj else
1012*a9fa9459Szrj {
1013*a9fa9459Szrj if (ch2 != EOF)
1014*a9fa9459Szrj UNGET (ch2);
1015*a9fa9459Szrj if (state == 9 || state == 10)
1016*a9fa9459Szrj state = 3;
1017*a9fa9459Szrj PUT (ch);
1018*a9fa9459Szrj }
1019*a9fa9459Szrj break;
1020*a9fa9459Szrj
1021*a9fa9459Szrj case LEX_IS_STRINGQUOTE:
1022*a9fa9459Szrj quotechar = ch;
1023*a9fa9459Szrj if (state == 10)
1024*a9fa9459Szrj {
1025*a9fa9459Szrj /* Preserve the whitespace in foo "bar". */
1026*a9fa9459Szrj UNGET (ch);
1027*a9fa9459Szrj state = 3;
1028*a9fa9459Szrj PUT (' ');
1029*a9fa9459Szrj
1030*a9fa9459Szrj /* PUT didn't jump out. We could just break, but we
1031*a9fa9459Szrj know what will happen, so optimize a bit. */
1032*a9fa9459Szrj ch = GET ();
1033*a9fa9459Szrj old_state = 3;
1034*a9fa9459Szrj }
1035*a9fa9459Szrj else if (state == 9)
1036*a9fa9459Szrj old_state = 3;
1037*a9fa9459Szrj else
1038*a9fa9459Szrj old_state = state;
1039*a9fa9459Szrj state = 5;
1040*a9fa9459Szrj PUT (ch);
1041*a9fa9459Szrj break;
1042*a9fa9459Szrj
1043*a9fa9459Szrj #ifndef IEEE_STYLE
1044*a9fa9459Szrj case LEX_IS_ONECHAR_QUOTE:
1045*a9fa9459Szrj #ifdef H_TICK_HEX
1046*a9fa9459Szrj if (state == 9 && enable_h_tick_hex)
1047*a9fa9459Szrj {
1048*a9fa9459Szrj char c;
1049*a9fa9459Szrj
1050*a9fa9459Szrj c = GET ();
1051*a9fa9459Szrj as_warn ("'%c found after symbol", c);
1052*a9fa9459Szrj UNGET (c);
1053*a9fa9459Szrj }
1054*a9fa9459Szrj #endif
1055*a9fa9459Szrj if (state == 10)
1056*a9fa9459Szrj {
1057*a9fa9459Szrj /* Preserve the whitespace in foo 'b'. */
1058*a9fa9459Szrj UNGET (ch);
1059*a9fa9459Szrj state = 3;
1060*a9fa9459Szrj PUT (' ');
1061*a9fa9459Szrj break;
1062*a9fa9459Szrj }
1063*a9fa9459Szrj ch = GET ();
1064*a9fa9459Szrj if (ch == EOF)
1065*a9fa9459Szrj {
1066*a9fa9459Szrj as_warn (_("end of file after a one-character quote; \\0 inserted"));
1067*a9fa9459Szrj ch = 0;
1068*a9fa9459Szrj }
1069*a9fa9459Szrj if (ch == '\\')
1070*a9fa9459Szrj {
1071*a9fa9459Szrj ch = GET ();
1072*a9fa9459Szrj if (ch == EOF)
1073*a9fa9459Szrj {
1074*a9fa9459Szrj as_warn (_("end of file in escape character"));
1075*a9fa9459Szrj ch = '\\';
1076*a9fa9459Szrj }
1077*a9fa9459Szrj else
1078*a9fa9459Szrj ch = process_escape (ch);
1079*a9fa9459Szrj }
1080*a9fa9459Szrj sprintf (out_buf, "%d", (int) (unsigned char) ch);
1081*a9fa9459Szrj
1082*a9fa9459Szrj /* None of these 'x constants for us. We want 'x'. */
1083*a9fa9459Szrj if ((ch = GET ()) != '\'')
1084*a9fa9459Szrj {
1085*a9fa9459Szrj #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1086*a9fa9459Szrj as_warn (_("missing close quote; (assumed)"));
1087*a9fa9459Szrj #else
1088*a9fa9459Szrj if (ch != EOF)
1089*a9fa9459Szrj UNGET (ch);
1090*a9fa9459Szrj #endif
1091*a9fa9459Szrj }
1092*a9fa9459Szrj if (strlen (out_buf) == 1)
1093*a9fa9459Szrj {
1094*a9fa9459Szrj PUT (out_buf[0]);
1095*a9fa9459Szrj break;
1096*a9fa9459Szrj }
1097*a9fa9459Szrj if (state == 9)
1098*a9fa9459Szrj old_state = 3;
1099*a9fa9459Szrj else
1100*a9fa9459Szrj old_state = state;
1101*a9fa9459Szrj state = -1;
1102*a9fa9459Szrj out_string = out_buf;
1103*a9fa9459Szrj PUT (*out_string++);
1104*a9fa9459Szrj break;
1105*a9fa9459Szrj #endif
1106*a9fa9459Szrj
1107*a9fa9459Szrj case LEX_IS_COLON:
1108*a9fa9459Szrj #ifdef KEEP_WHITE_AROUND_COLON
1109*a9fa9459Szrj state = 9;
1110*a9fa9459Szrj #else
1111*a9fa9459Szrj if (state == 9 || state == 10)
1112*a9fa9459Szrj state = 3;
1113*a9fa9459Szrj else if (state != 3)
1114*a9fa9459Szrj state = 1;
1115*a9fa9459Szrj #endif
1116*a9fa9459Szrj PUT (ch);
1117*a9fa9459Szrj break;
1118*a9fa9459Szrj
1119*a9fa9459Szrj case LEX_IS_NEWLINE:
1120*a9fa9459Szrj /* Roll out a bunch of newlines from inside comments, etc. */
1121*a9fa9459Szrj if (add_newlines)
1122*a9fa9459Szrj {
1123*a9fa9459Szrj --add_newlines;
1124*a9fa9459Szrj UNGET (ch);
1125*a9fa9459Szrj }
1126*a9fa9459Szrj /* Fall through. */
1127*a9fa9459Szrj
1128*a9fa9459Szrj case LEX_IS_LINE_SEPARATOR:
1129*a9fa9459Szrj state = 0;
1130*a9fa9459Szrj PUT (ch);
1131*a9fa9459Szrj break;
1132*a9fa9459Szrj
1133*a9fa9459Szrj case LEX_IS_PARALLEL_SEPARATOR:
1134*a9fa9459Szrj state = 1;
1135*a9fa9459Szrj PUT (ch);
1136*a9fa9459Szrj break;
1137*a9fa9459Szrj
1138*a9fa9459Szrj #ifdef TC_V850
1139*a9fa9459Szrj case LEX_IS_DOUBLEDASH_1ST:
1140*a9fa9459Szrj ch2 = GET ();
1141*a9fa9459Szrj if (ch2 != '-')
1142*a9fa9459Szrj {
1143*a9fa9459Szrj if (ch2 != EOF)
1144*a9fa9459Szrj UNGET (ch2);
1145*a9fa9459Szrj goto de_fault;
1146*a9fa9459Szrj }
1147*a9fa9459Szrj /* Read and skip to end of line. */
1148*a9fa9459Szrj do
1149*a9fa9459Szrj {
1150*a9fa9459Szrj ch = GET ();
1151*a9fa9459Szrj }
1152*a9fa9459Szrj while (ch != EOF && ch != '\n');
1153*a9fa9459Szrj
1154*a9fa9459Szrj if (ch == EOF)
1155*a9fa9459Szrj as_warn (_("end of file in comment; newline inserted"));
1156*a9fa9459Szrj
1157*a9fa9459Szrj state = 0;
1158*a9fa9459Szrj PUT ('\n');
1159*a9fa9459Szrj break;
1160*a9fa9459Szrj #endif
1161*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
1162*a9fa9459Szrj case LEX_IS_DOUBLEBAR_1ST:
1163*a9fa9459Szrj ch2 = GET ();
1164*a9fa9459Szrj if (ch2 != EOF)
1165*a9fa9459Szrj UNGET (ch2);
1166*a9fa9459Szrj if (ch2 != '|')
1167*a9fa9459Szrj goto de_fault;
1168*a9fa9459Szrj
1169*a9fa9459Szrj /* Handle '||' in two states as invoking PUT twice might
1170*a9fa9459Szrj result in the first one jumping out of this loop. We'd
1171*a9fa9459Szrj then lose track of the state and one '|' char. */
1172*a9fa9459Szrj state = 13;
1173*a9fa9459Szrj PUT ('|');
1174*a9fa9459Szrj break;
1175*a9fa9459Szrj #endif
1176*a9fa9459Szrj case LEX_IS_LINE_COMMENT_START:
1177*a9fa9459Szrj /* FIXME-someday: The two character comment stuff was badly
1178*a9fa9459Szrj thought out. On i386, we want '/' as line comment start
1179*a9fa9459Szrj AND we want C style comments. hence this hack. The
1180*a9fa9459Szrj whole lexical process should be reworked. xoxorich. */
1181*a9fa9459Szrj if (ch == '/')
1182*a9fa9459Szrj {
1183*a9fa9459Szrj ch2 = GET ();
1184*a9fa9459Szrj if (ch2 == '*')
1185*a9fa9459Szrj {
1186*a9fa9459Szrj old_state = 3;
1187*a9fa9459Szrj state = -2;
1188*a9fa9459Szrj break;
1189*a9fa9459Szrj }
1190*a9fa9459Szrj else
1191*a9fa9459Szrj {
1192*a9fa9459Szrj UNGET (ch2);
1193*a9fa9459Szrj }
1194*a9fa9459Szrj }
1195*a9fa9459Szrj
1196*a9fa9459Szrj if (state == 0 || state == 1) /* Only comment at start of line. */
1197*a9fa9459Szrj {
1198*a9fa9459Szrj int startch;
1199*a9fa9459Szrj
1200*a9fa9459Szrj startch = ch;
1201*a9fa9459Szrj
1202*a9fa9459Szrj do
1203*a9fa9459Szrj {
1204*a9fa9459Szrj ch = GET ();
1205*a9fa9459Szrj }
1206*a9fa9459Szrj while (ch != EOF && IS_WHITESPACE (ch));
1207*a9fa9459Szrj
1208*a9fa9459Szrj if (ch == EOF)
1209*a9fa9459Szrj {
1210*a9fa9459Szrj as_warn (_("end of file in comment; newline inserted"));
1211*a9fa9459Szrj PUT ('\n');
1212*a9fa9459Szrj break;
1213*a9fa9459Szrj }
1214*a9fa9459Szrj
1215*a9fa9459Szrj if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1216*a9fa9459Szrj {
1217*a9fa9459Szrj /* Not a cpp line. */
1218*a9fa9459Szrj while (ch != EOF && !IS_NEWLINE (ch))
1219*a9fa9459Szrj ch = GET ();
1220*a9fa9459Szrj if (ch == EOF)
1221*a9fa9459Szrj {
1222*a9fa9459Szrj as_warn (_("end of file in comment; newline inserted"));
1223*a9fa9459Szrj PUT ('\n');
1224*a9fa9459Szrj }
1225*a9fa9459Szrj else /* IS_NEWLINE (ch) */
1226*a9fa9459Szrj {
1227*a9fa9459Szrj /* To process non-zero add_newlines. */
1228*a9fa9459Szrj UNGET (ch);
1229*a9fa9459Szrj }
1230*a9fa9459Szrj state = 0;
1231*a9fa9459Szrj break;
1232*a9fa9459Szrj }
1233*a9fa9459Szrj /* Looks like `# 123 "filename"' from cpp. */
1234*a9fa9459Szrj UNGET (ch);
1235*a9fa9459Szrj old_state = 4;
1236*a9fa9459Szrj state = -1;
1237*a9fa9459Szrj if (scrub_m68k_mri)
1238*a9fa9459Szrj out_string = "\tlinefile ";
1239*a9fa9459Szrj else
1240*a9fa9459Szrj out_string = "\t.linefile ";
1241*a9fa9459Szrj PUT (*out_string++);
1242*a9fa9459Szrj break;
1243*a9fa9459Szrj }
1244*a9fa9459Szrj
1245*a9fa9459Szrj #ifdef TC_D10V
1246*a9fa9459Szrj /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1247*a9fa9459Szrj Trap is the only short insn that has a first operand that is
1248*a9fa9459Szrj neither register nor label.
1249*a9fa9459Szrj We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1250*a9fa9459Szrj We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1251*a9fa9459Szrj already LEX_IS_LINE_COMMENT_START. However, it is the
1252*a9fa9459Szrj only character in line_comment_chars for d10v, hence we
1253*a9fa9459Szrj can recognize it as such. */
1254*a9fa9459Szrj /* An alternative approach would be to reset the state to 1 when
1255*a9fa9459Szrj we see '||', '<'- or '->', but that seems to be overkill. */
1256*a9fa9459Szrj if (state == 10)
1257*a9fa9459Szrj PUT (' ');
1258*a9fa9459Szrj #endif
1259*a9fa9459Szrj /* We have a line comment character which is not at the
1260*a9fa9459Szrj start of a line. If this is also a normal comment
1261*a9fa9459Szrj character, fall through. Otherwise treat it as a default
1262*a9fa9459Szrj character. */
1263*a9fa9459Szrj if (strchr (tc_comment_chars, ch) == NULL
1264*a9fa9459Szrj && (! scrub_m68k_mri
1265*a9fa9459Szrj || (ch != '!' && ch != '*')))
1266*a9fa9459Szrj goto de_fault;
1267*a9fa9459Szrj if (scrub_m68k_mri
1268*a9fa9459Szrj && (ch == '!' || ch == '*' || ch == '#')
1269*a9fa9459Szrj && state != 1
1270*a9fa9459Szrj && state != 10)
1271*a9fa9459Szrj goto de_fault;
1272*a9fa9459Szrj /* Fall through. */
1273*a9fa9459Szrj case LEX_IS_COMMENT_START:
1274*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
1275*a9fa9459Szrj /* On the ARM, `@' is the comment character.
1276*a9fa9459Szrj Unfortunately this is also a special character in ELF .symver
1277*a9fa9459Szrj directives (and .type, though we deal with those another way).
1278*a9fa9459Szrj So we check if this line is such a directive, and treat
1279*a9fa9459Szrj the character as default if so. This is a hack. */
1280*a9fa9459Szrj if ((symver_state != NULL) && (*symver_state == 0))
1281*a9fa9459Szrj goto de_fault;
1282*a9fa9459Szrj #endif
1283*a9fa9459Szrj
1284*a9fa9459Szrj #ifdef TC_ARM
1285*a9fa9459Szrj /* For the ARM, care is needed not to damage occurrences of \@
1286*a9fa9459Szrj by stripping the @ onwards. Yuck. */
1287*a9fa9459Szrj if (to > tostart && *(to - 1) == '\\')
1288*a9fa9459Szrj /* Do not treat the @ as a start-of-comment. */
1289*a9fa9459Szrj goto de_fault;
1290*a9fa9459Szrj #endif
1291*a9fa9459Szrj
1292*a9fa9459Szrj #ifdef WARN_COMMENTS
1293*a9fa9459Szrj if (!found_comment)
1294*a9fa9459Szrj found_comment_file = as_where (&found_comment);
1295*a9fa9459Szrj #endif
1296*a9fa9459Szrj do
1297*a9fa9459Szrj {
1298*a9fa9459Szrj ch = GET ();
1299*a9fa9459Szrj }
1300*a9fa9459Szrj while (ch != EOF && !IS_NEWLINE (ch));
1301*a9fa9459Szrj if (ch == EOF)
1302*a9fa9459Szrj as_warn (_("end of file in comment; newline inserted"));
1303*a9fa9459Szrj state = 0;
1304*a9fa9459Szrj PUT ('\n');
1305*a9fa9459Szrj break;
1306*a9fa9459Szrj
1307*a9fa9459Szrj #ifdef H_TICK_HEX
1308*a9fa9459Szrj case LEX_IS_H:
1309*a9fa9459Szrj /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1310*a9fa9459Szrj the H' with 0x to make them gas-style hex characters. */
1311*a9fa9459Szrj if (enable_h_tick_hex)
1312*a9fa9459Szrj {
1313*a9fa9459Szrj char quot;
1314*a9fa9459Szrj
1315*a9fa9459Szrj quot = GET ();
1316*a9fa9459Szrj if (quot == '\'')
1317*a9fa9459Szrj {
1318*a9fa9459Szrj UNGET ('x');
1319*a9fa9459Szrj ch = '0';
1320*a9fa9459Szrj }
1321*a9fa9459Szrj else
1322*a9fa9459Szrj UNGET (quot);
1323*a9fa9459Szrj }
1324*a9fa9459Szrj /* FALL THROUGH */
1325*a9fa9459Szrj #endif
1326*a9fa9459Szrj
1327*a9fa9459Szrj case LEX_IS_SYMBOL_COMPONENT:
1328*a9fa9459Szrj if (state == 10)
1329*a9fa9459Szrj {
1330*a9fa9459Szrj /* This is a symbol character following another symbol
1331*a9fa9459Szrj character, with whitespace in between. We skipped
1332*a9fa9459Szrj the whitespace earlier, so output it now. */
1333*a9fa9459Szrj UNGET (ch);
1334*a9fa9459Szrj state = 3;
1335*a9fa9459Szrj PUT (' ');
1336*a9fa9459Szrj break;
1337*a9fa9459Szrj }
1338*a9fa9459Szrj
1339*a9fa9459Szrj #ifdef TC_Z80
1340*a9fa9459Szrj /* "af'" is a symbol containing '\''. */
1341*a9fa9459Szrj if (state == 3 && (ch == 'a' || ch == 'A'))
1342*a9fa9459Szrj {
1343*a9fa9459Szrj state = 16;
1344*a9fa9459Szrj PUT (ch);
1345*a9fa9459Szrj ch = GET ();
1346*a9fa9459Szrj if (ch == 'f' || ch == 'F')
1347*a9fa9459Szrj {
1348*a9fa9459Szrj state = 17;
1349*a9fa9459Szrj PUT (ch);
1350*a9fa9459Szrj break;
1351*a9fa9459Szrj }
1352*a9fa9459Szrj else
1353*a9fa9459Szrj {
1354*a9fa9459Szrj state = 9;
1355*a9fa9459Szrj if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1356*a9fa9459Szrj {
1357*a9fa9459Szrj if (ch != EOF)
1358*a9fa9459Szrj UNGET (ch);
1359*a9fa9459Szrj break;
1360*a9fa9459Szrj }
1361*a9fa9459Szrj }
1362*a9fa9459Szrj }
1363*a9fa9459Szrj #endif
1364*a9fa9459Szrj if (state == 3)
1365*a9fa9459Szrj state = 9;
1366*a9fa9459Szrj
1367*a9fa9459Szrj /* This is a common case. Quickly copy CH and all the
1368*a9fa9459Szrj following symbol component or normal characters. */
1369*a9fa9459Szrj if (to + 1 < toend
1370*a9fa9459Szrj && mri_state == NULL
1371*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
1372*a9fa9459Szrj && symver_state == NULL
1373*a9fa9459Szrj #endif
1374*a9fa9459Szrj )
1375*a9fa9459Szrj {
1376*a9fa9459Szrj char *s;
1377*a9fa9459Szrj ptrdiff_t len;
1378*a9fa9459Szrj
1379*a9fa9459Szrj for (s = from; s < fromend; s++)
1380*a9fa9459Szrj {
1381*a9fa9459Szrj int type;
1382*a9fa9459Szrj
1383*a9fa9459Szrj ch2 = *(unsigned char *) s;
1384*a9fa9459Szrj type = lex[ch2];
1385*a9fa9459Szrj if (type != 0
1386*a9fa9459Szrj && type != LEX_IS_SYMBOL_COMPONENT)
1387*a9fa9459Szrj break;
1388*a9fa9459Szrj }
1389*a9fa9459Szrj
1390*a9fa9459Szrj if (s > from)
1391*a9fa9459Szrj /* Handle the last character normally, for
1392*a9fa9459Szrj simplicity. */
1393*a9fa9459Szrj --s;
1394*a9fa9459Szrj
1395*a9fa9459Szrj len = s - from;
1396*a9fa9459Szrj
1397*a9fa9459Szrj if (len > (toend - to) - 1)
1398*a9fa9459Szrj len = (toend - to) - 1;
1399*a9fa9459Szrj
1400*a9fa9459Szrj if (len > 0)
1401*a9fa9459Szrj {
1402*a9fa9459Szrj PUT (ch);
1403*a9fa9459Szrj memcpy (to, from, len);
1404*a9fa9459Szrj to += len;
1405*a9fa9459Szrj from += len;
1406*a9fa9459Szrj if (to >= toend)
1407*a9fa9459Szrj goto tofull;
1408*a9fa9459Szrj ch = GET ();
1409*a9fa9459Szrj }
1410*a9fa9459Szrj }
1411*a9fa9459Szrj
1412*a9fa9459Szrj /* Fall through. */
1413*a9fa9459Szrj default:
1414*a9fa9459Szrj de_fault:
1415*a9fa9459Szrj /* Some relatively `normal' character. */
1416*a9fa9459Szrj if (state == 0)
1417*a9fa9459Szrj {
1418*a9fa9459Szrj state = 11; /* Now seeing label definition. */
1419*a9fa9459Szrj }
1420*a9fa9459Szrj else if (state == 1)
1421*a9fa9459Szrj {
1422*a9fa9459Szrj state = 2; /* Ditto. */
1423*a9fa9459Szrj }
1424*a9fa9459Szrj else if (state == 9)
1425*a9fa9459Szrj {
1426*a9fa9459Szrj if (!IS_SYMBOL_COMPONENT (ch))
1427*a9fa9459Szrj state = 3;
1428*a9fa9459Szrj }
1429*a9fa9459Szrj else if (state == 10)
1430*a9fa9459Szrj {
1431*a9fa9459Szrj if (ch == '\\')
1432*a9fa9459Szrj {
1433*a9fa9459Szrj /* Special handling for backslash: a backslash may
1434*a9fa9459Szrj be the beginning of a formal parameter (of a
1435*a9fa9459Szrj macro) following another symbol character, with
1436*a9fa9459Szrj whitespace in between. If that is the case, we
1437*a9fa9459Szrj output a space before the parameter. Strictly
1438*a9fa9459Szrj speaking, correct handling depends upon what the
1439*a9fa9459Szrj macro parameter expands into; if the parameter
1440*a9fa9459Szrj expands into something which does not start with
1441*a9fa9459Szrj an operand character, then we don't want to keep
1442*a9fa9459Szrj the space. We don't have enough information to
1443*a9fa9459Szrj make the right choice, so here we are making the
1444*a9fa9459Szrj choice which is more likely to be correct. */
1445*a9fa9459Szrj if (to + 1 >= toend)
1446*a9fa9459Szrj {
1447*a9fa9459Szrj /* If we're near the end of the buffer, save the
1448*a9fa9459Szrj character for the next time round. Otherwise
1449*a9fa9459Szrj we'll lose our state. */
1450*a9fa9459Szrj UNGET (ch);
1451*a9fa9459Szrj goto tofull;
1452*a9fa9459Szrj }
1453*a9fa9459Szrj *to++ = ' ';
1454*a9fa9459Szrj }
1455*a9fa9459Szrj
1456*a9fa9459Szrj state = 3;
1457*a9fa9459Szrj }
1458*a9fa9459Szrj PUT (ch);
1459*a9fa9459Szrj break;
1460*a9fa9459Szrj }
1461*a9fa9459Szrj }
1462*a9fa9459Szrj
1463*a9fa9459Szrj /*NOTREACHED*/
1464*a9fa9459Szrj
1465*a9fa9459Szrj fromeof:
1466*a9fa9459Szrj /* We have reached the end of the input. */
1467*a9fa9459Szrj return to - tostart;
1468*a9fa9459Szrj
1469*a9fa9459Szrj tofull:
1470*a9fa9459Szrj /* The output buffer is full. Save any input we have not yet
1471*a9fa9459Szrj processed. */
1472*a9fa9459Szrj if (fromend > from)
1473*a9fa9459Szrj {
1474*a9fa9459Szrj saved_input = from;
1475*a9fa9459Szrj saved_input_len = fromend - from;
1476*a9fa9459Szrj }
1477*a9fa9459Szrj else
1478*a9fa9459Szrj saved_input = NULL;
1479*a9fa9459Szrj
1480*a9fa9459Szrj return to - tostart;
1481*a9fa9459Szrj }
1482