1*fae548d3Szrj /* This is the Assembler Pre-Processor
2*fae548d3Szrj Copyright (C) 1987-2020 Free Software Foundation, Inc.
3*fae548d3Szrj
4*fae548d3Szrj This file is part of GAS, the GNU Assembler.
5*fae548d3Szrj
6*fae548d3Szrj GAS is free software; you can redistribute it and/or modify
7*fae548d3Szrj it under the terms of the GNU General Public License as published by
8*fae548d3Szrj the Free Software Foundation; either version 3, or (at your option)
9*fae548d3Szrj any later version.
10*fae548d3Szrj
11*fae548d3Szrj GAS is distributed in the hope that it will be useful, but WITHOUT
12*fae548d3Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13*fae548d3Szrj or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14*fae548d3Szrj License for more details.
15*fae548d3Szrj
16*fae548d3Szrj You should have received a copy of the GNU General Public License
17*fae548d3Szrj along with GAS; see the file COPYING. If not, write to the Free
18*fae548d3Szrj Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19*fae548d3Szrj 02110-1301, USA. */
20*fae548d3Szrj
21*fae548d3Szrj /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
22*fae548d3Szrj /* App, the assembler pre-processor. This pre-processor strips out
23*fae548d3Szrj excess spaces, turns single-quoted characters into a decimal
24*fae548d3Szrj constant, and turns the # in # <number> <filename> <garbage> into a
25*fae548d3Szrj .linefile. This needs better error-handling. */
26*fae548d3Szrj
27*fae548d3Szrj #include "as.h"
28*fae548d3Szrj
29*fae548d3Szrj #if (__STDC__ != 1)
30*fae548d3Szrj #ifndef const
31*fae548d3Szrj #define const /* empty */
32*fae548d3Szrj #endif
33*fae548d3Szrj #endif
34*fae548d3Szrj
35*fae548d3Szrj #ifdef H_TICK_HEX
36*fae548d3Szrj int enable_h_tick_hex = 0;
37*fae548d3Szrj #endif
38*fae548d3Szrj
39*fae548d3Szrj #ifdef TC_M68K
40*fae548d3Szrj /* Whether we are scrubbing in m68k MRI mode. This is different from
41*fae548d3Szrj flag_m68k_mri, because the two flags will be affected by the .mri
42*fae548d3Szrj pseudo-op at different times. */
43*fae548d3Szrj static int scrub_m68k_mri;
44*fae548d3Szrj
45*fae548d3Szrj /* The pseudo-op which switches in and out of MRI mode. See the
46*fae548d3Szrj comment in do_scrub_chars. */
47*fae548d3Szrj static const char mri_pseudo[] = ".mri 0";
48*fae548d3Szrj #else
49*fae548d3Szrj #define scrub_m68k_mri 0
50*fae548d3Szrj #endif
51*fae548d3Szrj
52*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
53*fae548d3Szrj /* The pseudo-op for which we need to special-case `@' characters.
54*fae548d3Szrj See the comment in do_scrub_chars. */
55*fae548d3Szrj static const char symver_pseudo[] = ".symver";
56*fae548d3Szrj static const char * symver_state;
57*fae548d3Szrj #endif
58*fae548d3Szrj #ifdef TC_ARM
59*fae548d3Szrj static char last_char;
60*fae548d3Szrj #endif
61*fae548d3Szrj
62*fae548d3Szrj static char lex[256];
63*fae548d3Szrj static const char symbol_chars[] =
64*fae548d3Szrj "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
65*fae548d3Szrj
66*fae548d3Szrj #define LEX_IS_SYMBOL_COMPONENT 1
67*fae548d3Szrj #define LEX_IS_WHITESPACE 2
68*fae548d3Szrj #define LEX_IS_LINE_SEPARATOR 3
69*fae548d3Szrj #define LEX_IS_COMMENT_START 4
70*fae548d3Szrj #define LEX_IS_LINE_COMMENT_START 5
71*fae548d3Szrj #define LEX_IS_TWOCHAR_COMMENT_1ST 6
72*fae548d3Szrj #define LEX_IS_STRINGQUOTE 8
73*fae548d3Szrj #define LEX_IS_COLON 9
74*fae548d3Szrj #define LEX_IS_NEWLINE 10
75*fae548d3Szrj #define LEX_IS_ONECHAR_QUOTE 11
76*fae548d3Szrj #ifdef TC_V850
77*fae548d3Szrj #define LEX_IS_DOUBLEDASH_1ST 12
78*fae548d3Szrj #endif
79*fae548d3Szrj #ifdef TC_M32R
80*fae548d3Szrj #define DOUBLEBAR_PARALLEL
81*fae548d3Szrj #endif
82*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
83*fae548d3Szrj #define LEX_IS_DOUBLEBAR_1ST 13
84*fae548d3Szrj #endif
85*fae548d3Szrj #define LEX_IS_PARALLEL_SEPARATOR 14
86*fae548d3Szrj #ifdef H_TICK_HEX
87*fae548d3Szrj #define LEX_IS_H 15
88*fae548d3Szrj #endif
89*fae548d3Szrj #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
90*fae548d3Szrj #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
91*fae548d3Szrj #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
92*fae548d3Szrj #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
93*fae548d3Szrj #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
94*fae548d3Szrj #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
95*fae548d3Szrj #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
96*fae548d3Szrj
97*fae548d3Szrj static int process_escape (int);
98*fae548d3Szrj
99*fae548d3Szrj /* FIXME-soon: The entire lexer/parser thingy should be
100*fae548d3Szrj built statically at compile time rather than dynamically
101*fae548d3Szrj each and every time the assembler is run. xoxorich. */
102*fae548d3Szrj
103*fae548d3Szrj void
do_scrub_begin(int m68k_mri ATTRIBUTE_UNUSED)104*fae548d3Szrj do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
105*fae548d3Szrj {
106*fae548d3Szrj const char *p;
107*fae548d3Szrj int c;
108*fae548d3Szrj
109*fae548d3Szrj lex[' '] = LEX_IS_WHITESPACE;
110*fae548d3Szrj lex['\t'] = LEX_IS_WHITESPACE;
111*fae548d3Szrj lex['\r'] = LEX_IS_WHITESPACE;
112*fae548d3Szrj lex['\n'] = LEX_IS_NEWLINE;
113*fae548d3Szrj lex[':'] = LEX_IS_COLON;
114*fae548d3Szrj
115*fae548d3Szrj #ifdef TC_M68K
116*fae548d3Szrj scrub_m68k_mri = m68k_mri;
117*fae548d3Szrj
118*fae548d3Szrj if (! m68k_mri)
119*fae548d3Szrj #endif
120*fae548d3Szrj {
121*fae548d3Szrj lex['"'] = LEX_IS_STRINGQUOTE;
122*fae548d3Szrj
123*fae548d3Szrj #if ! defined (TC_HPPA)
124*fae548d3Szrj lex['\''] = LEX_IS_ONECHAR_QUOTE;
125*fae548d3Szrj #endif
126*fae548d3Szrj
127*fae548d3Szrj #ifdef SINGLE_QUOTE_STRINGS
128*fae548d3Szrj lex['\''] = LEX_IS_STRINGQUOTE;
129*fae548d3Szrj #endif
130*fae548d3Szrj }
131*fae548d3Szrj
132*fae548d3Szrj /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133*fae548d3Szrj in state 5 of do_scrub_chars must be changed. */
134*fae548d3Szrj
135*fae548d3Szrj /* Note that these override the previous defaults, e.g. if ';' is a
136*fae548d3Szrj comment char, then it isn't a line separator. */
137*fae548d3Szrj for (p = symbol_chars; *p; ++p)
138*fae548d3Szrj lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139*fae548d3Szrj
140*fae548d3Szrj for (c = 128; c < 256; ++c)
141*fae548d3Szrj lex[c] = LEX_IS_SYMBOL_COMPONENT;
142*fae548d3Szrj
143*fae548d3Szrj #ifdef tc_symbol_chars
144*fae548d3Szrj /* This macro permits the processor to specify all characters which
145*fae548d3Szrj may appears in an operand. This will prevent the scrubber from
146*fae548d3Szrj discarding meaningful whitespace in certain cases. The i386
147*fae548d3Szrj backend uses this to support prefixes, which can confuse the
148*fae548d3Szrj scrubber as to whether it is parsing operands or opcodes. */
149*fae548d3Szrj for (p = tc_symbol_chars; *p; ++p)
150*fae548d3Szrj lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151*fae548d3Szrj #endif
152*fae548d3Szrj
153*fae548d3Szrj /* The m68k backend wants to be able to change comment_chars. */
154*fae548d3Szrj #ifndef tc_comment_chars
155*fae548d3Szrj #define tc_comment_chars comment_chars
156*fae548d3Szrj #endif
157*fae548d3Szrj for (p = tc_comment_chars; *p; p++)
158*fae548d3Szrj lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
159*fae548d3Szrj
160*fae548d3Szrj for (p = line_comment_chars; *p; p++)
161*fae548d3Szrj lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162*fae548d3Szrj
163*fae548d3Szrj #ifndef tc_line_separator_chars
164*fae548d3Szrj #define tc_line_separator_chars line_separator_chars
165*fae548d3Szrj #endif
166*fae548d3Szrj for (p = tc_line_separator_chars; *p; p++)
167*fae548d3Szrj lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
168*fae548d3Szrj
169*fae548d3Szrj #ifdef tc_parallel_separator_chars
170*fae548d3Szrj /* This macro permits the processor to specify all characters which
171*fae548d3Szrj separate parallel insns on the same line. */
172*fae548d3Szrj for (p = tc_parallel_separator_chars; *p; p++)
173*fae548d3Szrj lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
174*fae548d3Szrj #endif
175*fae548d3Szrj
176*fae548d3Szrj /* Only allow slash-star comments if slash is not in use.
177*fae548d3Szrj FIXME: This isn't right. We should always permit them. */
178*fae548d3Szrj if (lex['/'] == 0)
179*fae548d3Szrj lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
180*fae548d3Szrj
181*fae548d3Szrj #ifdef TC_M68K
182*fae548d3Szrj if (m68k_mri)
183*fae548d3Szrj {
184*fae548d3Szrj lex['\''] = LEX_IS_STRINGQUOTE;
185*fae548d3Szrj lex[';'] = LEX_IS_COMMENT_START;
186*fae548d3Szrj lex['*'] = LEX_IS_LINE_COMMENT_START;
187*fae548d3Szrj /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
188*fae548d3Szrj then it can't be used in an expression. */
189*fae548d3Szrj lex['!'] = LEX_IS_LINE_COMMENT_START;
190*fae548d3Szrj }
191*fae548d3Szrj #endif
192*fae548d3Szrj
193*fae548d3Szrj #ifdef TC_V850
194*fae548d3Szrj lex['-'] = LEX_IS_DOUBLEDASH_1ST;
195*fae548d3Szrj #endif
196*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
197*fae548d3Szrj lex['|'] = LEX_IS_DOUBLEBAR_1ST;
198*fae548d3Szrj #endif
199*fae548d3Szrj #ifdef TC_D30V
200*fae548d3Szrj /* Must do this is we want VLIW instruction with "->" or "<-". */
201*fae548d3Szrj lex['-'] = LEX_IS_SYMBOL_COMPONENT;
202*fae548d3Szrj #endif
203*fae548d3Szrj
204*fae548d3Szrj #ifdef H_TICK_HEX
205*fae548d3Szrj if (enable_h_tick_hex)
206*fae548d3Szrj {
207*fae548d3Szrj lex['h'] = LEX_IS_H;
208*fae548d3Szrj lex['H'] = LEX_IS_H;
209*fae548d3Szrj }
210*fae548d3Szrj #endif
211*fae548d3Szrj }
212*fae548d3Szrj
213*fae548d3Szrj /* Saved state of the scrubber. */
214*fae548d3Szrj static int state;
215*fae548d3Szrj static int old_state;
216*fae548d3Szrj static const char *out_string;
217*fae548d3Szrj static char out_buf[20];
218*fae548d3Szrj static int add_newlines;
219*fae548d3Szrj static char *saved_input;
220*fae548d3Szrj static size_t saved_input_len;
221*fae548d3Szrj static char input_buffer[32 * 1024];
222*fae548d3Szrj static const char *mri_state;
223*fae548d3Szrj static char mri_last_ch;
224*fae548d3Szrj
225*fae548d3Szrj /* Data structure for saving the state of app across #include's. Note that
226*fae548d3Szrj app is called asynchronously to the parsing of the .include's, so our
227*fae548d3Szrj state at the time .include is interpreted is completely unrelated.
228*fae548d3Szrj That's why we have to save it all. */
229*fae548d3Szrj
230*fae548d3Szrj struct app_save
231*fae548d3Szrj {
232*fae548d3Szrj int state;
233*fae548d3Szrj int old_state;
234*fae548d3Szrj const char * out_string;
235*fae548d3Szrj char out_buf[sizeof (out_buf)];
236*fae548d3Szrj int add_newlines;
237*fae548d3Szrj char * saved_input;
238*fae548d3Szrj size_t saved_input_len;
239*fae548d3Szrj #ifdef TC_M68K
240*fae548d3Szrj int scrub_m68k_mri;
241*fae548d3Szrj #endif
242*fae548d3Szrj const char * mri_state;
243*fae548d3Szrj char mri_last_ch;
244*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
245*fae548d3Szrj const char * symver_state;
246*fae548d3Szrj #endif
247*fae548d3Szrj #ifdef TC_ARM
248*fae548d3Szrj char last_char;
249*fae548d3Szrj #endif
250*fae548d3Szrj };
251*fae548d3Szrj
252*fae548d3Szrj char *
app_push(void)253*fae548d3Szrj app_push (void)
254*fae548d3Szrj {
255*fae548d3Szrj struct app_save *saved;
256*fae548d3Szrj
257*fae548d3Szrj saved = XNEW (struct app_save);
258*fae548d3Szrj saved->state = state;
259*fae548d3Szrj saved->old_state = old_state;
260*fae548d3Szrj saved->out_string = out_string;
261*fae548d3Szrj memcpy (saved->out_buf, out_buf, sizeof (out_buf));
262*fae548d3Szrj saved->add_newlines = add_newlines;
263*fae548d3Szrj if (saved_input == NULL)
264*fae548d3Szrj saved->saved_input = NULL;
265*fae548d3Szrj else
266*fae548d3Szrj {
267*fae548d3Szrj saved->saved_input = XNEWVEC (char, saved_input_len);
268*fae548d3Szrj memcpy (saved->saved_input, saved_input, saved_input_len);
269*fae548d3Szrj saved->saved_input_len = saved_input_len;
270*fae548d3Szrj }
271*fae548d3Szrj #ifdef TC_M68K
272*fae548d3Szrj saved->scrub_m68k_mri = scrub_m68k_mri;
273*fae548d3Szrj #endif
274*fae548d3Szrj saved->mri_state = mri_state;
275*fae548d3Szrj saved->mri_last_ch = mri_last_ch;
276*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
277*fae548d3Szrj saved->symver_state = symver_state;
278*fae548d3Szrj #endif
279*fae548d3Szrj #ifdef TC_ARM
280*fae548d3Szrj saved->last_char = last_char;
281*fae548d3Szrj #endif
282*fae548d3Szrj
283*fae548d3Szrj /* do_scrub_begin() is not useful, just wastes time. */
284*fae548d3Szrj
285*fae548d3Szrj state = 0;
286*fae548d3Szrj saved_input = NULL;
287*fae548d3Szrj add_newlines = 0;
288*fae548d3Szrj
289*fae548d3Szrj return (char *) saved;
290*fae548d3Szrj }
291*fae548d3Szrj
292*fae548d3Szrj void
app_pop(char * arg)293*fae548d3Szrj app_pop (char *arg)
294*fae548d3Szrj {
295*fae548d3Szrj struct app_save *saved = (struct app_save *) arg;
296*fae548d3Szrj
297*fae548d3Szrj /* There is no do_scrub_end (). */
298*fae548d3Szrj state = saved->state;
299*fae548d3Szrj old_state = saved->old_state;
300*fae548d3Szrj out_string = saved->out_string;
301*fae548d3Szrj memcpy (out_buf, saved->out_buf, sizeof (out_buf));
302*fae548d3Szrj add_newlines = saved->add_newlines;
303*fae548d3Szrj if (saved->saved_input == NULL)
304*fae548d3Szrj saved_input = NULL;
305*fae548d3Szrj else
306*fae548d3Szrj {
307*fae548d3Szrj gas_assert (saved->saved_input_len <= sizeof (input_buffer));
308*fae548d3Szrj memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
309*fae548d3Szrj saved_input = input_buffer;
310*fae548d3Szrj saved_input_len = saved->saved_input_len;
311*fae548d3Szrj free (saved->saved_input);
312*fae548d3Szrj }
313*fae548d3Szrj #ifdef TC_M68K
314*fae548d3Szrj scrub_m68k_mri = saved->scrub_m68k_mri;
315*fae548d3Szrj #endif
316*fae548d3Szrj mri_state = saved->mri_state;
317*fae548d3Szrj mri_last_ch = saved->mri_last_ch;
318*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
319*fae548d3Szrj symver_state = saved->symver_state;
320*fae548d3Szrj #endif
321*fae548d3Szrj #ifdef TC_ARM
322*fae548d3Szrj last_char = saved->last_char;
323*fae548d3Szrj #endif
324*fae548d3Szrj
325*fae548d3Szrj free (arg);
326*fae548d3Szrj }
327*fae548d3Szrj
328*fae548d3Szrj /* @@ This assumes that \n &c are the same on host and target. This is not
329*fae548d3Szrj necessarily true. */
330*fae548d3Szrj
331*fae548d3Szrj static int
process_escape(int ch)332*fae548d3Szrj process_escape (int ch)
333*fae548d3Szrj {
334*fae548d3Szrj switch (ch)
335*fae548d3Szrj {
336*fae548d3Szrj case 'b':
337*fae548d3Szrj return '\b';
338*fae548d3Szrj case 'f':
339*fae548d3Szrj return '\f';
340*fae548d3Szrj case 'n':
341*fae548d3Szrj return '\n';
342*fae548d3Szrj case 'r':
343*fae548d3Szrj return '\r';
344*fae548d3Szrj case 't':
345*fae548d3Szrj return '\t';
346*fae548d3Szrj case '\'':
347*fae548d3Szrj return '\'';
348*fae548d3Szrj case '"':
349*fae548d3Szrj return '\"';
350*fae548d3Szrj default:
351*fae548d3Szrj return ch;
352*fae548d3Szrj }
353*fae548d3Szrj }
354*fae548d3Szrj
355*fae548d3Szrj /* This function is called to process input characters. The GET
356*fae548d3Szrj parameter is used to retrieve more input characters. GET should
357*fae548d3Szrj set its parameter to point to a buffer, and return the length of
358*fae548d3Szrj the buffer; it should return 0 at end of file. The scrubbed output
359*fae548d3Szrj characters are put into the buffer starting at TOSTART; the TOSTART
360*fae548d3Szrj buffer is TOLEN bytes in length. The function returns the number
361*fae548d3Szrj of scrubbed characters put into TOSTART. This will be TOLEN unless
362*fae548d3Szrj end of file was seen. This function is arranged as a state
363*fae548d3Szrj machine, and saves its state so that it may return at any point.
364*fae548d3Szrj This is the way the old code used to work. */
365*fae548d3Szrj
366*fae548d3Szrj size_t
do_scrub_chars(size_t (* get)(char *,size_t),char * tostart,size_t tolen)367*fae548d3Szrj do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
368*fae548d3Szrj {
369*fae548d3Szrj char *to = tostart;
370*fae548d3Szrj char *toend = tostart + tolen;
371*fae548d3Szrj char *from;
372*fae548d3Szrj char *fromend;
373*fae548d3Szrj size_t fromlen;
374*fae548d3Szrj int ch, ch2 = 0;
375*fae548d3Szrj /* Character that started the string we're working on. */
376*fae548d3Szrj static char quotechar;
377*fae548d3Szrj
378*fae548d3Szrj /*State 0: beginning of normal line
379*fae548d3Szrj 1: After first whitespace on line (flush more white)
380*fae548d3Szrj 2: After first non-white (opcode) on line (keep 1white)
381*fae548d3Szrj 3: after second white on line (into operands) (flush white)
382*fae548d3Szrj 4: after putting out a .linefile, put out digits
383*fae548d3Szrj 5: parsing a string, then go to old-state
384*fae548d3Szrj 6: putting out \ escape in a "d string.
385*fae548d3Szrj 7: no longer used
386*fae548d3Szrj 8: no longer used
387*fae548d3Szrj 9: After seeing symbol char in state 3 (keep 1white after symchar)
388*fae548d3Szrj 10: After seeing whitespace in state 9 (keep white before symchar)
389*fae548d3Szrj 11: After seeing a symbol character in state 0 (eg a label definition)
390*fae548d3Szrj -1: output string in out_string and go to the state in old_state
391*fae548d3Szrj -2: flush text until a '*' '/' is seen, then go to state old_state
392*fae548d3Szrj #ifdef TC_V850
393*fae548d3Szrj 12: After seeing a dash, looking for a second dash as a start
394*fae548d3Szrj of comment.
395*fae548d3Szrj #endif
396*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
397*fae548d3Szrj 13: After seeing a vertical bar, looking for a second
398*fae548d3Szrj vertical bar as a parallel expression separator.
399*fae548d3Szrj #endif
400*fae548d3Szrj #ifdef TC_PREDICATE_START_CHAR
401*fae548d3Szrj 14: After seeing a predicate start character at state 0, looking
402*fae548d3Szrj for a predicate end character as predicate.
403*fae548d3Szrj 15: After seeing a predicate start character at state 1, looking
404*fae548d3Szrj for a predicate end character as predicate.
405*fae548d3Szrj #endif
406*fae548d3Szrj #ifdef TC_Z80
407*fae548d3Szrj 16: After seeing an 'a' or an 'A' at the start of a symbol
408*fae548d3Szrj 17: After seeing an 'f' or an 'F' in state 16
409*fae548d3Szrj #endif
410*fae548d3Szrj */
411*fae548d3Szrj
412*fae548d3Szrj /* I added states 9 and 10 because the MIPS ECOFF assembler uses
413*fae548d3Szrj constructs like ``.loc 1 20''. This was turning into ``.loc
414*fae548d3Szrj 120''. States 9 and 10 ensure that a space is never dropped in
415*fae548d3Szrj between characters which could appear in an identifier. Ian
416*fae548d3Szrj Taylor, ian@cygnus.com.
417*fae548d3Szrj
418*fae548d3Szrj I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
419*fae548d3Szrj correctly on the PA (and any other target where colons are optional).
420*fae548d3Szrj Jeff Law, law@cs.utah.edu.
421*fae548d3Szrj
422*fae548d3Szrj I added state 13 so that something like "cmp r1, r2 || trap #1" does not
423*fae548d3Szrj get squashed into "cmp r1,r2||trap#1", with the all important space
424*fae548d3Szrj between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
425*fae548d3Szrj
426*fae548d3Szrj /* This macro gets the next input character. */
427*fae548d3Szrj
428*fae548d3Szrj #define GET() \
429*fae548d3Szrj (from < fromend \
430*fae548d3Szrj ? * (unsigned char *) (from++) \
431*fae548d3Szrj : (saved_input = NULL, \
432*fae548d3Szrj fromlen = (*get) (input_buffer, sizeof input_buffer), \
433*fae548d3Szrj from = input_buffer, \
434*fae548d3Szrj fromend = from + fromlen, \
435*fae548d3Szrj (fromlen == 0 \
436*fae548d3Szrj ? EOF \
437*fae548d3Szrj : * (unsigned char *) (from++))))
438*fae548d3Szrj
439*fae548d3Szrj /* This macro pushes a character back on the input stream. */
440*fae548d3Szrj
441*fae548d3Szrj #define UNGET(uch) (*--from = (uch))
442*fae548d3Szrj
443*fae548d3Szrj /* This macro puts a character into the output buffer. If this
444*fae548d3Szrj character fills the output buffer, this macro jumps to the label
445*fae548d3Szrj TOFULL. We use this rather ugly approach because we need to
446*fae548d3Szrj handle two different termination conditions: EOF on the input
447*fae548d3Szrj stream, and a full output buffer. It would be simpler if we
448*fae548d3Szrj always read in the entire input stream before processing it, but
449*fae548d3Szrj I don't want to make such a significant change to the assembler's
450*fae548d3Szrj memory usage. */
451*fae548d3Szrj
452*fae548d3Szrj #define PUT(pch) \
453*fae548d3Szrj do \
454*fae548d3Szrj { \
455*fae548d3Szrj *to++ = (pch); \
456*fae548d3Szrj if (to >= toend) \
457*fae548d3Szrj goto tofull; \
458*fae548d3Szrj } \
459*fae548d3Szrj while (0)
460*fae548d3Szrj
461*fae548d3Szrj if (saved_input != NULL)
462*fae548d3Szrj {
463*fae548d3Szrj from = saved_input;
464*fae548d3Szrj fromend = from + saved_input_len;
465*fae548d3Szrj }
466*fae548d3Szrj else
467*fae548d3Szrj {
468*fae548d3Szrj fromlen = (*get) (input_buffer, sizeof input_buffer);
469*fae548d3Szrj if (fromlen == 0)
470*fae548d3Szrj return 0;
471*fae548d3Szrj from = input_buffer;
472*fae548d3Szrj fromend = from + fromlen;
473*fae548d3Szrj }
474*fae548d3Szrj
475*fae548d3Szrj while (1)
476*fae548d3Szrj {
477*fae548d3Szrj /* The cases in this switch end with continue, in order to
478*fae548d3Szrj branch back to the top of this while loop and generate the
479*fae548d3Szrj next output character in the appropriate state. */
480*fae548d3Szrj switch (state)
481*fae548d3Szrj {
482*fae548d3Szrj case -1:
483*fae548d3Szrj ch = *out_string++;
484*fae548d3Szrj if (*out_string == '\0')
485*fae548d3Szrj {
486*fae548d3Szrj state = old_state;
487*fae548d3Szrj old_state = 3;
488*fae548d3Szrj }
489*fae548d3Szrj PUT (ch);
490*fae548d3Szrj continue;
491*fae548d3Szrj
492*fae548d3Szrj case -2:
493*fae548d3Szrj for (;;)
494*fae548d3Szrj {
495*fae548d3Szrj do
496*fae548d3Szrj {
497*fae548d3Szrj ch = GET ();
498*fae548d3Szrj
499*fae548d3Szrj if (ch == EOF)
500*fae548d3Szrj {
501*fae548d3Szrj as_warn (_("end of file in comment"));
502*fae548d3Szrj goto fromeof;
503*fae548d3Szrj }
504*fae548d3Szrj
505*fae548d3Szrj if (ch == '\n')
506*fae548d3Szrj PUT ('\n');
507*fae548d3Szrj }
508*fae548d3Szrj while (ch != '*');
509*fae548d3Szrj
510*fae548d3Szrj while ((ch = GET ()) == '*')
511*fae548d3Szrj ;
512*fae548d3Szrj
513*fae548d3Szrj if (ch == EOF)
514*fae548d3Szrj {
515*fae548d3Szrj as_warn (_("end of file in comment"));
516*fae548d3Szrj goto fromeof;
517*fae548d3Szrj }
518*fae548d3Szrj
519*fae548d3Szrj if (ch == '/')
520*fae548d3Szrj break;
521*fae548d3Szrj
522*fae548d3Szrj UNGET (ch);
523*fae548d3Szrj }
524*fae548d3Szrj
525*fae548d3Szrj state = old_state;
526*fae548d3Szrj UNGET (' ');
527*fae548d3Szrj continue;
528*fae548d3Szrj
529*fae548d3Szrj case 4:
530*fae548d3Szrj ch = GET ();
531*fae548d3Szrj if (ch == EOF)
532*fae548d3Szrj goto fromeof;
533*fae548d3Szrj else if (ch >= '0' && ch <= '9')
534*fae548d3Szrj PUT (ch);
535*fae548d3Szrj else
536*fae548d3Szrj {
537*fae548d3Szrj while (ch != EOF && IS_WHITESPACE (ch))
538*fae548d3Szrj ch = GET ();
539*fae548d3Szrj if (ch == '"')
540*fae548d3Szrj {
541*fae548d3Szrj quotechar = ch;
542*fae548d3Szrj state = 5;
543*fae548d3Szrj old_state = 3;
544*fae548d3Szrj PUT (ch);
545*fae548d3Szrj }
546*fae548d3Szrj else
547*fae548d3Szrj {
548*fae548d3Szrj while (ch != EOF && ch != '\n')
549*fae548d3Szrj ch = GET ();
550*fae548d3Szrj state = 0;
551*fae548d3Szrj PUT (ch);
552*fae548d3Szrj }
553*fae548d3Szrj }
554*fae548d3Szrj continue;
555*fae548d3Szrj
556*fae548d3Szrj case 5:
557*fae548d3Szrj /* We are going to copy everything up to a quote character,
558*fae548d3Szrj with special handling for a backslash. We try to
559*fae548d3Szrj optimize the copying in the simple case without using the
560*fae548d3Szrj GET and PUT macros. */
561*fae548d3Szrj {
562*fae548d3Szrj char *s;
563*fae548d3Szrj ptrdiff_t len;
564*fae548d3Szrj
565*fae548d3Szrj for (s = from; s < fromend; s++)
566*fae548d3Szrj {
567*fae548d3Szrj ch = *s;
568*fae548d3Szrj if (ch == '\\'
569*fae548d3Szrj || ch == quotechar
570*fae548d3Szrj || ch == '\n')
571*fae548d3Szrj break;
572*fae548d3Szrj }
573*fae548d3Szrj len = s - from;
574*fae548d3Szrj if (len > toend - to)
575*fae548d3Szrj len = toend - to;
576*fae548d3Szrj if (len > 0)
577*fae548d3Szrj {
578*fae548d3Szrj memcpy (to, from, len);
579*fae548d3Szrj to += len;
580*fae548d3Szrj from += len;
581*fae548d3Szrj if (to >= toend)
582*fae548d3Szrj goto tofull;
583*fae548d3Szrj }
584*fae548d3Szrj }
585*fae548d3Szrj
586*fae548d3Szrj ch = GET ();
587*fae548d3Szrj if (ch == EOF)
588*fae548d3Szrj {
589*fae548d3Szrj /* This buffer is here specifically so
590*fae548d3Szrj that the UNGET below will work. */
591*fae548d3Szrj static char one_char_buf[1];
592*fae548d3Szrj
593*fae548d3Szrj as_warn (_("end of file in string; '%c' inserted"), quotechar);
594*fae548d3Szrj state = old_state;
595*fae548d3Szrj from = fromend = one_char_buf + 1;
596*fae548d3Szrj fromlen = 1;
597*fae548d3Szrj UNGET ('\n');
598*fae548d3Szrj PUT (quotechar);
599*fae548d3Szrj }
600*fae548d3Szrj else if (ch == quotechar)
601*fae548d3Szrj {
602*fae548d3Szrj state = old_state;
603*fae548d3Szrj PUT (ch);
604*fae548d3Szrj }
605*fae548d3Szrj else if (TC_STRING_ESCAPES && ch == '\\')
606*fae548d3Szrj {
607*fae548d3Szrj state = 6;
608*fae548d3Szrj PUT (ch);
609*fae548d3Szrj }
610*fae548d3Szrj else if (scrub_m68k_mri && ch == '\n')
611*fae548d3Szrj {
612*fae548d3Szrj /* Just quietly terminate the string. This permits lines like
613*fae548d3Szrj bne label loop if we haven't reach end yet. */
614*fae548d3Szrj state = old_state;
615*fae548d3Szrj UNGET (ch);
616*fae548d3Szrj PUT ('\'');
617*fae548d3Szrj }
618*fae548d3Szrj else
619*fae548d3Szrj {
620*fae548d3Szrj PUT (ch);
621*fae548d3Szrj }
622*fae548d3Szrj continue;
623*fae548d3Szrj
624*fae548d3Szrj case 6:
625*fae548d3Szrj state = 5;
626*fae548d3Szrj ch = GET ();
627*fae548d3Szrj switch (ch)
628*fae548d3Szrj {
629*fae548d3Szrj /* Handle strings broken across lines, by turning '\n' into
630*fae548d3Szrj '\\' and 'n'. */
631*fae548d3Szrj case '\n':
632*fae548d3Szrj UNGET ('n');
633*fae548d3Szrj add_newlines++;
634*fae548d3Szrj PUT ('\\');
635*fae548d3Szrj continue;
636*fae548d3Szrj
637*fae548d3Szrj case EOF:
638*fae548d3Szrj as_warn (_("end of file in string; '%c' inserted"), quotechar);
639*fae548d3Szrj PUT (quotechar);
640*fae548d3Szrj continue;
641*fae548d3Szrj
642*fae548d3Szrj case '"':
643*fae548d3Szrj case '\\':
644*fae548d3Szrj case 'b':
645*fae548d3Szrj case 'f':
646*fae548d3Szrj case 'n':
647*fae548d3Szrj case 'r':
648*fae548d3Szrj case 't':
649*fae548d3Szrj case 'v':
650*fae548d3Szrj case 'x':
651*fae548d3Szrj case 'X':
652*fae548d3Szrj case '0':
653*fae548d3Szrj case '1':
654*fae548d3Szrj case '2':
655*fae548d3Szrj case '3':
656*fae548d3Szrj case '4':
657*fae548d3Szrj case '5':
658*fae548d3Szrj case '6':
659*fae548d3Szrj case '7':
660*fae548d3Szrj break;
661*fae548d3Szrj
662*fae548d3Szrj default:
663*fae548d3Szrj #ifdef ONLY_STANDARD_ESCAPES
664*fae548d3Szrj as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
665*fae548d3Szrj #endif
666*fae548d3Szrj break;
667*fae548d3Szrj }
668*fae548d3Szrj PUT (ch);
669*fae548d3Szrj continue;
670*fae548d3Szrj
671*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
672*fae548d3Szrj case 13:
673*fae548d3Szrj ch = GET ();
674*fae548d3Szrj if (ch != '|')
675*fae548d3Szrj abort ();
676*fae548d3Szrj
677*fae548d3Szrj /* Reset back to state 1 and pretend that we are parsing a
678*fae548d3Szrj line from just after the first white space. */
679*fae548d3Szrj state = 1;
680*fae548d3Szrj PUT ('|');
681*fae548d3Szrj #ifdef TC_TIC6X
682*fae548d3Szrj /* "||^" is used for SPMASKed instructions. */
683*fae548d3Szrj ch = GET ();
684*fae548d3Szrj if (ch == EOF)
685*fae548d3Szrj goto fromeof;
686*fae548d3Szrj else if (ch == '^')
687*fae548d3Szrj PUT ('^');
688*fae548d3Szrj else
689*fae548d3Szrj UNGET (ch);
690*fae548d3Szrj #endif
691*fae548d3Szrj continue;
692*fae548d3Szrj #endif
693*fae548d3Szrj #ifdef TC_Z80
694*fae548d3Szrj case 16:
695*fae548d3Szrj /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
696*fae548d3Szrj ch = GET ();
697*fae548d3Szrj if (ch == 'f' || ch == 'F')
698*fae548d3Szrj {
699*fae548d3Szrj state = 17;
700*fae548d3Szrj PUT (ch);
701*fae548d3Szrj }
702*fae548d3Szrj else
703*fae548d3Szrj {
704*fae548d3Szrj state = 9;
705*fae548d3Szrj break;
706*fae548d3Szrj }
707*fae548d3Szrj /* Fall through. */
708*fae548d3Szrj case 17:
709*fae548d3Szrj /* We have seen "af" at the start of a symbol,
710*fae548d3Szrj a ' here is a part of that symbol. */
711*fae548d3Szrj ch = GET ();
712*fae548d3Szrj state = 9;
713*fae548d3Szrj if (ch == '\'')
714*fae548d3Szrj /* Change to avoid warning about unclosed string. */
715*fae548d3Szrj PUT ('`');
716*fae548d3Szrj else if (ch != EOF)
717*fae548d3Szrj UNGET (ch);
718*fae548d3Szrj break;
719*fae548d3Szrj #endif
720*fae548d3Szrj }
721*fae548d3Szrj
722*fae548d3Szrj /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
723*fae548d3Szrj
724*fae548d3Szrj /* flushchar: */
725*fae548d3Szrj ch = GET ();
726*fae548d3Szrj
727*fae548d3Szrj #ifdef TC_PREDICATE_START_CHAR
728*fae548d3Szrj if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
729*fae548d3Szrj {
730*fae548d3Szrj state += 14;
731*fae548d3Szrj PUT (ch);
732*fae548d3Szrj continue;
733*fae548d3Szrj }
734*fae548d3Szrj else if (state == 14 || state == 15)
735*fae548d3Szrj {
736*fae548d3Szrj if (ch == TC_PREDICATE_END_CHAR)
737*fae548d3Szrj {
738*fae548d3Szrj state -= 14;
739*fae548d3Szrj PUT (ch);
740*fae548d3Szrj ch = GET ();
741*fae548d3Szrj }
742*fae548d3Szrj else
743*fae548d3Szrj {
744*fae548d3Szrj PUT (ch);
745*fae548d3Szrj continue;
746*fae548d3Szrj }
747*fae548d3Szrj }
748*fae548d3Szrj #endif
749*fae548d3Szrj
750*fae548d3Szrj recycle:
751*fae548d3Szrj
752*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
753*fae548d3Szrj /* We need to watch out for .symver directives. See the comment later
754*fae548d3Szrj in this function. */
755*fae548d3Szrj if (symver_state == NULL)
756*fae548d3Szrj {
757*fae548d3Szrj if ((state == 0 || state == 1) && ch == symver_pseudo[0])
758*fae548d3Szrj symver_state = symver_pseudo + 1;
759*fae548d3Szrj }
760*fae548d3Szrj else
761*fae548d3Szrj {
762*fae548d3Szrj /* We advance to the next state if we find the right
763*fae548d3Szrj character. */
764*fae548d3Szrj if (ch != '\0' && (*symver_state == ch))
765*fae548d3Szrj ++symver_state;
766*fae548d3Szrj else if (*symver_state != '\0')
767*fae548d3Szrj /* We did not get the expected character, or we didn't
768*fae548d3Szrj get a valid terminating character after seeing the
769*fae548d3Szrj entire pseudo-op, so we must go back to the beginning. */
770*fae548d3Szrj symver_state = NULL;
771*fae548d3Szrj else
772*fae548d3Szrj {
773*fae548d3Szrj /* We've read the entire pseudo-op. If this is the end
774*fae548d3Szrj of the line, go back to the beginning. */
775*fae548d3Szrj if (IS_NEWLINE (ch))
776*fae548d3Szrj symver_state = NULL;
777*fae548d3Szrj }
778*fae548d3Szrj }
779*fae548d3Szrj #endif /* TC_ARM && OBJ_ELF */
780*fae548d3Szrj
781*fae548d3Szrj #ifdef TC_M68K
782*fae548d3Szrj /* We want to have pseudo-ops which control whether we are in
783*fae548d3Szrj MRI mode or not. Unfortunately, since m68k MRI mode affects
784*fae548d3Szrj the scrubber, that means that we need a special purpose
785*fae548d3Szrj recognizer here. */
786*fae548d3Szrj if (mri_state == NULL)
787*fae548d3Szrj {
788*fae548d3Szrj if ((state == 0 || state == 1)
789*fae548d3Szrj && ch == mri_pseudo[0])
790*fae548d3Szrj mri_state = mri_pseudo + 1;
791*fae548d3Szrj }
792*fae548d3Szrj else
793*fae548d3Szrj {
794*fae548d3Szrj /* We advance to the next state if we find the right
795*fae548d3Szrj character, or if we need a space character and we get any
796*fae548d3Szrj whitespace character, or if we need a '0' and we get a
797*fae548d3Szrj '1' (this is so that we only need one state to handle
798*fae548d3Szrj ``.mri 0'' and ``.mri 1''). */
799*fae548d3Szrj if (ch != '\0'
800*fae548d3Szrj && (*mri_state == ch
801*fae548d3Szrj || (*mri_state == ' '
802*fae548d3Szrj && lex[ch] == LEX_IS_WHITESPACE)
803*fae548d3Szrj || (*mri_state == '0'
804*fae548d3Szrj && ch == '1')))
805*fae548d3Szrj {
806*fae548d3Szrj mri_last_ch = ch;
807*fae548d3Szrj ++mri_state;
808*fae548d3Szrj }
809*fae548d3Szrj else if (*mri_state != '\0'
810*fae548d3Szrj || (lex[ch] != LEX_IS_WHITESPACE
811*fae548d3Szrj && lex[ch] != LEX_IS_NEWLINE))
812*fae548d3Szrj {
813*fae548d3Szrj /* We did not get the expected character, or we didn't
814*fae548d3Szrj get a valid terminating character after seeing the
815*fae548d3Szrj entire pseudo-op, so we must go back to the
816*fae548d3Szrj beginning. */
817*fae548d3Szrj mri_state = NULL;
818*fae548d3Szrj }
819*fae548d3Szrj else
820*fae548d3Szrj {
821*fae548d3Szrj /* We've read the entire pseudo-op. mips_last_ch is
822*fae548d3Szrj either '0' or '1' indicating whether to enter or
823*fae548d3Szrj leave MRI mode. */
824*fae548d3Szrj do_scrub_begin (mri_last_ch == '1');
825*fae548d3Szrj mri_state = NULL;
826*fae548d3Szrj
827*fae548d3Szrj /* We continue handling the character as usual. The
828*fae548d3Szrj main gas reader must also handle the .mri pseudo-op
829*fae548d3Szrj to control expression parsing and the like. */
830*fae548d3Szrj }
831*fae548d3Szrj }
832*fae548d3Szrj #endif
833*fae548d3Szrj
834*fae548d3Szrj if (ch == EOF)
835*fae548d3Szrj {
836*fae548d3Szrj if (state != 0)
837*fae548d3Szrj {
838*fae548d3Szrj as_warn (_("end of file not at end of a line; newline inserted"));
839*fae548d3Szrj state = 0;
840*fae548d3Szrj PUT ('\n');
841*fae548d3Szrj }
842*fae548d3Szrj goto fromeof;
843*fae548d3Szrj }
844*fae548d3Szrj
845*fae548d3Szrj switch (lex[ch])
846*fae548d3Szrj {
847*fae548d3Szrj case LEX_IS_WHITESPACE:
848*fae548d3Szrj do
849*fae548d3Szrj {
850*fae548d3Szrj ch = GET ();
851*fae548d3Szrj }
852*fae548d3Szrj while (ch != EOF && IS_WHITESPACE (ch));
853*fae548d3Szrj if (ch == EOF)
854*fae548d3Szrj goto fromeof;
855*fae548d3Szrj
856*fae548d3Szrj if (state == 0)
857*fae548d3Szrj {
858*fae548d3Szrj /* Preserve a single whitespace character at the
859*fae548d3Szrj beginning of a line. */
860*fae548d3Szrj state = 1;
861*fae548d3Szrj UNGET (ch);
862*fae548d3Szrj PUT (' ');
863*fae548d3Szrj break;
864*fae548d3Szrj }
865*fae548d3Szrj
866*fae548d3Szrj #ifdef KEEP_WHITE_AROUND_COLON
867*fae548d3Szrj if (lex[ch] == LEX_IS_COLON)
868*fae548d3Szrj {
869*fae548d3Szrj /* Only keep this white if there's no white *after* the
870*fae548d3Szrj colon. */
871*fae548d3Szrj ch2 = GET ();
872*fae548d3Szrj if (ch2 != EOF)
873*fae548d3Szrj UNGET (ch2);
874*fae548d3Szrj if (!IS_WHITESPACE (ch2))
875*fae548d3Szrj {
876*fae548d3Szrj state = 9;
877*fae548d3Szrj UNGET (ch);
878*fae548d3Szrj PUT (' ');
879*fae548d3Szrj break;
880*fae548d3Szrj }
881*fae548d3Szrj }
882*fae548d3Szrj #endif
883*fae548d3Szrj if (IS_COMMENT (ch)
884*fae548d3Szrj || ch == '/'
885*fae548d3Szrj || IS_LINE_SEPARATOR (ch)
886*fae548d3Szrj || IS_PARALLEL_SEPARATOR (ch))
887*fae548d3Szrj {
888*fae548d3Szrj if (scrub_m68k_mri)
889*fae548d3Szrj {
890*fae548d3Szrj /* In MRI mode, we keep these spaces. */
891*fae548d3Szrj UNGET (ch);
892*fae548d3Szrj PUT (' ');
893*fae548d3Szrj break;
894*fae548d3Szrj }
895*fae548d3Szrj goto recycle;
896*fae548d3Szrj }
897*fae548d3Szrj
898*fae548d3Szrj /* If we're in state 2 or 11, we've seen a non-white
899*fae548d3Szrj character followed by whitespace. If the next character
900*fae548d3Szrj is ':', this is whitespace after a label name which we
901*fae548d3Szrj normally must ignore. In MRI mode, though, spaces are
902*fae548d3Szrj not permitted between the label and the colon. */
903*fae548d3Szrj if ((state == 2 || state == 11)
904*fae548d3Szrj && lex[ch] == LEX_IS_COLON
905*fae548d3Szrj && ! scrub_m68k_mri)
906*fae548d3Szrj {
907*fae548d3Szrj state = 1;
908*fae548d3Szrj PUT (ch);
909*fae548d3Szrj break;
910*fae548d3Szrj }
911*fae548d3Szrj
912*fae548d3Szrj switch (state)
913*fae548d3Szrj {
914*fae548d3Szrj case 1:
915*fae548d3Szrj /* We can arrive here if we leave a leading whitespace
916*fae548d3Szrj character at the beginning of a line. */
917*fae548d3Szrj goto recycle;
918*fae548d3Szrj case 2:
919*fae548d3Szrj state = 3;
920*fae548d3Szrj if (to + 1 < toend)
921*fae548d3Szrj {
922*fae548d3Szrj /* Optimize common case by skipping UNGET/GET. */
923*fae548d3Szrj PUT (' '); /* Sp after opco */
924*fae548d3Szrj goto recycle;
925*fae548d3Szrj }
926*fae548d3Szrj UNGET (ch);
927*fae548d3Szrj PUT (' ');
928*fae548d3Szrj break;
929*fae548d3Szrj case 3:
930*fae548d3Szrj #ifndef TC_KEEP_OPERAND_SPACES
931*fae548d3Szrj /* For TI C6X, we keep these spaces as they may separate
932*fae548d3Szrj functional unit specifiers from operands. */
933*fae548d3Szrj if (scrub_m68k_mri)
934*fae548d3Szrj #endif
935*fae548d3Szrj {
936*fae548d3Szrj /* In MRI mode, we keep these spaces. */
937*fae548d3Szrj UNGET (ch);
938*fae548d3Szrj PUT (' ');
939*fae548d3Szrj break;
940*fae548d3Szrj }
941*fae548d3Szrj goto recycle; /* Sp in operands */
942*fae548d3Szrj case 9:
943*fae548d3Szrj case 10:
944*fae548d3Szrj #ifndef TC_KEEP_OPERAND_SPACES
945*fae548d3Szrj if (scrub_m68k_mri)
946*fae548d3Szrj #endif
947*fae548d3Szrj {
948*fae548d3Szrj /* In MRI mode, we keep these spaces. */
949*fae548d3Szrj state = 3;
950*fae548d3Szrj UNGET (ch);
951*fae548d3Szrj PUT (' ');
952*fae548d3Szrj break;
953*fae548d3Szrj }
954*fae548d3Szrj state = 10; /* Sp after symbol char */
955*fae548d3Szrj goto recycle;
956*fae548d3Szrj case 11:
957*fae548d3Szrj if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
958*fae548d3Szrj state = 1;
959*fae548d3Szrj else
960*fae548d3Szrj {
961*fae548d3Szrj /* We know that ch is not ':', since we tested that
962*fae548d3Szrj case above. Therefore this is not a label, so it
963*fae548d3Szrj must be the opcode, and we've just seen the
964*fae548d3Szrj whitespace after it. */
965*fae548d3Szrj state = 3;
966*fae548d3Szrj }
967*fae548d3Szrj UNGET (ch);
968*fae548d3Szrj PUT (' '); /* Sp after label definition. */
969*fae548d3Szrj break;
970*fae548d3Szrj default:
971*fae548d3Szrj BAD_CASE (state);
972*fae548d3Szrj }
973*fae548d3Szrj break;
974*fae548d3Szrj
975*fae548d3Szrj case LEX_IS_TWOCHAR_COMMENT_1ST:
976*fae548d3Szrj ch2 = GET ();
977*fae548d3Szrj if (ch2 == '*')
978*fae548d3Szrj {
979*fae548d3Szrj for (;;)
980*fae548d3Szrj {
981*fae548d3Szrj do
982*fae548d3Szrj {
983*fae548d3Szrj ch2 = GET ();
984*fae548d3Szrj if (ch2 != EOF && IS_NEWLINE (ch2))
985*fae548d3Szrj add_newlines++;
986*fae548d3Szrj }
987*fae548d3Szrj while (ch2 != EOF && ch2 != '*');
988*fae548d3Szrj
989*fae548d3Szrj while (ch2 == '*')
990*fae548d3Szrj ch2 = GET ();
991*fae548d3Szrj
992*fae548d3Szrj if (ch2 == EOF || ch2 == '/')
993*fae548d3Szrj break;
994*fae548d3Szrj
995*fae548d3Szrj /* This UNGET will ensure that we count newlines
996*fae548d3Szrj correctly. */
997*fae548d3Szrj UNGET (ch2);
998*fae548d3Szrj }
999*fae548d3Szrj
1000*fae548d3Szrj if (ch2 == EOF)
1001*fae548d3Szrj as_warn (_("end of file in multiline comment"));
1002*fae548d3Szrj
1003*fae548d3Szrj ch = ' ';
1004*fae548d3Szrj goto recycle;
1005*fae548d3Szrj }
1006*fae548d3Szrj #ifdef DOUBLESLASH_LINE_COMMENTS
1007*fae548d3Szrj else if (ch2 == '/')
1008*fae548d3Szrj {
1009*fae548d3Szrj do
1010*fae548d3Szrj {
1011*fae548d3Szrj ch = GET ();
1012*fae548d3Szrj }
1013*fae548d3Szrj while (ch != EOF && !IS_NEWLINE (ch));
1014*fae548d3Szrj if (ch == EOF)
1015*fae548d3Szrj as_warn ("end of file in comment; newline inserted");
1016*fae548d3Szrj state = 0;
1017*fae548d3Szrj PUT ('\n');
1018*fae548d3Szrj break;
1019*fae548d3Szrj }
1020*fae548d3Szrj #endif
1021*fae548d3Szrj else
1022*fae548d3Szrj {
1023*fae548d3Szrj if (ch2 != EOF)
1024*fae548d3Szrj UNGET (ch2);
1025*fae548d3Szrj if (state == 9 || state == 10)
1026*fae548d3Szrj state = 3;
1027*fae548d3Szrj PUT (ch);
1028*fae548d3Szrj }
1029*fae548d3Szrj break;
1030*fae548d3Szrj
1031*fae548d3Szrj case LEX_IS_STRINGQUOTE:
1032*fae548d3Szrj quotechar = ch;
1033*fae548d3Szrj if (state == 10)
1034*fae548d3Szrj {
1035*fae548d3Szrj /* Preserve the whitespace in foo "bar". */
1036*fae548d3Szrj UNGET (ch);
1037*fae548d3Szrj state = 3;
1038*fae548d3Szrj PUT (' ');
1039*fae548d3Szrj
1040*fae548d3Szrj /* PUT didn't jump out. We could just break, but we
1041*fae548d3Szrj know what will happen, so optimize a bit. */
1042*fae548d3Szrj ch = GET ();
1043*fae548d3Szrj old_state = 3;
1044*fae548d3Szrj }
1045*fae548d3Szrj else if (state == 9)
1046*fae548d3Szrj old_state = 3;
1047*fae548d3Szrj else
1048*fae548d3Szrj old_state = state;
1049*fae548d3Szrj state = 5;
1050*fae548d3Szrj PUT (ch);
1051*fae548d3Szrj break;
1052*fae548d3Szrj
1053*fae548d3Szrj case LEX_IS_ONECHAR_QUOTE:
1054*fae548d3Szrj #ifdef H_TICK_HEX
1055*fae548d3Szrj if (state == 9 && enable_h_tick_hex)
1056*fae548d3Szrj {
1057*fae548d3Szrj char c;
1058*fae548d3Szrj
1059*fae548d3Szrj c = GET ();
1060*fae548d3Szrj as_warn ("'%c found after symbol", c);
1061*fae548d3Szrj UNGET (c);
1062*fae548d3Szrj }
1063*fae548d3Szrj #endif
1064*fae548d3Szrj if (state == 10)
1065*fae548d3Szrj {
1066*fae548d3Szrj /* Preserve the whitespace in foo 'b'. */
1067*fae548d3Szrj UNGET (ch);
1068*fae548d3Szrj state = 3;
1069*fae548d3Szrj PUT (' ');
1070*fae548d3Szrj break;
1071*fae548d3Szrj }
1072*fae548d3Szrj ch = GET ();
1073*fae548d3Szrj if (ch == EOF)
1074*fae548d3Szrj {
1075*fae548d3Szrj as_warn (_("end of file after a one-character quote; \\0 inserted"));
1076*fae548d3Szrj ch = 0;
1077*fae548d3Szrj }
1078*fae548d3Szrj if (ch == '\\')
1079*fae548d3Szrj {
1080*fae548d3Szrj ch = GET ();
1081*fae548d3Szrj if (ch == EOF)
1082*fae548d3Szrj {
1083*fae548d3Szrj as_warn (_("end of file in escape character"));
1084*fae548d3Szrj ch = '\\';
1085*fae548d3Szrj }
1086*fae548d3Szrj else
1087*fae548d3Szrj ch = process_escape (ch);
1088*fae548d3Szrj }
1089*fae548d3Szrj sprintf (out_buf, "%d", (int) (unsigned char) ch);
1090*fae548d3Szrj
1091*fae548d3Szrj /* None of these 'x constants for us. We want 'x'. */
1092*fae548d3Szrj if ((ch = GET ()) != '\'')
1093*fae548d3Szrj {
1094*fae548d3Szrj #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1095*fae548d3Szrj as_warn (_("missing close quote; (assumed)"));
1096*fae548d3Szrj #else
1097*fae548d3Szrj if (ch != EOF)
1098*fae548d3Szrj UNGET (ch);
1099*fae548d3Szrj #endif
1100*fae548d3Szrj }
1101*fae548d3Szrj if (strlen (out_buf) == 1)
1102*fae548d3Szrj {
1103*fae548d3Szrj PUT (out_buf[0]);
1104*fae548d3Szrj break;
1105*fae548d3Szrj }
1106*fae548d3Szrj if (state == 9)
1107*fae548d3Szrj old_state = 3;
1108*fae548d3Szrj else
1109*fae548d3Szrj old_state = state;
1110*fae548d3Szrj state = -1;
1111*fae548d3Szrj out_string = out_buf;
1112*fae548d3Szrj PUT (*out_string++);
1113*fae548d3Szrj break;
1114*fae548d3Szrj
1115*fae548d3Szrj case LEX_IS_COLON:
1116*fae548d3Szrj #ifdef KEEP_WHITE_AROUND_COLON
1117*fae548d3Szrj state = 9;
1118*fae548d3Szrj #else
1119*fae548d3Szrj if (state == 9 || state == 10)
1120*fae548d3Szrj state = 3;
1121*fae548d3Szrj else if (state != 3)
1122*fae548d3Szrj state = 1;
1123*fae548d3Szrj #endif
1124*fae548d3Szrj PUT (ch);
1125*fae548d3Szrj break;
1126*fae548d3Szrj
1127*fae548d3Szrj case LEX_IS_NEWLINE:
1128*fae548d3Szrj /* Roll out a bunch of newlines from inside comments, etc. */
1129*fae548d3Szrj if (add_newlines)
1130*fae548d3Szrj {
1131*fae548d3Szrj --add_newlines;
1132*fae548d3Szrj UNGET (ch);
1133*fae548d3Szrj }
1134*fae548d3Szrj /* Fall through. */
1135*fae548d3Szrj
1136*fae548d3Szrj case LEX_IS_LINE_SEPARATOR:
1137*fae548d3Szrj state = 0;
1138*fae548d3Szrj PUT (ch);
1139*fae548d3Szrj break;
1140*fae548d3Szrj
1141*fae548d3Szrj case LEX_IS_PARALLEL_SEPARATOR:
1142*fae548d3Szrj state = 1;
1143*fae548d3Szrj PUT (ch);
1144*fae548d3Szrj break;
1145*fae548d3Szrj
1146*fae548d3Szrj #ifdef TC_V850
1147*fae548d3Szrj case LEX_IS_DOUBLEDASH_1ST:
1148*fae548d3Szrj ch2 = GET ();
1149*fae548d3Szrj if (ch2 != '-')
1150*fae548d3Szrj {
1151*fae548d3Szrj if (ch2 != EOF)
1152*fae548d3Szrj UNGET (ch2);
1153*fae548d3Szrj goto de_fault;
1154*fae548d3Szrj }
1155*fae548d3Szrj /* Read and skip to end of line. */
1156*fae548d3Szrj do
1157*fae548d3Szrj {
1158*fae548d3Szrj ch = GET ();
1159*fae548d3Szrj }
1160*fae548d3Szrj while (ch != EOF && ch != '\n');
1161*fae548d3Szrj
1162*fae548d3Szrj if (ch == EOF)
1163*fae548d3Szrj as_warn (_("end of file in comment; newline inserted"));
1164*fae548d3Szrj
1165*fae548d3Szrj state = 0;
1166*fae548d3Szrj PUT ('\n');
1167*fae548d3Szrj break;
1168*fae548d3Szrj #endif
1169*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
1170*fae548d3Szrj case LEX_IS_DOUBLEBAR_1ST:
1171*fae548d3Szrj ch2 = GET ();
1172*fae548d3Szrj if (ch2 != EOF)
1173*fae548d3Szrj UNGET (ch2);
1174*fae548d3Szrj if (ch2 != '|')
1175*fae548d3Szrj goto de_fault;
1176*fae548d3Szrj
1177*fae548d3Szrj /* Handle '||' in two states as invoking PUT twice might
1178*fae548d3Szrj result in the first one jumping out of this loop. We'd
1179*fae548d3Szrj then lose track of the state and one '|' char. */
1180*fae548d3Szrj state = 13;
1181*fae548d3Szrj PUT ('|');
1182*fae548d3Szrj break;
1183*fae548d3Szrj #endif
1184*fae548d3Szrj case LEX_IS_LINE_COMMENT_START:
1185*fae548d3Szrj /* FIXME-someday: The two character comment stuff was badly
1186*fae548d3Szrj thought out. On i386, we want '/' as line comment start
1187*fae548d3Szrj AND we want C style comments. hence this hack. The
1188*fae548d3Szrj whole lexical process should be reworked. xoxorich. */
1189*fae548d3Szrj if (ch == '/')
1190*fae548d3Szrj {
1191*fae548d3Szrj ch2 = GET ();
1192*fae548d3Szrj if (ch2 == '*')
1193*fae548d3Szrj {
1194*fae548d3Szrj old_state = 3;
1195*fae548d3Szrj state = -2;
1196*fae548d3Szrj break;
1197*fae548d3Szrj }
1198*fae548d3Szrj else if (ch2 != EOF)
1199*fae548d3Szrj {
1200*fae548d3Szrj UNGET (ch2);
1201*fae548d3Szrj }
1202*fae548d3Szrj }
1203*fae548d3Szrj
1204*fae548d3Szrj if (state == 0 || state == 1) /* Only comment at start of line. */
1205*fae548d3Szrj {
1206*fae548d3Szrj int startch;
1207*fae548d3Szrj
1208*fae548d3Szrj startch = ch;
1209*fae548d3Szrj
1210*fae548d3Szrj do
1211*fae548d3Szrj {
1212*fae548d3Szrj ch = GET ();
1213*fae548d3Szrj }
1214*fae548d3Szrj while (ch != EOF && IS_WHITESPACE (ch));
1215*fae548d3Szrj
1216*fae548d3Szrj if (ch == EOF)
1217*fae548d3Szrj {
1218*fae548d3Szrj as_warn (_("end of file in comment; newline inserted"));
1219*fae548d3Szrj PUT ('\n');
1220*fae548d3Szrj break;
1221*fae548d3Szrj }
1222*fae548d3Szrj
1223*fae548d3Szrj if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1224*fae548d3Szrj {
1225*fae548d3Szrj /* Not a cpp line. */
1226*fae548d3Szrj while (ch != EOF && !IS_NEWLINE (ch))
1227*fae548d3Szrj ch = GET ();
1228*fae548d3Szrj if (ch == EOF)
1229*fae548d3Szrj {
1230*fae548d3Szrj as_warn (_("end of file in comment; newline inserted"));
1231*fae548d3Szrj PUT ('\n');
1232*fae548d3Szrj }
1233*fae548d3Szrj else /* IS_NEWLINE (ch) */
1234*fae548d3Szrj {
1235*fae548d3Szrj /* To process non-zero add_newlines. */
1236*fae548d3Szrj UNGET (ch);
1237*fae548d3Szrj }
1238*fae548d3Szrj state = 0;
1239*fae548d3Szrj break;
1240*fae548d3Szrj }
1241*fae548d3Szrj /* Looks like `# 123 "filename"' from cpp. */
1242*fae548d3Szrj UNGET (ch);
1243*fae548d3Szrj old_state = 4;
1244*fae548d3Szrj state = -1;
1245*fae548d3Szrj if (scrub_m68k_mri)
1246*fae548d3Szrj out_string = "\tlinefile ";
1247*fae548d3Szrj else
1248*fae548d3Szrj out_string = "\t.linefile ";
1249*fae548d3Szrj PUT (*out_string++);
1250*fae548d3Szrj break;
1251*fae548d3Szrj }
1252*fae548d3Szrj
1253*fae548d3Szrj #ifdef TC_D10V
1254*fae548d3Szrj /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1255*fae548d3Szrj Trap is the only short insn that has a first operand that is
1256*fae548d3Szrj neither register nor label.
1257*fae548d3Szrj We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1258*fae548d3Szrj We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1259*fae548d3Szrj already LEX_IS_LINE_COMMENT_START. However, it is the
1260*fae548d3Szrj only character in line_comment_chars for d10v, hence we
1261*fae548d3Szrj can recognize it as such. */
1262*fae548d3Szrj /* An alternative approach would be to reset the state to 1 when
1263*fae548d3Szrj we see '||', '<'- or '->', but that seems to be overkill. */
1264*fae548d3Szrj if (state == 10)
1265*fae548d3Szrj PUT (' ');
1266*fae548d3Szrj #endif
1267*fae548d3Szrj /* We have a line comment character which is not at the
1268*fae548d3Szrj start of a line. If this is also a normal comment
1269*fae548d3Szrj character, fall through. Otherwise treat it as a default
1270*fae548d3Szrj character. */
1271*fae548d3Szrj if (strchr (tc_comment_chars, ch) == NULL
1272*fae548d3Szrj && (! scrub_m68k_mri
1273*fae548d3Szrj || (ch != '!' && ch != '*')))
1274*fae548d3Szrj goto de_fault;
1275*fae548d3Szrj if (scrub_m68k_mri
1276*fae548d3Szrj && (ch == '!' || ch == '*' || ch == '#')
1277*fae548d3Szrj && state != 1
1278*fae548d3Szrj && state != 10)
1279*fae548d3Szrj goto de_fault;
1280*fae548d3Szrj /* Fall through. */
1281*fae548d3Szrj case LEX_IS_COMMENT_START:
1282*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
1283*fae548d3Szrj /* On the ARM, `@' is the comment character.
1284*fae548d3Szrj Unfortunately this is also a special character in ELF .symver
1285*fae548d3Szrj directives (and .type, though we deal with those another way).
1286*fae548d3Szrj So we check if this line is such a directive, and treat
1287*fae548d3Szrj the character as default if so. This is a hack. */
1288*fae548d3Szrj if ((symver_state != NULL) && (*symver_state == 0))
1289*fae548d3Szrj goto de_fault;
1290*fae548d3Szrj #endif
1291*fae548d3Szrj
1292*fae548d3Szrj #ifdef TC_ARM
1293*fae548d3Szrj /* For the ARM, care is needed not to damage occurrences of \@
1294*fae548d3Szrj by stripping the @ onwards. Yuck. */
1295*fae548d3Szrj if ((to > tostart ? to[-1] : last_char) == '\\')
1296*fae548d3Szrj /* Do not treat the @ as a start-of-comment. */
1297*fae548d3Szrj goto de_fault;
1298*fae548d3Szrj #endif
1299*fae548d3Szrj
1300*fae548d3Szrj #ifdef WARN_COMMENTS
1301*fae548d3Szrj if (!found_comment)
1302*fae548d3Szrj found_comment_file = as_where (&found_comment);
1303*fae548d3Szrj #endif
1304*fae548d3Szrj do
1305*fae548d3Szrj {
1306*fae548d3Szrj ch = GET ();
1307*fae548d3Szrj }
1308*fae548d3Szrj while (ch != EOF && !IS_NEWLINE (ch));
1309*fae548d3Szrj if (ch == EOF)
1310*fae548d3Szrj as_warn (_("end of file in comment; newline inserted"));
1311*fae548d3Szrj state = 0;
1312*fae548d3Szrj PUT ('\n');
1313*fae548d3Szrj break;
1314*fae548d3Szrj
1315*fae548d3Szrj #ifdef H_TICK_HEX
1316*fae548d3Szrj case LEX_IS_H:
1317*fae548d3Szrj /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1318*fae548d3Szrj the H' with 0x to make them gas-style hex characters. */
1319*fae548d3Szrj if (enable_h_tick_hex)
1320*fae548d3Szrj {
1321*fae548d3Szrj char quot;
1322*fae548d3Szrj
1323*fae548d3Szrj quot = GET ();
1324*fae548d3Szrj if (quot == '\'')
1325*fae548d3Szrj {
1326*fae548d3Szrj UNGET ('x');
1327*fae548d3Szrj ch = '0';
1328*fae548d3Szrj }
1329*fae548d3Szrj else
1330*fae548d3Szrj UNGET (quot);
1331*fae548d3Szrj }
1332*fae548d3Szrj #endif
1333*fae548d3Szrj /* Fall through. */
1334*fae548d3Szrj
1335*fae548d3Szrj case LEX_IS_SYMBOL_COMPONENT:
1336*fae548d3Szrj if (state == 10)
1337*fae548d3Szrj {
1338*fae548d3Szrj /* This is a symbol character following another symbol
1339*fae548d3Szrj character, with whitespace in between. We skipped
1340*fae548d3Szrj the whitespace earlier, so output it now. */
1341*fae548d3Szrj UNGET (ch);
1342*fae548d3Szrj state = 3;
1343*fae548d3Szrj PUT (' ');
1344*fae548d3Szrj break;
1345*fae548d3Szrj }
1346*fae548d3Szrj
1347*fae548d3Szrj #ifdef TC_Z80
1348*fae548d3Szrj /* "af'" is a symbol containing '\''. */
1349*fae548d3Szrj if (state == 3 && (ch == 'a' || ch == 'A'))
1350*fae548d3Szrj {
1351*fae548d3Szrj state = 16;
1352*fae548d3Szrj PUT (ch);
1353*fae548d3Szrj ch = GET ();
1354*fae548d3Szrj if (ch == 'f' || ch == 'F')
1355*fae548d3Szrj {
1356*fae548d3Szrj state = 17;
1357*fae548d3Szrj PUT (ch);
1358*fae548d3Szrj break;
1359*fae548d3Szrj }
1360*fae548d3Szrj else
1361*fae548d3Szrj {
1362*fae548d3Szrj state = 9;
1363*fae548d3Szrj if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1364*fae548d3Szrj {
1365*fae548d3Szrj if (ch != EOF)
1366*fae548d3Szrj UNGET (ch);
1367*fae548d3Szrj break;
1368*fae548d3Szrj }
1369*fae548d3Szrj }
1370*fae548d3Szrj }
1371*fae548d3Szrj #endif
1372*fae548d3Szrj if (state == 3)
1373*fae548d3Szrj state = 9;
1374*fae548d3Szrj
1375*fae548d3Szrj /* This is a common case. Quickly copy CH and all the
1376*fae548d3Szrj following symbol component or normal characters. */
1377*fae548d3Szrj if (to + 1 < toend
1378*fae548d3Szrj && mri_state == NULL
1379*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
1380*fae548d3Szrj && symver_state == NULL
1381*fae548d3Szrj #endif
1382*fae548d3Szrj )
1383*fae548d3Szrj {
1384*fae548d3Szrj char *s;
1385*fae548d3Szrj ptrdiff_t len;
1386*fae548d3Szrj
1387*fae548d3Szrj for (s = from; s < fromend; s++)
1388*fae548d3Szrj {
1389*fae548d3Szrj int type;
1390*fae548d3Szrj
1391*fae548d3Szrj ch2 = *(unsigned char *) s;
1392*fae548d3Szrj type = lex[ch2];
1393*fae548d3Szrj if (type != 0
1394*fae548d3Szrj && type != LEX_IS_SYMBOL_COMPONENT)
1395*fae548d3Szrj break;
1396*fae548d3Szrj }
1397*fae548d3Szrj
1398*fae548d3Szrj if (s > from)
1399*fae548d3Szrj /* Handle the last character normally, for
1400*fae548d3Szrj simplicity. */
1401*fae548d3Szrj --s;
1402*fae548d3Szrj
1403*fae548d3Szrj len = s - from;
1404*fae548d3Szrj
1405*fae548d3Szrj if (len > (toend - to) - 1)
1406*fae548d3Szrj len = (toend - to) - 1;
1407*fae548d3Szrj
1408*fae548d3Szrj if (len > 0)
1409*fae548d3Szrj {
1410*fae548d3Szrj PUT (ch);
1411*fae548d3Szrj memcpy (to, from, len);
1412*fae548d3Szrj to += len;
1413*fae548d3Szrj from += len;
1414*fae548d3Szrj if (to >= toend)
1415*fae548d3Szrj goto tofull;
1416*fae548d3Szrj ch = GET ();
1417*fae548d3Szrj }
1418*fae548d3Szrj }
1419*fae548d3Szrj
1420*fae548d3Szrj /* Fall through. */
1421*fae548d3Szrj default:
1422*fae548d3Szrj de_fault:
1423*fae548d3Szrj /* Some relatively `normal' character. */
1424*fae548d3Szrj if (state == 0)
1425*fae548d3Szrj {
1426*fae548d3Szrj state = 11; /* Now seeing label definition. */
1427*fae548d3Szrj }
1428*fae548d3Szrj else if (state == 1)
1429*fae548d3Szrj {
1430*fae548d3Szrj state = 2; /* Ditto. */
1431*fae548d3Szrj }
1432*fae548d3Szrj else if (state == 9)
1433*fae548d3Szrj {
1434*fae548d3Szrj if (!IS_SYMBOL_COMPONENT (ch))
1435*fae548d3Szrj state = 3;
1436*fae548d3Szrj }
1437*fae548d3Szrj else if (state == 10)
1438*fae548d3Szrj {
1439*fae548d3Szrj if (ch == '\\')
1440*fae548d3Szrj {
1441*fae548d3Szrj /* Special handling for backslash: a backslash may
1442*fae548d3Szrj be the beginning of a formal parameter (of a
1443*fae548d3Szrj macro) following another symbol character, with
1444*fae548d3Szrj whitespace in between. If that is the case, we
1445*fae548d3Szrj output a space before the parameter. Strictly
1446*fae548d3Szrj speaking, correct handling depends upon what the
1447*fae548d3Szrj macro parameter expands into; if the parameter
1448*fae548d3Szrj expands into something which does not start with
1449*fae548d3Szrj an operand character, then we don't want to keep
1450*fae548d3Szrj the space. We don't have enough information to
1451*fae548d3Szrj make the right choice, so here we are making the
1452*fae548d3Szrj choice which is more likely to be correct. */
1453*fae548d3Szrj if (to + 1 >= toend)
1454*fae548d3Szrj {
1455*fae548d3Szrj /* If we're near the end of the buffer, save the
1456*fae548d3Szrj character for the next time round. Otherwise
1457*fae548d3Szrj we'll lose our state. */
1458*fae548d3Szrj UNGET (ch);
1459*fae548d3Szrj goto tofull;
1460*fae548d3Szrj }
1461*fae548d3Szrj *to++ = ' ';
1462*fae548d3Szrj }
1463*fae548d3Szrj
1464*fae548d3Szrj state = 3;
1465*fae548d3Szrj }
1466*fae548d3Szrj PUT (ch);
1467*fae548d3Szrj break;
1468*fae548d3Szrj }
1469*fae548d3Szrj }
1470*fae548d3Szrj
1471*fae548d3Szrj /*NOTREACHED*/
1472*fae548d3Szrj
1473*fae548d3Szrj fromeof:
1474*fae548d3Szrj /* We have reached the end of the input. */
1475*fae548d3Szrj #ifdef TC_ARM
1476*fae548d3Szrj if (to > tostart)
1477*fae548d3Szrj last_char = to[-1];
1478*fae548d3Szrj #endif
1479*fae548d3Szrj return to - tostart;
1480*fae548d3Szrj
1481*fae548d3Szrj tofull:
1482*fae548d3Szrj /* The output buffer is full. Save any input we have not yet
1483*fae548d3Szrj processed. */
1484*fae548d3Szrj if (fromend > from)
1485*fae548d3Szrj {
1486*fae548d3Szrj saved_input = from;
1487*fae548d3Szrj saved_input_len = fromend - from;
1488*fae548d3Szrj }
1489*fae548d3Szrj else
1490*fae548d3Szrj saved_input = NULL;
1491*fae548d3Szrj
1492*fae548d3Szrj #ifdef TC_ARM
1493*fae548d3Szrj if (to > tostart)
1494*fae548d3Szrj last_char = to[-1];
1495*fae548d3Szrj #endif
1496*fae548d3Szrj return to - tostart;
1497*fae548d3Szrj }
1498