xref: /dragonfly/contrib/binutils-2.34/gas/app.c (revision fae548d3)
1*fae548d3Szrj /* This is the Assembler Pre-Processor
2*fae548d3Szrj    Copyright (C) 1987-2020 Free Software Foundation, Inc.
3*fae548d3Szrj 
4*fae548d3Szrj    This file is part of GAS, the GNU Assembler.
5*fae548d3Szrj 
6*fae548d3Szrj    GAS is free software; you can redistribute it and/or modify
7*fae548d3Szrj    it under the terms of the GNU General Public License as published by
8*fae548d3Szrj    the Free Software Foundation; either version 3, or (at your option)
9*fae548d3Szrj    any later version.
10*fae548d3Szrj 
11*fae548d3Szrj    GAS is distributed in the hope that it will be useful, but WITHOUT
12*fae548d3Szrj    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13*fae548d3Szrj    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14*fae548d3Szrj    License for more details.
15*fae548d3Szrj 
16*fae548d3Szrj    You should have received a copy of the GNU General Public License
17*fae548d3Szrj    along with GAS; see the file COPYING.  If not, write to the Free
18*fae548d3Szrj    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19*fae548d3Szrj    02110-1301, USA.  */
20*fae548d3Szrj 
21*fae548d3Szrj /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22*fae548d3Szrj /* App, the assembler pre-processor.  This pre-processor strips out
23*fae548d3Szrj    excess spaces, turns single-quoted characters into a decimal
24*fae548d3Szrj    constant, and turns the # in # <number> <filename> <garbage> into a
25*fae548d3Szrj    .linefile.  This needs better error-handling.  */
26*fae548d3Szrj 
27*fae548d3Szrj #include "as.h"
28*fae548d3Szrj 
29*fae548d3Szrj #if (__STDC__ != 1)
30*fae548d3Szrj #ifndef const
31*fae548d3Szrj #define const  /* empty */
32*fae548d3Szrj #endif
33*fae548d3Szrj #endif
34*fae548d3Szrj 
35*fae548d3Szrj #ifdef H_TICK_HEX
36*fae548d3Szrj int enable_h_tick_hex = 0;
37*fae548d3Szrj #endif
38*fae548d3Szrj 
39*fae548d3Szrj #ifdef TC_M68K
40*fae548d3Szrj /* Whether we are scrubbing in m68k MRI mode.  This is different from
41*fae548d3Szrj    flag_m68k_mri, because the two flags will be affected by the .mri
42*fae548d3Szrj    pseudo-op at different times.  */
43*fae548d3Szrj static int scrub_m68k_mri;
44*fae548d3Szrj 
45*fae548d3Szrj /* The pseudo-op which switches in and out of MRI mode.  See the
46*fae548d3Szrj    comment in do_scrub_chars.  */
47*fae548d3Szrj static const char mri_pseudo[] = ".mri 0";
48*fae548d3Szrj #else
49*fae548d3Szrj #define scrub_m68k_mri 0
50*fae548d3Szrj #endif
51*fae548d3Szrj 
52*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
53*fae548d3Szrj /* The pseudo-op for which we need to special-case `@' characters.
54*fae548d3Szrj    See the comment in do_scrub_chars.  */
55*fae548d3Szrj static const char   symver_pseudo[] = ".symver";
56*fae548d3Szrj static const char * symver_state;
57*fae548d3Szrj #endif
58*fae548d3Szrj #ifdef TC_ARM
59*fae548d3Szrj static char last_char;
60*fae548d3Szrj #endif
61*fae548d3Szrj 
62*fae548d3Szrj static char lex[256];
63*fae548d3Szrj static const char symbol_chars[] =
64*fae548d3Szrj "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
65*fae548d3Szrj 
66*fae548d3Szrj #define LEX_IS_SYMBOL_COMPONENT		1
67*fae548d3Szrj #define LEX_IS_WHITESPACE		2
68*fae548d3Szrj #define LEX_IS_LINE_SEPARATOR		3
69*fae548d3Szrj #define LEX_IS_COMMENT_START		4
70*fae548d3Szrj #define LEX_IS_LINE_COMMENT_START	5
71*fae548d3Szrj #define	LEX_IS_TWOCHAR_COMMENT_1ST	6
72*fae548d3Szrj #define	LEX_IS_STRINGQUOTE		8
73*fae548d3Szrj #define	LEX_IS_COLON			9
74*fae548d3Szrj #define	LEX_IS_NEWLINE			10
75*fae548d3Szrj #define	LEX_IS_ONECHAR_QUOTE		11
76*fae548d3Szrj #ifdef TC_V850
77*fae548d3Szrj #define LEX_IS_DOUBLEDASH_1ST		12
78*fae548d3Szrj #endif
79*fae548d3Szrj #ifdef TC_M32R
80*fae548d3Szrj #define DOUBLEBAR_PARALLEL
81*fae548d3Szrj #endif
82*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
83*fae548d3Szrj #define LEX_IS_DOUBLEBAR_1ST		13
84*fae548d3Szrj #endif
85*fae548d3Szrj #define LEX_IS_PARALLEL_SEPARATOR	14
86*fae548d3Szrj #ifdef H_TICK_HEX
87*fae548d3Szrj #define LEX_IS_H			15
88*fae548d3Szrj #endif
89*fae548d3Szrj #define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
90*fae548d3Szrj #define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
91*fae548d3Szrj #define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
92*fae548d3Szrj #define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
93*fae548d3Szrj #define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
94*fae548d3Szrj #define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
95*fae548d3Szrj #define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
96*fae548d3Szrj 
97*fae548d3Szrj static int process_escape (int);
98*fae548d3Szrj 
99*fae548d3Szrj /* FIXME-soon: The entire lexer/parser thingy should be
100*fae548d3Szrj    built statically at compile time rather than dynamically
101*fae548d3Szrj    each and every time the assembler is run.  xoxorich.  */
102*fae548d3Szrj 
103*fae548d3Szrj void
do_scrub_begin(int m68k_mri ATTRIBUTE_UNUSED)104*fae548d3Szrj do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
105*fae548d3Szrj {
106*fae548d3Szrj   const char *p;
107*fae548d3Szrj   int c;
108*fae548d3Szrj 
109*fae548d3Szrj   lex[' '] = LEX_IS_WHITESPACE;
110*fae548d3Szrj   lex['\t'] = LEX_IS_WHITESPACE;
111*fae548d3Szrj   lex['\r'] = LEX_IS_WHITESPACE;
112*fae548d3Szrj   lex['\n'] = LEX_IS_NEWLINE;
113*fae548d3Szrj   lex[':'] = LEX_IS_COLON;
114*fae548d3Szrj 
115*fae548d3Szrj #ifdef TC_M68K
116*fae548d3Szrj   scrub_m68k_mri = m68k_mri;
117*fae548d3Szrj 
118*fae548d3Szrj   if (! m68k_mri)
119*fae548d3Szrj #endif
120*fae548d3Szrj     {
121*fae548d3Szrj       lex['"'] = LEX_IS_STRINGQUOTE;
122*fae548d3Szrj 
123*fae548d3Szrj #if ! defined (TC_HPPA)
124*fae548d3Szrj       lex['\''] = LEX_IS_ONECHAR_QUOTE;
125*fae548d3Szrj #endif
126*fae548d3Szrj 
127*fae548d3Szrj #ifdef SINGLE_QUOTE_STRINGS
128*fae548d3Szrj       lex['\''] = LEX_IS_STRINGQUOTE;
129*fae548d3Szrj #endif
130*fae548d3Szrj     }
131*fae548d3Szrj 
132*fae548d3Szrj   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133*fae548d3Szrj      in state 5 of do_scrub_chars must be changed.  */
134*fae548d3Szrj 
135*fae548d3Szrj   /* Note that these override the previous defaults, e.g. if ';' is a
136*fae548d3Szrj      comment char, then it isn't a line separator.  */
137*fae548d3Szrj   for (p = symbol_chars; *p; ++p)
138*fae548d3Szrj     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139*fae548d3Szrj 
140*fae548d3Szrj   for (c = 128; c < 256; ++c)
141*fae548d3Szrj     lex[c] = LEX_IS_SYMBOL_COMPONENT;
142*fae548d3Szrj 
143*fae548d3Szrj #ifdef tc_symbol_chars
144*fae548d3Szrj   /* This macro permits the processor to specify all characters which
145*fae548d3Szrj      may appears in an operand.  This will prevent the scrubber from
146*fae548d3Szrj      discarding meaningful whitespace in certain cases.  The i386
147*fae548d3Szrj      backend uses this to support prefixes, which can confuse the
148*fae548d3Szrj      scrubber as to whether it is parsing operands or opcodes.  */
149*fae548d3Szrj   for (p = tc_symbol_chars; *p; ++p)
150*fae548d3Szrj     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151*fae548d3Szrj #endif
152*fae548d3Szrj 
153*fae548d3Szrj   /* The m68k backend wants to be able to change comment_chars.  */
154*fae548d3Szrj #ifndef tc_comment_chars
155*fae548d3Szrj #define tc_comment_chars comment_chars
156*fae548d3Szrj #endif
157*fae548d3Szrj   for (p = tc_comment_chars; *p; p++)
158*fae548d3Szrj     lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
159*fae548d3Szrj 
160*fae548d3Szrj   for (p = line_comment_chars; *p; p++)
161*fae548d3Szrj     lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162*fae548d3Szrj 
163*fae548d3Szrj #ifndef tc_line_separator_chars
164*fae548d3Szrj #define tc_line_separator_chars line_separator_chars
165*fae548d3Szrj #endif
166*fae548d3Szrj   for (p = tc_line_separator_chars; *p; p++)
167*fae548d3Szrj     lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
168*fae548d3Szrj 
169*fae548d3Szrj #ifdef tc_parallel_separator_chars
170*fae548d3Szrj   /* This macro permits the processor to specify all characters which
171*fae548d3Szrj      separate parallel insns on the same line.  */
172*fae548d3Szrj   for (p = tc_parallel_separator_chars; *p; p++)
173*fae548d3Szrj     lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
174*fae548d3Szrj #endif
175*fae548d3Szrj 
176*fae548d3Szrj   /* Only allow slash-star comments if slash is not in use.
177*fae548d3Szrj      FIXME: This isn't right.  We should always permit them.  */
178*fae548d3Szrj   if (lex['/'] == 0)
179*fae548d3Szrj     lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
180*fae548d3Szrj 
181*fae548d3Szrj #ifdef TC_M68K
182*fae548d3Szrj   if (m68k_mri)
183*fae548d3Szrj     {
184*fae548d3Szrj       lex['\''] = LEX_IS_STRINGQUOTE;
185*fae548d3Szrj       lex[';'] = LEX_IS_COMMENT_START;
186*fae548d3Szrj       lex['*'] = LEX_IS_LINE_COMMENT_START;
187*fae548d3Szrj       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
188*fae548d3Szrj 	 then it can't be used in an expression.  */
189*fae548d3Szrj       lex['!'] = LEX_IS_LINE_COMMENT_START;
190*fae548d3Szrj     }
191*fae548d3Szrj #endif
192*fae548d3Szrj 
193*fae548d3Szrj #ifdef TC_V850
194*fae548d3Szrj   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
195*fae548d3Szrj #endif
196*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
197*fae548d3Szrj   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
198*fae548d3Szrj #endif
199*fae548d3Szrj #ifdef TC_D30V
200*fae548d3Szrj   /* Must do this is we want VLIW instruction with "->" or "<-".  */
201*fae548d3Szrj   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
202*fae548d3Szrj #endif
203*fae548d3Szrj 
204*fae548d3Szrj #ifdef H_TICK_HEX
205*fae548d3Szrj   if (enable_h_tick_hex)
206*fae548d3Szrj     {
207*fae548d3Szrj       lex['h'] = LEX_IS_H;
208*fae548d3Szrj       lex['H'] = LEX_IS_H;
209*fae548d3Szrj     }
210*fae548d3Szrj #endif
211*fae548d3Szrj }
212*fae548d3Szrj 
213*fae548d3Szrj /* Saved state of the scrubber.  */
214*fae548d3Szrj static int state;
215*fae548d3Szrj static int old_state;
216*fae548d3Szrj static const char *out_string;
217*fae548d3Szrj static char out_buf[20];
218*fae548d3Szrj static int add_newlines;
219*fae548d3Szrj static char *saved_input;
220*fae548d3Szrj static size_t saved_input_len;
221*fae548d3Szrj static char input_buffer[32 * 1024];
222*fae548d3Szrj static const char *mri_state;
223*fae548d3Szrj static char mri_last_ch;
224*fae548d3Szrj 
225*fae548d3Szrj /* Data structure for saving the state of app across #include's.  Note that
226*fae548d3Szrj    app is called asynchronously to the parsing of the .include's, so our
227*fae548d3Szrj    state at the time .include is interpreted is completely unrelated.
228*fae548d3Szrj    That's why we have to save it all.  */
229*fae548d3Szrj 
230*fae548d3Szrj struct app_save
231*fae548d3Szrj {
232*fae548d3Szrj   int          state;
233*fae548d3Szrj   int          old_state;
234*fae548d3Szrj   const char * out_string;
235*fae548d3Szrj   char         out_buf[sizeof (out_buf)];
236*fae548d3Szrj   int          add_newlines;
237*fae548d3Szrj   char *       saved_input;
238*fae548d3Szrj   size_t       saved_input_len;
239*fae548d3Szrj #ifdef TC_M68K
240*fae548d3Szrj   int          scrub_m68k_mri;
241*fae548d3Szrj #endif
242*fae548d3Szrj   const char * mri_state;
243*fae548d3Szrj   char         mri_last_ch;
244*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
245*fae548d3Szrj   const char * symver_state;
246*fae548d3Szrj #endif
247*fae548d3Szrj #ifdef TC_ARM
248*fae548d3Szrj   char last_char;
249*fae548d3Szrj #endif
250*fae548d3Szrj };
251*fae548d3Szrj 
252*fae548d3Szrj char *
app_push(void)253*fae548d3Szrj app_push (void)
254*fae548d3Szrj {
255*fae548d3Szrj   struct app_save *saved;
256*fae548d3Szrj 
257*fae548d3Szrj   saved = XNEW (struct app_save);
258*fae548d3Szrj   saved->state = state;
259*fae548d3Szrj   saved->old_state = old_state;
260*fae548d3Szrj   saved->out_string = out_string;
261*fae548d3Szrj   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
262*fae548d3Szrj   saved->add_newlines = add_newlines;
263*fae548d3Szrj   if (saved_input == NULL)
264*fae548d3Szrj     saved->saved_input = NULL;
265*fae548d3Szrj   else
266*fae548d3Szrj     {
267*fae548d3Szrj       saved->saved_input = XNEWVEC (char, saved_input_len);
268*fae548d3Szrj       memcpy (saved->saved_input, saved_input, saved_input_len);
269*fae548d3Szrj       saved->saved_input_len = saved_input_len;
270*fae548d3Szrj     }
271*fae548d3Szrj #ifdef TC_M68K
272*fae548d3Szrj   saved->scrub_m68k_mri = scrub_m68k_mri;
273*fae548d3Szrj #endif
274*fae548d3Szrj   saved->mri_state = mri_state;
275*fae548d3Szrj   saved->mri_last_ch = mri_last_ch;
276*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
277*fae548d3Szrj   saved->symver_state = symver_state;
278*fae548d3Szrj #endif
279*fae548d3Szrj #ifdef TC_ARM
280*fae548d3Szrj   saved->last_char = last_char;
281*fae548d3Szrj #endif
282*fae548d3Szrj 
283*fae548d3Szrj   /* do_scrub_begin() is not useful, just wastes time.  */
284*fae548d3Szrj 
285*fae548d3Szrj   state = 0;
286*fae548d3Szrj   saved_input = NULL;
287*fae548d3Szrj   add_newlines = 0;
288*fae548d3Szrj 
289*fae548d3Szrj   return (char *) saved;
290*fae548d3Szrj }
291*fae548d3Szrj 
292*fae548d3Szrj void
app_pop(char * arg)293*fae548d3Szrj app_pop (char *arg)
294*fae548d3Szrj {
295*fae548d3Szrj   struct app_save *saved = (struct app_save *) arg;
296*fae548d3Szrj 
297*fae548d3Szrj   /* There is no do_scrub_end ().  */
298*fae548d3Szrj   state = saved->state;
299*fae548d3Szrj   old_state = saved->old_state;
300*fae548d3Szrj   out_string = saved->out_string;
301*fae548d3Szrj   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
302*fae548d3Szrj   add_newlines = saved->add_newlines;
303*fae548d3Szrj   if (saved->saved_input == NULL)
304*fae548d3Szrj     saved_input = NULL;
305*fae548d3Szrj   else
306*fae548d3Szrj     {
307*fae548d3Szrj       gas_assert (saved->saved_input_len <= sizeof (input_buffer));
308*fae548d3Szrj       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
309*fae548d3Szrj       saved_input = input_buffer;
310*fae548d3Szrj       saved_input_len = saved->saved_input_len;
311*fae548d3Szrj       free (saved->saved_input);
312*fae548d3Szrj     }
313*fae548d3Szrj #ifdef TC_M68K
314*fae548d3Szrj   scrub_m68k_mri = saved->scrub_m68k_mri;
315*fae548d3Szrj #endif
316*fae548d3Szrj   mri_state = saved->mri_state;
317*fae548d3Szrj   mri_last_ch = saved->mri_last_ch;
318*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
319*fae548d3Szrj   symver_state = saved->symver_state;
320*fae548d3Szrj #endif
321*fae548d3Szrj #ifdef TC_ARM
322*fae548d3Szrj   last_char = saved->last_char;
323*fae548d3Szrj #endif
324*fae548d3Szrj 
325*fae548d3Szrj   free (arg);
326*fae548d3Szrj }
327*fae548d3Szrj 
328*fae548d3Szrj /* @@ This assumes that \n &c are the same on host and target.  This is not
329*fae548d3Szrj    necessarily true.  */
330*fae548d3Szrj 
331*fae548d3Szrj static int
process_escape(int ch)332*fae548d3Szrj process_escape (int ch)
333*fae548d3Szrj {
334*fae548d3Szrj   switch (ch)
335*fae548d3Szrj     {
336*fae548d3Szrj     case 'b':
337*fae548d3Szrj       return '\b';
338*fae548d3Szrj     case 'f':
339*fae548d3Szrj       return '\f';
340*fae548d3Szrj     case 'n':
341*fae548d3Szrj       return '\n';
342*fae548d3Szrj     case 'r':
343*fae548d3Szrj       return '\r';
344*fae548d3Szrj     case 't':
345*fae548d3Szrj       return '\t';
346*fae548d3Szrj     case '\'':
347*fae548d3Szrj       return '\'';
348*fae548d3Szrj     case '"':
349*fae548d3Szrj       return '\"';
350*fae548d3Szrj     default:
351*fae548d3Szrj       return ch;
352*fae548d3Szrj     }
353*fae548d3Szrj }
354*fae548d3Szrj 
355*fae548d3Szrj /* This function is called to process input characters.  The GET
356*fae548d3Szrj    parameter is used to retrieve more input characters.  GET should
357*fae548d3Szrj    set its parameter to point to a buffer, and return the length of
358*fae548d3Szrj    the buffer; it should return 0 at end of file.  The scrubbed output
359*fae548d3Szrj    characters are put into the buffer starting at TOSTART; the TOSTART
360*fae548d3Szrj    buffer is TOLEN bytes in length.  The function returns the number
361*fae548d3Szrj    of scrubbed characters put into TOSTART.  This will be TOLEN unless
362*fae548d3Szrj    end of file was seen.  This function is arranged as a state
363*fae548d3Szrj    machine, and saves its state so that it may return at any point.
364*fae548d3Szrj    This is the way the old code used to work.  */
365*fae548d3Szrj 
366*fae548d3Szrj size_t
do_scrub_chars(size_t (* get)(char *,size_t),char * tostart,size_t tolen)367*fae548d3Szrj do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
368*fae548d3Szrj {
369*fae548d3Szrj   char *to = tostart;
370*fae548d3Szrj   char *toend = tostart + tolen;
371*fae548d3Szrj   char *from;
372*fae548d3Szrj   char *fromend;
373*fae548d3Szrj   size_t fromlen;
374*fae548d3Szrj   int ch, ch2 = 0;
375*fae548d3Szrj   /* Character that started the string we're working on.  */
376*fae548d3Szrj   static char quotechar;
377*fae548d3Szrj 
378*fae548d3Szrj   /*State 0: beginning of normal line
379*fae548d3Szrj 	  1: After first whitespace on line (flush more white)
380*fae548d3Szrj 	  2: After first non-white (opcode) on line (keep 1white)
381*fae548d3Szrj 	  3: after second white on line (into operands) (flush white)
382*fae548d3Szrj 	  4: after putting out a .linefile, put out digits
383*fae548d3Szrj 	  5: parsing a string, then go to old-state
384*fae548d3Szrj 	  6: putting out \ escape in a "d string.
385*fae548d3Szrj 	  7: no longer used
386*fae548d3Szrj 	  8: no longer used
387*fae548d3Szrj 	  9: After seeing symbol char in state 3 (keep 1white after symchar)
388*fae548d3Szrj 	 10: After seeing whitespace in state 9 (keep white before symchar)
389*fae548d3Szrj 	 11: After seeing a symbol character in state 0 (eg a label definition)
390*fae548d3Szrj 	 -1: output string in out_string and go to the state in old_state
391*fae548d3Szrj 	 -2: flush text until a '*' '/' is seen, then go to state old_state
392*fae548d3Szrj #ifdef TC_V850
393*fae548d3Szrj 	 12: After seeing a dash, looking for a second dash as a start
394*fae548d3Szrj 	     of comment.
395*fae548d3Szrj #endif
396*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
397*fae548d3Szrj 	 13: After seeing a vertical bar, looking for a second
398*fae548d3Szrj 	     vertical bar as a parallel expression separator.
399*fae548d3Szrj #endif
400*fae548d3Szrj #ifdef TC_PREDICATE_START_CHAR
401*fae548d3Szrj 	 14: After seeing a predicate start character at state 0, looking
402*fae548d3Szrj 	     for a predicate end character as predicate.
403*fae548d3Szrj 	 15: After seeing a predicate start character at state 1, looking
404*fae548d3Szrj 	     for a predicate end character as predicate.
405*fae548d3Szrj #endif
406*fae548d3Szrj #ifdef TC_Z80
407*fae548d3Szrj 	 16: After seeing an 'a' or an 'A' at the start of a symbol
408*fae548d3Szrj 	 17: After seeing an 'f' or an 'F' in state 16
409*fae548d3Szrj #endif
410*fae548d3Szrj 	  */
411*fae548d3Szrj 
412*fae548d3Szrj   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
413*fae548d3Szrj      constructs like ``.loc 1 20''.  This was turning into ``.loc
414*fae548d3Szrj      120''.  States 9 and 10 ensure that a space is never dropped in
415*fae548d3Szrj      between characters which could appear in an identifier.  Ian
416*fae548d3Szrj      Taylor, ian@cygnus.com.
417*fae548d3Szrj 
418*fae548d3Szrj      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
419*fae548d3Szrj      correctly on the PA (and any other target where colons are optional).
420*fae548d3Szrj      Jeff Law, law@cs.utah.edu.
421*fae548d3Szrj 
422*fae548d3Szrj      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
423*fae548d3Szrj      get squashed into "cmp r1,r2||trap#1", with the all important space
424*fae548d3Szrj      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
425*fae548d3Szrj 
426*fae548d3Szrj   /* This macro gets the next input character.  */
427*fae548d3Szrj 
428*fae548d3Szrj #define GET()							\
429*fae548d3Szrj   (from < fromend						\
430*fae548d3Szrj    ? * (unsigned char *) (from++)				\
431*fae548d3Szrj    : (saved_input = NULL,					\
432*fae548d3Szrj       fromlen = (*get) (input_buffer, sizeof input_buffer),	\
433*fae548d3Szrj       from = input_buffer,					\
434*fae548d3Szrj       fromend = from + fromlen,					\
435*fae548d3Szrj       (fromlen == 0						\
436*fae548d3Szrj        ? EOF							\
437*fae548d3Szrj        : * (unsigned char *) (from++))))
438*fae548d3Szrj 
439*fae548d3Szrj   /* This macro pushes a character back on the input stream.  */
440*fae548d3Szrj 
441*fae548d3Szrj #define UNGET(uch) (*--from = (uch))
442*fae548d3Szrj 
443*fae548d3Szrj   /* This macro puts a character into the output buffer.  If this
444*fae548d3Szrj      character fills the output buffer, this macro jumps to the label
445*fae548d3Szrj      TOFULL.  We use this rather ugly approach because we need to
446*fae548d3Szrj      handle two different termination conditions: EOF on the input
447*fae548d3Szrj      stream, and a full output buffer.  It would be simpler if we
448*fae548d3Szrj      always read in the entire input stream before processing it, but
449*fae548d3Szrj      I don't want to make such a significant change to the assembler's
450*fae548d3Szrj      memory usage.  */
451*fae548d3Szrj 
452*fae548d3Szrj #define PUT(pch)				\
453*fae548d3Szrj   do						\
454*fae548d3Szrj     {						\
455*fae548d3Szrj       *to++ = (pch);				\
456*fae548d3Szrj       if (to >= toend)				\
457*fae548d3Szrj 	goto tofull;				\
458*fae548d3Szrj     }						\
459*fae548d3Szrj   while (0)
460*fae548d3Szrj 
461*fae548d3Szrj   if (saved_input != NULL)
462*fae548d3Szrj     {
463*fae548d3Szrj       from = saved_input;
464*fae548d3Szrj       fromend = from + saved_input_len;
465*fae548d3Szrj     }
466*fae548d3Szrj   else
467*fae548d3Szrj     {
468*fae548d3Szrj       fromlen = (*get) (input_buffer, sizeof input_buffer);
469*fae548d3Szrj       if (fromlen == 0)
470*fae548d3Szrj 	return 0;
471*fae548d3Szrj       from = input_buffer;
472*fae548d3Szrj       fromend = from + fromlen;
473*fae548d3Szrj     }
474*fae548d3Szrj 
475*fae548d3Szrj   while (1)
476*fae548d3Szrj     {
477*fae548d3Szrj       /* The cases in this switch end with continue, in order to
478*fae548d3Szrj 	 branch back to the top of this while loop and generate the
479*fae548d3Szrj 	 next output character in the appropriate state.  */
480*fae548d3Szrj       switch (state)
481*fae548d3Szrj 	{
482*fae548d3Szrj 	case -1:
483*fae548d3Szrj 	  ch = *out_string++;
484*fae548d3Szrj 	  if (*out_string == '\0')
485*fae548d3Szrj 	    {
486*fae548d3Szrj 	      state = old_state;
487*fae548d3Szrj 	      old_state = 3;
488*fae548d3Szrj 	    }
489*fae548d3Szrj 	  PUT (ch);
490*fae548d3Szrj 	  continue;
491*fae548d3Szrj 
492*fae548d3Szrj 	case -2:
493*fae548d3Szrj 	  for (;;)
494*fae548d3Szrj 	    {
495*fae548d3Szrj 	      do
496*fae548d3Szrj 		{
497*fae548d3Szrj 		  ch = GET ();
498*fae548d3Szrj 
499*fae548d3Szrj 		  if (ch == EOF)
500*fae548d3Szrj 		    {
501*fae548d3Szrj 		      as_warn (_("end of file in comment"));
502*fae548d3Szrj 		      goto fromeof;
503*fae548d3Szrj 		    }
504*fae548d3Szrj 
505*fae548d3Szrj 		  if (ch == '\n')
506*fae548d3Szrj 		    PUT ('\n');
507*fae548d3Szrj 		}
508*fae548d3Szrj 	      while (ch != '*');
509*fae548d3Szrj 
510*fae548d3Szrj 	      while ((ch = GET ()) == '*')
511*fae548d3Szrj 		;
512*fae548d3Szrj 
513*fae548d3Szrj 	      if (ch == EOF)
514*fae548d3Szrj 		{
515*fae548d3Szrj 		  as_warn (_("end of file in comment"));
516*fae548d3Szrj 		  goto fromeof;
517*fae548d3Szrj 		}
518*fae548d3Szrj 
519*fae548d3Szrj 	      if (ch == '/')
520*fae548d3Szrj 		break;
521*fae548d3Szrj 
522*fae548d3Szrj 	      UNGET (ch);
523*fae548d3Szrj 	    }
524*fae548d3Szrj 
525*fae548d3Szrj 	  state = old_state;
526*fae548d3Szrj 	  UNGET (' ');
527*fae548d3Szrj 	  continue;
528*fae548d3Szrj 
529*fae548d3Szrj 	case 4:
530*fae548d3Szrj 	  ch = GET ();
531*fae548d3Szrj 	  if (ch == EOF)
532*fae548d3Szrj 	    goto fromeof;
533*fae548d3Szrj 	  else if (ch >= '0' && ch <= '9')
534*fae548d3Szrj 	    PUT (ch);
535*fae548d3Szrj 	  else
536*fae548d3Szrj 	    {
537*fae548d3Szrj 	      while (ch != EOF && IS_WHITESPACE (ch))
538*fae548d3Szrj 		ch = GET ();
539*fae548d3Szrj 	      if (ch == '"')
540*fae548d3Szrj 		{
541*fae548d3Szrj 		  quotechar = ch;
542*fae548d3Szrj 		  state = 5;
543*fae548d3Szrj 		  old_state = 3;
544*fae548d3Szrj 		  PUT (ch);
545*fae548d3Szrj 		}
546*fae548d3Szrj 	      else
547*fae548d3Szrj 		{
548*fae548d3Szrj 		  while (ch != EOF && ch != '\n')
549*fae548d3Szrj 		    ch = GET ();
550*fae548d3Szrj 		  state = 0;
551*fae548d3Szrj 		  PUT (ch);
552*fae548d3Szrj 		}
553*fae548d3Szrj 	    }
554*fae548d3Szrj 	  continue;
555*fae548d3Szrj 
556*fae548d3Szrj 	case 5:
557*fae548d3Szrj 	  /* We are going to copy everything up to a quote character,
558*fae548d3Szrj 	     with special handling for a backslash.  We try to
559*fae548d3Szrj 	     optimize the copying in the simple case without using the
560*fae548d3Szrj 	     GET and PUT macros.  */
561*fae548d3Szrj 	  {
562*fae548d3Szrj 	    char *s;
563*fae548d3Szrj 	    ptrdiff_t len;
564*fae548d3Szrj 
565*fae548d3Szrj 	    for (s = from; s < fromend; s++)
566*fae548d3Szrj 	      {
567*fae548d3Szrj 		ch = *s;
568*fae548d3Szrj 		if (ch == '\\'
569*fae548d3Szrj 		    || ch == quotechar
570*fae548d3Szrj 		    || ch == '\n')
571*fae548d3Szrj 		  break;
572*fae548d3Szrj 	      }
573*fae548d3Szrj 	    len = s - from;
574*fae548d3Szrj 	    if (len > toend - to)
575*fae548d3Szrj 	      len = toend - to;
576*fae548d3Szrj 	    if (len > 0)
577*fae548d3Szrj 	      {
578*fae548d3Szrj 		memcpy (to, from, len);
579*fae548d3Szrj 		to += len;
580*fae548d3Szrj 		from += len;
581*fae548d3Szrj 		if (to >= toend)
582*fae548d3Szrj 		  goto tofull;
583*fae548d3Szrj 	      }
584*fae548d3Szrj 	  }
585*fae548d3Szrj 
586*fae548d3Szrj 	  ch = GET ();
587*fae548d3Szrj 	  if (ch == EOF)
588*fae548d3Szrj 	    {
589*fae548d3Szrj 	      /* This buffer is here specifically so
590*fae548d3Szrj 		 that the UNGET below will work.  */
591*fae548d3Szrj 	      static char one_char_buf[1];
592*fae548d3Szrj 
593*fae548d3Szrj 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
594*fae548d3Szrj 	      state = old_state;
595*fae548d3Szrj 	      from = fromend = one_char_buf + 1;
596*fae548d3Szrj 	      fromlen = 1;
597*fae548d3Szrj 	      UNGET ('\n');
598*fae548d3Szrj 	      PUT (quotechar);
599*fae548d3Szrj 	    }
600*fae548d3Szrj 	  else if (ch == quotechar)
601*fae548d3Szrj 	    {
602*fae548d3Szrj 	      state = old_state;
603*fae548d3Szrj 	      PUT (ch);
604*fae548d3Szrj 	    }
605*fae548d3Szrj 	  else if (TC_STRING_ESCAPES && ch == '\\')
606*fae548d3Szrj 	    {
607*fae548d3Szrj 	      state = 6;
608*fae548d3Szrj 	      PUT (ch);
609*fae548d3Szrj 	    }
610*fae548d3Szrj 	  else if (scrub_m68k_mri && ch == '\n')
611*fae548d3Szrj 	    {
612*fae548d3Szrj 	      /* Just quietly terminate the string.  This permits lines like
613*fae548d3Szrj 		   bne	label	loop if we haven't reach end yet.  */
614*fae548d3Szrj 	      state = old_state;
615*fae548d3Szrj 	      UNGET (ch);
616*fae548d3Szrj 	      PUT ('\'');
617*fae548d3Szrj 	    }
618*fae548d3Szrj 	  else
619*fae548d3Szrj 	    {
620*fae548d3Szrj 	      PUT (ch);
621*fae548d3Szrj 	    }
622*fae548d3Szrj 	  continue;
623*fae548d3Szrj 
624*fae548d3Szrj 	case 6:
625*fae548d3Szrj 	  state = 5;
626*fae548d3Szrj 	  ch = GET ();
627*fae548d3Szrj 	  switch (ch)
628*fae548d3Szrj 	    {
629*fae548d3Szrj 	      /* Handle strings broken across lines, by turning '\n' into
630*fae548d3Szrj 		 '\\' and 'n'.  */
631*fae548d3Szrj 	    case '\n':
632*fae548d3Szrj 	      UNGET ('n');
633*fae548d3Szrj 	      add_newlines++;
634*fae548d3Szrj 	      PUT ('\\');
635*fae548d3Szrj 	      continue;
636*fae548d3Szrj 
637*fae548d3Szrj 	    case EOF:
638*fae548d3Szrj 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
639*fae548d3Szrj 	      PUT (quotechar);
640*fae548d3Szrj 	      continue;
641*fae548d3Szrj 
642*fae548d3Szrj 	    case '"':
643*fae548d3Szrj 	    case '\\':
644*fae548d3Szrj 	    case 'b':
645*fae548d3Szrj 	    case 'f':
646*fae548d3Szrj 	    case 'n':
647*fae548d3Szrj 	    case 'r':
648*fae548d3Szrj 	    case 't':
649*fae548d3Szrj 	    case 'v':
650*fae548d3Szrj 	    case 'x':
651*fae548d3Szrj 	    case 'X':
652*fae548d3Szrj 	    case '0':
653*fae548d3Szrj 	    case '1':
654*fae548d3Szrj 	    case '2':
655*fae548d3Szrj 	    case '3':
656*fae548d3Szrj 	    case '4':
657*fae548d3Szrj 	    case '5':
658*fae548d3Szrj 	    case '6':
659*fae548d3Szrj 	    case '7':
660*fae548d3Szrj 	      break;
661*fae548d3Szrj 
662*fae548d3Szrj 	    default:
663*fae548d3Szrj #ifdef ONLY_STANDARD_ESCAPES
664*fae548d3Szrj 	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
665*fae548d3Szrj #endif
666*fae548d3Szrj 	      break;
667*fae548d3Szrj 	    }
668*fae548d3Szrj 	  PUT (ch);
669*fae548d3Szrj 	  continue;
670*fae548d3Szrj 
671*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
672*fae548d3Szrj 	case 13:
673*fae548d3Szrj 	  ch = GET ();
674*fae548d3Szrj 	  if (ch != '|')
675*fae548d3Szrj 	    abort ();
676*fae548d3Szrj 
677*fae548d3Szrj 	  /* Reset back to state 1 and pretend that we are parsing a
678*fae548d3Szrj 	     line from just after the first white space.  */
679*fae548d3Szrj 	  state = 1;
680*fae548d3Szrj 	  PUT ('|');
681*fae548d3Szrj #ifdef TC_TIC6X
682*fae548d3Szrj 	  /* "||^" is used for SPMASKed instructions.  */
683*fae548d3Szrj 	  ch = GET ();
684*fae548d3Szrj 	  if (ch == EOF)
685*fae548d3Szrj 	    goto fromeof;
686*fae548d3Szrj 	  else if (ch == '^')
687*fae548d3Szrj 	    PUT ('^');
688*fae548d3Szrj 	  else
689*fae548d3Szrj 	    UNGET (ch);
690*fae548d3Szrj #endif
691*fae548d3Szrj 	  continue;
692*fae548d3Szrj #endif
693*fae548d3Szrj #ifdef TC_Z80
694*fae548d3Szrj 	case 16:
695*fae548d3Szrj 	  /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
696*fae548d3Szrj 	  ch = GET ();
697*fae548d3Szrj 	  if (ch == 'f' || ch == 'F')
698*fae548d3Szrj 	    {
699*fae548d3Szrj 	      state = 17;
700*fae548d3Szrj 	      PUT (ch);
701*fae548d3Szrj 	    }
702*fae548d3Szrj 	  else
703*fae548d3Szrj 	    {
704*fae548d3Szrj 	      state = 9;
705*fae548d3Szrj 	      break;
706*fae548d3Szrj 	    }
707*fae548d3Szrj 	  /* Fall through.  */
708*fae548d3Szrj 	case 17:
709*fae548d3Szrj 	  /* We have seen "af" at the start of a symbol,
710*fae548d3Szrj 	     a ' here is a part of that symbol.  */
711*fae548d3Szrj 	  ch = GET ();
712*fae548d3Szrj 	  state = 9;
713*fae548d3Szrj 	  if (ch == '\'')
714*fae548d3Szrj 	    /* Change to avoid warning about unclosed string.  */
715*fae548d3Szrj 	    PUT ('`');
716*fae548d3Szrj 	  else if (ch != EOF)
717*fae548d3Szrj 	    UNGET (ch);
718*fae548d3Szrj 	  break;
719*fae548d3Szrj #endif
720*fae548d3Szrj 	}
721*fae548d3Szrj 
722*fae548d3Szrj       /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
723*fae548d3Szrj 
724*fae548d3Szrj       /* flushchar: */
725*fae548d3Szrj       ch = GET ();
726*fae548d3Szrj 
727*fae548d3Szrj #ifdef TC_PREDICATE_START_CHAR
728*fae548d3Szrj       if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
729*fae548d3Szrj 	{
730*fae548d3Szrj 	  state += 14;
731*fae548d3Szrj 	  PUT (ch);
732*fae548d3Szrj 	  continue;
733*fae548d3Szrj 	}
734*fae548d3Szrj       else if (state == 14 || state == 15)
735*fae548d3Szrj 	{
736*fae548d3Szrj 	  if (ch == TC_PREDICATE_END_CHAR)
737*fae548d3Szrj 	    {
738*fae548d3Szrj 	      state -= 14;
739*fae548d3Szrj 	      PUT (ch);
740*fae548d3Szrj 	      ch = GET ();
741*fae548d3Szrj 	    }
742*fae548d3Szrj 	  else
743*fae548d3Szrj 	    {
744*fae548d3Szrj 	      PUT (ch);
745*fae548d3Szrj 	      continue;
746*fae548d3Szrj 	    }
747*fae548d3Szrj 	}
748*fae548d3Szrj #endif
749*fae548d3Szrj 
750*fae548d3Szrj     recycle:
751*fae548d3Szrj 
752*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
753*fae548d3Szrj       /* We need to watch out for .symver directives.  See the comment later
754*fae548d3Szrj 	 in this function.  */
755*fae548d3Szrj       if (symver_state == NULL)
756*fae548d3Szrj 	{
757*fae548d3Szrj 	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
758*fae548d3Szrj 	    symver_state = symver_pseudo + 1;
759*fae548d3Szrj 	}
760*fae548d3Szrj       else
761*fae548d3Szrj 	{
762*fae548d3Szrj 	  /* We advance to the next state if we find the right
763*fae548d3Szrj 	     character.  */
764*fae548d3Szrj 	  if (ch != '\0' && (*symver_state == ch))
765*fae548d3Szrj 	    ++symver_state;
766*fae548d3Szrj 	  else if (*symver_state != '\0')
767*fae548d3Szrj 	    /* We did not get the expected character, or we didn't
768*fae548d3Szrj 	       get a valid terminating character after seeing the
769*fae548d3Szrj 	       entire pseudo-op, so we must go back to the beginning.  */
770*fae548d3Szrj 	    symver_state = NULL;
771*fae548d3Szrj 	  else
772*fae548d3Szrj 	    {
773*fae548d3Szrj 	      /* We've read the entire pseudo-op.  If this is the end
774*fae548d3Szrj 		 of the line, go back to the beginning.  */
775*fae548d3Szrj 	      if (IS_NEWLINE (ch))
776*fae548d3Szrj 		symver_state = NULL;
777*fae548d3Szrj 	    }
778*fae548d3Szrj 	}
779*fae548d3Szrj #endif /* TC_ARM && OBJ_ELF */
780*fae548d3Szrj 
781*fae548d3Szrj #ifdef TC_M68K
782*fae548d3Szrj       /* We want to have pseudo-ops which control whether we are in
783*fae548d3Szrj 	 MRI mode or not.  Unfortunately, since m68k MRI mode affects
784*fae548d3Szrj 	 the scrubber, that means that we need a special purpose
785*fae548d3Szrj 	 recognizer here.  */
786*fae548d3Szrj       if (mri_state == NULL)
787*fae548d3Szrj 	{
788*fae548d3Szrj 	  if ((state == 0 || state == 1)
789*fae548d3Szrj 	      && ch == mri_pseudo[0])
790*fae548d3Szrj 	    mri_state = mri_pseudo + 1;
791*fae548d3Szrj 	}
792*fae548d3Szrj       else
793*fae548d3Szrj 	{
794*fae548d3Szrj 	  /* We advance to the next state if we find the right
795*fae548d3Szrj 	     character, or if we need a space character and we get any
796*fae548d3Szrj 	     whitespace character, or if we need a '0' and we get a
797*fae548d3Szrj 	     '1' (this is so that we only need one state to handle
798*fae548d3Szrj 	     ``.mri 0'' and ``.mri 1'').  */
799*fae548d3Szrj 	  if (ch != '\0'
800*fae548d3Szrj 	      && (*mri_state == ch
801*fae548d3Szrj 		  || (*mri_state == ' '
802*fae548d3Szrj 		      && lex[ch] == LEX_IS_WHITESPACE)
803*fae548d3Szrj 		  || (*mri_state == '0'
804*fae548d3Szrj 		      && ch == '1')))
805*fae548d3Szrj 	    {
806*fae548d3Szrj 	      mri_last_ch = ch;
807*fae548d3Szrj 	      ++mri_state;
808*fae548d3Szrj 	    }
809*fae548d3Szrj 	  else if (*mri_state != '\0'
810*fae548d3Szrj 		   || (lex[ch] != LEX_IS_WHITESPACE
811*fae548d3Szrj 		       && lex[ch] != LEX_IS_NEWLINE))
812*fae548d3Szrj 	    {
813*fae548d3Szrj 	      /* We did not get the expected character, or we didn't
814*fae548d3Szrj 		 get a valid terminating character after seeing the
815*fae548d3Szrj 		 entire pseudo-op, so we must go back to the
816*fae548d3Szrj 		 beginning.  */
817*fae548d3Szrj 	      mri_state = NULL;
818*fae548d3Szrj 	    }
819*fae548d3Szrj 	  else
820*fae548d3Szrj 	    {
821*fae548d3Szrj 	      /* We've read the entire pseudo-op.  mips_last_ch is
822*fae548d3Szrj 		 either '0' or '1' indicating whether to enter or
823*fae548d3Szrj 		 leave MRI mode.  */
824*fae548d3Szrj 	      do_scrub_begin (mri_last_ch == '1');
825*fae548d3Szrj 	      mri_state = NULL;
826*fae548d3Szrj 
827*fae548d3Szrj 	      /* We continue handling the character as usual.  The
828*fae548d3Szrj 		 main gas reader must also handle the .mri pseudo-op
829*fae548d3Szrj 		 to control expression parsing and the like.  */
830*fae548d3Szrj 	    }
831*fae548d3Szrj 	}
832*fae548d3Szrj #endif
833*fae548d3Szrj 
834*fae548d3Szrj       if (ch == EOF)
835*fae548d3Szrj 	{
836*fae548d3Szrj 	  if (state != 0)
837*fae548d3Szrj 	    {
838*fae548d3Szrj 	      as_warn (_("end of file not at end of a line; newline inserted"));
839*fae548d3Szrj 	      state = 0;
840*fae548d3Szrj 	      PUT ('\n');
841*fae548d3Szrj 	    }
842*fae548d3Szrj 	  goto fromeof;
843*fae548d3Szrj 	}
844*fae548d3Szrj 
845*fae548d3Szrj       switch (lex[ch])
846*fae548d3Szrj 	{
847*fae548d3Szrj 	case LEX_IS_WHITESPACE:
848*fae548d3Szrj 	  do
849*fae548d3Szrj 	    {
850*fae548d3Szrj 	      ch = GET ();
851*fae548d3Szrj 	    }
852*fae548d3Szrj 	  while (ch != EOF && IS_WHITESPACE (ch));
853*fae548d3Szrj 	  if (ch == EOF)
854*fae548d3Szrj 	    goto fromeof;
855*fae548d3Szrj 
856*fae548d3Szrj 	  if (state == 0)
857*fae548d3Szrj 	    {
858*fae548d3Szrj 	      /* Preserve a single whitespace character at the
859*fae548d3Szrj 		 beginning of a line.  */
860*fae548d3Szrj 	      state = 1;
861*fae548d3Szrj 	      UNGET (ch);
862*fae548d3Szrj 	      PUT (' ');
863*fae548d3Szrj 	      break;
864*fae548d3Szrj 	    }
865*fae548d3Szrj 
866*fae548d3Szrj #ifdef KEEP_WHITE_AROUND_COLON
867*fae548d3Szrj 	  if (lex[ch] == LEX_IS_COLON)
868*fae548d3Szrj 	    {
869*fae548d3Szrj 	      /* Only keep this white if there's no white *after* the
870*fae548d3Szrj 		 colon.  */
871*fae548d3Szrj 	      ch2 = GET ();
872*fae548d3Szrj 	      if (ch2 != EOF)
873*fae548d3Szrj 		UNGET (ch2);
874*fae548d3Szrj 	      if (!IS_WHITESPACE (ch2))
875*fae548d3Szrj 		{
876*fae548d3Szrj 		  state = 9;
877*fae548d3Szrj 		  UNGET (ch);
878*fae548d3Szrj 		  PUT (' ');
879*fae548d3Szrj 		  break;
880*fae548d3Szrj 		}
881*fae548d3Szrj 	    }
882*fae548d3Szrj #endif
883*fae548d3Szrj 	  if (IS_COMMENT (ch)
884*fae548d3Szrj 	      || ch == '/'
885*fae548d3Szrj 	      || IS_LINE_SEPARATOR (ch)
886*fae548d3Szrj 	      || IS_PARALLEL_SEPARATOR (ch))
887*fae548d3Szrj 	    {
888*fae548d3Szrj 	      if (scrub_m68k_mri)
889*fae548d3Szrj 		{
890*fae548d3Szrj 		  /* In MRI mode, we keep these spaces.  */
891*fae548d3Szrj 		  UNGET (ch);
892*fae548d3Szrj 		  PUT (' ');
893*fae548d3Szrj 		  break;
894*fae548d3Szrj 		}
895*fae548d3Szrj 	      goto recycle;
896*fae548d3Szrj 	    }
897*fae548d3Szrj 
898*fae548d3Szrj 	  /* If we're in state 2 or 11, we've seen a non-white
899*fae548d3Szrj 	     character followed by whitespace.  If the next character
900*fae548d3Szrj 	     is ':', this is whitespace after a label name which we
901*fae548d3Szrj 	     normally must ignore.  In MRI mode, though, spaces are
902*fae548d3Szrj 	     not permitted between the label and the colon.  */
903*fae548d3Szrj 	  if ((state == 2 || state == 11)
904*fae548d3Szrj 	      && lex[ch] == LEX_IS_COLON
905*fae548d3Szrj 	      && ! scrub_m68k_mri)
906*fae548d3Szrj 	    {
907*fae548d3Szrj 	      state = 1;
908*fae548d3Szrj 	      PUT (ch);
909*fae548d3Szrj 	      break;
910*fae548d3Szrj 	    }
911*fae548d3Szrj 
912*fae548d3Szrj 	  switch (state)
913*fae548d3Szrj 	    {
914*fae548d3Szrj 	    case 1:
915*fae548d3Szrj 	      /* We can arrive here if we leave a leading whitespace
916*fae548d3Szrj 		 character at the beginning of a line.  */
917*fae548d3Szrj 	      goto recycle;
918*fae548d3Szrj 	    case 2:
919*fae548d3Szrj 	      state = 3;
920*fae548d3Szrj 	      if (to + 1 < toend)
921*fae548d3Szrj 		{
922*fae548d3Szrj 		  /* Optimize common case by skipping UNGET/GET.  */
923*fae548d3Szrj 		  PUT (' ');	/* Sp after opco */
924*fae548d3Szrj 		  goto recycle;
925*fae548d3Szrj 		}
926*fae548d3Szrj 	      UNGET (ch);
927*fae548d3Szrj 	      PUT (' ');
928*fae548d3Szrj 	      break;
929*fae548d3Szrj 	    case 3:
930*fae548d3Szrj #ifndef TC_KEEP_OPERAND_SPACES
931*fae548d3Szrj 	      /* For TI C6X, we keep these spaces as they may separate
932*fae548d3Szrj 		 functional unit specifiers from operands.  */
933*fae548d3Szrj 	      if (scrub_m68k_mri)
934*fae548d3Szrj #endif
935*fae548d3Szrj 		{
936*fae548d3Szrj 		  /* In MRI mode, we keep these spaces.  */
937*fae548d3Szrj 		  UNGET (ch);
938*fae548d3Szrj 		  PUT (' ');
939*fae548d3Szrj 		  break;
940*fae548d3Szrj 		}
941*fae548d3Szrj 	      goto recycle;	/* Sp in operands */
942*fae548d3Szrj 	    case 9:
943*fae548d3Szrj 	    case 10:
944*fae548d3Szrj #ifndef TC_KEEP_OPERAND_SPACES
945*fae548d3Szrj 	      if (scrub_m68k_mri)
946*fae548d3Szrj #endif
947*fae548d3Szrj 		{
948*fae548d3Szrj 		  /* In MRI mode, we keep these spaces.  */
949*fae548d3Szrj 		  state = 3;
950*fae548d3Szrj 		  UNGET (ch);
951*fae548d3Szrj 		  PUT (' ');
952*fae548d3Szrj 		  break;
953*fae548d3Szrj 		}
954*fae548d3Szrj 	      state = 10;	/* Sp after symbol char */
955*fae548d3Szrj 	      goto recycle;
956*fae548d3Szrj 	    case 11:
957*fae548d3Szrj 	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
958*fae548d3Szrj 		state = 1;
959*fae548d3Szrj 	      else
960*fae548d3Szrj 		{
961*fae548d3Szrj 		  /* We know that ch is not ':', since we tested that
962*fae548d3Szrj 		     case above.  Therefore this is not a label, so it
963*fae548d3Szrj 		     must be the opcode, and we've just seen the
964*fae548d3Szrj 		     whitespace after it.  */
965*fae548d3Szrj 		  state = 3;
966*fae548d3Szrj 		}
967*fae548d3Szrj 	      UNGET (ch);
968*fae548d3Szrj 	      PUT (' ');	/* Sp after label definition.  */
969*fae548d3Szrj 	      break;
970*fae548d3Szrj 	    default:
971*fae548d3Szrj 	      BAD_CASE (state);
972*fae548d3Szrj 	    }
973*fae548d3Szrj 	  break;
974*fae548d3Szrj 
975*fae548d3Szrj 	case LEX_IS_TWOCHAR_COMMENT_1ST:
976*fae548d3Szrj 	  ch2 = GET ();
977*fae548d3Szrj 	  if (ch2 == '*')
978*fae548d3Szrj 	    {
979*fae548d3Szrj 	      for (;;)
980*fae548d3Szrj 		{
981*fae548d3Szrj 		  do
982*fae548d3Szrj 		    {
983*fae548d3Szrj 		      ch2 = GET ();
984*fae548d3Szrj 		      if (ch2 != EOF && IS_NEWLINE (ch2))
985*fae548d3Szrj 			add_newlines++;
986*fae548d3Szrj 		    }
987*fae548d3Szrj 		  while (ch2 != EOF && ch2 != '*');
988*fae548d3Szrj 
989*fae548d3Szrj 		  while (ch2 == '*')
990*fae548d3Szrj 		    ch2 = GET ();
991*fae548d3Szrj 
992*fae548d3Szrj 		  if (ch2 == EOF || ch2 == '/')
993*fae548d3Szrj 		    break;
994*fae548d3Szrj 
995*fae548d3Szrj 		  /* This UNGET will ensure that we count newlines
996*fae548d3Szrj 		     correctly.  */
997*fae548d3Szrj 		  UNGET (ch2);
998*fae548d3Szrj 		}
999*fae548d3Szrj 
1000*fae548d3Szrj 	      if (ch2 == EOF)
1001*fae548d3Szrj 		as_warn (_("end of file in multiline comment"));
1002*fae548d3Szrj 
1003*fae548d3Szrj 	      ch = ' ';
1004*fae548d3Szrj 	      goto recycle;
1005*fae548d3Szrj 	    }
1006*fae548d3Szrj #ifdef DOUBLESLASH_LINE_COMMENTS
1007*fae548d3Szrj 	  else if (ch2 == '/')
1008*fae548d3Szrj 	    {
1009*fae548d3Szrj 	      do
1010*fae548d3Szrj 		{
1011*fae548d3Szrj 		  ch = GET ();
1012*fae548d3Szrj 		}
1013*fae548d3Szrj 	      while (ch != EOF && !IS_NEWLINE (ch));
1014*fae548d3Szrj 	      if (ch == EOF)
1015*fae548d3Szrj 		as_warn ("end of file in comment; newline inserted");
1016*fae548d3Szrj 	      state = 0;
1017*fae548d3Szrj 	      PUT ('\n');
1018*fae548d3Szrj 	      break;
1019*fae548d3Szrj 	    }
1020*fae548d3Szrj #endif
1021*fae548d3Szrj 	  else
1022*fae548d3Szrj 	    {
1023*fae548d3Szrj 	      if (ch2 != EOF)
1024*fae548d3Szrj 		UNGET (ch2);
1025*fae548d3Szrj 	      if (state == 9 || state == 10)
1026*fae548d3Szrj 		state = 3;
1027*fae548d3Szrj 	      PUT (ch);
1028*fae548d3Szrj 	    }
1029*fae548d3Szrj 	  break;
1030*fae548d3Szrj 
1031*fae548d3Szrj 	case LEX_IS_STRINGQUOTE:
1032*fae548d3Szrj 	  quotechar = ch;
1033*fae548d3Szrj 	  if (state == 10)
1034*fae548d3Szrj 	    {
1035*fae548d3Szrj 	      /* Preserve the whitespace in foo "bar".  */
1036*fae548d3Szrj 	      UNGET (ch);
1037*fae548d3Szrj 	      state = 3;
1038*fae548d3Szrj 	      PUT (' ');
1039*fae548d3Szrj 
1040*fae548d3Szrj 	      /* PUT didn't jump out.  We could just break, but we
1041*fae548d3Szrj 		 know what will happen, so optimize a bit.  */
1042*fae548d3Szrj 	      ch = GET ();
1043*fae548d3Szrj 	      old_state = 3;
1044*fae548d3Szrj 	    }
1045*fae548d3Szrj 	  else if (state == 9)
1046*fae548d3Szrj 	    old_state = 3;
1047*fae548d3Szrj 	  else
1048*fae548d3Szrj 	    old_state = state;
1049*fae548d3Szrj 	  state = 5;
1050*fae548d3Szrj 	  PUT (ch);
1051*fae548d3Szrj 	  break;
1052*fae548d3Szrj 
1053*fae548d3Szrj 	case LEX_IS_ONECHAR_QUOTE:
1054*fae548d3Szrj #ifdef H_TICK_HEX
1055*fae548d3Szrj 	  if (state == 9 && enable_h_tick_hex)
1056*fae548d3Szrj 	    {
1057*fae548d3Szrj 	      char c;
1058*fae548d3Szrj 
1059*fae548d3Szrj 	      c = GET ();
1060*fae548d3Szrj 	      as_warn ("'%c found after symbol", c);
1061*fae548d3Szrj 	      UNGET (c);
1062*fae548d3Szrj 	    }
1063*fae548d3Szrj #endif
1064*fae548d3Szrj 	  if (state == 10)
1065*fae548d3Szrj 	    {
1066*fae548d3Szrj 	      /* Preserve the whitespace in foo 'b'.  */
1067*fae548d3Szrj 	      UNGET (ch);
1068*fae548d3Szrj 	      state = 3;
1069*fae548d3Szrj 	      PUT (' ');
1070*fae548d3Szrj 	      break;
1071*fae548d3Szrj 	    }
1072*fae548d3Szrj 	  ch = GET ();
1073*fae548d3Szrj 	  if (ch == EOF)
1074*fae548d3Szrj 	    {
1075*fae548d3Szrj 	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
1076*fae548d3Szrj 	      ch = 0;
1077*fae548d3Szrj 	    }
1078*fae548d3Szrj 	  if (ch == '\\')
1079*fae548d3Szrj 	    {
1080*fae548d3Szrj 	      ch = GET ();
1081*fae548d3Szrj 	      if (ch == EOF)
1082*fae548d3Szrj 		{
1083*fae548d3Szrj 		  as_warn (_("end of file in escape character"));
1084*fae548d3Szrj 		  ch = '\\';
1085*fae548d3Szrj 		}
1086*fae548d3Szrj 	      else
1087*fae548d3Szrj 		ch = process_escape (ch);
1088*fae548d3Szrj 	    }
1089*fae548d3Szrj 	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
1090*fae548d3Szrj 
1091*fae548d3Szrj 	  /* None of these 'x constants for us.  We want 'x'.  */
1092*fae548d3Szrj 	  if ((ch = GET ()) != '\'')
1093*fae548d3Szrj 	    {
1094*fae548d3Szrj #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1095*fae548d3Szrj 	      as_warn (_("missing close quote; (assumed)"));
1096*fae548d3Szrj #else
1097*fae548d3Szrj 	      if (ch != EOF)
1098*fae548d3Szrj 		UNGET (ch);
1099*fae548d3Szrj #endif
1100*fae548d3Szrj 	    }
1101*fae548d3Szrj 	  if (strlen (out_buf) == 1)
1102*fae548d3Szrj 	    {
1103*fae548d3Szrj 	      PUT (out_buf[0]);
1104*fae548d3Szrj 	      break;
1105*fae548d3Szrj 	    }
1106*fae548d3Szrj 	  if (state == 9)
1107*fae548d3Szrj 	    old_state = 3;
1108*fae548d3Szrj 	  else
1109*fae548d3Szrj 	    old_state = state;
1110*fae548d3Szrj 	  state = -1;
1111*fae548d3Szrj 	  out_string = out_buf;
1112*fae548d3Szrj 	  PUT (*out_string++);
1113*fae548d3Szrj 	  break;
1114*fae548d3Szrj 
1115*fae548d3Szrj 	case LEX_IS_COLON:
1116*fae548d3Szrj #ifdef KEEP_WHITE_AROUND_COLON
1117*fae548d3Szrj 	  state = 9;
1118*fae548d3Szrj #else
1119*fae548d3Szrj 	  if (state == 9 || state == 10)
1120*fae548d3Szrj 	    state = 3;
1121*fae548d3Szrj 	  else if (state != 3)
1122*fae548d3Szrj 	    state = 1;
1123*fae548d3Szrj #endif
1124*fae548d3Szrj 	  PUT (ch);
1125*fae548d3Szrj 	  break;
1126*fae548d3Szrj 
1127*fae548d3Szrj 	case LEX_IS_NEWLINE:
1128*fae548d3Szrj 	  /* Roll out a bunch of newlines from inside comments, etc.  */
1129*fae548d3Szrj 	  if (add_newlines)
1130*fae548d3Szrj 	    {
1131*fae548d3Szrj 	      --add_newlines;
1132*fae548d3Szrj 	      UNGET (ch);
1133*fae548d3Szrj 	    }
1134*fae548d3Szrj 	  /* Fall through.  */
1135*fae548d3Szrj 
1136*fae548d3Szrj 	case LEX_IS_LINE_SEPARATOR:
1137*fae548d3Szrj 	  state = 0;
1138*fae548d3Szrj 	  PUT (ch);
1139*fae548d3Szrj 	  break;
1140*fae548d3Szrj 
1141*fae548d3Szrj 	case LEX_IS_PARALLEL_SEPARATOR:
1142*fae548d3Szrj 	  state = 1;
1143*fae548d3Szrj 	  PUT (ch);
1144*fae548d3Szrj 	  break;
1145*fae548d3Szrj 
1146*fae548d3Szrj #ifdef TC_V850
1147*fae548d3Szrj 	case LEX_IS_DOUBLEDASH_1ST:
1148*fae548d3Szrj 	  ch2 = GET ();
1149*fae548d3Szrj 	  if (ch2 != '-')
1150*fae548d3Szrj 	    {
1151*fae548d3Szrj 	      if (ch2 != EOF)
1152*fae548d3Szrj 		UNGET (ch2);
1153*fae548d3Szrj 	      goto de_fault;
1154*fae548d3Szrj 	    }
1155*fae548d3Szrj 	  /* Read and skip to end of line.  */
1156*fae548d3Szrj 	  do
1157*fae548d3Szrj 	    {
1158*fae548d3Szrj 	      ch = GET ();
1159*fae548d3Szrj 	    }
1160*fae548d3Szrj 	  while (ch != EOF && ch != '\n');
1161*fae548d3Szrj 
1162*fae548d3Szrj 	  if (ch == EOF)
1163*fae548d3Szrj 	    as_warn (_("end of file in comment; newline inserted"));
1164*fae548d3Szrj 
1165*fae548d3Szrj 	  state = 0;
1166*fae548d3Szrj 	  PUT ('\n');
1167*fae548d3Szrj 	  break;
1168*fae548d3Szrj #endif
1169*fae548d3Szrj #ifdef DOUBLEBAR_PARALLEL
1170*fae548d3Szrj 	case LEX_IS_DOUBLEBAR_1ST:
1171*fae548d3Szrj 	  ch2 = GET ();
1172*fae548d3Szrj 	  if (ch2 != EOF)
1173*fae548d3Szrj 	    UNGET (ch2);
1174*fae548d3Szrj 	  if (ch2 != '|')
1175*fae548d3Szrj 	    goto de_fault;
1176*fae548d3Szrj 
1177*fae548d3Szrj 	  /* Handle '||' in two states as invoking PUT twice might
1178*fae548d3Szrj 	     result in the first one jumping out of this loop.  We'd
1179*fae548d3Szrj 	     then lose track of the state and one '|' char.  */
1180*fae548d3Szrj 	  state = 13;
1181*fae548d3Szrj 	  PUT ('|');
1182*fae548d3Szrj 	  break;
1183*fae548d3Szrj #endif
1184*fae548d3Szrj 	case LEX_IS_LINE_COMMENT_START:
1185*fae548d3Szrj 	  /* FIXME-someday: The two character comment stuff was badly
1186*fae548d3Szrj 	     thought out.  On i386, we want '/' as line comment start
1187*fae548d3Szrj 	     AND we want C style comments.  hence this hack.  The
1188*fae548d3Szrj 	     whole lexical process should be reworked.  xoxorich.  */
1189*fae548d3Szrj 	  if (ch == '/')
1190*fae548d3Szrj 	    {
1191*fae548d3Szrj 	      ch2 = GET ();
1192*fae548d3Szrj 	      if (ch2 == '*')
1193*fae548d3Szrj 		{
1194*fae548d3Szrj 		  old_state = 3;
1195*fae548d3Szrj 		  state = -2;
1196*fae548d3Szrj 		  break;
1197*fae548d3Szrj 		}
1198*fae548d3Szrj 	      else if (ch2 != EOF)
1199*fae548d3Szrj 		{
1200*fae548d3Szrj 		  UNGET (ch2);
1201*fae548d3Szrj 		}
1202*fae548d3Szrj 	    }
1203*fae548d3Szrj 
1204*fae548d3Szrj 	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
1205*fae548d3Szrj 	    {
1206*fae548d3Szrj 	      int startch;
1207*fae548d3Szrj 
1208*fae548d3Szrj 	      startch = ch;
1209*fae548d3Szrj 
1210*fae548d3Szrj 	      do
1211*fae548d3Szrj 		{
1212*fae548d3Szrj 		  ch = GET ();
1213*fae548d3Szrj 		}
1214*fae548d3Szrj 	      while (ch != EOF && IS_WHITESPACE (ch));
1215*fae548d3Szrj 
1216*fae548d3Szrj 	      if (ch == EOF)
1217*fae548d3Szrj 		{
1218*fae548d3Szrj 		  as_warn (_("end of file in comment; newline inserted"));
1219*fae548d3Szrj 		  PUT ('\n');
1220*fae548d3Szrj 		  break;
1221*fae548d3Szrj 		}
1222*fae548d3Szrj 
1223*fae548d3Szrj 	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1224*fae548d3Szrj 		{
1225*fae548d3Szrj 		  /* Not a cpp line.  */
1226*fae548d3Szrj 		  while (ch != EOF && !IS_NEWLINE (ch))
1227*fae548d3Szrj 		    ch = GET ();
1228*fae548d3Szrj 		  if (ch == EOF)
1229*fae548d3Szrj 		    {
1230*fae548d3Szrj 		      as_warn (_("end of file in comment; newline inserted"));
1231*fae548d3Szrj 		      PUT ('\n');
1232*fae548d3Szrj 		    }
1233*fae548d3Szrj 		  else /* IS_NEWLINE (ch) */
1234*fae548d3Szrj 		    {
1235*fae548d3Szrj 		      /* To process non-zero add_newlines.  */
1236*fae548d3Szrj 		      UNGET (ch);
1237*fae548d3Szrj 		    }
1238*fae548d3Szrj 		  state = 0;
1239*fae548d3Szrj 		  break;
1240*fae548d3Szrj 		}
1241*fae548d3Szrj 	      /* Looks like `# 123 "filename"' from cpp.  */
1242*fae548d3Szrj 	      UNGET (ch);
1243*fae548d3Szrj 	      old_state = 4;
1244*fae548d3Szrj 	      state = -1;
1245*fae548d3Szrj 	      if (scrub_m68k_mri)
1246*fae548d3Szrj 		out_string = "\tlinefile ";
1247*fae548d3Szrj 	      else
1248*fae548d3Szrj 		out_string = "\t.linefile ";
1249*fae548d3Szrj 	      PUT (*out_string++);
1250*fae548d3Szrj 	      break;
1251*fae548d3Szrj 	    }
1252*fae548d3Szrj 
1253*fae548d3Szrj #ifdef TC_D10V
1254*fae548d3Szrj 	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1255*fae548d3Szrj 	     Trap is the only short insn that has a first operand that is
1256*fae548d3Szrj 	     neither register nor label.
1257*fae548d3Szrj 	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1258*fae548d3Szrj 	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1259*fae548d3Szrj 	     already LEX_IS_LINE_COMMENT_START.  However, it is the
1260*fae548d3Szrj 	     only character in line_comment_chars for d10v, hence we
1261*fae548d3Szrj 	     can recognize it as such.  */
1262*fae548d3Szrj 	  /* An alternative approach would be to reset the state to 1 when
1263*fae548d3Szrj 	     we see '||', '<'- or '->', but that seems to be overkill.  */
1264*fae548d3Szrj 	  if (state == 10)
1265*fae548d3Szrj 	    PUT (' ');
1266*fae548d3Szrj #endif
1267*fae548d3Szrj 	  /* We have a line comment character which is not at the
1268*fae548d3Szrj 	     start of a line.  If this is also a normal comment
1269*fae548d3Szrj 	     character, fall through.  Otherwise treat it as a default
1270*fae548d3Szrj 	     character.  */
1271*fae548d3Szrj 	  if (strchr (tc_comment_chars, ch) == NULL
1272*fae548d3Szrj 	      && (! scrub_m68k_mri
1273*fae548d3Szrj 		  || (ch != '!' && ch != '*')))
1274*fae548d3Szrj 	    goto de_fault;
1275*fae548d3Szrj 	  if (scrub_m68k_mri
1276*fae548d3Szrj 	      && (ch == '!' || ch == '*' || ch == '#')
1277*fae548d3Szrj 	      && state != 1
1278*fae548d3Szrj 	      && state != 10)
1279*fae548d3Szrj 	    goto de_fault;
1280*fae548d3Szrj 	  /* Fall through.  */
1281*fae548d3Szrj 	case LEX_IS_COMMENT_START:
1282*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
1283*fae548d3Szrj 	  /* On the ARM, `@' is the comment character.
1284*fae548d3Szrj 	     Unfortunately this is also a special character in ELF .symver
1285*fae548d3Szrj 	     directives (and .type, though we deal with those another way).
1286*fae548d3Szrj 	     So we check if this line is such a directive, and treat
1287*fae548d3Szrj 	     the character as default if so.  This is a hack.  */
1288*fae548d3Szrj 	  if ((symver_state != NULL) && (*symver_state == 0))
1289*fae548d3Szrj 	    goto de_fault;
1290*fae548d3Szrj #endif
1291*fae548d3Szrj 
1292*fae548d3Szrj #ifdef TC_ARM
1293*fae548d3Szrj 	  /* For the ARM, care is needed not to damage occurrences of \@
1294*fae548d3Szrj 	     by stripping the @ onwards.  Yuck.  */
1295*fae548d3Szrj 	  if ((to > tostart ? to[-1] : last_char) == '\\')
1296*fae548d3Szrj 	    /* Do not treat the @ as a start-of-comment.  */
1297*fae548d3Szrj 	    goto de_fault;
1298*fae548d3Szrj #endif
1299*fae548d3Szrj 
1300*fae548d3Szrj #ifdef WARN_COMMENTS
1301*fae548d3Szrj 	  if (!found_comment)
1302*fae548d3Szrj 	    found_comment_file = as_where (&found_comment);
1303*fae548d3Szrj #endif
1304*fae548d3Szrj 	  do
1305*fae548d3Szrj 	    {
1306*fae548d3Szrj 	      ch = GET ();
1307*fae548d3Szrj 	    }
1308*fae548d3Szrj 	  while (ch != EOF && !IS_NEWLINE (ch));
1309*fae548d3Szrj 	  if (ch == EOF)
1310*fae548d3Szrj 	    as_warn (_("end of file in comment; newline inserted"));
1311*fae548d3Szrj 	  state = 0;
1312*fae548d3Szrj 	  PUT ('\n');
1313*fae548d3Szrj 	  break;
1314*fae548d3Szrj 
1315*fae548d3Szrj #ifdef H_TICK_HEX
1316*fae548d3Szrj 	case LEX_IS_H:
1317*fae548d3Szrj 	  /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1318*fae548d3Szrj 	     the H' with 0x to make them gas-style hex characters.  */
1319*fae548d3Szrj 	  if (enable_h_tick_hex)
1320*fae548d3Szrj 	    {
1321*fae548d3Szrj 	      char quot;
1322*fae548d3Szrj 
1323*fae548d3Szrj 	      quot = GET ();
1324*fae548d3Szrj 	      if (quot == '\'')
1325*fae548d3Szrj 		{
1326*fae548d3Szrj 		  UNGET ('x');
1327*fae548d3Szrj 		  ch = '0';
1328*fae548d3Szrj 		}
1329*fae548d3Szrj 	      else
1330*fae548d3Szrj 		UNGET (quot);
1331*fae548d3Szrj 	    }
1332*fae548d3Szrj #endif
1333*fae548d3Szrj 	  /* Fall through.  */
1334*fae548d3Szrj 
1335*fae548d3Szrj 	case LEX_IS_SYMBOL_COMPONENT:
1336*fae548d3Szrj 	  if (state == 10)
1337*fae548d3Szrj 	    {
1338*fae548d3Szrj 	      /* This is a symbol character following another symbol
1339*fae548d3Szrj 		 character, with whitespace in between.  We skipped
1340*fae548d3Szrj 		 the whitespace earlier, so output it now.  */
1341*fae548d3Szrj 	      UNGET (ch);
1342*fae548d3Szrj 	      state = 3;
1343*fae548d3Szrj 	      PUT (' ');
1344*fae548d3Szrj 	      break;
1345*fae548d3Szrj 	    }
1346*fae548d3Szrj 
1347*fae548d3Szrj #ifdef TC_Z80
1348*fae548d3Szrj 	  /* "af'" is a symbol containing '\''.  */
1349*fae548d3Szrj 	  if (state == 3 && (ch == 'a' || ch == 'A'))
1350*fae548d3Szrj 	    {
1351*fae548d3Szrj 	      state = 16;
1352*fae548d3Szrj 	      PUT (ch);
1353*fae548d3Szrj 	      ch = GET ();
1354*fae548d3Szrj 	      if (ch == 'f' || ch == 'F')
1355*fae548d3Szrj 		{
1356*fae548d3Szrj 		  state = 17;
1357*fae548d3Szrj 		  PUT (ch);
1358*fae548d3Szrj 		  break;
1359*fae548d3Szrj 		}
1360*fae548d3Szrj 	      else
1361*fae548d3Szrj 		{
1362*fae548d3Szrj 		  state = 9;
1363*fae548d3Szrj 		  if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1364*fae548d3Szrj 		    {
1365*fae548d3Szrj 		      if (ch != EOF)
1366*fae548d3Szrj 			UNGET (ch);
1367*fae548d3Szrj 		      break;
1368*fae548d3Szrj 		    }
1369*fae548d3Szrj 		}
1370*fae548d3Szrj 	    }
1371*fae548d3Szrj #endif
1372*fae548d3Szrj 	  if (state == 3)
1373*fae548d3Szrj 	    state = 9;
1374*fae548d3Szrj 
1375*fae548d3Szrj 	  /* This is a common case.  Quickly copy CH and all the
1376*fae548d3Szrj 	     following symbol component or normal characters.  */
1377*fae548d3Szrj 	  if (to + 1 < toend
1378*fae548d3Szrj 	      && mri_state == NULL
1379*fae548d3Szrj #if defined TC_ARM && defined OBJ_ELF
1380*fae548d3Szrj 	      && symver_state == NULL
1381*fae548d3Szrj #endif
1382*fae548d3Szrj 	      )
1383*fae548d3Szrj 	    {
1384*fae548d3Szrj 	      char *s;
1385*fae548d3Szrj 	      ptrdiff_t len;
1386*fae548d3Szrj 
1387*fae548d3Szrj 	      for (s = from; s < fromend; s++)
1388*fae548d3Szrj 		{
1389*fae548d3Szrj 		  int type;
1390*fae548d3Szrj 
1391*fae548d3Szrj 		  ch2 = *(unsigned char *) s;
1392*fae548d3Szrj 		  type = lex[ch2];
1393*fae548d3Szrj 		  if (type != 0
1394*fae548d3Szrj 		      && type != LEX_IS_SYMBOL_COMPONENT)
1395*fae548d3Szrj 		    break;
1396*fae548d3Szrj 		}
1397*fae548d3Szrj 
1398*fae548d3Szrj 	      if (s > from)
1399*fae548d3Szrj 		/* Handle the last character normally, for
1400*fae548d3Szrj 		   simplicity.  */
1401*fae548d3Szrj 		--s;
1402*fae548d3Szrj 
1403*fae548d3Szrj 	      len = s - from;
1404*fae548d3Szrj 
1405*fae548d3Szrj 	      if (len > (toend - to) - 1)
1406*fae548d3Szrj 		len = (toend - to) - 1;
1407*fae548d3Szrj 
1408*fae548d3Szrj 	      if (len > 0)
1409*fae548d3Szrj 		{
1410*fae548d3Szrj 		  PUT (ch);
1411*fae548d3Szrj 		  memcpy (to, from, len);
1412*fae548d3Szrj 		  to += len;
1413*fae548d3Szrj 		  from += len;
1414*fae548d3Szrj 		  if (to >= toend)
1415*fae548d3Szrj 		    goto tofull;
1416*fae548d3Szrj 		  ch = GET ();
1417*fae548d3Szrj 		}
1418*fae548d3Szrj 	    }
1419*fae548d3Szrj 
1420*fae548d3Szrj 	  /* Fall through.  */
1421*fae548d3Szrj 	default:
1422*fae548d3Szrj 	de_fault:
1423*fae548d3Szrj 	  /* Some relatively `normal' character.  */
1424*fae548d3Szrj 	  if (state == 0)
1425*fae548d3Szrj 	    {
1426*fae548d3Szrj 	      state = 11;	/* Now seeing label definition.  */
1427*fae548d3Szrj 	    }
1428*fae548d3Szrj 	  else if (state == 1)
1429*fae548d3Szrj 	    {
1430*fae548d3Szrj 	      state = 2;	/* Ditto.  */
1431*fae548d3Szrj 	    }
1432*fae548d3Szrj 	  else if (state == 9)
1433*fae548d3Szrj 	    {
1434*fae548d3Szrj 	      if (!IS_SYMBOL_COMPONENT (ch))
1435*fae548d3Szrj 		state = 3;
1436*fae548d3Szrj 	    }
1437*fae548d3Szrj 	  else if (state == 10)
1438*fae548d3Szrj 	    {
1439*fae548d3Szrj 	      if (ch == '\\')
1440*fae548d3Szrj 		{
1441*fae548d3Szrj 		  /* Special handling for backslash: a backslash may
1442*fae548d3Szrj 		     be the beginning of a formal parameter (of a
1443*fae548d3Szrj 		     macro) following another symbol character, with
1444*fae548d3Szrj 		     whitespace in between.  If that is the case, we
1445*fae548d3Szrj 		     output a space before the parameter.  Strictly
1446*fae548d3Szrj 		     speaking, correct handling depends upon what the
1447*fae548d3Szrj 		     macro parameter expands into; if the parameter
1448*fae548d3Szrj 		     expands into something which does not start with
1449*fae548d3Szrj 		     an operand character, then we don't want to keep
1450*fae548d3Szrj 		     the space.  We don't have enough information to
1451*fae548d3Szrj 		     make the right choice, so here we are making the
1452*fae548d3Szrj 		     choice which is more likely to be correct.  */
1453*fae548d3Szrj 		  if (to + 1 >= toend)
1454*fae548d3Szrj 		    {
1455*fae548d3Szrj 		      /* If we're near the end of the buffer, save the
1456*fae548d3Szrj 		         character for the next time round.  Otherwise
1457*fae548d3Szrj 		         we'll lose our state.  */
1458*fae548d3Szrj 		      UNGET (ch);
1459*fae548d3Szrj 		      goto tofull;
1460*fae548d3Szrj 		    }
1461*fae548d3Szrj 		  *to++ = ' ';
1462*fae548d3Szrj 		}
1463*fae548d3Szrj 
1464*fae548d3Szrj 	      state = 3;
1465*fae548d3Szrj 	    }
1466*fae548d3Szrj 	  PUT (ch);
1467*fae548d3Szrj 	  break;
1468*fae548d3Szrj 	}
1469*fae548d3Szrj     }
1470*fae548d3Szrj 
1471*fae548d3Szrj   /*NOTREACHED*/
1472*fae548d3Szrj 
1473*fae548d3Szrj  fromeof:
1474*fae548d3Szrj   /* We have reached the end of the input.  */
1475*fae548d3Szrj #ifdef TC_ARM
1476*fae548d3Szrj   if (to > tostart)
1477*fae548d3Szrj     last_char = to[-1];
1478*fae548d3Szrj #endif
1479*fae548d3Szrj   return to - tostart;
1480*fae548d3Szrj 
1481*fae548d3Szrj  tofull:
1482*fae548d3Szrj   /* The output buffer is full.  Save any input we have not yet
1483*fae548d3Szrj      processed.  */
1484*fae548d3Szrj   if (fromend > from)
1485*fae548d3Szrj     {
1486*fae548d3Szrj       saved_input = from;
1487*fae548d3Szrj       saved_input_len = fromend - from;
1488*fae548d3Szrj     }
1489*fae548d3Szrj   else
1490*fae548d3Szrj     saved_input = NULL;
1491*fae548d3Szrj 
1492*fae548d3Szrj #ifdef TC_ARM
1493*fae548d3Szrj   if (to > tostart)
1494*fae548d3Szrj     last_char = to[-1];
1495*fae548d3Szrj #endif
1496*fae548d3Szrj   return to - tostart;
1497*fae548d3Szrj }
1498