xref: /dragonfly/contrib/binutils-2.27/gas/app.c (revision a9fa9459)
1*a9fa9459Szrj /* This is the Assembler Pre-Processor
2*a9fa9459Szrj    Copyright (C) 1987-2016 Free Software Foundation, Inc.
3*a9fa9459Szrj 
4*a9fa9459Szrj    This file is part of GAS, the GNU Assembler.
5*a9fa9459Szrj 
6*a9fa9459Szrj    GAS is free software; you can redistribute it and/or modify
7*a9fa9459Szrj    it under the terms of the GNU General Public License as published by
8*a9fa9459Szrj    the Free Software Foundation; either version 3, or (at your option)
9*a9fa9459Szrj    any later version.
10*a9fa9459Szrj 
11*a9fa9459Szrj    GAS is distributed in the hope that it will be useful, but WITHOUT
12*a9fa9459Szrj    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13*a9fa9459Szrj    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14*a9fa9459Szrj    License for more details.
15*a9fa9459Szrj 
16*a9fa9459Szrj    You should have received a copy of the GNU General Public License
17*a9fa9459Szrj    along with GAS; see the file COPYING.  If not, write to the Free
18*a9fa9459Szrj    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19*a9fa9459Szrj    02110-1301, USA.  */
20*a9fa9459Szrj 
21*a9fa9459Szrj /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22*a9fa9459Szrj /* App, the assembler pre-processor.  This pre-processor strips out
23*a9fa9459Szrj    excess spaces, turns single-quoted characters into a decimal
24*a9fa9459Szrj    constant, and turns the # in # <number> <filename> <garbage> into a
25*a9fa9459Szrj    .linefile.  This needs better error-handling.  */
26*a9fa9459Szrj 
27*a9fa9459Szrj #include "as.h"
28*a9fa9459Szrj 
29*a9fa9459Szrj #if (__STDC__ != 1)
30*a9fa9459Szrj #ifndef const
31*a9fa9459Szrj #define const  /* empty */
32*a9fa9459Szrj #endif
33*a9fa9459Szrj #endif
34*a9fa9459Szrj 
35*a9fa9459Szrj #ifdef H_TICK_HEX
36*a9fa9459Szrj int enable_h_tick_hex = 0;
37*a9fa9459Szrj #endif
38*a9fa9459Szrj 
39*a9fa9459Szrj #ifdef TC_M68K
40*a9fa9459Szrj /* Whether we are scrubbing in m68k MRI mode.  This is different from
41*a9fa9459Szrj    flag_m68k_mri, because the two flags will be affected by the .mri
42*a9fa9459Szrj    pseudo-op at different times.  */
43*a9fa9459Szrj static int scrub_m68k_mri;
44*a9fa9459Szrj 
45*a9fa9459Szrj /* The pseudo-op which switches in and out of MRI mode.  See the
46*a9fa9459Szrj    comment in do_scrub_chars.  */
47*a9fa9459Szrj static const char mri_pseudo[] = ".mri 0";
48*a9fa9459Szrj #else
49*a9fa9459Szrj #define scrub_m68k_mri 0
50*a9fa9459Szrj #endif
51*a9fa9459Szrj 
52*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
53*a9fa9459Szrj /* The pseudo-op for which we need to special-case `@' characters.
54*a9fa9459Szrj    See the comment in do_scrub_chars.  */
55*a9fa9459Szrj static const char   symver_pseudo[] = ".symver";
56*a9fa9459Szrj static const char * symver_state;
57*a9fa9459Szrj #endif
58*a9fa9459Szrj 
59*a9fa9459Szrj static char lex[256];
60*a9fa9459Szrj static const char symbol_chars[] =
61*a9fa9459Szrj "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
62*a9fa9459Szrj 
63*a9fa9459Szrj #define LEX_IS_SYMBOL_COMPONENT		1
64*a9fa9459Szrj #define LEX_IS_WHITESPACE		2
65*a9fa9459Szrj #define LEX_IS_LINE_SEPARATOR		3
66*a9fa9459Szrj #define LEX_IS_COMMENT_START		4
67*a9fa9459Szrj #define LEX_IS_LINE_COMMENT_START	5
68*a9fa9459Szrj #define	LEX_IS_TWOCHAR_COMMENT_1ST	6
69*a9fa9459Szrj #define	LEX_IS_STRINGQUOTE		8
70*a9fa9459Szrj #define	LEX_IS_COLON			9
71*a9fa9459Szrj #define	LEX_IS_NEWLINE			10
72*a9fa9459Szrj #define	LEX_IS_ONECHAR_QUOTE		11
73*a9fa9459Szrj #ifdef TC_V850
74*a9fa9459Szrj #define LEX_IS_DOUBLEDASH_1ST		12
75*a9fa9459Szrj #endif
76*a9fa9459Szrj #ifdef TC_M32R
77*a9fa9459Szrj #define DOUBLEBAR_PARALLEL
78*a9fa9459Szrj #endif
79*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
80*a9fa9459Szrj #define LEX_IS_DOUBLEBAR_1ST		13
81*a9fa9459Szrj #endif
82*a9fa9459Szrj #define LEX_IS_PARALLEL_SEPARATOR	14
83*a9fa9459Szrj #ifdef H_TICK_HEX
84*a9fa9459Szrj #define LEX_IS_H			15
85*a9fa9459Szrj #endif
86*a9fa9459Szrj #define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
87*a9fa9459Szrj #define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
88*a9fa9459Szrj #define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
89*a9fa9459Szrj #define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
90*a9fa9459Szrj #define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
91*a9fa9459Szrj #define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
92*a9fa9459Szrj #define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
93*a9fa9459Szrj 
94*a9fa9459Szrj static int process_escape (int);
95*a9fa9459Szrj 
96*a9fa9459Szrj /* FIXME-soon: The entire lexer/parser thingy should be
97*a9fa9459Szrj    built statically at compile time rather than dynamically
98*a9fa9459Szrj    each and every time the assembler is run.  xoxorich.  */
99*a9fa9459Szrj 
100*a9fa9459Szrj void
do_scrub_begin(int m68k_mri ATTRIBUTE_UNUSED)101*a9fa9459Szrj do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
102*a9fa9459Szrj {
103*a9fa9459Szrj   const char *p;
104*a9fa9459Szrj   int c;
105*a9fa9459Szrj 
106*a9fa9459Szrj   lex[' '] = LEX_IS_WHITESPACE;
107*a9fa9459Szrj   lex['\t'] = LEX_IS_WHITESPACE;
108*a9fa9459Szrj   lex['\r'] = LEX_IS_WHITESPACE;
109*a9fa9459Szrj   lex['\n'] = LEX_IS_NEWLINE;
110*a9fa9459Szrj   lex[':'] = LEX_IS_COLON;
111*a9fa9459Szrj 
112*a9fa9459Szrj #ifdef TC_M68K
113*a9fa9459Szrj   scrub_m68k_mri = m68k_mri;
114*a9fa9459Szrj 
115*a9fa9459Szrj   if (! m68k_mri)
116*a9fa9459Szrj #endif
117*a9fa9459Szrj     {
118*a9fa9459Szrj       lex['"'] = LEX_IS_STRINGQUOTE;
119*a9fa9459Szrj 
120*a9fa9459Szrj #if ! defined (TC_HPPA) && ! defined (TC_I370)
121*a9fa9459Szrj       /* I370 uses single-quotes to delimit integer, float constants.  */
122*a9fa9459Szrj       lex['\''] = LEX_IS_ONECHAR_QUOTE;
123*a9fa9459Szrj #endif
124*a9fa9459Szrj 
125*a9fa9459Szrj #ifdef SINGLE_QUOTE_STRINGS
126*a9fa9459Szrj       lex['\''] = LEX_IS_STRINGQUOTE;
127*a9fa9459Szrj #endif
128*a9fa9459Szrj     }
129*a9fa9459Szrj 
130*a9fa9459Szrj   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
131*a9fa9459Szrj      in state 5 of do_scrub_chars must be changed.  */
132*a9fa9459Szrj 
133*a9fa9459Szrj   /* Note that these override the previous defaults, e.g. if ';' is a
134*a9fa9459Szrj      comment char, then it isn't a line separator.  */
135*a9fa9459Szrj   for (p = symbol_chars; *p; ++p)
136*a9fa9459Szrj     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
137*a9fa9459Szrj 
138*a9fa9459Szrj   for (c = 128; c < 256; ++c)
139*a9fa9459Szrj     lex[c] = LEX_IS_SYMBOL_COMPONENT;
140*a9fa9459Szrj 
141*a9fa9459Szrj #ifdef tc_symbol_chars
142*a9fa9459Szrj   /* This macro permits the processor to specify all characters which
143*a9fa9459Szrj      may appears in an operand.  This will prevent the scrubber from
144*a9fa9459Szrj      discarding meaningful whitespace in certain cases.  The i386
145*a9fa9459Szrj      backend uses this to support prefixes, which can confuse the
146*a9fa9459Szrj      scrubber as to whether it is parsing operands or opcodes.  */
147*a9fa9459Szrj   for (p = tc_symbol_chars; *p; ++p)
148*a9fa9459Szrj     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
149*a9fa9459Szrj #endif
150*a9fa9459Szrj 
151*a9fa9459Szrj   /* The m68k backend wants to be able to change comment_chars.  */
152*a9fa9459Szrj #ifndef tc_comment_chars
153*a9fa9459Szrj #define tc_comment_chars comment_chars
154*a9fa9459Szrj #endif
155*a9fa9459Szrj   for (p = tc_comment_chars; *p; p++)
156*a9fa9459Szrj     lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
157*a9fa9459Szrj 
158*a9fa9459Szrj   for (p = line_comment_chars; *p; p++)
159*a9fa9459Szrj     lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
160*a9fa9459Szrj 
161*a9fa9459Szrj #ifndef tc_line_separator_chars
162*a9fa9459Szrj #define tc_line_separator_chars line_separator_chars
163*a9fa9459Szrj #endif
164*a9fa9459Szrj   for (p = tc_line_separator_chars; *p; p++)
165*a9fa9459Szrj     lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
166*a9fa9459Szrj 
167*a9fa9459Szrj #ifdef tc_parallel_separator_chars
168*a9fa9459Szrj   /* This macro permits the processor to specify all characters which
169*a9fa9459Szrj      separate parallel insns on the same line.  */
170*a9fa9459Szrj   for (p = tc_parallel_separator_chars; *p; p++)
171*a9fa9459Szrj     lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
172*a9fa9459Szrj #endif
173*a9fa9459Szrj 
174*a9fa9459Szrj   /* Only allow slash-star comments if slash is not in use.
175*a9fa9459Szrj      FIXME: This isn't right.  We should always permit them.  */
176*a9fa9459Szrj   if (lex['/'] == 0)
177*a9fa9459Szrj     lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
178*a9fa9459Szrj 
179*a9fa9459Szrj #ifdef TC_M68K
180*a9fa9459Szrj   if (m68k_mri)
181*a9fa9459Szrj     {
182*a9fa9459Szrj       lex['\''] = LEX_IS_STRINGQUOTE;
183*a9fa9459Szrj       lex[';'] = LEX_IS_COMMENT_START;
184*a9fa9459Szrj       lex['*'] = LEX_IS_LINE_COMMENT_START;
185*a9fa9459Szrj       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
186*a9fa9459Szrj 	 then it can't be used in an expression.  */
187*a9fa9459Szrj       lex['!'] = LEX_IS_LINE_COMMENT_START;
188*a9fa9459Szrj     }
189*a9fa9459Szrj #endif
190*a9fa9459Szrj 
191*a9fa9459Szrj #ifdef TC_V850
192*a9fa9459Szrj   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
193*a9fa9459Szrj #endif
194*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
195*a9fa9459Szrj   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
196*a9fa9459Szrj #endif
197*a9fa9459Szrj #ifdef TC_D30V
198*a9fa9459Szrj   /* Must do this is we want VLIW instruction with "->" or "<-".  */
199*a9fa9459Szrj   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
200*a9fa9459Szrj #endif
201*a9fa9459Szrj 
202*a9fa9459Szrj #ifdef H_TICK_HEX
203*a9fa9459Szrj   if (enable_h_tick_hex)
204*a9fa9459Szrj     {
205*a9fa9459Szrj       lex['h'] = LEX_IS_H;
206*a9fa9459Szrj       lex['H'] = LEX_IS_H;
207*a9fa9459Szrj     }
208*a9fa9459Szrj #endif
209*a9fa9459Szrj }
210*a9fa9459Szrj 
211*a9fa9459Szrj /* Saved state of the scrubber.  */
212*a9fa9459Szrj static int state;
213*a9fa9459Szrj static int old_state;
214*a9fa9459Szrj static const char *out_string;
215*a9fa9459Szrj static char out_buf[20];
216*a9fa9459Szrj static int add_newlines;
217*a9fa9459Szrj static char *saved_input;
218*a9fa9459Szrj static size_t saved_input_len;
219*a9fa9459Szrj static char input_buffer[32 * 1024];
220*a9fa9459Szrj static const char *mri_state;
221*a9fa9459Szrj static char mri_last_ch;
222*a9fa9459Szrj 
223*a9fa9459Szrj /* Data structure for saving the state of app across #include's.  Note that
224*a9fa9459Szrj    app is called asynchronously to the parsing of the .include's, so our
225*a9fa9459Szrj    state at the time .include is interpreted is completely unrelated.
226*a9fa9459Szrj    That's why we have to save it all.  */
227*a9fa9459Szrj 
228*a9fa9459Szrj struct app_save
229*a9fa9459Szrj {
230*a9fa9459Szrj   int          state;
231*a9fa9459Szrj   int          old_state;
232*a9fa9459Szrj   const char * out_string;
233*a9fa9459Szrj   char         out_buf[sizeof (out_buf)];
234*a9fa9459Szrj   int          add_newlines;
235*a9fa9459Szrj   char *       saved_input;
236*a9fa9459Szrj   size_t       saved_input_len;
237*a9fa9459Szrj #ifdef TC_M68K
238*a9fa9459Szrj   int          scrub_m68k_mri;
239*a9fa9459Szrj #endif
240*a9fa9459Szrj   const char * mri_state;
241*a9fa9459Szrj   char         mri_last_ch;
242*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
243*a9fa9459Szrj   const char * symver_state;
244*a9fa9459Szrj #endif
245*a9fa9459Szrj };
246*a9fa9459Szrj 
247*a9fa9459Szrj char *
app_push(void)248*a9fa9459Szrj app_push (void)
249*a9fa9459Szrj {
250*a9fa9459Szrj   struct app_save *saved;
251*a9fa9459Szrj 
252*a9fa9459Szrj   saved = XNEW (struct app_save);
253*a9fa9459Szrj   saved->state = state;
254*a9fa9459Szrj   saved->old_state = old_state;
255*a9fa9459Szrj   saved->out_string = out_string;
256*a9fa9459Szrj   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
257*a9fa9459Szrj   saved->add_newlines = add_newlines;
258*a9fa9459Szrj   if (saved_input == NULL)
259*a9fa9459Szrj     saved->saved_input = NULL;
260*a9fa9459Szrj   else
261*a9fa9459Szrj     {
262*a9fa9459Szrj       saved->saved_input = XNEWVEC (char, saved_input_len);
263*a9fa9459Szrj       memcpy (saved->saved_input, saved_input, saved_input_len);
264*a9fa9459Szrj       saved->saved_input_len = saved_input_len;
265*a9fa9459Szrj     }
266*a9fa9459Szrj #ifdef TC_M68K
267*a9fa9459Szrj   saved->scrub_m68k_mri = scrub_m68k_mri;
268*a9fa9459Szrj #endif
269*a9fa9459Szrj   saved->mri_state = mri_state;
270*a9fa9459Szrj   saved->mri_last_ch = mri_last_ch;
271*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
272*a9fa9459Szrj   saved->symver_state = symver_state;
273*a9fa9459Szrj #endif
274*a9fa9459Szrj 
275*a9fa9459Szrj   /* do_scrub_begin() is not useful, just wastes time.  */
276*a9fa9459Szrj 
277*a9fa9459Szrj   state = 0;
278*a9fa9459Szrj   saved_input = NULL;
279*a9fa9459Szrj   add_newlines = 0;
280*a9fa9459Szrj 
281*a9fa9459Szrj   return (char *) saved;
282*a9fa9459Szrj }
283*a9fa9459Szrj 
284*a9fa9459Szrj void
app_pop(char * arg)285*a9fa9459Szrj app_pop (char *arg)
286*a9fa9459Szrj {
287*a9fa9459Szrj   struct app_save *saved = (struct app_save *) arg;
288*a9fa9459Szrj 
289*a9fa9459Szrj   /* There is no do_scrub_end ().  */
290*a9fa9459Szrj   state = saved->state;
291*a9fa9459Szrj   old_state = saved->old_state;
292*a9fa9459Szrj   out_string = saved->out_string;
293*a9fa9459Szrj   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
294*a9fa9459Szrj   add_newlines = saved->add_newlines;
295*a9fa9459Szrj   if (saved->saved_input == NULL)
296*a9fa9459Szrj     saved_input = NULL;
297*a9fa9459Szrj   else
298*a9fa9459Szrj     {
299*a9fa9459Szrj       gas_assert (saved->saved_input_len <= sizeof (input_buffer));
300*a9fa9459Szrj       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
301*a9fa9459Szrj       saved_input = input_buffer;
302*a9fa9459Szrj       saved_input_len = saved->saved_input_len;
303*a9fa9459Szrj       free (saved->saved_input);
304*a9fa9459Szrj     }
305*a9fa9459Szrj #ifdef TC_M68K
306*a9fa9459Szrj   scrub_m68k_mri = saved->scrub_m68k_mri;
307*a9fa9459Szrj #endif
308*a9fa9459Szrj   mri_state = saved->mri_state;
309*a9fa9459Szrj   mri_last_ch = saved->mri_last_ch;
310*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
311*a9fa9459Szrj   symver_state = saved->symver_state;
312*a9fa9459Szrj #endif
313*a9fa9459Szrj 
314*a9fa9459Szrj   free (arg);
315*a9fa9459Szrj }
316*a9fa9459Szrj 
317*a9fa9459Szrj /* @@ This assumes that \n &c are the same on host and target.  This is not
318*a9fa9459Szrj    necessarily true.  */
319*a9fa9459Szrj 
320*a9fa9459Szrj static int
process_escape(int ch)321*a9fa9459Szrj process_escape (int ch)
322*a9fa9459Szrj {
323*a9fa9459Szrj   switch (ch)
324*a9fa9459Szrj     {
325*a9fa9459Szrj     case 'b':
326*a9fa9459Szrj       return '\b';
327*a9fa9459Szrj     case 'f':
328*a9fa9459Szrj       return '\f';
329*a9fa9459Szrj     case 'n':
330*a9fa9459Szrj       return '\n';
331*a9fa9459Szrj     case 'r':
332*a9fa9459Szrj       return '\r';
333*a9fa9459Szrj     case 't':
334*a9fa9459Szrj       return '\t';
335*a9fa9459Szrj     case '\'':
336*a9fa9459Szrj       return '\'';
337*a9fa9459Szrj     case '"':
338*a9fa9459Szrj       return '\"';
339*a9fa9459Szrj     default:
340*a9fa9459Szrj       return ch;
341*a9fa9459Szrj     }
342*a9fa9459Szrj }
343*a9fa9459Szrj 
344*a9fa9459Szrj /* This function is called to process input characters.  The GET
345*a9fa9459Szrj    parameter is used to retrieve more input characters.  GET should
346*a9fa9459Szrj    set its parameter to point to a buffer, and return the length of
347*a9fa9459Szrj    the buffer; it should return 0 at end of file.  The scrubbed output
348*a9fa9459Szrj    characters are put into the buffer starting at TOSTART; the TOSTART
349*a9fa9459Szrj    buffer is TOLEN bytes in length.  The function returns the number
350*a9fa9459Szrj    of scrubbed characters put into TOSTART.  This will be TOLEN unless
351*a9fa9459Szrj    end of file was seen.  This function is arranged as a state
352*a9fa9459Szrj    machine, and saves its state so that it may return at any point.
353*a9fa9459Szrj    This is the way the old code used to work.  */
354*a9fa9459Szrj 
355*a9fa9459Szrj size_t
do_scrub_chars(size_t (* get)(char *,size_t),char * tostart,size_t tolen)356*a9fa9459Szrj do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
357*a9fa9459Szrj {
358*a9fa9459Szrj   char *to = tostart;
359*a9fa9459Szrj   char *toend = tostart + tolen;
360*a9fa9459Szrj   char *from;
361*a9fa9459Szrj   char *fromend;
362*a9fa9459Szrj   size_t fromlen;
363*a9fa9459Szrj   int ch, ch2 = 0;
364*a9fa9459Szrj   /* Character that started the string we're working on.  */
365*a9fa9459Szrj   static char quotechar;
366*a9fa9459Szrj 
367*a9fa9459Szrj   /*State 0: beginning of normal line
368*a9fa9459Szrj 	  1: After first whitespace on line (flush more white)
369*a9fa9459Szrj 	  2: After first non-white (opcode) on line (keep 1white)
370*a9fa9459Szrj 	  3: after second white on line (into operands) (flush white)
371*a9fa9459Szrj 	  4: after putting out a .linefile, put out digits
372*a9fa9459Szrj 	  5: parsing a string, then go to old-state
373*a9fa9459Szrj 	  6: putting out \ escape in a "d string.
374*a9fa9459Szrj 	  7: no longer used
375*a9fa9459Szrj 	  8: no longer used
376*a9fa9459Szrj 	  9: After seeing symbol char in state 3 (keep 1white after symchar)
377*a9fa9459Szrj 	 10: After seeing whitespace in state 9 (keep white before symchar)
378*a9fa9459Szrj 	 11: After seeing a symbol character in state 0 (eg a label definition)
379*a9fa9459Szrj 	 -1: output string in out_string and go to the state in old_state
380*a9fa9459Szrj 	 -2: flush text until a '*' '/' is seen, then go to state old_state
381*a9fa9459Szrj #ifdef TC_V850
382*a9fa9459Szrj 	 12: After seeing a dash, looking for a second dash as a start
383*a9fa9459Szrj 	     of comment.
384*a9fa9459Szrj #endif
385*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
386*a9fa9459Szrj 	 13: After seeing a vertical bar, looking for a second
387*a9fa9459Szrj 	     vertical bar as a parallel expression separator.
388*a9fa9459Szrj #endif
389*a9fa9459Szrj #ifdef TC_PREDICATE_START_CHAR
390*a9fa9459Szrj 	 14: After seeing a predicate start character at state 0, looking
391*a9fa9459Szrj 	     for a predicate end character as predicate.
392*a9fa9459Szrj 	 15: After seeing a predicate start character at state 1, looking
393*a9fa9459Szrj 	     for a predicate end character as predicate.
394*a9fa9459Szrj #endif
395*a9fa9459Szrj #ifdef TC_Z80
396*a9fa9459Szrj 	 16: After seeing an 'a' or an 'A' at the start of a symbol
397*a9fa9459Szrj 	 17: After seeing an 'f' or an 'F' in state 16
398*a9fa9459Szrj #endif
399*a9fa9459Szrj 	  */
400*a9fa9459Szrj 
401*a9fa9459Szrj   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
402*a9fa9459Szrj      constructs like ``.loc 1 20''.  This was turning into ``.loc
403*a9fa9459Szrj      120''.  States 9 and 10 ensure that a space is never dropped in
404*a9fa9459Szrj      between characters which could appear in an identifier.  Ian
405*a9fa9459Szrj      Taylor, ian@cygnus.com.
406*a9fa9459Szrj 
407*a9fa9459Szrj      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
408*a9fa9459Szrj      correctly on the PA (and any other target where colons are optional).
409*a9fa9459Szrj      Jeff Law, law@cs.utah.edu.
410*a9fa9459Szrj 
411*a9fa9459Szrj      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
412*a9fa9459Szrj      get squashed into "cmp r1,r2||trap#1", with the all important space
413*a9fa9459Szrj      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
414*a9fa9459Szrj 
415*a9fa9459Szrj   /* This macro gets the next input character.  */
416*a9fa9459Szrj 
417*a9fa9459Szrj #define GET()							\
418*a9fa9459Szrj   (from < fromend						\
419*a9fa9459Szrj    ? * (unsigned char *) (from++)				\
420*a9fa9459Szrj    : (saved_input = NULL,					\
421*a9fa9459Szrj       fromlen = (*get) (input_buffer, sizeof input_buffer),	\
422*a9fa9459Szrj       from = input_buffer,					\
423*a9fa9459Szrj       fromend = from + fromlen,					\
424*a9fa9459Szrj       (fromlen == 0						\
425*a9fa9459Szrj        ? EOF							\
426*a9fa9459Szrj        : * (unsigned char *) (from++))))
427*a9fa9459Szrj 
428*a9fa9459Szrj   /* This macro pushes a character back on the input stream.  */
429*a9fa9459Szrj 
430*a9fa9459Szrj #define UNGET(uch) (*--from = (uch))
431*a9fa9459Szrj 
432*a9fa9459Szrj   /* This macro puts a character into the output buffer.  If this
433*a9fa9459Szrj      character fills the output buffer, this macro jumps to the label
434*a9fa9459Szrj      TOFULL.  We use this rather ugly approach because we need to
435*a9fa9459Szrj      handle two different termination conditions: EOF on the input
436*a9fa9459Szrj      stream, and a full output buffer.  It would be simpler if we
437*a9fa9459Szrj      always read in the entire input stream before processing it, but
438*a9fa9459Szrj      I don't want to make such a significant change to the assembler's
439*a9fa9459Szrj      memory usage.  */
440*a9fa9459Szrj 
441*a9fa9459Szrj #define PUT(pch)				\
442*a9fa9459Szrj   do						\
443*a9fa9459Szrj     {						\
444*a9fa9459Szrj       *to++ = (pch);				\
445*a9fa9459Szrj       if (to >= toend)				\
446*a9fa9459Szrj 	goto tofull;				\
447*a9fa9459Szrj     }						\
448*a9fa9459Szrj   while (0)
449*a9fa9459Szrj 
450*a9fa9459Szrj   if (saved_input != NULL)
451*a9fa9459Szrj     {
452*a9fa9459Szrj       from = saved_input;
453*a9fa9459Szrj       fromend = from + saved_input_len;
454*a9fa9459Szrj     }
455*a9fa9459Szrj   else
456*a9fa9459Szrj     {
457*a9fa9459Szrj       fromlen = (*get) (input_buffer, sizeof input_buffer);
458*a9fa9459Szrj       if (fromlen == 0)
459*a9fa9459Szrj 	return 0;
460*a9fa9459Szrj       from = input_buffer;
461*a9fa9459Szrj       fromend = from + fromlen;
462*a9fa9459Szrj     }
463*a9fa9459Szrj 
464*a9fa9459Szrj   while (1)
465*a9fa9459Szrj     {
466*a9fa9459Szrj       /* The cases in this switch end with continue, in order to
467*a9fa9459Szrj 	 branch back to the top of this while loop and generate the
468*a9fa9459Szrj 	 next output character in the appropriate state.  */
469*a9fa9459Szrj       switch (state)
470*a9fa9459Szrj 	{
471*a9fa9459Szrj 	case -1:
472*a9fa9459Szrj 	  ch = *out_string++;
473*a9fa9459Szrj 	  if (*out_string == '\0')
474*a9fa9459Szrj 	    {
475*a9fa9459Szrj 	      state = old_state;
476*a9fa9459Szrj 	      old_state = 3;
477*a9fa9459Szrj 	    }
478*a9fa9459Szrj 	  PUT (ch);
479*a9fa9459Szrj 	  continue;
480*a9fa9459Szrj 
481*a9fa9459Szrj 	case -2:
482*a9fa9459Szrj 	  for (;;)
483*a9fa9459Szrj 	    {
484*a9fa9459Szrj 	      do
485*a9fa9459Szrj 		{
486*a9fa9459Szrj 		  ch = GET ();
487*a9fa9459Szrj 
488*a9fa9459Szrj 		  if (ch == EOF)
489*a9fa9459Szrj 		    {
490*a9fa9459Szrj 		      as_warn (_("end of file in comment"));
491*a9fa9459Szrj 		      goto fromeof;
492*a9fa9459Szrj 		    }
493*a9fa9459Szrj 
494*a9fa9459Szrj 		  if (ch == '\n')
495*a9fa9459Szrj 		    PUT ('\n');
496*a9fa9459Szrj 		}
497*a9fa9459Szrj 	      while (ch != '*');
498*a9fa9459Szrj 
499*a9fa9459Szrj 	      while ((ch = GET ()) == '*')
500*a9fa9459Szrj 		;
501*a9fa9459Szrj 
502*a9fa9459Szrj 	      if (ch == EOF)
503*a9fa9459Szrj 		{
504*a9fa9459Szrj 		  as_warn (_("end of file in comment"));
505*a9fa9459Szrj 		  goto fromeof;
506*a9fa9459Szrj 		}
507*a9fa9459Szrj 
508*a9fa9459Szrj 	      if (ch == '/')
509*a9fa9459Szrj 		break;
510*a9fa9459Szrj 
511*a9fa9459Szrj 	      UNGET (ch);
512*a9fa9459Szrj 	    }
513*a9fa9459Szrj 
514*a9fa9459Szrj 	  state = old_state;
515*a9fa9459Szrj 	  UNGET (' ');
516*a9fa9459Szrj 	  continue;
517*a9fa9459Szrj 
518*a9fa9459Szrj 	case 4:
519*a9fa9459Szrj 	  ch = GET ();
520*a9fa9459Szrj 	  if (ch == EOF)
521*a9fa9459Szrj 	    goto fromeof;
522*a9fa9459Szrj 	  else if (ch >= '0' && ch <= '9')
523*a9fa9459Szrj 	    PUT (ch);
524*a9fa9459Szrj 	  else
525*a9fa9459Szrj 	    {
526*a9fa9459Szrj 	      while (ch != EOF && IS_WHITESPACE (ch))
527*a9fa9459Szrj 		ch = GET ();
528*a9fa9459Szrj 	      if (ch == '"')
529*a9fa9459Szrj 		{
530*a9fa9459Szrj 		  quotechar = ch;
531*a9fa9459Szrj 		  state = 5;
532*a9fa9459Szrj 		  old_state = 3;
533*a9fa9459Szrj 		  PUT (ch);
534*a9fa9459Szrj 		}
535*a9fa9459Szrj 	      else
536*a9fa9459Szrj 		{
537*a9fa9459Szrj 		  while (ch != EOF && ch != '\n')
538*a9fa9459Szrj 		    ch = GET ();
539*a9fa9459Szrj 		  state = 0;
540*a9fa9459Szrj 		  PUT (ch);
541*a9fa9459Szrj 		}
542*a9fa9459Szrj 	    }
543*a9fa9459Szrj 	  continue;
544*a9fa9459Szrj 
545*a9fa9459Szrj 	case 5:
546*a9fa9459Szrj 	  /* We are going to copy everything up to a quote character,
547*a9fa9459Szrj 	     with special handling for a backslash.  We try to
548*a9fa9459Szrj 	     optimize the copying in the simple case without using the
549*a9fa9459Szrj 	     GET and PUT macros.  */
550*a9fa9459Szrj 	  {
551*a9fa9459Szrj 	    char *s;
552*a9fa9459Szrj 	    ptrdiff_t len;
553*a9fa9459Szrj 
554*a9fa9459Szrj 	    for (s = from; s < fromend; s++)
555*a9fa9459Szrj 	      {
556*a9fa9459Szrj 		ch = *s;
557*a9fa9459Szrj 		if (ch == '\\'
558*a9fa9459Szrj 		    || ch == quotechar
559*a9fa9459Szrj 		    || ch == '\n')
560*a9fa9459Szrj 		  break;
561*a9fa9459Szrj 	      }
562*a9fa9459Szrj 	    len = s - from;
563*a9fa9459Szrj 	    if (len > toend - to)
564*a9fa9459Szrj 	      len = toend - to;
565*a9fa9459Szrj 	    if (len > 0)
566*a9fa9459Szrj 	      {
567*a9fa9459Szrj 		memcpy (to, from, len);
568*a9fa9459Szrj 		to += len;
569*a9fa9459Szrj 		from += len;
570*a9fa9459Szrj 		if (to >= toend)
571*a9fa9459Szrj 		  goto tofull;
572*a9fa9459Szrj 	      }
573*a9fa9459Szrj 	  }
574*a9fa9459Szrj 
575*a9fa9459Szrj 	  ch = GET ();
576*a9fa9459Szrj 	  if (ch == EOF)
577*a9fa9459Szrj 	    {
578*a9fa9459Szrj 	      /* This buffer is here specifically so
579*a9fa9459Szrj 		 that the UNGET below will work.  */
580*a9fa9459Szrj 	      static char one_char_buf[1];
581*a9fa9459Szrj 
582*a9fa9459Szrj 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
583*a9fa9459Szrj 	      state = old_state;
584*a9fa9459Szrj 	      from = fromend = one_char_buf + 1;
585*a9fa9459Szrj 	      fromlen = 1;
586*a9fa9459Szrj 	      UNGET ('\n');
587*a9fa9459Szrj 	      PUT (quotechar);
588*a9fa9459Szrj 	    }
589*a9fa9459Szrj 	  else if (ch == quotechar)
590*a9fa9459Szrj 	    {
591*a9fa9459Szrj 	      state = old_state;
592*a9fa9459Szrj 	      PUT (ch);
593*a9fa9459Szrj 	    }
594*a9fa9459Szrj #ifndef NO_STRING_ESCAPES
595*a9fa9459Szrj 	  else if (ch == '\\')
596*a9fa9459Szrj 	    {
597*a9fa9459Szrj 	      state = 6;
598*a9fa9459Szrj 	      PUT (ch);
599*a9fa9459Szrj 	    }
600*a9fa9459Szrj #endif
601*a9fa9459Szrj 	  else if (scrub_m68k_mri && ch == '\n')
602*a9fa9459Szrj 	    {
603*a9fa9459Szrj 	      /* Just quietly terminate the string.  This permits lines like
604*a9fa9459Szrj 		   bne	label	loop if we haven't reach end yet.  */
605*a9fa9459Szrj 	      state = old_state;
606*a9fa9459Szrj 	      UNGET (ch);
607*a9fa9459Szrj 	      PUT ('\'');
608*a9fa9459Szrj 	    }
609*a9fa9459Szrj 	  else
610*a9fa9459Szrj 	    {
611*a9fa9459Szrj 	      PUT (ch);
612*a9fa9459Szrj 	    }
613*a9fa9459Szrj 	  continue;
614*a9fa9459Szrj 
615*a9fa9459Szrj 	case 6:
616*a9fa9459Szrj 	  state = 5;
617*a9fa9459Szrj 	  ch = GET ();
618*a9fa9459Szrj 	  switch (ch)
619*a9fa9459Szrj 	    {
620*a9fa9459Szrj 	      /* Handle strings broken across lines, by turning '\n' into
621*a9fa9459Szrj 		 '\\' and 'n'.  */
622*a9fa9459Szrj 	    case '\n':
623*a9fa9459Szrj 	      UNGET ('n');
624*a9fa9459Szrj 	      add_newlines++;
625*a9fa9459Szrj 	      PUT ('\\');
626*a9fa9459Szrj 	      continue;
627*a9fa9459Szrj 
628*a9fa9459Szrj 	    case EOF:
629*a9fa9459Szrj 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
630*a9fa9459Szrj 	      PUT (quotechar);
631*a9fa9459Szrj 	      continue;
632*a9fa9459Szrj 
633*a9fa9459Szrj 	    case '"':
634*a9fa9459Szrj 	    case '\\':
635*a9fa9459Szrj 	    case 'b':
636*a9fa9459Szrj 	    case 'f':
637*a9fa9459Szrj 	    case 'n':
638*a9fa9459Szrj 	    case 'r':
639*a9fa9459Szrj 	    case 't':
640*a9fa9459Szrj 	    case 'v':
641*a9fa9459Szrj 	    case 'x':
642*a9fa9459Szrj 	    case 'X':
643*a9fa9459Szrj 	    case '0':
644*a9fa9459Szrj 	    case '1':
645*a9fa9459Szrj 	    case '2':
646*a9fa9459Szrj 	    case '3':
647*a9fa9459Szrj 	    case '4':
648*a9fa9459Szrj 	    case '5':
649*a9fa9459Szrj 	    case '6':
650*a9fa9459Szrj 	    case '7':
651*a9fa9459Szrj 	      break;
652*a9fa9459Szrj 
653*a9fa9459Szrj 	    default:
654*a9fa9459Szrj #ifdef ONLY_STANDARD_ESCAPES
655*a9fa9459Szrj 	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
656*a9fa9459Szrj #endif
657*a9fa9459Szrj 	      break;
658*a9fa9459Szrj 	    }
659*a9fa9459Szrj 	  PUT (ch);
660*a9fa9459Szrj 	  continue;
661*a9fa9459Szrj 
662*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
663*a9fa9459Szrj 	case 13:
664*a9fa9459Szrj 	  ch = GET ();
665*a9fa9459Szrj 	  if (ch != '|')
666*a9fa9459Szrj 	    abort ();
667*a9fa9459Szrj 
668*a9fa9459Szrj 	  /* Reset back to state 1 and pretend that we are parsing a
669*a9fa9459Szrj 	     line from just after the first white space.  */
670*a9fa9459Szrj 	  state = 1;
671*a9fa9459Szrj 	  PUT ('|');
672*a9fa9459Szrj #ifdef TC_TIC6X
673*a9fa9459Szrj 	  /* "||^" is used for SPMASKed instructions.  */
674*a9fa9459Szrj 	  ch = GET ();
675*a9fa9459Szrj 	  if (ch == EOF)
676*a9fa9459Szrj 	    goto fromeof;
677*a9fa9459Szrj 	  else if (ch == '^')
678*a9fa9459Szrj 	    PUT ('^');
679*a9fa9459Szrj 	  else
680*a9fa9459Szrj 	    UNGET (ch);
681*a9fa9459Szrj #endif
682*a9fa9459Szrj 	  continue;
683*a9fa9459Szrj #endif
684*a9fa9459Szrj #ifdef TC_Z80
685*a9fa9459Szrj 	case 16:
686*a9fa9459Szrj 	  /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
687*a9fa9459Szrj 	  ch = GET ();
688*a9fa9459Szrj 	  if (ch == 'f' || ch == 'F')
689*a9fa9459Szrj 	    {
690*a9fa9459Szrj 	      state = 17;
691*a9fa9459Szrj 	      PUT (ch);
692*a9fa9459Szrj 	    }
693*a9fa9459Szrj 	  else
694*a9fa9459Szrj 	    {
695*a9fa9459Szrj 	      state = 9;
696*a9fa9459Szrj 	      break;
697*a9fa9459Szrj 	    }
698*a9fa9459Szrj 	case 17:
699*a9fa9459Szrj 	  /* We have seen "af" at the start of a symbol,
700*a9fa9459Szrj 	     a ' here is a part of that symbol.  */
701*a9fa9459Szrj 	  ch = GET ();
702*a9fa9459Szrj 	  state = 9;
703*a9fa9459Szrj 	  if (ch == '\'')
704*a9fa9459Szrj 	    /* Change to avoid warning about unclosed string.  */
705*a9fa9459Szrj 	    PUT ('`');
706*a9fa9459Szrj 	  else if (ch != EOF)
707*a9fa9459Szrj 	    UNGET (ch);
708*a9fa9459Szrj 	  break;
709*a9fa9459Szrj #endif
710*a9fa9459Szrj 	}
711*a9fa9459Szrj 
712*a9fa9459Szrj       /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
713*a9fa9459Szrj 
714*a9fa9459Szrj       /* flushchar: */
715*a9fa9459Szrj       ch = GET ();
716*a9fa9459Szrj 
717*a9fa9459Szrj #ifdef TC_PREDICATE_START_CHAR
718*a9fa9459Szrj       if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
719*a9fa9459Szrj 	{
720*a9fa9459Szrj 	  state += 14;
721*a9fa9459Szrj 	  PUT (ch);
722*a9fa9459Szrj 	  continue;
723*a9fa9459Szrj 	}
724*a9fa9459Szrj       else if (state == 14 || state == 15)
725*a9fa9459Szrj 	{
726*a9fa9459Szrj 	  if (ch == TC_PREDICATE_END_CHAR)
727*a9fa9459Szrj 	    {
728*a9fa9459Szrj 	      state -= 14;
729*a9fa9459Szrj 	      PUT (ch);
730*a9fa9459Szrj 	      ch = GET ();
731*a9fa9459Szrj 	    }
732*a9fa9459Szrj 	  else
733*a9fa9459Szrj 	    {
734*a9fa9459Szrj 	      PUT (ch);
735*a9fa9459Szrj 	      continue;
736*a9fa9459Szrj 	    }
737*a9fa9459Szrj 	}
738*a9fa9459Szrj #endif
739*a9fa9459Szrj 
740*a9fa9459Szrj     recycle:
741*a9fa9459Szrj 
742*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
743*a9fa9459Szrj       /* We need to watch out for .symver directives.  See the comment later
744*a9fa9459Szrj 	 in this function.  */
745*a9fa9459Szrj       if (symver_state == NULL)
746*a9fa9459Szrj 	{
747*a9fa9459Szrj 	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
748*a9fa9459Szrj 	    symver_state = symver_pseudo + 1;
749*a9fa9459Szrj 	}
750*a9fa9459Szrj       else
751*a9fa9459Szrj 	{
752*a9fa9459Szrj 	  /* We advance to the next state if we find the right
753*a9fa9459Szrj 	     character.  */
754*a9fa9459Szrj 	  if (ch != '\0' && (*symver_state == ch))
755*a9fa9459Szrj 	    ++symver_state;
756*a9fa9459Szrj 	  else if (*symver_state != '\0')
757*a9fa9459Szrj 	    /* We did not get the expected character, or we didn't
758*a9fa9459Szrj 	       get a valid terminating character after seeing the
759*a9fa9459Szrj 	       entire pseudo-op, so we must go back to the beginning.  */
760*a9fa9459Szrj 	    symver_state = NULL;
761*a9fa9459Szrj 	  else
762*a9fa9459Szrj 	    {
763*a9fa9459Szrj 	      /* We've read the entire pseudo-op.  If this is the end
764*a9fa9459Szrj 		 of the line, go back to the beginning.  */
765*a9fa9459Szrj 	      if (IS_NEWLINE (ch))
766*a9fa9459Szrj 		symver_state = NULL;
767*a9fa9459Szrj 	    }
768*a9fa9459Szrj 	}
769*a9fa9459Szrj #endif /* TC_ARM && OBJ_ELF */
770*a9fa9459Szrj 
771*a9fa9459Szrj #ifdef TC_M68K
772*a9fa9459Szrj       /* We want to have pseudo-ops which control whether we are in
773*a9fa9459Szrj 	 MRI mode or not.  Unfortunately, since m68k MRI mode affects
774*a9fa9459Szrj 	 the scrubber, that means that we need a special purpose
775*a9fa9459Szrj 	 recognizer here.  */
776*a9fa9459Szrj       if (mri_state == NULL)
777*a9fa9459Szrj 	{
778*a9fa9459Szrj 	  if ((state == 0 || state == 1)
779*a9fa9459Szrj 	      && ch == mri_pseudo[0])
780*a9fa9459Szrj 	    mri_state = mri_pseudo + 1;
781*a9fa9459Szrj 	}
782*a9fa9459Szrj       else
783*a9fa9459Szrj 	{
784*a9fa9459Szrj 	  /* We advance to the next state if we find the right
785*a9fa9459Szrj 	     character, or if we need a space character and we get any
786*a9fa9459Szrj 	     whitespace character, or if we need a '0' and we get a
787*a9fa9459Szrj 	     '1' (this is so that we only need one state to handle
788*a9fa9459Szrj 	     ``.mri 0'' and ``.mri 1'').  */
789*a9fa9459Szrj 	  if (ch != '\0'
790*a9fa9459Szrj 	      && (*mri_state == ch
791*a9fa9459Szrj 		  || (*mri_state == ' '
792*a9fa9459Szrj 		      && lex[ch] == LEX_IS_WHITESPACE)
793*a9fa9459Szrj 		  || (*mri_state == '0'
794*a9fa9459Szrj 		      && ch == '1')))
795*a9fa9459Szrj 	    {
796*a9fa9459Szrj 	      mri_last_ch = ch;
797*a9fa9459Szrj 	      ++mri_state;
798*a9fa9459Szrj 	    }
799*a9fa9459Szrj 	  else if (*mri_state != '\0'
800*a9fa9459Szrj 		   || (lex[ch] != LEX_IS_WHITESPACE
801*a9fa9459Szrj 		       && lex[ch] != LEX_IS_NEWLINE))
802*a9fa9459Szrj 	    {
803*a9fa9459Szrj 	      /* We did not get the expected character, or we didn't
804*a9fa9459Szrj 		 get a valid terminating character after seeing the
805*a9fa9459Szrj 		 entire pseudo-op, so we must go back to the
806*a9fa9459Szrj 		 beginning.  */
807*a9fa9459Szrj 	      mri_state = NULL;
808*a9fa9459Szrj 	    }
809*a9fa9459Szrj 	  else
810*a9fa9459Szrj 	    {
811*a9fa9459Szrj 	      /* We've read the entire pseudo-op.  mips_last_ch is
812*a9fa9459Szrj 		 either '0' or '1' indicating whether to enter or
813*a9fa9459Szrj 		 leave MRI mode.  */
814*a9fa9459Szrj 	      do_scrub_begin (mri_last_ch == '1');
815*a9fa9459Szrj 	      mri_state = NULL;
816*a9fa9459Szrj 
817*a9fa9459Szrj 	      /* We continue handling the character as usual.  The
818*a9fa9459Szrj 		 main gas reader must also handle the .mri pseudo-op
819*a9fa9459Szrj 		 to control expression parsing and the like.  */
820*a9fa9459Szrj 	    }
821*a9fa9459Szrj 	}
822*a9fa9459Szrj #endif
823*a9fa9459Szrj 
824*a9fa9459Szrj       if (ch == EOF)
825*a9fa9459Szrj 	{
826*a9fa9459Szrj 	  if (state != 0)
827*a9fa9459Szrj 	    {
828*a9fa9459Szrj 	      as_warn (_("end of file not at end of a line; newline inserted"));
829*a9fa9459Szrj 	      state = 0;
830*a9fa9459Szrj 	      PUT ('\n');
831*a9fa9459Szrj 	    }
832*a9fa9459Szrj 	  goto fromeof;
833*a9fa9459Szrj 	}
834*a9fa9459Szrj 
835*a9fa9459Szrj       switch (lex[ch])
836*a9fa9459Szrj 	{
837*a9fa9459Szrj 	case LEX_IS_WHITESPACE:
838*a9fa9459Szrj 	  do
839*a9fa9459Szrj 	    {
840*a9fa9459Szrj 	      ch = GET ();
841*a9fa9459Szrj 	    }
842*a9fa9459Szrj 	  while (ch != EOF && IS_WHITESPACE (ch));
843*a9fa9459Szrj 	  if (ch == EOF)
844*a9fa9459Szrj 	    goto fromeof;
845*a9fa9459Szrj 
846*a9fa9459Szrj 	  if (state == 0)
847*a9fa9459Szrj 	    {
848*a9fa9459Szrj 	      /* Preserve a single whitespace character at the
849*a9fa9459Szrj 		 beginning of a line.  */
850*a9fa9459Szrj 	      state = 1;
851*a9fa9459Szrj 	      UNGET (ch);
852*a9fa9459Szrj 	      PUT (' ');
853*a9fa9459Szrj 	      break;
854*a9fa9459Szrj 	    }
855*a9fa9459Szrj 
856*a9fa9459Szrj #ifdef KEEP_WHITE_AROUND_COLON
857*a9fa9459Szrj 	  if (lex[ch] == LEX_IS_COLON)
858*a9fa9459Szrj 	    {
859*a9fa9459Szrj 	      /* Only keep this white if there's no white *after* the
860*a9fa9459Szrj 		 colon.  */
861*a9fa9459Szrj 	      ch2 = GET ();
862*a9fa9459Szrj 	      if (ch2 != EOF)
863*a9fa9459Szrj 		UNGET (ch2);
864*a9fa9459Szrj 	      if (!IS_WHITESPACE (ch2))
865*a9fa9459Szrj 		{
866*a9fa9459Szrj 		  state = 9;
867*a9fa9459Szrj 		  UNGET (ch);
868*a9fa9459Szrj 		  PUT (' ');
869*a9fa9459Szrj 		  break;
870*a9fa9459Szrj 		}
871*a9fa9459Szrj 	    }
872*a9fa9459Szrj #endif
873*a9fa9459Szrj 	  if (IS_COMMENT (ch)
874*a9fa9459Szrj 	      || ch == '/'
875*a9fa9459Szrj 	      || IS_LINE_SEPARATOR (ch)
876*a9fa9459Szrj 	      || IS_PARALLEL_SEPARATOR (ch))
877*a9fa9459Szrj 	    {
878*a9fa9459Szrj 	      if (scrub_m68k_mri)
879*a9fa9459Szrj 		{
880*a9fa9459Szrj 		  /* In MRI mode, we keep these spaces.  */
881*a9fa9459Szrj 		  UNGET (ch);
882*a9fa9459Szrj 		  PUT (' ');
883*a9fa9459Szrj 		  break;
884*a9fa9459Szrj 		}
885*a9fa9459Szrj 	      goto recycle;
886*a9fa9459Szrj 	    }
887*a9fa9459Szrj 
888*a9fa9459Szrj 	  /* If we're in state 2 or 11, we've seen a non-white
889*a9fa9459Szrj 	     character followed by whitespace.  If the next character
890*a9fa9459Szrj 	     is ':', this is whitespace after a label name which we
891*a9fa9459Szrj 	     normally must ignore.  In MRI mode, though, spaces are
892*a9fa9459Szrj 	     not permitted between the label and the colon.  */
893*a9fa9459Szrj 	  if ((state == 2 || state == 11)
894*a9fa9459Szrj 	      && lex[ch] == LEX_IS_COLON
895*a9fa9459Szrj 	      && ! scrub_m68k_mri)
896*a9fa9459Szrj 	    {
897*a9fa9459Szrj 	      state = 1;
898*a9fa9459Szrj 	      PUT (ch);
899*a9fa9459Szrj 	      break;
900*a9fa9459Szrj 	    }
901*a9fa9459Szrj 
902*a9fa9459Szrj 	  switch (state)
903*a9fa9459Szrj 	    {
904*a9fa9459Szrj 	    case 1:
905*a9fa9459Szrj 	      /* We can arrive here if we leave a leading whitespace
906*a9fa9459Szrj 		 character at the beginning of a line.  */
907*a9fa9459Szrj 	      goto recycle;
908*a9fa9459Szrj 	    case 2:
909*a9fa9459Szrj 	      state = 3;
910*a9fa9459Szrj 	      if (to + 1 < toend)
911*a9fa9459Szrj 		{
912*a9fa9459Szrj 		  /* Optimize common case by skipping UNGET/GET.  */
913*a9fa9459Szrj 		  PUT (' ');	/* Sp after opco */
914*a9fa9459Szrj 		  goto recycle;
915*a9fa9459Szrj 		}
916*a9fa9459Szrj 	      UNGET (ch);
917*a9fa9459Szrj 	      PUT (' ');
918*a9fa9459Szrj 	      break;
919*a9fa9459Szrj 	    case 3:
920*a9fa9459Szrj #ifndef TC_KEEP_OPERAND_SPACES
921*a9fa9459Szrj 	      /* For TI C6X, we keep these spaces as they may separate
922*a9fa9459Szrj 		 functional unit specifiers from operands.  */
923*a9fa9459Szrj 	      if (scrub_m68k_mri)
924*a9fa9459Szrj #endif
925*a9fa9459Szrj 		{
926*a9fa9459Szrj 		  /* In MRI mode, we keep these spaces.  */
927*a9fa9459Szrj 		  UNGET (ch);
928*a9fa9459Szrj 		  PUT (' ');
929*a9fa9459Szrj 		  break;
930*a9fa9459Szrj 		}
931*a9fa9459Szrj 	      goto recycle;	/* Sp in operands */
932*a9fa9459Szrj 	    case 9:
933*a9fa9459Szrj 	    case 10:
934*a9fa9459Szrj #ifndef TC_KEEP_OPERAND_SPACES
935*a9fa9459Szrj 	      if (scrub_m68k_mri)
936*a9fa9459Szrj #endif
937*a9fa9459Szrj 		{
938*a9fa9459Szrj 		  /* In MRI mode, we keep these spaces.  */
939*a9fa9459Szrj 		  state = 3;
940*a9fa9459Szrj 		  UNGET (ch);
941*a9fa9459Szrj 		  PUT (' ');
942*a9fa9459Szrj 		  break;
943*a9fa9459Szrj 		}
944*a9fa9459Szrj 	      state = 10;	/* Sp after symbol char */
945*a9fa9459Szrj 	      goto recycle;
946*a9fa9459Szrj 	    case 11:
947*a9fa9459Szrj 	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
948*a9fa9459Szrj 		state = 1;
949*a9fa9459Szrj 	      else
950*a9fa9459Szrj 		{
951*a9fa9459Szrj 		  /* We know that ch is not ':', since we tested that
952*a9fa9459Szrj 		     case above.  Therefore this is not a label, so it
953*a9fa9459Szrj 		     must be the opcode, and we've just seen the
954*a9fa9459Szrj 		     whitespace after it.  */
955*a9fa9459Szrj 		  state = 3;
956*a9fa9459Szrj 		}
957*a9fa9459Szrj 	      UNGET (ch);
958*a9fa9459Szrj 	      PUT (' ');	/* Sp after label definition.  */
959*a9fa9459Szrj 	      break;
960*a9fa9459Szrj 	    default:
961*a9fa9459Szrj 	      BAD_CASE (state);
962*a9fa9459Szrj 	    }
963*a9fa9459Szrj 	  break;
964*a9fa9459Szrj 
965*a9fa9459Szrj 	case LEX_IS_TWOCHAR_COMMENT_1ST:
966*a9fa9459Szrj 	  ch2 = GET ();
967*a9fa9459Szrj 	  if (ch2 == '*')
968*a9fa9459Szrj 	    {
969*a9fa9459Szrj 	      for (;;)
970*a9fa9459Szrj 		{
971*a9fa9459Szrj 		  do
972*a9fa9459Szrj 		    {
973*a9fa9459Szrj 		      ch2 = GET ();
974*a9fa9459Szrj 		      if (ch2 != EOF && IS_NEWLINE (ch2))
975*a9fa9459Szrj 			add_newlines++;
976*a9fa9459Szrj 		    }
977*a9fa9459Szrj 		  while (ch2 != EOF && ch2 != '*');
978*a9fa9459Szrj 
979*a9fa9459Szrj 		  while (ch2 == '*')
980*a9fa9459Szrj 		    ch2 = GET ();
981*a9fa9459Szrj 
982*a9fa9459Szrj 		  if (ch2 == EOF || ch2 == '/')
983*a9fa9459Szrj 		    break;
984*a9fa9459Szrj 
985*a9fa9459Szrj 		  /* This UNGET will ensure that we count newlines
986*a9fa9459Szrj 		     correctly.  */
987*a9fa9459Szrj 		  UNGET (ch2);
988*a9fa9459Szrj 		}
989*a9fa9459Szrj 
990*a9fa9459Szrj 	      if (ch2 == EOF)
991*a9fa9459Szrj 		as_warn (_("end of file in multiline comment"));
992*a9fa9459Szrj 
993*a9fa9459Szrj 	      ch = ' ';
994*a9fa9459Szrj 	      goto recycle;
995*a9fa9459Szrj 	    }
996*a9fa9459Szrj #ifdef DOUBLESLASH_LINE_COMMENTS
997*a9fa9459Szrj 	  else if (ch2 == '/')
998*a9fa9459Szrj 	    {
999*a9fa9459Szrj 	      do
1000*a9fa9459Szrj 		{
1001*a9fa9459Szrj 		  ch = GET ();
1002*a9fa9459Szrj 		}
1003*a9fa9459Szrj 	      while (ch != EOF && !IS_NEWLINE (ch));
1004*a9fa9459Szrj 	      if (ch == EOF)
1005*a9fa9459Szrj 		as_warn ("end of file in comment; newline inserted");
1006*a9fa9459Szrj 	      state = 0;
1007*a9fa9459Szrj 	      PUT ('\n');
1008*a9fa9459Szrj 	      break;
1009*a9fa9459Szrj 	    }
1010*a9fa9459Szrj #endif
1011*a9fa9459Szrj 	  else
1012*a9fa9459Szrj 	    {
1013*a9fa9459Szrj 	      if (ch2 != EOF)
1014*a9fa9459Szrj 		UNGET (ch2);
1015*a9fa9459Szrj 	      if (state == 9 || state == 10)
1016*a9fa9459Szrj 		state = 3;
1017*a9fa9459Szrj 	      PUT (ch);
1018*a9fa9459Szrj 	    }
1019*a9fa9459Szrj 	  break;
1020*a9fa9459Szrj 
1021*a9fa9459Szrj 	case LEX_IS_STRINGQUOTE:
1022*a9fa9459Szrj 	  quotechar = ch;
1023*a9fa9459Szrj 	  if (state == 10)
1024*a9fa9459Szrj 	    {
1025*a9fa9459Szrj 	      /* Preserve the whitespace in foo "bar".  */
1026*a9fa9459Szrj 	      UNGET (ch);
1027*a9fa9459Szrj 	      state = 3;
1028*a9fa9459Szrj 	      PUT (' ');
1029*a9fa9459Szrj 
1030*a9fa9459Szrj 	      /* PUT didn't jump out.  We could just break, but we
1031*a9fa9459Szrj 		 know what will happen, so optimize a bit.  */
1032*a9fa9459Szrj 	      ch = GET ();
1033*a9fa9459Szrj 	      old_state = 3;
1034*a9fa9459Szrj 	    }
1035*a9fa9459Szrj 	  else if (state == 9)
1036*a9fa9459Szrj 	    old_state = 3;
1037*a9fa9459Szrj 	  else
1038*a9fa9459Szrj 	    old_state = state;
1039*a9fa9459Szrj 	  state = 5;
1040*a9fa9459Szrj 	  PUT (ch);
1041*a9fa9459Szrj 	  break;
1042*a9fa9459Szrj 
1043*a9fa9459Szrj #ifndef IEEE_STYLE
1044*a9fa9459Szrj 	case LEX_IS_ONECHAR_QUOTE:
1045*a9fa9459Szrj #ifdef H_TICK_HEX
1046*a9fa9459Szrj 	  if (state == 9 && enable_h_tick_hex)
1047*a9fa9459Szrj 	    {
1048*a9fa9459Szrj 	      char c;
1049*a9fa9459Szrj 
1050*a9fa9459Szrj 	      c = GET ();
1051*a9fa9459Szrj 	      as_warn ("'%c found after symbol", c);
1052*a9fa9459Szrj 	      UNGET (c);
1053*a9fa9459Szrj 	    }
1054*a9fa9459Szrj #endif
1055*a9fa9459Szrj 	  if (state == 10)
1056*a9fa9459Szrj 	    {
1057*a9fa9459Szrj 	      /* Preserve the whitespace in foo 'b'.  */
1058*a9fa9459Szrj 	      UNGET (ch);
1059*a9fa9459Szrj 	      state = 3;
1060*a9fa9459Szrj 	      PUT (' ');
1061*a9fa9459Szrj 	      break;
1062*a9fa9459Szrj 	    }
1063*a9fa9459Szrj 	  ch = GET ();
1064*a9fa9459Szrj 	  if (ch == EOF)
1065*a9fa9459Szrj 	    {
1066*a9fa9459Szrj 	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
1067*a9fa9459Szrj 	      ch = 0;
1068*a9fa9459Szrj 	    }
1069*a9fa9459Szrj 	  if (ch == '\\')
1070*a9fa9459Szrj 	    {
1071*a9fa9459Szrj 	      ch = GET ();
1072*a9fa9459Szrj 	      if (ch == EOF)
1073*a9fa9459Szrj 		{
1074*a9fa9459Szrj 		  as_warn (_("end of file in escape character"));
1075*a9fa9459Szrj 		  ch = '\\';
1076*a9fa9459Szrj 		}
1077*a9fa9459Szrj 	      else
1078*a9fa9459Szrj 		ch = process_escape (ch);
1079*a9fa9459Szrj 	    }
1080*a9fa9459Szrj 	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
1081*a9fa9459Szrj 
1082*a9fa9459Szrj 	  /* None of these 'x constants for us.  We want 'x'.  */
1083*a9fa9459Szrj 	  if ((ch = GET ()) != '\'')
1084*a9fa9459Szrj 	    {
1085*a9fa9459Szrj #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1086*a9fa9459Szrj 	      as_warn (_("missing close quote; (assumed)"));
1087*a9fa9459Szrj #else
1088*a9fa9459Szrj 	      if (ch != EOF)
1089*a9fa9459Szrj 		UNGET (ch);
1090*a9fa9459Szrj #endif
1091*a9fa9459Szrj 	    }
1092*a9fa9459Szrj 	  if (strlen (out_buf) == 1)
1093*a9fa9459Szrj 	    {
1094*a9fa9459Szrj 	      PUT (out_buf[0]);
1095*a9fa9459Szrj 	      break;
1096*a9fa9459Szrj 	    }
1097*a9fa9459Szrj 	  if (state == 9)
1098*a9fa9459Szrj 	    old_state = 3;
1099*a9fa9459Szrj 	  else
1100*a9fa9459Szrj 	    old_state = state;
1101*a9fa9459Szrj 	  state = -1;
1102*a9fa9459Szrj 	  out_string = out_buf;
1103*a9fa9459Szrj 	  PUT (*out_string++);
1104*a9fa9459Szrj 	  break;
1105*a9fa9459Szrj #endif
1106*a9fa9459Szrj 
1107*a9fa9459Szrj 	case LEX_IS_COLON:
1108*a9fa9459Szrj #ifdef KEEP_WHITE_AROUND_COLON
1109*a9fa9459Szrj 	  state = 9;
1110*a9fa9459Szrj #else
1111*a9fa9459Szrj 	  if (state == 9 || state == 10)
1112*a9fa9459Szrj 	    state = 3;
1113*a9fa9459Szrj 	  else if (state != 3)
1114*a9fa9459Szrj 	    state = 1;
1115*a9fa9459Szrj #endif
1116*a9fa9459Szrj 	  PUT (ch);
1117*a9fa9459Szrj 	  break;
1118*a9fa9459Szrj 
1119*a9fa9459Szrj 	case LEX_IS_NEWLINE:
1120*a9fa9459Szrj 	  /* Roll out a bunch of newlines from inside comments, etc.  */
1121*a9fa9459Szrj 	  if (add_newlines)
1122*a9fa9459Szrj 	    {
1123*a9fa9459Szrj 	      --add_newlines;
1124*a9fa9459Szrj 	      UNGET (ch);
1125*a9fa9459Szrj 	    }
1126*a9fa9459Szrj 	  /* Fall through.  */
1127*a9fa9459Szrj 
1128*a9fa9459Szrj 	case LEX_IS_LINE_SEPARATOR:
1129*a9fa9459Szrj 	  state = 0;
1130*a9fa9459Szrj 	  PUT (ch);
1131*a9fa9459Szrj 	  break;
1132*a9fa9459Szrj 
1133*a9fa9459Szrj 	case LEX_IS_PARALLEL_SEPARATOR:
1134*a9fa9459Szrj 	  state = 1;
1135*a9fa9459Szrj 	  PUT (ch);
1136*a9fa9459Szrj 	  break;
1137*a9fa9459Szrj 
1138*a9fa9459Szrj #ifdef TC_V850
1139*a9fa9459Szrj 	case LEX_IS_DOUBLEDASH_1ST:
1140*a9fa9459Szrj 	  ch2 = GET ();
1141*a9fa9459Szrj 	  if (ch2 != '-')
1142*a9fa9459Szrj 	    {
1143*a9fa9459Szrj 	      if (ch2 != EOF)
1144*a9fa9459Szrj 		UNGET (ch2);
1145*a9fa9459Szrj 	      goto de_fault;
1146*a9fa9459Szrj 	    }
1147*a9fa9459Szrj 	  /* Read and skip to end of line.  */
1148*a9fa9459Szrj 	  do
1149*a9fa9459Szrj 	    {
1150*a9fa9459Szrj 	      ch = GET ();
1151*a9fa9459Szrj 	    }
1152*a9fa9459Szrj 	  while (ch != EOF && ch != '\n');
1153*a9fa9459Szrj 
1154*a9fa9459Szrj 	  if (ch == EOF)
1155*a9fa9459Szrj 	    as_warn (_("end of file in comment; newline inserted"));
1156*a9fa9459Szrj 
1157*a9fa9459Szrj 	  state = 0;
1158*a9fa9459Szrj 	  PUT ('\n');
1159*a9fa9459Szrj 	  break;
1160*a9fa9459Szrj #endif
1161*a9fa9459Szrj #ifdef DOUBLEBAR_PARALLEL
1162*a9fa9459Szrj 	case LEX_IS_DOUBLEBAR_1ST:
1163*a9fa9459Szrj 	  ch2 = GET ();
1164*a9fa9459Szrj 	  if (ch2 != EOF)
1165*a9fa9459Szrj 	    UNGET (ch2);
1166*a9fa9459Szrj 	  if (ch2 != '|')
1167*a9fa9459Szrj 	    goto de_fault;
1168*a9fa9459Szrj 
1169*a9fa9459Szrj 	  /* Handle '||' in two states as invoking PUT twice might
1170*a9fa9459Szrj 	     result in the first one jumping out of this loop.  We'd
1171*a9fa9459Szrj 	     then lose track of the state and one '|' char.  */
1172*a9fa9459Szrj 	  state = 13;
1173*a9fa9459Szrj 	  PUT ('|');
1174*a9fa9459Szrj 	  break;
1175*a9fa9459Szrj #endif
1176*a9fa9459Szrj 	case LEX_IS_LINE_COMMENT_START:
1177*a9fa9459Szrj 	  /* FIXME-someday: The two character comment stuff was badly
1178*a9fa9459Szrj 	     thought out.  On i386, we want '/' as line comment start
1179*a9fa9459Szrj 	     AND we want C style comments.  hence this hack.  The
1180*a9fa9459Szrj 	     whole lexical process should be reworked.  xoxorich.  */
1181*a9fa9459Szrj 	  if (ch == '/')
1182*a9fa9459Szrj 	    {
1183*a9fa9459Szrj 	      ch2 = GET ();
1184*a9fa9459Szrj 	      if (ch2 == '*')
1185*a9fa9459Szrj 		{
1186*a9fa9459Szrj 		  old_state = 3;
1187*a9fa9459Szrj 		  state = -2;
1188*a9fa9459Szrj 		  break;
1189*a9fa9459Szrj 		}
1190*a9fa9459Szrj 	      else
1191*a9fa9459Szrj 		{
1192*a9fa9459Szrj 		  UNGET (ch2);
1193*a9fa9459Szrj 		}
1194*a9fa9459Szrj 	    }
1195*a9fa9459Szrj 
1196*a9fa9459Szrj 	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
1197*a9fa9459Szrj 	    {
1198*a9fa9459Szrj 	      int startch;
1199*a9fa9459Szrj 
1200*a9fa9459Szrj 	      startch = ch;
1201*a9fa9459Szrj 
1202*a9fa9459Szrj 	      do
1203*a9fa9459Szrj 		{
1204*a9fa9459Szrj 		  ch = GET ();
1205*a9fa9459Szrj 		}
1206*a9fa9459Szrj 	      while (ch != EOF && IS_WHITESPACE (ch));
1207*a9fa9459Szrj 
1208*a9fa9459Szrj 	      if (ch == EOF)
1209*a9fa9459Szrj 		{
1210*a9fa9459Szrj 		  as_warn (_("end of file in comment; newline inserted"));
1211*a9fa9459Szrj 		  PUT ('\n');
1212*a9fa9459Szrj 		  break;
1213*a9fa9459Szrj 		}
1214*a9fa9459Szrj 
1215*a9fa9459Szrj 	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1216*a9fa9459Szrj 		{
1217*a9fa9459Szrj 		  /* Not a cpp line.  */
1218*a9fa9459Szrj 		  while (ch != EOF && !IS_NEWLINE (ch))
1219*a9fa9459Szrj 		    ch = GET ();
1220*a9fa9459Szrj 		  if (ch == EOF)
1221*a9fa9459Szrj 		    {
1222*a9fa9459Szrj 		      as_warn (_("end of file in comment; newline inserted"));
1223*a9fa9459Szrj 		      PUT ('\n');
1224*a9fa9459Szrj 		    }
1225*a9fa9459Szrj 		  else /* IS_NEWLINE (ch) */
1226*a9fa9459Szrj 		    {
1227*a9fa9459Szrj 		      /* To process non-zero add_newlines.  */
1228*a9fa9459Szrj 		      UNGET (ch);
1229*a9fa9459Szrj 		    }
1230*a9fa9459Szrj 		  state = 0;
1231*a9fa9459Szrj 		  break;
1232*a9fa9459Szrj 		}
1233*a9fa9459Szrj 	      /* Looks like `# 123 "filename"' from cpp.  */
1234*a9fa9459Szrj 	      UNGET (ch);
1235*a9fa9459Szrj 	      old_state = 4;
1236*a9fa9459Szrj 	      state = -1;
1237*a9fa9459Szrj 	      if (scrub_m68k_mri)
1238*a9fa9459Szrj 		out_string = "\tlinefile ";
1239*a9fa9459Szrj 	      else
1240*a9fa9459Szrj 		out_string = "\t.linefile ";
1241*a9fa9459Szrj 	      PUT (*out_string++);
1242*a9fa9459Szrj 	      break;
1243*a9fa9459Szrj 	    }
1244*a9fa9459Szrj 
1245*a9fa9459Szrj #ifdef TC_D10V
1246*a9fa9459Szrj 	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1247*a9fa9459Szrj 	     Trap is the only short insn that has a first operand that is
1248*a9fa9459Szrj 	     neither register nor label.
1249*a9fa9459Szrj 	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1250*a9fa9459Szrj 	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1251*a9fa9459Szrj 	     already LEX_IS_LINE_COMMENT_START.  However, it is the
1252*a9fa9459Szrj 	     only character in line_comment_chars for d10v, hence we
1253*a9fa9459Szrj 	     can recognize it as such.  */
1254*a9fa9459Szrj 	  /* An alternative approach would be to reset the state to 1 when
1255*a9fa9459Szrj 	     we see '||', '<'- or '->', but that seems to be overkill.  */
1256*a9fa9459Szrj 	  if (state == 10)
1257*a9fa9459Szrj 	    PUT (' ');
1258*a9fa9459Szrj #endif
1259*a9fa9459Szrj 	  /* We have a line comment character which is not at the
1260*a9fa9459Szrj 	     start of a line.  If this is also a normal comment
1261*a9fa9459Szrj 	     character, fall through.  Otherwise treat it as a default
1262*a9fa9459Szrj 	     character.  */
1263*a9fa9459Szrj 	  if (strchr (tc_comment_chars, ch) == NULL
1264*a9fa9459Szrj 	      && (! scrub_m68k_mri
1265*a9fa9459Szrj 		  || (ch != '!' && ch != '*')))
1266*a9fa9459Szrj 	    goto de_fault;
1267*a9fa9459Szrj 	  if (scrub_m68k_mri
1268*a9fa9459Szrj 	      && (ch == '!' || ch == '*' || ch == '#')
1269*a9fa9459Szrj 	      && state != 1
1270*a9fa9459Szrj 	      && state != 10)
1271*a9fa9459Szrj 	    goto de_fault;
1272*a9fa9459Szrj 	  /* Fall through.  */
1273*a9fa9459Szrj 	case LEX_IS_COMMENT_START:
1274*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
1275*a9fa9459Szrj 	  /* On the ARM, `@' is the comment character.
1276*a9fa9459Szrj 	     Unfortunately this is also a special character in ELF .symver
1277*a9fa9459Szrj 	     directives (and .type, though we deal with those another way).
1278*a9fa9459Szrj 	     So we check if this line is such a directive, and treat
1279*a9fa9459Szrj 	     the character as default if so.  This is a hack.  */
1280*a9fa9459Szrj 	  if ((symver_state != NULL) && (*symver_state == 0))
1281*a9fa9459Szrj 	    goto de_fault;
1282*a9fa9459Szrj #endif
1283*a9fa9459Szrj 
1284*a9fa9459Szrj #ifdef TC_ARM
1285*a9fa9459Szrj 	  /* For the ARM, care is needed not to damage occurrences of \@
1286*a9fa9459Szrj 	     by stripping the @ onwards.  Yuck.  */
1287*a9fa9459Szrj 	  if (to > tostart && *(to - 1) == '\\')
1288*a9fa9459Szrj 	    /* Do not treat the @ as a start-of-comment.  */
1289*a9fa9459Szrj 	    goto de_fault;
1290*a9fa9459Szrj #endif
1291*a9fa9459Szrj 
1292*a9fa9459Szrj #ifdef WARN_COMMENTS
1293*a9fa9459Szrj 	  if (!found_comment)
1294*a9fa9459Szrj 	    found_comment_file = as_where (&found_comment);
1295*a9fa9459Szrj #endif
1296*a9fa9459Szrj 	  do
1297*a9fa9459Szrj 	    {
1298*a9fa9459Szrj 	      ch = GET ();
1299*a9fa9459Szrj 	    }
1300*a9fa9459Szrj 	  while (ch != EOF && !IS_NEWLINE (ch));
1301*a9fa9459Szrj 	  if (ch == EOF)
1302*a9fa9459Szrj 	    as_warn (_("end of file in comment; newline inserted"));
1303*a9fa9459Szrj 	  state = 0;
1304*a9fa9459Szrj 	  PUT ('\n');
1305*a9fa9459Szrj 	  break;
1306*a9fa9459Szrj 
1307*a9fa9459Szrj #ifdef H_TICK_HEX
1308*a9fa9459Szrj 	case LEX_IS_H:
1309*a9fa9459Szrj 	  /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1310*a9fa9459Szrj 	     the H' with 0x to make them gas-style hex characters.  */
1311*a9fa9459Szrj 	  if (enable_h_tick_hex)
1312*a9fa9459Szrj 	    {
1313*a9fa9459Szrj 	      char quot;
1314*a9fa9459Szrj 
1315*a9fa9459Szrj 	      quot = GET ();
1316*a9fa9459Szrj 	      if (quot == '\'')
1317*a9fa9459Szrj 		{
1318*a9fa9459Szrj 		  UNGET ('x');
1319*a9fa9459Szrj 		  ch = '0';
1320*a9fa9459Szrj 		}
1321*a9fa9459Szrj 	      else
1322*a9fa9459Szrj 		UNGET (quot);
1323*a9fa9459Szrj 	    }
1324*a9fa9459Szrj 	  /* FALL THROUGH */
1325*a9fa9459Szrj #endif
1326*a9fa9459Szrj 
1327*a9fa9459Szrj 	case LEX_IS_SYMBOL_COMPONENT:
1328*a9fa9459Szrj 	  if (state == 10)
1329*a9fa9459Szrj 	    {
1330*a9fa9459Szrj 	      /* This is a symbol character following another symbol
1331*a9fa9459Szrj 		 character, with whitespace in between.  We skipped
1332*a9fa9459Szrj 		 the whitespace earlier, so output it now.  */
1333*a9fa9459Szrj 	      UNGET (ch);
1334*a9fa9459Szrj 	      state = 3;
1335*a9fa9459Szrj 	      PUT (' ');
1336*a9fa9459Szrj 	      break;
1337*a9fa9459Szrj 	    }
1338*a9fa9459Szrj 
1339*a9fa9459Szrj #ifdef TC_Z80
1340*a9fa9459Szrj 	  /* "af'" is a symbol containing '\''.  */
1341*a9fa9459Szrj 	  if (state == 3 && (ch == 'a' || ch == 'A'))
1342*a9fa9459Szrj 	    {
1343*a9fa9459Szrj 	      state = 16;
1344*a9fa9459Szrj 	      PUT (ch);
1345*a9fa9459Szrj 	      ch = GET ();
1346*a9fa9459Szrj 	      if (ch == 'f' || ch == 'F')
1347*a9fa9459Szrj 		{
1348*a9fa9459Szrj 		  state = 17;
1349*a9fa9459Szrj 		  PUT (ch);
1350*a9fa9459Szrj 		  break;
1351*a9fa9459Szrj 		}
1352*a9fa9459Szrj 	      else
1353*a9fa9459Szrj 		{
1354*a9fa9459Szrj 		  state = 9;
1355*a9fa9459Szrj 		  if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1356*a9fa9459Szrj 		    {
1357*a9fa9459Szrj 		      if (ch != EOF)
1358*a9fa9459Szrj 			UNGET (ch);
1359*a9fa9459Szrj 		      break;
1360*a9fa9459Szrj 		    }
1361*a9fa9459Szrj 		}
1362*a9fa9459Szrj 	    }
1363*a9fa9459Szrj #endif
1364*a9fa9459Szrj 	  if (state == 3)
1365*a9fa9459Szrj 	    state = 9;
1366*a9fa9459Szrj 
1367*a9fa9459Szrj 	  /* This is a common case.  Quickly copy CH and all the
1368*a9fa9459Szrj 	     following symbol component or normal characters.  */
1369*a9fa9459Szrj 	  if (to + 1 < toend
1370*a9fa9459Szrj 	      && mri_state == NULL
1371*a9fa9459Szrj #if defined TC_ARM && defined OBJ_ELF
1372*a9fa9459Szrj 	      && symver_state == NULL
1373*a9fa9459Szrj #endif
1374*a9fa9459Szrj 	      )
1375*a9fa9459Szrj 	    {
1376*a9fa9459Szrj 	      char *s;
1377*a9fa9459Szrj 	      ptrdiff_t len;
1378*a9fa9459Szrj 
1379*a9fa9459Szrj 	      for (s = from; s < fromend; s++)
1380*a9fa9459Szrj 		{
1381*a9fa9459Szrj 		  int type;
1382*a9fa9459Szrj 
1383*a9fa9459Szrj 		  ch2 = *(unsigned char *) s;
1384*a9fa9459Szrj 		  type = lex[ch2];
1385*a9fa9459Szrj 		  if (type != 0
1386*a9fa9459Szrj 		      && type != LEX_IS_SYMBOL_COMPONENT)
1387*a9fa9459Szrj 		    break;
1388*a9fa9459Szrj 		}
1389*a9fa9459Szrj 
1390*a9fa9459Szrj 	      if (s > from)
1391*a9fa9459Szrj 		/* Handle the last character normally, for
1392*a9fa9459Szrj 		   simplicity.  */
1393*a9fa9459Szrj 		--s;
1394*a9fa9459Szrj 
1395*a9fa9459Szrj 	      len = s - from;
1396*a9fa9459Szrj 
1397*a9fa9459Szrj 	      if (len > (toend - to) - 1)
1398*a9fa9459Szrj 		len = (toend - to) - 1;
1399*a9fa9459Szrj 
1400*a9fa9459Szrj 	      if (len > 0)
1401*a9fa9459Szrj 		{
1402*a9fa9459Szrj 		  PUT (ch);
1403*a9fa9459Szrj 		  memcpy (to, from, len);
1404*a9fa9459Szrj 		  to += len;
1405*a9fa9459Szrj 		  from += len;
1406*a9fa9459Szrj 		  if (to >= toend)
1407*a9fa9459Szrj 		    goto tofull;
1408*a9fa9459Szrj 		  ch = GET ();
1409*a9fa9459Szrj 		}
1410*a9fa9459Szrj 	    }
1411*a9fa9459Szrj 
1412*a9fa9459Szrj 	  /* Fall through.  */
1413*a9fa9459Szrj 	default:
1414*a9fa9459Szrj 	de_fault:
1415*a9fa9459Szrj 	  /* Some relatively `normal' character.  */
1416*a9fa9459Szrj 	  if (state == 0)
1417*a9fa9459Szrj 	    {
1418*a9fa9459Szrj 	      state = 11;	/* Now seeing label definition.  */
1419*a9fa9459Szrj 	    }
1420*a9fa9459Szrj 	  else if (state == 1)
1421*a9fa9459Szrj 	    {
1422*a9fa9459Szrj 	      state = 2;	/* Ditto.  */
1423*a9fa9459Szrj 	    }
1424*a9fa9459Szrj 	  else if (state == 9)
1425*a9fa9459Szrj 	    {
1426*a9fa9459Szrj 	      if (!IS_SYMBOL_COMPONENT (ch))
1427*a9fa9459Szrj 		state = 3;
1428*a9fa9459Szrj 	    }
1429*a9fa9459Szrj 	  else if (state == 10)
1430*a9fa9459Szrj 	    {
1431*a9fa9459Szrj 	      if (ch == '\\')
1432*a9fa9459Szrj 		{
1433*a9fa9459Szrj 		  /* Special handling for backslash: a backslash may
1434*a9fa9459Szrj 		     be the beginning of a formal parameter (of a
1435*a9fa9459Szrj 		     macro) following another symbol character, with
1436*a9fa9459Szrj 		     whitespace in between.  If that is the case, we
1437*a9fa9459Szrj 		     output a space before the parameter.  Strictly
1438*a9fa9459Szrj 		     speaking, correct handling depends upon what the
1439*a9fa9459Szrj 		     macro parameter expands into; if the parameter
1440*a9fa9459Szrj 		     expands into something which does not start with
1441*a9fa9459Szrj 		     an operand character, then we don't want to keep
1442*a9fa9459Szrj 		     the space.  We don't have enough information to
1443*a9fa9459Szrj 		     make the right choice, so here we are making the
1444*a9fa9459Szrj 		     choice which is more likely to be correct.  */
1445*a9fa9459Szrj 		  if (to + 1 >= toend)
1446*a9fa9459Szrj 		    {
1447*a9fa9459Szrj 		      /* If we're near the end of the buffer, save the
1448*a9fa9459Szrj 		         character for the next time round.  Otherwise
1449*a9fa9459Szrj 		         we'll lose our state.  */
1450*a9fa9459Szrj 		      UNGET (ch);
1451*a9fa9459Szrj 		      goto tofull;
1452*a9fa9459Szrj 		    }
1453*a9fa9459Szrj 		  *to++ = ' ';
1454*a9fa9459Szrj 		}
1455*a9fa9459Szrj 
1456*a9fa9459Szrj 	      state = 3;
1457*a9fa9459Szrj 	    }
1458*a9fa9459Szrj 	  PUT (ch);
1459*a9fa9459Szrj 	  break;
1460*a9fa9459Szrj 	}
1461*a9fa9459Szrj     }
1462*a9fa9459Szrj 
1463*a9fa9459Szrj   /*NOTREACHED*/
1464*a9fa9459Szrj 
1465*a9fa9459Szrj  fromeof:
1466*a9fa9459Szrj   /* We have reached the end of the input.  */
1467*a9fa9459Szrj   return to - tostart;
1468*a9fa9459Szrj 
1469*a9fa9459Szrj  tofull:
1470*a9fa9459Szrj   /* The output buffer is full.  Save any input we have not yet
1471*a9fa9459Szrj      processed.  */
1472*a9fa9459Szrj   if (fromend > from)
1473*a9fa9459Szrj     {
1474*a9fa9459Szrj       saved_input = from;
1475*a9fa9459Szrj       saved_input_len = fromend - from;
1476*a9fa9459Szrj     }
1477*a9fa9459Szrj   else
1478*a9fa9459Szrj     saved_input = NULL;
1479*a9fa9459Szrj 
1480*a9fa9459Szrj   return to - tostart;
1481*a9fa9459Szrj }
1482