xref: /dragonfly/contrib/binutils-2.34/gas/app.c (revision 7ff0fc30)
1 /* This is the Assembler Pre-Processor
2    Copyright (C) 1987-2020 Free Software Foundation, Inc.
3 
4    This file is part of GAS, the GNU Assembler.
5 
6    GAS is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    GAS is distributed in the hope that it will be useful, but WITHOUT
12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14    License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with GAS; see the file COPYING.  If not, write to the Free
18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19    02110-1301, USA.  */
20 
21 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22 /* App, the assembler pre-processor.  This pre-processor strips out
23    excess spaces, turns single-quoted characters into a decimal
24    constant, and turns the # in # <number> <filename> <garbage> into a
25    .linefile.  This needs better error-handling.  */
26 
27 #include "as.h"
28 
29 #if (__STDC__ != 1)
30 #ifndef const
31 #define const  /* empty */
32 #endif
33 #endif
34 
35 #ifdef H_TICK_HEX
36 int enable_h_tick_hex = 0;
37 #endif
38 
39 #ifdef TC_M68K
40 /* Whether we are scrubbing in m68k MRI mode.  This is different from
41    flag_m68k_mri, because the two flags will be affected by the .mri
42    pseudo-op at different times.  */
43 static int scrub_m68k_mri;
44 
45 /* The pseudo-op which switches in and out of MRI mode.  See the
46    comment in do_scrub_chars.  */
47 static const char mri_pseudo[] = ".mri 0";
48 #else
49 #define scrub_m68k_mri 0
50 #endif
51 
52 #if defined TC_ARM && defined OBJ_ELF
53 /* The pseudo-op for which we need to special-case `@' characters.
54    See the comment in do_scrub_chars.  */
55 static const char   symver_pseudo[] = ".symver";
56 static const char * symver_state;
57 #endif
58 #ifdef TC_ARM
59 static char last_char;
60 #endif
61 
62 static char lex[256];
63 static const char symbol_chars[] =
64 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
65 
66 #define LEX_IS_SYMBOL_COMPONENT		1
67 #define LEX_IS_WHITESPACE		2
68 #define LEX_IS_LINE_SEPARATOR		3
69 #define LEX_IS_COMMENT_START		4
70 #define LEX_IS_LINE_COMMENT_START	5
71 #define	LEX_IS_TWOCHAR_COMMENT_1ST	6
72 #define	LEX_IS_STRINGQUOTE		8
73 #define	LEX_IS_COLON			9
74 #define	LEX_IS_NEWLINE			10
75 #define	LEX_IS_ONECHAR_QUOTE		11
76 #ifdef TC_V850
77 #define LEX_IS_DOUBLEDASH_1ST		12
78 #endif
79 #ifdef TC_M32R
80 #define DOUBLEBAR_PARALLEL
81 #endif
82 #ifdef DOUBLEBAR_PARALLEL
83 #define LEX_IS_DOUBLEBAR_1ST		13
84 #endif
85 #define LEX_IS_PARALLEL_SEPARATOR	14
86 #ifdef H_TICK_HEX
87 #define LEX_IS_H			15
88 #endif
89 #define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
90 #define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
91 #define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
92 #define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
93 #define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
94 #define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
95 #define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
96 
97 static int process_escape (int);
98 
99 /* FIXME-soon: The entire lexer/parser thingy should be
100    built statically at compile time rather than dynamically
101    each and every time the assembler is run.  xoxorich.  */
102 
103 void
104 do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
105 {
106   const char *p;
107   int c;
108 
109   lex[' '] = LEX_IS_WHITESPACE;
110   lex['\t'] = LEX_IS_WHITESPACE;
111   lex['\r'] = LEX_IS_WHITESPACE;
112   lex['\n'] = LEX_IS_NEWLINE;
113   lex[':'] = LEX_IS_COLON;
114 
115 #ifdef TC_M68K
116   scrub_m68k_mri = m68k_mri;
117 
118   if (! m68k_mri)
119 #endif
120     {
121       lex['"'] = LEX_IS_STRINGQUOTE;
122 
123 #if ! defined (TC_HPPA)
124       lex['\''] = LEX_IS_ONECHAR_QUOTE;
125 #endif
126 
127 #ifdef SINGLE_QUOTE_STRINGS
128       lex['\''] = LEX_IS_STRINGQUOTE;
129 #endif
130     }
131 
132   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133      in state 5 of do_scrub_chars must be changed.  */
134 
135   /* Note that these override the previous defaults, e.g. if ';' is a
136      comment char, then it isn't a line separator.  */
137   for (p = symbol_chars; *p; ++p)
138     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139 
140   for (c = 128; c < 256; ++c)
141     lex[c] = LEX_IS_SYMBOL_COMPONENT;
142 
143 #ifdef tc_symbol_chars
144   /* This macro permits the processor to specify all characters which
145      may appears in an operand.  This will prevent the scrubber from
146      discarding meaningful whitespace in certain cases.  The i386
147      backend uses this to support prefixes, which can confuse the
148      scrubber as to whether it is parsing operands or opcodes.  */
149   for (p = tc_symbol_chars; *p; ++p)
150     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151 #endif
152 
153   /* The m68k backend wants to be able to change comment_chars.  */
154 #ifndef tc_comment_chars
155 #define tc_comment_chars comment_chars
156 #endif
157   for (p = tc_comment_chars; *p; p++)
158     lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
159 
160   for (p = line_comment_chars; *p; p++)
161     lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162 
163 #ifndef tc_line_separator_chars
164 #define tc_line_separator_chars line_separator_chars
165 #endif
166   for (p = tc_line_separator_chars; *p; p++)
167     lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
168 
169 #ifdef tc_parallel_separator_chars
170   /* This macro permits the processor to specify all characters which
171      separate parallel insns on the same line.  */
172   for (p = tc_parallel_separator_chars; *p; p++)
173     lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
174 #endif
175 
176   /* Only allow slash-star comments if slash is not in use.
177      FIXME: This isn't right.  We should always permit them.  */
178   if (lex['/'] == 0)
179     lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
180 
181 #ifdef TC_M68K
182   if (m68k_mri)
183     {
184       lex['\''] = LEX_IS_STRINGQUOTE;
185       lex[';'] = LEX_IS_COMMENT_START;
186       lex['*'] = LEX_IS_LINE_COMMENT_START;
187       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
188 	 then it can't be used in an expression.  */
189       lex['!'] = LEX_IS_LINE_COMMENT_START;
190     }
191 #endif
192 
193 #ifdef TC_V850
194   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
195 #endif
196 #ifdef DOUBLEBAR_PARALLEL
197   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
198 #endif
199 #ifdef TC_D30V
200   /* Must do this is we want VLIW instruction with "->" or "<-".  */
201   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
202 #endif
203 
204 #ifdef H_TICK_HEX
205   if (enable_h_tick_hex)
206     {
207       lex['h'] = LEX_IS_H;
208       lex['H'] = LEX_IS_H;
209     }
210 #endif
211 }
212 
213 /* Saved state of the scrubber.  */
214 static int state;
215 static int old_state;
216 static const char *out_string;
217 static char out_buf[20];
218 static int add_newlines;
219 static char *saved_input;
220 static size_t saved_input_len;
221 static char input_buffer[32 * 1024];
222 static const char *mri_state;
223 static char mri_last_ch;
224 
225 /* Data structure for saving the state of app across #include's.  Note that
226    app is called asynchronously to the parsing of the .include's, so our
227    state at the time .include is interpreted is completely unrelated.
228    That's why we have to save it all.  */
229 
230 struct app_save
231 {
232   int          state;
233   int          old_state;
234   const char * out_string;
235   char         out_buf[sizeof (out_buf)];
236   int          add_newlines;
237   char *       saved_input;
238   size_t       saved_input_len;
239 #ifdef TC_M68K
240   int          scrub_m68k_mri;
241 #endif
242   const char * mri_state;
243   char         mri_last_ch;
244 #if defined TC_ARM && defined OBJ_ELF
245   const char * symver_state;
246 #endif
247 #ifdef TC_ARM
248   char last_char;
249 #endif
250 };
251 
252 char *
253 app_push (void)
254 {
255   struct app_save *saved;
256 
257   saved = XNEW (struct app_save);
258   saved->state = state;
259   saved->old_state = old_state;
260   saved->out_string = out_string;
261   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
262   saved->add_newlines = add_newlines;
263   if (saved_input == NULL)
264     saved->saved_input = NULL;
265   else
266     {
267       saved->saved_input = XNEWVEC (char, saved_input_len);
268       memcpy (saved->saved_input, saved_input, saved_input_len);
269       saved->saved_input_len = saved_input_len;
270     }
271 #ifdef TC_M68K
272   saved->scrub_m68k_mri = scrub_m68k_mri;
273 #endif
274   saved->mri_state = mri_state;
275   saved->mri_last_ch = mri_last_ch;
276 #if defined TC_ARM && defined OBJ_ELF
277   saved->symver_state = symver_state;
278 #endif
279 #ifdef TC_ARM
280   saved->last_char = last_char;
281 #endif
282 
283   /* do_scrub_begin() is not useful, just wastes time.  */
284 
285   state = 0;
286   saved_input = NULL;
287   add_newlines = 0;
288 
289   return (char *) saved;
290 }
291 
292 void
293 app_pop (char *arg)
294 {
295   struct app_save *saved = (struct app_save *) arg;
296 
297   /* There is no do_scrub_end ().  */
298   state = saved->state;
299   old_state = saved->old_state;
300   out_string = saved->out_string;
301   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
302   add_newlines = saved->add_newlines;
303   if (saved->saved_input == NULL)
304     saved_input = NULL;
305   else
306     {
307       gas_assert (saved->saved_input_len <= sizeof (input_buffer));
308       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
309       saved_input = input_buffer;
310       saved_input_len = saved->saved_input_len;
311       free (saved->saved_input);
312     }
313 #ifdef TC_M68K
314   scrub_m68k_mri = saved->scrub_m68k_mri;
315 #endif
316   mri_state = saved->mri_state;
317   mri_last_ch = saved->mri_last_ch;
318 #if defined TC_ARM && defined OBJ_ELF
319   symver_state = saved->symver_state;
320 #endif
321 #ifdef TC_ARM
322   last_char = saved->last_char;
323 #endif
324 
325   free (arg);
326 }
327 
328 /* @@ This assumes that \n &c are the same on host and target.  This is not
329    necessarily true.  */
330 
331 static int
332 process_escape (int ch)
333 {
334   switch (ch)
335     {
336     case 'b':
337       return '\b';
338     case 'f':
339       return '\f';
340     case 'n':
341       return '\n';
342     case 'r':
343       return '\r';
344     case 't':
345       return '\t';
346     case '\'':
347       return '\'';
348     case '"':
349       return '\"';
350     default:
351       return ch;
352     }
353 }
354 
355 /* This function is called to process input characters.  The GET
356    parameter is used to retrieve more input characters.  GET should
357    set its parameter to point to a buffer, and return the length of
358    the buffer; it should return 0 at end of file.  The scrubbed output
359    characters are put into the buffer starting at TOSTART; the TOSTART
360    buffer is TOLEN bytes in length.  The function returns the number
361    of scrubbed characters put into TOSTART.  This will be TOLEN unless
362    end of file was seen.  This function is arranged as a state
363    machine, and saves its state so that it may return at any point.
364    This is the way the old code used to work.  */
365 
366 size_t
367 do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
368 {
369   char *to = tostart;
370   char *toend = tostart + tolen;
371   char *from;
372   char *fromend;
373   size_t fromlen;
374   int ch, ch2 = 0;
375   /* Character that started the string we're working on.  */
376   static char quotechar;
377 
378   /*State 0: beginning of normal line
379 	  1: After first whitespace on line (flush more white)
380 	  2: After first non-white (opcode) on line (keep 1white)
381 	  3: after second white on line (into operands) (flush white)
382 	  4: after putting out a .linefile, put out digits
383 	  5: parsing a string, then go to old-state
384 	  6: putting out \ escape in a "d string.
385 	  7: no longer used
386 	  8: no longer used
387 	  9: After seeing symbol char in state 3 (keep 1white after symchar)
388 	 10: After seeing whitespace in state 9 (keep white before symchar)
389 	 11: After seeing a symbol character in state 0 (eg a label definition)
390 	 -1: output string in out_string and go to the state in old_state
391 	 -2: flush text until a '*' '/' is seen, then go to state old_state
392 #ifdef TC_V850
393 	 12: After seeing a dash, looking for a second dash as a start
394 	     of comment.
395 #endif
396 #ifdef DOUBLEBAR_PARALLEL
397 	 13: After seeing a vertical bar, looking for a second
398 	     vertical bar as a parallel expression separator.
399 #endif
400 #ifdef TC_PREDICATE_START_CHAR
401 	 14: After seeing a predicate start character at state 0, looking
402 	     for a predicate end character as predicate.
403 	 15: After seeing a predicate start character at state 1, looking
404 	     for a predicate end character as predicate.
405 #endif
406 #ifdef TC_Z80
407 	 16: After seeing an 'a' or an 'A' at the start of a symbol
408 	 17: After seeing an 'f' or an 'F' in state 16
409 #endif
410 	  */
411 
412   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
413      constructs like ``.loc 1 20''.  This was turning into ``.loc
414      120''.  States 9 and 10 ensure that a space is never dropped in
415      between characters which could appear in an identifier.  Ian
416      Taylor, ian@cygnus.com.
417 
418      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
419      correctly on the PA (and any other target where colons are optional).
420      Jeff Law, law@cs.utah.edu.
421 
422      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
423      get squashed into "cmp r1,r2||trap#1", with the all important space
424      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
425 
426   /* This macro gets the next input character.  */
427 
428 #define GET()							\
429   (from < fromend						\
430    ? * (unsigned char *) (from++)				\
431    : (saved_input = NULL,					\
432       fromlen = (*get) (input_buffer, sizeof input_buffer),	\
433       from = input_buffer,					\
434       fromend = from + fromlen,					\
435       (fromlen == 0						\
436        ? EOF							\
437        : * (unsigned char *) (from++))))
438 
439   /* This macro pushes a character back on the input stream.  */
440 
441 #define UNGET(uch) (*--from = (uch))
442 
443   /* This macro puts a character into the output buffer.  If this
444      character fills the output buffer, this macro jumps to the label
445      TOFULL.  We use this rather ugly approach because we need to
446      handle two different termination conditions: EOF on the input
447      stream, and a full output buffer.  It would be simpler if we
448      always read in the entire input stream before processing it, but
449      I don't want to make such a significant change to the assembler's
450      memory usage.  */
451 
452 #define PUT(pch)				\
453   do						\
454     {						\
455       *to++ = (pch);				\
456       if (to >= toend)				\
457 	goto tofull;				\
458     }						\
459   while (0)
460 
461   if (saved_input != NULL)
462     {
463       from = saved_input;
464       fromend = from + saved_input_len;
465     }
466   else
467     {
468       fromlen = (*get) (input_buffer, sizeof input_buffer);
469       if (fromlen == 0)
470 	return 0;
471       from = input_buffer;
472       fromend = from + fromlen;
473     }
474 
475   while (1)
476     {
477       /* The cases in this switch end with continue, in order to
478 	 branch back to the top of this while loop and generate the
479 	 next output character in the appropriate state.  */
480       switch (state)
481 	{
482 	case -1:
483 	  ch = *out_string++;
484 	  if (*out_string == '\0')
485 	    {
486 	      state = old_state;
487 	      old_state = 3;
488 	    }
489 	  PUT (ch);
490 	  continue;
491 
492 	case -2:
493 	  for (;;)
494 	    {
495 	      do
496 		{
497 		  ch = GET ();
498 
499 		  if (ch == EOF)
500 		    {
501 		      as_warn (_("end of file in comment"));
502 		      goto fromeof;
503 		    }
504 
505 		  if (ch == '\n')
506 		    PUT ('\n');
507 		}
508 	      while (ch != '*');
509 
510 	      while ((ch = GET ()) == '*')
511 		;
512 
513 	      if (ch == EOF)
514 		{
515 		  as_warn (_("end of file in comment"));
516 		  goto fromeof;
517 		}
518 
519 	      if (ch == '/')
520 		break;
521 
522 	      UNGET (ch);
523 	    }
524 
525 	  state = old_state;
526 	  UNGET (' ');
527 	  continue;
528 
529 	case 4:
530 	  ch = GET ();
531 	  if (ch == EOF)
532 	    goto fromeof;
533 	  else if (ch >= '0' && ch <= '9')
534 	    PUT (ch);
535 	  else
536 	    {
537 	      while (ch != EOF && IS_WHITESPACE (ch))
538 		ch = GET ();
539 	      if (ch == '"')
540 		{
541 		  quotechar = ch;
542 		  state = 5;
543 		  old_state = 3;
544 		  PUT (ch);
545 		}
546 	      else
547 		{
548 		  while (ch != EOF && ch != '\n')
549 		    ch = GET ();
550 		  state = 0;
551 		  PUT (ch);
552 		}
553 	    }
554 	  continue;
555 
556 	case 5:
557 	  /* We are going to copy everything up to a quote character,
558 	     with special handling for a backslash.  We try to
559 	     optimize the copying in the simple case without using the
560 	     GET and PUT macros.  */
561 	  {
562 	    char *s;
563 	    ptrdiff_t len;
564 
565 	    for (s = from; s < fromend; s++)
566 	      {
567 		ch = *s;
568 		if (ch == '\\'
569 		    || ch == quotechar
570 		    || ch == '\n')
571 		  break;
572 	      }
573 	    len = s - from;
574 	    if (len > toend - to)
575 	      len = toend - to;
576 	    if (len > 0)
577 	      {
578 		memcpy (to, from, len);
579 		to += len;
580 		from += len;
581 		if (to >= toend)
582 		  goto tofull;
583 	      }
584 	  }
585 
586 	  ch = GET ();
587 	  if (ch == EOF)
588 	    {
589 	      /* This buffer is here specifically so
590 		 that the UNGET below will work.  */
591 	      static char one_char_buf[1];
592 
593 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
594 	      state = old_state;
595 	      from = fromend = one_char_buf + 1;
596 	      fromlen = 1;
597 	      UNGET ('\n');
598 	      PUT (quotechar);
599 	    }
600 	  else if (ch == quotechar)
601 	    {
602 	      state = old_state;
603 	      PUT (ch);
604 	    }
605 	  else if (TC_STRING_ESCAPES && ch == '\\')
606 	    {
607 	      state = 6;
608 	      PUT (ch);
609 	    }
610 	  else if (scrub_m68k_mri && ch == '\n')
611 	    {
612 	      /* Just quietly terminate the string.  This permits lines like
613 		   bne	label	loop if we haven't reach end yet.  */
614 	      state = old_state;
615 	      UNGET (ch);
616 	      PUT ('\'');
617 	    }
618 	  else
619 	    {
620 	      PUT (ch);
621 	    }
622 	  continue;
623 
624 	case 6:
625 	  state = 5;
626 	  ch = GET ();
627 	  switch (ch)
628 	    {
629 	      /* Handle strings broken across lines, by turning '\n' into
630 		 '\\' and 'n'.  */
631 	    case '\n':
632 	      UNGET ('n');
633 	      add_newlines++;
634 	      PUT ('\\');
635 	      continue;
636 
637 	    case EOF:
638 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
639 	      PUT (quotechar);
640 	      continue;
641 
642 	    case '"':
643 	    case '\\':
644 	    case 'b':
645 	    case 'f':
646 	    case 'n':
647 	    case 'r':
648 	    case 't':
649 	    case 'v':
650 	    case 'x':
651 	    case 'X':
652 	    case '0':
653 	    case '1':
654 	    case '2':
655 	    case '3':
656 	    case '4':
657 	    case '5':
658 	    case '6':
659 	    case '7':
660 	      break;
661 
662 	    default:
663 #ifdef ONLY_STANDARD_ESCAPES
664 	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
665 #endif
666 	      break;
667 	    }
668 	  PUT (ch);
669 	  continue;
670 
671 #ifdef DOUBLEBAR_PARALLEL
672 	case 13:
673 	  ch = GET ();
674 	  if (ch != '|')
675 	    abort ();
676 
677 	  /* Reset back to state 1 and pretend that we are parsing a
678 	     line from just after the first white space.  */
679 	  state = 1;
680 	  PUT ('|');
681 #ifdef TC_TIC6X
682 	  /* "||^" is used for SPMASKed instructions.  */
683 	  ch = GET ();
684 	  if (ch == EOF)
685 	    goto fromeof;
686 	  else if (ch == '^')
687 	    PUT ('^');
688 	  else
689 	    UNGET (ch);
690 #endif
691 	  continue;
692 #endif
693 #ifdef TC_Z80
694 	case 16:
695 	  /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
696 	  ch = GET ();
697 	  if (ch == 'f' || ch == 'F')
698 	    {
699 	      state = 17;
700 	      PUT (ch);
701 	    }
702 	  else
703 	    {
704 	      state = 9;
705 	      break;
706 	    }
707 	  /* Fall through.  */
708 	case 17:
709 	  /* We have seen "af" at the start of a symbol,
710 	     a ' here is a part of that symbol.  */
711 	  ch = GET ();
712 	  state = 9;
713 	  if (ch == '\'')
714 	    /* Change to avoid warning about unclosed string.  */
715 	    PUT ('`');
716 	  else if (ch != EOF)
717 	    UNGET (ch);
718 	  break;
719 #endif
720 	}
721 
722       /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
723 
724       /* flushchar: */
725       ch = GET ();
726 
727 #ifdef TC_PREDICATE_START_CHAR
728       if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
729 	{
730 	  state += 14;
731 	  PUT (ch);
732 	  continue;
733 	}
734       else if (state == 14 || state == 15)
735 	{
736 	  if (ch == TC_PREDICATE_END_CHAR)
737 	    {
738 	      state -= 14;
739 	      PUT (ch);
740 	      ch = GET ();
741 	    }
742 	  else
743 	    {
744 	      PUT (ch);
745 	      continue;
746 	    }
747 	}
748 #endif
749 
750     recycle:
751 
752 #if defined TC_ARM && defined OBJ_ELF
753       /* We need to watch out for .symver directives.  See the comment later
754 	 in this function.  */
755       if (symver_state == NULL)
756 	{
757 	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
758 	    symver_state = symver_pseudo + 1;
759 	}
760       else
761 	{
762 	  /* We advance to the next state if we find the right
763 	     character.  */
764 	  if (ch != '\0' && (*symver_state == ch))
765 	    ++symver_state;
766 	  else if (*symver_state != '\0')
767 	    /* We did not get the expected character, or we didn't
768 	       get a valid terminating character after seeing the
769 	       entire pseudo-op, so we must go back to the beginning.  */
770 	    symver_state = NULL;
771 	  else
772 	    {
773 	      /* We've read the entire pseudo-op.  If this is the end
774 		 of the line, go back to the beginning.  */
775 	      if (IS_NEWLINE (ch))
776 		symver_state = NULL;
777 	    }
778 	}
779 #endif /* TC_ARM && OBJ_ELF */
780 
781 #ifdef TC_M68K
782       /* We want to have pseudo-ops which control whether we are in
783 	 MRI mode or not.  Unfortunately, since m68k MRI mode affects
784 	 the scrubber, that means that we need a special purpose
785 	 recognizer here.  */
786       if (mri_state == NULL)
787 	{
788 	  if ((state == 0 || state == 1)
789 	      && ch == mri_pseudo[0])
790 	    mri_state = mri_pseudo + 1;
791 	}
792       else
793 	{
794 	  /* We advance to the next state if we find the right
795 	     character, or if we need a space character and we get any
796 	     whitespace character, or if we need a '0' and we get a
797 	     '1' (this is so that we only need one state to handle
798 	     ``.mri 0'' and ``.mri 1'').  */
799 	  if (ch != '\0'
800 	      && (*mri_state == ch
801 		  || (*mri_state == ' '
802 		      && lex[ch] == LEX_IS_WHITESPACE)
803 		  || (*mri_state == '0'
804 		      && ch == '1')))
805 	    {
806 	      mri_last_ch = ch;
807 	      ++mri_state;
808 	    }
809 	  else if (*mri_state != '\0'
810 		   || (lex[ch] != LEX_IS_WHITESPACE
811 		       && lex[ch] != LEX_IS_NEWLINE))
812 	    {
813 	      /* We did not get the expected character, or we didn't
814 		 get a valid terminating character after seeing the
815 		 entire pseudo-op, so we must go back to the
816 		 beginning.  */
817 	      mri_state = NULL;
818 	    }
819 	  else
820 	    {
821 	      /* We've read the entire pseudo-op.  mips_last_ch is
822 		 either '0' or '1' indicating whether to enter or
823 		 leave MRI mode.  */
824 	      do_scrub_begin (mri_last_ch == '1');
825 	      mri_state = NULL;
826 
827 	      /* We continue handling the character as usual.  The
828 		 main gas reader must also handle the .mri pseudo-op
829 		 to control expression parsing and the like.  */
830 	    }
831 	}
832 #endif
833 
834       if (ch == EOF)
835 	{
836 	  if (state != 0)
837 	    {
838 	      as_warn (_("end of file not at end of a line; newline inserted"));
839 	      state = 0;
840 	      PUT ('\n');
841 	    }
842 	  goto fromeof;
843 	}
844 
845       switch (lex[ch])
846 	{
847 	case LEX_IS_WHITESPACE:
848 	  do
849 	    {
850 	      ch = GET ();
851 	    }
852 	  while (ch != EOF && IS_WHITESPACE (ch));
853 	  if (ch == EOF)
854 	    goto fromeof;
855 
856 	  if (state == 0)
857 	    {
858 	      /* Preserve a single whitespace character at the
859 		 beginning of a line.  */
860 	      state = 1;
861 	      UNGET (ch);
862 	      PUT (' ');
863 	      break;
864 	    }
865 
866 #ifdef KEEP_WHITE_AROUND_COLON
867 	  if (lex[ch] == LEX_IS_COLON)
868 	    {
869 	      /* Only keep this white if there's no white *after* the
870 		 colon.  */
871 	      ch2 = GET ();
872 	      if (ch2 != EOF)
873 		UNGET (ch2);
874 	      if (!IS_WHITESPACE (ch2))
875 		{
876 		  state = 9;
877 		  UNGET (ch);
878 		  PUT (' ');
879 		  break;
880 		}
881 	    }
882 #endif
883 	  if (IS_COMMENT (ch)
884 	      || ch == '/'
885 	      || IS_LINE_SEPARATOR (ch)
886 	      || IS_PARALLEL_SEPARATOR (ch))
887 	    {
888 	      if (scrub_m68k_mri)
889 		{
890 		  /* In MRI mode, we keep these spaces.  */
891 		  UNGET (ch);
892 		  PUT (' ');
893 		  break;
894 		}
895 	      goto recycle;
896 	    }
897 
898 	  /* If we're in state 2 or 11, we've seen a non-white
899 	     character followed by whitespace.  If the next character
900 	     is ':', this is whitespace after a label name which we
901 	     normally must ignore.  In MRI mode, though, spaces are
902 	     not permitted between the label and the colon.  */
903 	  if ((state == 2 || state == 11)
904 	      && lex[ch] == LEX_IS_COLON
905 	      && ! scrub_m68k_mri)
906 	    {
907 	      state = 1;
908 	      PUT (ch);
909 	      break;
910 	    }
911 
912 	  switch (state)
913 	    {
914 	    case 1:
915 	      /* We can arrive here if we leave a leading whitespace
916 		 character at the beginning of a line.  */
917 	      goto recycle;
918 	    case 2:
919 	      state = 3;
920 	      if (to + 1 < toend)
921 		{
922 		  /* Optimize common case by skipping UNGET/GET.  */
923 		  PUT (' ');	/* Sp after opco */
924 		  goto recycle;
925 		}
926 	      UNGET (ch);
927 	      PUT (' ');
928 	      break;
929 	    case 3:
930 #ifndef TC_KEEP_OPERAND_SPACES
931 	      /* For TI C6X, we keep these spaces as they may separate
932 		 functional unit specifiers from operands.  */
933 	      if (scrub_m68k_mri)
934 #endif
935 		{
936 		  /* In MRI mode, we keep these spaces.  */
937 		  UNGET (ch);
938 		  PUT (' ');
939 		  break;
940 		}
941 	      goto recycle;	/* Sp in operands */
942 	    case 9:
943 	    case 10:
944 #ifndef TC_KEEP_OPERAND_SPACES
945 	      if (scrub_m68k_mri)
946 #endif
947 		{
948 		  /* In MRI mode, we keep these spaces.  */
949 		  state = 3;
950 		  UNGET (ch);
951 		  PUT (' ');
952 		  break;
953 		}
954 	      state = 10;	/* Sp after symbol char */
955 	      goto recycle;
956 	    case 11:
957 	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
958 		state = 1;
959 	      else
960 		{
961 		  /* We know that ch is not ':', since we tested that
962 		     case above.  Therefore this is not a label, so it
963 		     must be the opcode, and we've just seen the
964 		     whitespace after it.  */
965 		  state = 3;
966 		}
967 	      UNGET (ch);
968 	      PUT (' ');	/* Sp after label definition.  */
969 	      break;
970 	    default:
971 	      BAD_CASE (state);
972 	    }
973 	  break;
974 
975 	case LEX_IS_TWOCHAR_COMMENT_1ST:
976 	  ch2 = GET ();
977 	  if (ch2 == '*')
978 	    {
979 	      for (;;)
980 		{
981 		  do
982 		    {
983 		      ch2 = GET ();
984 		      if (ch2 != EOF && IS_NEWLINE (ch2))
985 			add_newlines++;
986 		    }
987 		  while (ch2 != EOF && ch2 != '*');
988 
989 		  while (ch2 == '*')
990 		    ch2 = GET ();
991 
992 		  if (ch2 == EOF || ch2 == '/')
993 		    break;
994 
995 		  /* This UNGET will ensure that we count newlines
996 		     correctly.  */
997 		  UNGET (ch2);
998 		}
999 
1000 	      if (ch2 == EOF)
1001 		as_warn (_("end of file in multiline comment"));
1002 
1003 	      ch = ' ';
1004 	      goto recycle;
1005 	    }
1006 #ifdef DOUBLESLASH_LINE_COMMENTS
1007 	  else if (ch2 == '/')
1008 	    {
1009 	      do
1010 		{
1011 		  ch = GET ();
1012 		}
1013 	      while (ch != EOF && !IS_NEWLINE (ch));
1014 	      if (ch == EOF)
1015 		as_warn ("end of file in comment; newline inserted");
1016 	      state = 0;
1017 	      PUT ('\n');
1018 	      break;
1019 	    }
1020 #endif
1021 	  else
1022 	    {
1023 	      if (ch2 != EOF)
1024 		UNGET (ch2);
1025 	      if (state == 9 || state == 10)
1026 		state = 3;
1027 	      PUT (ch);
1028 	    }
1029 	  break;
1030 
1031 	case LEX_IS_STRINGQUOTE:
1032 	  quotechar = ch;
1033 	  if (state == 10)
1034 	    {
1035 	      /* Preserve the whitespace in foo "bar".  */
1036 	      UNGET (ch);
1037 	      state = 3;
1038 	      PUT (' ');
1039 
1040 	      /* PUT didn't jump out.  We could just break, but we
1041 		 know what will happen, so optimize a bit.  */
1042 	      ch = GET ();
1043 	      old_state = 3;
1044 	    }
1045 	  else if (state == 9)
1046 	    old_state = 3;
1047 	  else
1048 	    old_state = state;
1049 	  state = 5;
1050 	  PUT (ch);
1051 	  break;
1052 
1053 	case LEX_IS_ONECHAR_QUOTE:
1054 #ifdef H_TICK_HEX
1055 	  if (state == 9 && enable_h_tick_hex)
1056 	    {
1057 	      char c;
1058 
1059 	      c = GET ();
1060 	      as_warn ("'%c found after symbol", c);
1061 	      UNGET (c);
1062 	    }
1063 #endif
1064 	  if (state == 10)
1065 	    {
1066 	      /* Preserve the whitespace in foo 'b'.  */
1067 	      UNGET (ch);
1068 	      state = 3;
1069 	      PUT (' ');
1070 	      break;
1071 	    }
1072 	  ch = GET ();
1073 	  if (ch == EOF)
1074 	    {
1075 	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
1076 	      ch = 0;
1077 	    }
1078 	  if (ch == '\\')
1079 	    {
1080 	      ch = GET ();
1081 	      if (ch == EOF)
1082 		{
1083 		  as_warn (_("end of file in escape character"));
1084 		  ch = '\\';
1085 		}
1086 	      else
1087 		ch = process_escape (ch);
1088 	    }
1089 	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
1090 
1091 	  /* None of these 'x constants for us.  We want 'x'.  */
1092 	  if ((ch = GET ()) != '\'')
1093 	    {
1094 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1095 	      as_warn (_("missing close quote; (assumed)"));
1096 #else
1097 	      if (ch != EOF)
1098 		UNGET (ch);
1099 #endif
1100 	    }
1101 	  if (strlen (out_buf) == 1)
1102 	    {
1103 	      PUT (out_buf[0]);
1104 	      break;
1105 	    }
1106 	  if (state == 9)
1107 	    old_state = 3;
1108 	  else
1109 	    old_state = state;
1110 	  state = -1;
1111 	  out_string = out_buf;
1112 	  PUT (*out_string++);
1113 	  break;
1114 
1115 	case LEX_IS_COLON:
1116 #ifdef KEEP_WHITE_AROUND_COLON
1117 	  state = 9;
1118 #else
1119 	  if (state == 9 || state == 10)
1120 	    state = 3;
1121 	  else if (state != 3)
1122 	    state = 1;
1123 #endif
1124 	  PUT (ch);
1125 	  break;
1126 
1127 	case LEX_IS_NEWLINE:
1128 	  /* Roll out a bunch of newlines from inside comments, etc.  */
1129 	  if (add_newlines)
1130 	    {
1131 	      --add_newlines;
1132 	      UNGET (ch);
1133 	    }
1134 	  /* Fall through.  */
1135 
1136 	case LEX_IS_LINE_SEPARATOR:
1137 	  state = 0;
1138 	  PUT (ch);
1139 	  break;
1140 
1141 	case LEX_IS_PARALLEL_SEPARATOR:
1142 	  state = 1;
1143 	  PUT (ch);
1144 	  break;
1145 
1146 #ifdef TC_V850
1147 	case LEX_IS_DOUBLEDASH_1ST:
1148 	  ch2 = GET ();
1149 	  if (ch2 != '-')
1150 	    {
1151 	      if (ch2 != EOF)
1152 		UNGET (ch2);
1153 	      goto de_fault;
1154 	    }
1155 	  /* Read and skip to end of line.  */
1156 	  do
1157 	    {
1158 	      ch = GET ();
1159 	    }
1160 	  while (ch != EOF && ch != '\n');
1161 
1162 	  if (ch == EOF)
1163 	    as_warn (_("end of file in comment; newline inserted"));
1164 
1165 	  state = 0;
1166 	  PUT ('\n');
1167 	  break;
1168 #endif
1169 #ifdef DOUBLEBAR_PARALLEL
1170 	case LEX_IS_DOUBLEBAR_1ST:
1171 	  ch2 = GET ();
1172 	  if (ch2 != EOF)
1173 	    UNGET (ch2);
1174 	  if (ch2 != '|')
1175 	    goto de_fault;
1176 
1177 	  /* Handle '||' in two states as invoking PUT twice might
1178 	     result in the first one jumping out of this loop.  We'd
1179 	     then lose track of the state and one '|' char.  */
1180 	  state = 13;
1181 	  PUT ('|');
1182 	  break;
1183 #endif
1184 	case LEX_IS_LINE_COMMENT_START:
1185 	  /* FIXME-someday: The two character comment stuff was badly
1186 	     thought out.  On i386, we want '/' as line comment start
1187 	     AND we want C style comments.  hence this hack.  The
1188 	     whole lexical process should be reworked.  xoxorich.  */
1189 	  if (ch == '/')
1190 	    {
1191 	      ch2 = GET ();
1192 	      if (ch2 == '*')
1193 		{
1194 		  old_state = 3;
1195 		  state = -2;
1196 		  break;
1197 		}
1198 	      else if (ch2 != EOF)
1199 		{
1200 		  UNGET (ch2);
1201 		}
1202 	    }
1203 
1204 	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
1205 	    {
1206 	      int startch;
1207 
1208 	      startch = ch;
1209 
1210 	      do
1211 		{
1212 		  ch = GET ();
1213 		}
1214 	      while (ch != EOF && IS_WHITESPACE (ch));
1215 
1216 	      if (ch == EOF)
1217 		{
1218 		  as_warn (_("end of file in comment; newline inserted"));
1219 		  PUT ('\n');
1220 		  break;
1221 		}
1222 
1223 	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1224 		{
1225 		  /* Not a cpp line.  */
1226 		  while (ch != EOF && !IS_NEWLINE (ch))
1227 		    ch = GET ();
1228 		  if (ch == EOF)
1229 		    {
1230 		      as_warn (_("end of file in comment; newline inserted"));
1231 		      PUT ('\n');
1232 		    }
1233 		  else /* IS_NEWLINE (ch) */
1234 		    {
1235 		      /* To process non-zero add_newlines.  */
1236 		      UNGET (ch);
1237 		    }
1238 		  state = 0;
1239 		  break;
1240 		}
1241 	      /* Looks like `# 123 "filename"' from cpp.  */
1242 	      UNGET (ch);
1243 	      old_state = 4;
1244 	      state = -1;
1245 	      if (scrub_m68k_mri)
1246 		out_string = "\tlinefile ";
1247 	      else
1248 		out_string = "\t.linefile ";
1249 	      PUT (*out_string++);
1250 	      break;
1251 	    }
1252 
1253 #ifdef TC_D10V
1254 	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1255 	     Trap is the only short insn that has a first operand that is
1256 	     neither register nor label.
1257 	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1258 	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1259 	     already LEX_IS_LINE_COMMENT_START.  However, it is the
1260 	     only character in line_comment_chars for d10v, hence we
1261 	     can recognize it as such.  */
1262 	  /* An alternative approach would be to reset the state to 1 when
1263 	     we see '||', '<'- or '->', but that seems to be overkill.  */
1264 	  if (state == 10)
1265 	    PUT (' ');
1266 #endif
1267 	  /* We have a line comment character which is not at the
1268 	     start of a line.  If this is also a normal comment
1269 	     character, fall through.  Otherwise treat it as a default
1270 	     character.  */
1271 	  if (strchr (tc_comment_chars, ch) == NULL
1272 	      && (! scrub_m68k_mri
1273 		  || (ch != '!' && ch != '*')))
1274 	    goto de_fault;
1275 	  if (scrub_m68k_mri
1276 	      && (ch == '!' || ch == '*' || ch == '#')
1277 	      && state != 1
1278 	      && state != 10)
1279 	    goto de_fault;
1280 	  /* Fall through.  */
1281 	case LEX_IS_COMMENT_START:
1282 #if defined TC_ARM && defined OBJ_ELF
1283 	  /* On the ARM, `@' is the comment character.
1284 	     Unfortunately this is also a special character in ELF .symver
1285 	     directives (and .type, though we deal with those another way).
1286 	     So we check if this line is such a directive, and treat
1287 	     the character as default if so.  This is a hack.  */
1288 	  if ((symver_state != NULL) && (*symver_state == 0))
1289 	    goto de_fault;
1290 #endif
1291 
1292 #ifdef TC_ARM
1293 	  /* For the ARM, care is needed not to damage occurrences of \@
1294 	     by stripping the @ onwards.  Yuck.  */
1295 	  if ((to > tostart ? to[-1] : last_char) == '\\')
1296 	    /* Do not treat the @ as a start-of-comment.  */
1297 	    goto de_fault;
1298 #endif
1299 
1300 #ifdef WARN_COMMENTS
1301 	  if (!found_comment)
1302 	    found_comment_file = as_where (&found_comment);
1303 #endif
1304 	  do
1305 	    {
1306 	      ch = GET ();
1307 	    }
1308 	  while (ch != EOF && !IS_NEWLINE (ch));
1309 	  if (ch == EOF)
1310 	    as_warn (_("end of file in comment; newline inserted"));
1311 	  state = 0;
1312 	  PUT ('\n');
1313 	  break;
1314 
1315 #ifdef H_TICK_HEX
1316 	case LEX_IS_H:
1317 	  /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1318 	     the H' with 0x to make them gas-style hex characters.  */
1319 	  if (enable_h_tick_hex)
1320 	    {
1321 	      char quot;
1322 
1323 	      quot = GET ();
1324 	      if (quot == '\'')
1325 		{
1326 		  UNGET ('x');
1327 		  ch = '0';
1328 		}
1329 	      else
1330 		UNGET (quot);
1331 	    }
1332 #endif
1333 	  /* Fall through.  */
1334 
1335 	case LEX_IS_SYMBOL_COMPONENT:
1336 	  if (state == 10)
1337 	    {
1338 	      /* This is a symbol character following another symbol
1339 		 character, with whitespace in between.  We skipped
1340 		 the whitespace earlier, so output it now.  */
1341 	      UNGET (ch);
1342 	      state = 3;
1343 	      PUT (' ');
1344 	      break;
1345 	    }
1346 
1347 #ifdef TC_Z80
1348 	  /* "af'" is a symbol containing '\''.  */
1349 	  if (state == 3 && (ch == 'a' || ch == 'A'))
1350 	    {
1351 	      state = 16;
1352 	      PUT (ch);
1353 	      ch = GET ();
1354 	      if (ch == 'f' || ch == 'F')
1355 		{
1356 		  state = 17;
1357 		  PUT (ch);
1358 		  break;
1359 		}
1360 	      else
1361 		{
1362 		  state = 9;
1363 		  if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1364 		    {
1365 		      if (ch != EOF)
1366 			UNGET (ch);
1367 		      break;
1368 		    }
1369 		}
1370 	    }
1371 #endif
1372 	  if (state == 3)
1373 	    state = 9;
1374 
1375 	  /* This is a common case.  Quickly copy CH and all the
1376 	     following symbol component or normal characters.  */
1377 	  if (to + 1 < toend
1378 	      && mri_state == NULL
1379 #if defined TC_ARM && defined OBJ_ELF
1380 	      && symver_state == NULL
1381 #endif
1382 	      )
1383 	    {
1384 	      char *s;
1385 	      ptrdiff_t len;
1386 
1387 	      for (s = from; s < fromend; s++)
1388 		{
1389 		  int type;
1390 
1391 		  ch2 = *(unsigned char *) s;
1392 		  type = lex[ch2];
1393 		  if (type != 0
1394 		      && type != LEX_IS_SYMBOL_COMPONENT)
1395 		    break;
1396 		}
1397 
1398 	      if (s > from)
1399 		/* Handle the last character normally, for
1400 		   simplicity.  */
1401 		--s;
1402 
1403 	      len = s - from;
1404 
1405 	      if (len > (toend - to) - 1)
1406 		len = (toend - to) - 1;
1407 
1408 	      if (len > 0)
1409 		{
1410 		  PUT (ch);
1411 		  memcpy (to, from, len);
1412 		  to += len;
1413 		  from += len;
1414 		  if (to >= toend)
1415 		    goto tofull;
1416 		  ch = GET ();
1417 		}
1418 	    }
1419 
1420 	  /* Fall through.  */
1421 	default:
1422 	de_fault:
1423 	  /* Some relatively `normal' character.  */
1424 	  if (state == 0)
1425 	    {
1426 	      state = 11;	/* Now seeing label definition.  */
1427 	    }
1428 	  else if (state == 1)
1429 	    {
1430 	      state = 2;	/* Ditto.  */
1431 	    }
1432 	  else if (state == 9)
1433 	    {
1434 	      if (!IS_SYMBOL_COMPONENT (ch))
1435 		state = 3;
1436 	    }
1437 	  else if (state == 10)
1438 	    {
1439 	      if (ch == '\\')
1440 		{
1441 		  /* Special handling for backslash: a backslash may
1442 		     be the beginning of a formal parameter (of a
1443 		     macro) following another symbol character, with
1444 		     whitespace in between.  If that is the case, we
1445 		     output a space before the parameter.  Strictly
1446 		     speaking, correct handling depends upon what the
1447 		     macro parameter expands into; if the parameter
1448 		     expands into something which does not start with
1449 		     an operand character, then we don't want to keep
1450 		     the space.  We don't have enough information to
1451 		     make the right choice, so here we are making the
1452 		     choice which is more likely to be correct.  */
1453 		  if (to + 1 >= toend)
1454 		    {
1455 		      /* If we're near the end of the buffer, save the
1456 		         character for the next time round.  Otherwise
1457 		         we'll lose our state.  */
1458 		      UNGET (ch);
1459 		      goto tofull;
1460 		    }
1461 		  *to++ = ' ';
1462 		}
1463 
1464 	      state = 3;
1465 	    }
1466 	  PUT (ch);
1467 	  break;
1468 	}
1469     }
1470 
1471   /*NOTREACHED*/
1472 
1473  fromeof:
1474   /* We have reached the end of the input.  */
1475 #ifdef TC_ARM
1476   if (to > tostart)
1477     last_char = to[-1];
1478 #endif
1479   return to - tostart;
1480 
1481  tofull:
1482   /* The output buffer is full.  Save any input we have not yet
1483      processed.  */
1484   if (fromend > from)
1485     {
1486       saved_input = from;
1487       saved_input_len = fromend - from;
1488     }
1489   else
1490     saved_input = NULL;
1491 
1492 #ifdef TC_ARM
1493   if (to > tostart)
1494     last_char = to[-1];
1495 #endif
1496   return to - tostart;
1497 }
1498