1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2000-2015, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *   file name:  ucnvlat1.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2000feb07
14 *   created by: Markus W. Scherer
15 */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_CONVERSION
20 
21 #include "unicode/ucnv.h"
22 #include "unicode/uset.h"
23 #include "unicode/utf8.h"
24 #include "ucnv_bld.h"
25 #include "ucnv_cnv.h"
26 #include "ustr_imp.h"
27 
28 /* control optimizations according to the platform */
29 #define LATIN1_UNROLL_FROM_UNICODE 1
30 
31 /* ISO 8859-1 --------------------------------------------------------------- */
32 
33 /* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
34 U_CDECL_BEGIN
35 static void U_CALLCONV
_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs * pArgs,UErrorCode * pErrorCode)36 _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
37                             UErrorCode *pErrorCode) {
38     const uint8_t *source;
39     UChar *target;
40     int32_t targetCapacity, length;
41     int32_t *offsets;
42 
43     int32_t sourceIndex;
44 
45     /* set up the local pointers */
46     source=(const uint8_t *)pArgs->source;
47     target=pArgs->target;
48     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
49     offsets=pArgs->offsets;
50 
51     sourceIndex=0;
52 
53     /*
54      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
55      * for the minimum of the sourceLength and targetCapacity
56      */
57     length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
58     if(length<=targetCapacity) {
59         targetCapacity=length;
60     } else {
61         /* target will be full */
62         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
63         length=targetCapacity;
64     }
65 
66     if(targetCapacity>=8) {
67         /* This loop is unrolled for speed and improved pipelining. */
68         int32_t count, loops;
69 
70         loops=count=targetCapacity>>3;
71         length=targetCapacity&=0x7;
72         do {
73             target[0]=source[0];
74             target[1]=source[1];
75             target[2]=source[2];
76             target[3]=source[3];
77             target[4]=source[4];
78             target[5]=source[5];
79             target[6]=source[6];
80             target[7]=source[7];
81             target+=8;
82             source+=8;
83         } while(--count>0);
84 
85         if(offsets!=NULL) {
86             do {
87                 offsets[0]=sourceIndex++;
88                 offsets[1]=sourceIndex++;
89                 offsets[2]=sourceIndex++;
90                 offsets[3]=sourceIndex++;
91                 offsets[4]=sourceIndex++;
92                 offsets[5]=sourceIndex++;
93                 offsets[6]=sourceIndex++;
94                 offsets[7]=sourceIndex++;
95                 offsets+=8;
96             } while(--loops>0);
97         }
98     }
99 
100     /* conversion loop */
101     while(targetCapacity>0) {
102         *target++=*source++;
103         --targetCapacity;
104     }
105 
106     /* write back the updated pointers */
107     pArgs->source=(const char *)source;
108     pArgs->target=target;
109 
110     /* set offsets */
111     if(offsets!=NULL) {
112         while(length>0) {
113             *offsets++=sourceIndex++;
114             --length;
115         }
116         pArgs->offsets=offsets;
117     }
118 }
119 
120 /* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
121 static UChar32 U_CALLCONV
_Latin1GetNextUChar(UConverterToUnicodeArgs * pArgs,UErrorCode * pErrorCode)122 _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
123                     UErrorCode *pErrorCode) {
124     const uint8_t *source=(const uint8_t *)pArgs->source;
125     if(source<(const uint8_t *)pArgs->sourceLimit) {
126         pArgs->source=(const char *)(source+1);
127         return *source;
128     }
129 
130     /* no output because of empty input */
131     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
132     return 0xffff;
133 }
134 
135 /* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
136 static void U_CALLCONV
_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs * pArgs,UErrorCode * pErrorCode)137 _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
138                               UErrorCode *pErrorCode) {
139     UConverter *cnv;
140     const UChar *source, *sourceLimit;
141     uint8_t *target, *oldTarget;
142     int32_t targetCapacity, length;
143     int32_t *offsets;
144 
145     UChar32 cp;
146     UChar c, max;
147 
148     int32_t sourceIndex;
149 
150     /* set up the local pointers */
151     cnv=pArgs->converter;
152     source=pArgs->source;
153     sourceLimit=pArgs->sourceLimit;
154     target=oldTarget=(uint8_t *)pArgs->target;
155     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
156     offsets=pArgs->offsets;
157 
158     if(cnv->sharedData==&_Latin1Data) {
159         max=0xff; /* Latin-1 */
160     } else {
161         max=0x7f; /* US-ASCII */
162     }
163 
164     /* get the converter state from UConverter */
165     cp=cnv->fromUChar32;
166 
167     /* sourceIndex=-1 if the current character began in the previous buffer */
168     sourceIndex= cp==0 ? 0 : -1;
169 
170     /*
171      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
172      * for the minimum of the sourceLength and targetCapacity
173      */
174     length=(int32_t)(sourceLimit-source);
175     if(length<targetCapacity) {
176         targetCapacity=length;
177     }
178 
179     /* conversion loop */
180     if(cp!=0 && targetCapacity>0) {
181         goto getTrail;
182     }
183 
184 #if LATIN1_UNROLL_FROM_UNICODE
185     /* unroll the loop with the most common case */
186     if(targetCapacity>=16) {
187         int32_t count, loops;
188         UChar u, oredChars;
189 
190         loops=count=targetCapacity>>4;
191         do {
192             oredChars=u=*source++;
193             *target++=(uint8_t)u;
194             oredChars|=u=*source++;
195             *target++=(uint8_t)u;
196             oredChars|=u=*source++;
197             *target++=(uint8_t)u;
198             oredChars|=u=*source++;
199             *target++=(uint8_t)u;
200             oredChars|=u=*source++;
201             *target++=(uint8_t)u;
202             oredChars|=u=*source++;
203             *target++=(uint8_t)u;
204             oredChars|=u=*source++;
205             *target++=(uint8_t)u;
206             oredChars|=u=*source++;
207             *target++=(uint8_t)u;
208             oredChars|=u=*source++;
209             *target++=(uint8_t)u;
210             oredChars|=u=*source++;
211             *target++=(uint8_t)u;
212             oredChars|=u=*source++;
213             *target++=(uint8_t)u;
214             oredChars|=u=*source++;
215             *target++=(uint8_t)u;
216             oredChars|=u=*source++;
217             *target++=(uint8_t)u;
218             oredChars|=u=*source++;
219             *target++=(uint8_t)u;
220             oredChars|=u=*source++;
221             *target++=(uint8_t)u;
222             oredChars|=u=*source++;
223             *target++=(uint8_t)u;
224 
225             /* were all 16 entries really valid? */
226             if(oredChars>max) {
227                 /* no, return to the first of these 16 */
228                 source-=16;
229                 target-=16;
230                 break;
231             }
232         } while(--count>0);
233         count=loops-count;
234         targetCapacity-=16*count;
235 
236         if(offsets!=NULL) {
237             oldTarget+=16*count;
238             while(count>0) {
239                 *offsets++=sourceIndex++;
240                 *offsets++=sourceIndex++;
241                 *offsets++=sourceIndex++;
242                 *offsets++=sourceIndex++;
243                 *offsets++=sourceIndex++;
244                 *offsets++=sourceIndex++;
245                 *offsets++=sourceIndex++;
246                 *offsets++=sourceIndex++;
247                 *offsets++=sourceIndex++;
248                 *offsets++=sourceIndex++;
249                 *offsets++=sourceIndex++;
250                 *offsets++=sourceIndex++;
251                 *offsets++=sourceIndex++;
252                 *offsets++=sourceIndex++;
253                 *offsets++=sourceIndex++;
254                 *offsets++=sourceIndex++;
255                 --count;
256             }
257         }
258     }
259 #endif
260 
261     /* conversion loop */
262     c=0;
263     while(targetCapacity>0 && (c=*source++)<=max) {
264         /* convert the Unicode code point */
265         *target++=(uint8_t)c;
266         --targetCapacity;
267     }
268 
269     if(c>max) {
270         cp=c;
271         if(!U_IS_SURROGATE(cp)) {
272             /* callback(unassigned) */
273         } else if(U_IS_SURROGATE_LEAD(cp)) {
274 getTrail:
275             if(source<sourceLimit) {
276                 /* test the following code unit */
277                 UChar trail=*source;
278                 if(U16_IS_TRAIL(trail)) {
279                     ++source;
280                     cp=U16_GET_SUPPLEMENTARY(cp, trail);
281                     /* this codepage does not map supplementary code points */
282                     /* callback(unassigned) */
283                 } else {
284                     /* this is an unmatched lead code unit (1st surrogate) */
285                     /* callback(illegal) */
286                 }
287             } else {
288                 /* no more input */
289                 cnv->fromUChar32=cp;
290                 goto noMoreInput;
291             }
292         } else {
293             /* this is an unmatched trail code unit (2nd surrogate) */
294             /* callback(illegal) */
295         }
296 
297         *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
298         cnv->fromUChar32=cp;
299     }
300 noMoreInput:
301 
302     /* set offsets since the start */
303     if(offsets!=NULL) {
304         size_t count=target-oldTarget;
305         while(count>0) {
306             *offsets++=sourceIndex++;
307             --count;
308         }
309     }
310 
311     if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
312         /* target is full */
313         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
314     }
315 
316     /* write back the updated pointers */
317     pArgs->source=source;
318     pArgs->target=(char *)target;
319     pArgs->offsets=offsets;
320 }
321 
322 /* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
323 static void U_CALLCONV
ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs * pFromUArgs,UConverterToUnicodeArgs * pToUArgs,UErrorCode * pErrorCode)324 ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
325                     UConverterToUnicodeArgs *pToUArgs,
326                     UErrorCode *pErrorCode) {
327     UConverter *utf8;
328     const uint8_t *source, *sourceLimit;
329     uint8_t *target;
330     int32_t targetCapacity;
331 
332     UChar32 c;
333     uint8_t b, t1;
334 
335     /* set up the local pointers */
336     utf8=pToUArgs->converter;
337     source=(uint8_t *)pToUArgs->source;
338     sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
339     target=(uint8_t *)pFromUArgs->target;
340     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
341 
342     /* get the converter state from the UTF-8 UConverter */
343     if (utf8->toULength > 0) {
344         c=(UChar32)utf8->toUnicodeStatus;
345     } else {
346         c = 0;
347     }
348     if(c!=0 && source<sourceLimit) {
349         if(targetCapacity==0) {
350             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
351             return;
352         } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
353             ++source;
354             *target++=(uint8_t)(((c&3)<<6)|t1);
355             --targetCapacity;
356 
357             utf8->toUnicodeStatus=0;
358             utf8->toULength=0;
359         } else {
360             /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
361             *pErrorCode=U_USING_DEFAULT_WARNING;
362             return;
363         }
364     }
365 
366     /*
367      * Make sure that the last byte sequence before sourceLimit is complete
368      * or runs into a lead byte.
369      * In the conversion loop compare source with sourceLimit only once
370      * per multi-byte character.
371      * For Latin-1, adjust sourceLimit only for 1 trail byte because
372      * the conversion loop handles at most 2-byte sequences.
373      */
374     if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
375         --sourceLimit;
376     }
377 
378     /* conversion loop */
379     while(source<sourceLimit) {
380         if(targetCapacity>0) {
381             b=*source++;
382             if(U8_IS_SINGLE(b)) {
383                 /* convert ASCII */
384                 *target++=(uint8_t)b;
385                 --targetCapacity;
386             } else if( /* handle U+0080..U+00FF inline */
387                        b>=0xc2 && b<=0xc3 &&
388                        (t1=(uint8_t)(*source-0x80)) <= 0x3f
389             ) {
390                 ++source;
391                 *target++=(uint8_t)(((b&3)<<6)|t1);
392                 --targetCapacity;
393             } else {
394                 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
395                 pToUArgs->source=(char *)(source-1);
396                 pFromUArgs->target=(char *)target;
397                 *pErrorCode=U_USING_DEFAULT_WARNING;
398                 return;
399             }
400         } else {
401             /* target is full */
402             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
403             break;
404         }
405     }
406 
407     /*
408      * The sourceLimit may have been adjusted before the conversion loop
409      * to stop before a truncated sequence.
410      * If so, then collect the truncated sequence now.
411      * For Latin-1, there is at most exactly one lead byte because of the
412      * smaller sourceLimit adjustment logic.
413      */
414     if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
415         utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
416         utf8->toULength=1;
417         utf8->mode=U8_COUNT_BYTES(b);
418     }
419 
420     /* write back the updated pointers */
421     pToUArgs->source=(char *)source;
422     pFromUArgs->target=(char *)target;
423 }
424 
425 static void U_CALLCONV
_Latin1GetUnicodeSet(const UConverter * cnv,const USetAdder * sa,UConverterUnicodeSet which,UErrorCode * pErrorCode)426 _Latin1GetUnicodeSet(const UConverter *cnv,
427                      const USetAdder *sa,
428                      UConverterUnicodeSet which,
429                      UErrorCode *pErrorCode) {
430     (void)cnv;
431     (void)which;
432     (void)pErrorCode;
433     sa->addRange(sa->set, 0, 0xff);
434 }
435 U_CDECL_END
436 
437 
438 static const UConverterImpl _Latin1Impl={
439     UCNV_LATIN_1,
440 
441     NULL,
442     NULL,
443 
444     NULL,
445     NULL,
446     NULL,
447 
448     _Latin1ToUnicodeWithOffsets,
449     _Latin1ToUnicodeWithOffsets,
450     _Latin1FromUnicodeWithOffsets,
451     _Latin1FromUnicodeWithOffsets,
452     _Latin1GetNextUChar,
453 
454     NULL,
455     NULL,
456     NULL,
457     NULL,
458     _Latin1GetUnicodeSet,
459 
460     NULL,
461     ucnv_Latin1FromUTF8
462 };
463 
464 static const UConverterStaticData _Latin1StaticData={
465     sizeof(UConverterStaticData),
466     "ISO-8859-1",
467     819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
468     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
469     0,
470     0,
471     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
472 };
473 
474 const UConverterSharedData _Latin1Data=
475         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
476 
477 /* US-ASCII ----------------------------------------------------------------- */
478 
479 U_CDECL_BEGIN
480 /* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
481 static void U_CALLCONV
_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs * pArgs,UErrorCode * pErrorCode)482 _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
483                            UErrorCode *pErrorCode) {
484     const uint8_t *source, *sourceLimit;
485     UChar *target, *oldTarget;
486     int32_t targetCapacity, length;
487     int32_t *offsets;
488 
489     int32_t sourceIndex;
490 
491     uint8_t c;
492 
493     /* set up the local pointers */
494     source=(const uint8_t *)pArgs->source;
495     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
496     target=oldTarget=pArgs->target;
497     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
498     offsets=pArgs->offsets;
499 
500     /* sourceIndex=-1 if the current character began in the previous buffer */
501     sourceIndex=0;
502 
503     /*
504      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
505      * for the minimum of the sourceLength and targetCapacity
506      */
507     length=(int32_t)(sourceLimit-source);
508     if(length<targetCapacity) {
509         targetCapacity=length;
510     }
511 
512     if(targetCapacity>=8) {
513         /* This loop is unrolled for speed and improved pipelining. */
514         int32_t count, loops;
515         UChar oredChars;
516 
517         loops=count=targetCapacity>>3;
518         do {
519             oredChars=target[0]=source[0];
520             oredChars|=target[1]=source[1];
521             oredChars|=target[2]=source[2];
522             oredChars|=target[3]=source[3];
523             oredChars|=target[4]=source[4];
524             oredChars|=target[5]=source[5];
525             oredChars|=target[6]=source[6];
526             oredChars|=target[7]=source[7];
527 
528             /* were all 16 entries really valid? */
529             if(oredChars>0x7f) {
530                 /* no, return to the first of these 16 */
531                 break;
532             }
533             source+=8;
534             target+=8;
535         } while(--count>0);
536         count=loops-count;
537         targetCapacity-=count*8;
538 
539         if(offsets!=NULL) {
540             oldTarget+=count*8;
541             while(count>0) {
542                 offsets[0]=sourceIndex++;
543                 offsets[1]=sourceIndex++;
544                 offsets[2]=sourceIndex++;
545                 offsets[3]=sourceIndex++;
546                 offsets[4]=sourceIndex++;
547                 offsets[5]=sourceIndex++;
548                 offsets[6]=sourceIndex++;
549                 offsets[7]=sourceIndex++;
550                 offsets+=8;
551                 --count;
552             }
553         }
554     }
555 
556     /* conversion loop */
557     c=0;
558     while(targetCapacity>0 && (c=*source++)<=0x7f) {
559         *target++=c;
560         --targetCapacity;
561     }
562 
563     if(c>0x7f) {
564         /* callback(illegal); copy the current bytes to toUBytes[] */
565         UConverter *cnv=pArgs->converter;
566         cnv->toUBytes[0]=c;
567         cnv->toULength=1;
568         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
569     } else if(source<sourceLimit && target>=pArgs->targetLimit) {
570         /* target is full */
571         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
572     }
573 
574     /* set offsets since the start */
575     if(offsets!=NULL) {
576         size_t count=target-oldTarget;
577         while(count>0) {
578             *offsets++=sourceIndex++;
579             --count;
580         }
581     }
582 
583     /* write back the updated pointers */
584     pArgs->source=(const char *)source;
585     pArgs->target=target;
586     pArgs->offsets=offsets;
587 }
588 
589 /* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
590 static UChar32 U_CALLCONV
_ASCIIGetNextUChar(UConverterToUnicodeArgs * pArgs,UErrorCode * pErrorCode)591 _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
592                    UErrorCode *pErrorCode) {
593     const uint8_t *source;
594     uint8_t b;
595 
596     source=(const uint8_t *)pArgs->source;
597     if(source<(const uint8_t *)pArgs->sourceLimit) {
598         b=*source++;
599         pArgs->source=(const char *)source;
600         if(b<=0x7f) {
601             return b;
602         } else {
603             UConverter *cnv=pArgs->converter;
604             cnv->toUBytes[0]=b;
605             cnv->toULength=1;
606             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
607             return 0xffff;
608         }
609     }
610 
611     /* no output because of empty input */
612     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
613     return 0xffff;
614 }
615 
616 /* "Convert" UTF-8 to US-ASCII: Validate and copy. */
617 static void U_CALLCONV
ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs * pFromUArgs,UConverterToUnicodeArgs * pToUArgs,UErrorCode * pErrorCode)618 ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
619                    UConverterToUnicodeArgs *pToUArgs,
620                    UErrorCode *pErrorCode) {
621     const uint8_t *source, *sourceLimit;
622     uint8_t *target;
623     int32_t targetCapacity, length;
624 
625     uint8_t c;
626 
627     if(pToUArgs->converter->toULength > 0) {
628         /* no handling of partial UTF-8 characters here, fall back to pivoting */
629         *pErrorCode=U_USING_DEFAULT_WARNING;
630         return;
631     }
632 
633     /* set up the local pointers */
634     source=(const uint8_t *)pToUArgs->source;
635     sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
636     target=(uint8_t *)pFromUArgs->target;
637     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
638 
639     /*
640      * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
641      * for the minimum of the sourceLength and targetCapacity
642      */
643     length=(int32_t)(sourceLimit-source);
644     if(length<targetCapacity) {
645         targetCapacity=length;
646     }
647 
648     /* unroll the loop with the most common case */
649     if(targetCapacity>=16) {
650         int32_t count, loops;
651         uint8_t oredChars;
652 
653         loops=count=targetCapacity>>4;
654         do {
655             oredChars=*target++=*source++;
656             oredChars|=*target++=*source++;
657             oredChars|=*target++=*source++;
658             oredChars|=*target++=*source++;
659             oredChars|=*target++=*source++;
660             oredChars|=*target++=*source++;
661             oredChars|=*target++=*source++;
662             oredChars|=*target++=*source++;
663             oredChars|=*target++=*source++;
664             oredChars|=*target++=*source++;
665             oredChars|=*target++=*source++;
666             oredChars|=*target++=*source++;
667             oredChars|=*target++=*source++;
668             oredChars|=*target++=*source++;
669             oredChars|=*target++=*source++;
670             oredChars|=*target++=*source++;
671 
672             /* were all 16 entries really valid? */
673             if(oredChars>0x7f) {
674                 /* no, return to the first of these 16 */
675                 source-=16;
676                 target-=16;
677                 break;
678             }
679         } while(--count>0);
680         count=loops-count;
681         targetCapacity-=16*count;
682     }
683 
684     /* conversion loop */
685     c=0;
686     while(targetCapacity>0 && (c=*source)<=0x7f) {
687         ++source;
688         *target++=c;
689         --targetCapacity;
690     }
691 
692     if(c>0x7f) {
693         /* non-ASCII character, handle in standard converter */
694         *pErrorCode=U_USING_DEFAULT_WARNING;
695     } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
696         /* target is full */
697         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
698     }
699 
700     /* write back the updated pointers */
701     pToUArgs->source=(const char *)source;
702     pFromUArgs->target=(char *)target;
703 }
704 
705 static void U_CALLCONV
_ASCIIGetUnicodeSet(const UConverter * cnv,const USetAdder * sa,UConverterUnicodeSet which,UErrorCode * pErrorCode)706 _ASCIIGetUnicodeSet(const UConverter *cnv,
707                     const USetAdder *sa,
708                     UConverterUnicodeSet which,
709                     UErrorCode *pErrorCode) {
710     (void)cnv;
711     (void)which;
712     (void)pErrorCode;
713     sa->addRange(sa->set, 0, 0x7f);
714 }
715 U_CDECL_END
716 
717 static const UConverterImpl _ASCIIImpl={
718     UCNV_US_ASCII,
719 
720     NULL,
721     NULL,
722 
723     NULL,
724     NULL,
725     NULL,
726 
727     _ASCIIToUnicodeWithOffsets,
728     _ASCIIToUnicodeWithOffsets,
729     _Latin1FromUnicodeWithOffsets,
730     _Latin1FromUnicodeWithOffsets,
731     _ASCIIGetNextUChar,
732 
733     NULL,
734     NULL,
735     NULL,
736     NULL,
737     _ASCIIGetUnicodeSet,
738 
739     NULL,
740     ucnv_ASCIIFromUTF8
741 };
742 
743 static const UConverterStaticData _ASCIIStaticData={
744     sizeof(UConverterStaticData),
745     "US-ASCII",
746     367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
747     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
748     0,
749     0,
750     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
751 };
752 
753 const UConverterSharedData _ASCIIData=
754         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
755 
756 #endif
757