1 /******************************************************************************
2 *
3 * unicodertf.cpp - SWFilter descendant to convert UTF-8 to RTF tags
4 *
5 * $Id: unicodertf.cpp 3081 2014-03-05 19:52:08Z chrislit $
6 *
7 * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
8 * CrossWire Bible Society
9 * P. O. Box 2528
10 * Tempe, AZ 85280-2528
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation version 2.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 */
22
23 #include <stdio.h>
24 #include <unicodertf.h>
25 #include <swbuf.h>
26
27 SWORD_NAMESPACE_START
28
UnicodeRTF()29 UnicodeRTF::UnicodeRTF() {
30 }
31
32
processText(SWBuf & text,const SWKey * key,const SWModule * module)33 char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
34 {
35 const unsigned char *from;
36 char digit[10];
37 unsigned long ch;
38 signed short utf16;
39 unsigned char from2[7];
40
41 SWBuf orig = text;
42
43 from = (const unsigned char *)orig.c_str();
44
45 // -------------------------------
46 for (text = ""; *from; from++) {
47 ch = 0;
48 //case: ANSI
49 if ((*from & 128) != 128) {
50 text += *from;
51 continue;
52 }
53 //case: Invalid UTF-8 (illegal continuing byte in initial position)
54 if ((*from & 128) && ((*from & 64) != 64)) {
55 continue;
56 }
57 //case: 2+ byte codepoint
58 from2[0] = *from;
59 from2[0] <<= 1;
60 int subsequent;
61 for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
62 from2[0] <<= 1;
63 from2[subsequent] = from[subsequent];
64 from2[subsequent] &= 63;
65 ch <<= 6;
66 ch |= from2[subsequent];
67 }
68 subsequent--;
69 from2[0] <<= 1;
70 char significantFirstBits = 8 - (2+subsequent);
71
72 ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
73 from += subsequent;
74 if (ch < 0x10000) {
75 utf16 = (signed short)ch;
76 text += '\\';
77 text += 'u';
78 sprintf(digit, "%d", utf16);
79 text += digit;
80 text += '?';
81 }
82 else {
83 utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
84 text += '\\';
85 text += 'u';
86 sprintf(digit, "%d", utf16);
87 text += digit;
88 text += '?';
89 utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
90 text += '\\';
91 text += 'u';
92 sprintf(digit, "%d", utf16);
93 text += digit;
94 text += '?';
95 }
96 }
97
98 return 0;
99 }
100
101 SWORD_NAMESPACE_END
102