1 /******************************************************************************
2  *
3  *  unicodertf.cpp -	SWFilter descendant to convert UTF-8 to RTF tags
4  *
5  * $Id: unicodertf.cpp 3081 2014-03-05 19:52:08Z chrislit $
6  *
7  * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
8  *	CrossWire Bible Society
9  *	P. O. Box 2528
10  *	Tempe, AZ  85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <stdio.h>
24 #include <unicodertf.h>
25 #include <swbuf.h>
26 
27 SWORD_NAMESPACE_START
28 
UnicodeRTF()29 UnicodeRTF::UnicodeRTF() {
30 }
31 
32 
processText(SWBuf & text,const SWKey * key,const SWModule * module)33 char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
34 {
35 	const unsigned char *from;
36 	char digit[10];
37 	unsigned long ch;
38         signed short utf16;
39 	unsigned char from2[7];
40 
41 	SWBuf orig = text;
42 
43 	from = (const unsigned char *)orig.c_str();
44 
45 	// -------------------------------
46 	for (text = ""; *from; from++) {
47 		ch = 0;
48                 //case: ANSI
49 		if ((*from & 128) != 128) {
50 			text += *from;
51 			continue;
52 		}
53                 //case: Invalid UTF-8 (illegal continuing byte in initial position)
54 		if ((*from & 128) && ((*from & 64) != 64)) {
55 			continue;
56 		}
57                 //case: 2+ byte codepoint
58 		from2[0] = *from;
59 		from2[0] <<= 1;
60 		int subsequent;
61 		for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
62 			from2[0] <<= 1;
63 			from2[subsequent] = from[subsequent];
64 			from2[subsequent] &= 63;
65 			ch <<= 6;
66 			ch |= from2[subsequent];
67 		}
68 		subsequent--;
69 		from2[0] <<= 1;
70 		char significantFirstBits = 8 - (2+subsequent);
71 
72 		ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
73 		from += subsequent;
74                 if (ch < 0x10000) {
75 				utf16 = (signed short)ch;
76 				text += '\\';
77 				text += 'u';
78 				sprintf(digit, "%d", utf16);
79 				text += digit;
80 				text += '?';
81 			 }
82 			else {
83 				utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
84 				text += '\\';
85 				text += 'u';
86 				sprintf(digit, "%d", utf16);
87 				text += digit;
88 				text += '?';
89 				utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
90 				text += '\\';
91 				text += 'u';
92 				sprintf(digit, "%d", utf16);
93 				text += digit;
94 				text += '?';
95 			}
96 	}
97 
98 	return 0;
99 }
100 
101 SWORD_NAMESPACE_END
102