1 /*
2 * Transcoder.cpp -
3 *
4 * Copyright (c) 2008 Kokosabu(MIURA Yasuyuki) <kokosabu@gmail.com>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * $Id$
30 */
31
32 #include <stdio.h>
33 #include "scheme.h"
34 #include "Object.h"
35 #include "Object-inl.h"
36 #include "SString.h"
37 #include "Symbol.h"
38 #include "BinaryOutputPort.h"
39 #include "Transcoder.h"
40 #include "UTF8Codec.h"
41
42 using namespace scheme;
43
Transcoder(Codec * codec)44 Transcoder::Transcoder(Codec* codec) :
45 beginningOfInput_(true),
46 codec_(codec),
47 eolStyle_(EolStyle(LF)), // LF means no convert.
48 errorHandlingMode_(ErrorHandlingMode(REPLACE_ERROR)),
49 lineNo_(1)
50 {
51 }
52
Transcoder(Codec * codec,EolStyle eolStyle)53 Transcoder::Transcoder(Codec* codec, EolStyle eolStyle) :
54 beginningOfInput_(true),
55 codec_(codec),
56 eolStyle_(eolStyle),
57 errorHandlingMode_(ErrorHandlingMode(REPLACE_ERROR)),
58 lineNo_(1)
59 {
60 }
61
Transcoder(Codec * codec,EolStyle eolStyle,enum ErrorHandlingMode errorHandlingMode)62 Transcoder::Transcoder(Codec* codec, EolStyle eolStyle, enum ErrorHandlingMode errorHandlingMode) :
63 beginningOfInput_(true),
64 codec_(codec),
65 eolStyle_(eolStyle),
66 errorHandlingMode_(errorHandlingMode),
67 lineNo_(1)
68 {
69 }
70
getLineNo() const71 int Transcoder::getLineNo() const
72 {
73 return lineNo_;
74 }
75
eolStyle()76 enum EolStyle Transcoder::eolStyle()
77 {
78 return eolStyle_;
79 }
80
errorHandlingMode()81 enum ErrorHandlingMode Transcoder::errorHandlingMode()
82 {
83 return errorHandlingMode_;
84 }
85
eolStyleSymbol()86 Object Transcoder::eolStyleSymbol()
87 {
88 return eolStyleToSymbol(eolStyle_);
89 }
90
errorHandlingModeSymbol()91 Object Transcoder::errorHandlingModeSymbol()
92 {
93 return errorHandlingModeToSymbol(errorHandlingMode_);
94 }
95
nativeEolStyle()96 enum EolStyle Transcoder::nativeEolStyle()
97 {
98 #if LINE_FEED_CODE_LF
99 return EolStyle(LF);
100 #elif LINE_FEED_CODE_CRLF
101 return EolStyle(CRLF);
102 #elif LINE_FEED_CODE_CR
103 return EolStyle::CR;
104 #else
105 MOSH_FATAL("not found platform native eol style\n");
106 #endif
107 }
108
eolStyleToSymbol(const enum EolStyle eolstyle)109 Object Transcoder::eolStyleToSymbol(const enum EolStyle eolstyle)
110 {
111 switch (eolstyle) {
112 case EolStyle(LF):
113 return Symbol::LF;
114 case EolStyle(CR):
115 return Symbol::CR;
116 case EolStyle(CRLF):
117 return Symbol::CRLF;
118 case EolStyle(NEL):
119 return Symbol::NEL;
120 case EolStyle(CRNEL):
121 return Symbol::CRNEL;
122 case EolStyle(LS):
123 return Symbol::LS;
124 default:
125 return Symbol::NONE;
126 }
127 }
128
errorHandlingModeToSymbol(const enum ErrorHandlingMode errorHandlingMode)129 Object Transcoder::errorHandlingModeToSymbol(const enum ErrorHandlingMode errorHandlingMode)
130 {
131 switch (errorHandlingMode) {
132 case ErrorHandlingMode(IGNORE_ERROR):
133 return Symbol::IGNORE_ERROR;
134 case ErrorHandlingMode(RAISE_ERROR):
135 return Symbol::RAISE_ERROR;
136 case ErrorHandlingMode(REPLACE_ERROR):
137 return Symbol::REPLACE_ERROR;
138 default:
139 MOSH_FATAL("not found errorHandlingMode\n");
140 }
141 return Object::Undef;
142 }
143
putString(BinaryOutputPort * port,const ucs4string & s)144 void Transcoder::putString(BinaryOutputPort* port, const ucs4string& s)
145 {
146 for (ucs4string::const_iterator it = s.begin(); it != s.end(); ++it) {
147 putChar(port, *it);
148 }
149 }
150
putChar(BinaryOutputPort * port,ucs4char c)151 void Transcoder::putChar(BinaryOutputPort* port, ucs4char c)
152 {
153 if (!buffer_.empty()) {
154 // remove 1 character
155 buffer_.erase(0, 1);
156 }
157 if (eolStyle_ == EolStyle(E_NONE)) {
158 codec_->putChar(port, c, errorHandlingMode_);
159 return;
160 } else if (c == EolStyle(LF)) {
161 switch (eolStyle_) {
162 case EolStyle(LF):
163 case EolStyle(CR):
164 case EolStyle(NEL):
165 case EolStyle(LS):
166 {
167 codec_->putChar(port, eolStyle_, errorHandlingMode_);
168 break;
169 }
170 case EolStyle(E_NONE):
171 {
172 codec_->putChar(port, c, errorHandlingMode_);
173 break;
174 }
175 case EolStyle(CRLF):
176 {
177 codec_->putChar(port, EolStyle(CR), errorHandlingMode_);
178 codec_->putChar(port, EolStyle(LF), errorHandlingMode_);
179 break;
180 }
181 case EolStyle(CRNEL):
182 {
183 codec_->putChar(port, EolStyle(CR), errorHandlingMode_);
184 codec_->putChar(port, EolStyle(NEL), errorHandlingMode_);
185 break;
186 }
187 }
188 } else {
189 codec_->putChar(port, c, errorHandlingMode_);
190 }
191 }
192
193 // int Transcoder::putChar(uint8_t* buf, ucs4char c)
194 // {
195 // return codec_->out(buf, c, errorHandlingMode_);
196 // }
197
unGetChar(ucs4char c)198 void Transcoder::unGetChar(ucs4char c)
199 {
200 if (EOF == c) {
201 return;
202 }
203 buffer_ += c;
204 if (c == EolStyle(LF)) {
205 lineNo_--;
206 }
207
208 }
209
getCharInternal(BinaryInputPort * port)210 ucs4char Transcoder::getCharInternal(BinaryInputPort* port)
211 {
212 // In the beginning of input, we have to check the BOM.
213 if (beginningOfInput_) {
214 beginningOfInput_ = false;
215 const bool checkBOM = true;
216 return codec_->getChar(port, errorHandlingMode_, checkBOM);
217 }
218 ucs4char c;
219 if (buffer_.empty()) {
220 c= codec_->getChar(port, errorHandlingMode_);
221 } else {
222 c = buffer_[buffer_.size() - 1];
223 buffer_.erase(buffer_.size() - 1, 1);
224 }
225 return c;
226 }
227
getChar(BinaryInputPort * port)228 ucs4char Transcoder::getChar(BinaryInputPort* port)
229 {
230 const ucs4char c = getCharInternal(port);
231 if (eolStyle_ == EolStyle(E_NONE)) {
232 if (c == EolStyle(LF)) {
233 lineNo_++;
234 }
235 return c;
236 }
237 switch(c) {
238 case EolStyle(LF):
239 case EolStyle(NEL):
240 case EolStyle(LS):
241 {
242 lineNo_++;
243 return EolStyle(LF);
244 }
245 case EolStyle(CR):
246 {
247 const ucs4char c2 = getCharInternal(port);
248 lineNo_++;
249 switch(c2) {
250 case EolStyle(LF):
251 case EolStyle(NEL):
252 return EolStyle(LF);
253 default:
254 unGetChar(c2);
255 return EolStyle(LF);
256 }
257 }
258 default:
259 return c;
260 }
261 }
262
getString(BinaryInputPort * port)263 ucs4string Transcoder::getString(BinaryInputPort* port)
264 {
265 ucs4string ret;
266 for (ucs4char c = getChar(port); c != EOF; c = getChar(port)) {
267 ret += c;
268 }
269 return ret;
270 }
271
validateEolStyle(Object eolStyle,EolStyle & result)272 bool Transcoder::validateEolStyle(Object eolStyle, EolStyle& result)
273 {
274 MOSH_ASSERT(eolStyle.isSymbol());
275 if (eolStyle == Symbol::LF) {
276 result = EolStyle(LF);
277 } else if (eolStyle == Symbol::CR) {
278 result = EolStyle(CR);
279 } else if (eolStyle == Symbol::CRLF) {
280 result = EolStyle(CRLF);
281 } else if (eolStyle == Symbol::NEL) {
282 result = EolStyle(NEL);
283 } else if (eolStyle == Symbol::CRNEL) {
284 result = EolStyle(CRNEL);
285 } else if (eolStyle == Symbol::LS) {
286 result = EolStyle(LS);
287 } else if (eolStyle == Symbol::NONE) {
288 result = EolStyle(E_NONE);
289 } else {
290 return false;
291 }
292 return true;
293 }
294
validateErrorHandlingMode(Object symbol,enum ErrorHandlingMode & result)295 bool Transcoder::validateErrorHandlingMode(Object symbol, enum ErrorHandlingMode& result)
296 {
297 MOSH_ASSERT(symbol.isSymbol());
298 if (symbol == Symbol::IGNORE_ERROR) {
299 result = ErrorHandlingMode(IGNORE_ERROR);
300 } else if (symbol == Symbol::RAISE_ERROR) {
301 result = ErrorHandlingMode(RAISE_ERROR);
302 } else if (symbol == Symbol::REPLACE_ERROR) {
303 result = ErrorHandlingMode(REPLACE_ERROR);
304 } else {
305 return false;
306 }
307 return true;
308 }
309
310