1# frozen_string_literal: false
2#
3# kconv.rb - Kanji Converter.
4#
5# $Id: kconv.rb 53143 2015-12-16 05:31:54Z naruse $
6#
7# ----
8#
9# kconv.rb implements the Kconv class for Kanji Converter.  Additionally,
10# some methods in String classes are added to allow easy conversion.
11#
12
13require 'nkf'
14
15#
16# Kanji Converter for Ruby.
17#
18module Kconv
19  #
20  # Public Constants
21  #
22
23  #Constant of Encoding
24
25  # Auto-Detect
26  AUTO = NKF::AUTO
27  # ISO-2022-JP
28  JIS = NKF::JIS
29  # EUC-JP
30  EUC = NKF::EUC
31  # Shift_JIS
32  SJIS = NKF::SJIS
33  # BINARY
34  BINARY = NKF::BINARY
35  # NOCONV
36  NOCONV = NKF::NOCONV
37  # ASCII
38  ASCII = NKF::ASCII
39  # UTF-8
40  UTF8 = NKF::UTF8
41  # UTF-16
42  UTF16 = NKF::UTF16
43  # UTF-32
44  UTF32 = NKF::UTF32
45  # UNKNOWN
46  UNKNOWN = NKF::UNKNOWN
47
48  #
49  # Public Methods
50  #
51
52  # call-seq:
53  #    Kconv.kconv(str, to_enc, from_enc=nil)
54  #
55  # Convert <code>str</code> to <code>to_enc</code>.
56  # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
57  def kconv(str, to_enc, from_enc=nil)
58    opt = ''
59    opt += ' --ic=' + from_enc.to_s if from_enc
60    opt += ' --oc=' + to_enc.to_s if to_enc
61
62    ::NKF::nkf(opt, str)
63  end
64  module_function :kconv
65
66  #
67  # Encode to
68  #
69
70  # call-seq:
71  #    Kconv.tojis(str)   => string
72  #
73  # Convert <code>str</code> to ISO-2022-JP
74  def tojis(str)
75    kconv(str, JIS)
76  end
77  module_function :tojis
78
79  # call-seq:
80  #    Kconv.toeuc(str)   => string
81  #
82  # Convert <code>str</code> to EUC-JP
83  def toeuc(str)
84    kconv(str, EUC)
85  end
86  module_function :toeuc
87
88  # call-seq:
89  #    Kconv.tosjis(str)   => string
90  #
91  # Convert <code>str</code> to Shift_JIS
92  def tosjis(str)
93    kconv(str, SJIS)
94  end
95  module_function :tosjis
96
97  # call-seq:
98  #    Kconv.toutf8(str)   => string
99  #
100  # Convert <code>str</code> to UTF-8
101  def toutf8(str)
102    kconv(str, UTF8)
103  end
104  module_function :toutf8
105
106  # call-seq:
107  #    Kconv.toutf16(str)   => string
108  #
109  # Convert <code>str</code> to UTF-16
110  def toutf16(str)
111    kconv(str, UTF16)
112  end
113  module_function :toutf16
114
115  # call-seq:
116  #    Kconv.toutf32(str)   => string
117  #
118  # Convert <code>str</code> to UTF-32
119  def toutf32(str)
120    kconv(str, UTF32)
121  end
122  module_function :toutf32
123
124  # call-seq:
125  #    Kconv.tolocale   => string
126  #
127  # Convert <code>self</code> to locale encoding
128  def tolocale(str)
129    kconv(str, Encoding.locale_charmap)
130  end
131  module_function :tolocale
132
133  #
134  # guess
135  #
136
137  # call-seq:
138  #    Kconv.guess(str)   => encoding
139  #
140  # Guess input encoding by NKF.guess
141  def guess(str)
142    ::NKF::guess(str)
143  end
144  module_function :guess
145
146  #
147  # isEncoding
148  #
149
150  # call-seq:
151  #    Kconv.iseuc(str)   => true or false
152  #
153  # Returns whether input encoding is EUC-JP or not.
154  #
155  # *Note* don't expect this return value is MatchData.
156  def iseuc(str)
157    str.dup.force_encoding(EUC).valid_encoding?
158  end
159  module_function :iseuc
160
161  # call-seq:
162  #    Kconv.issjis(str)   => true or false
163  #
164  # Returns whether input encoding is Shift_JIS or not.
165  def issjis(str)
166    str.dup.force_encoding(SJIS).valid_encoding?
167  end
168  module_function :issjis
169
170  # call-seq:
171  #    Kconv.isjis(str)   => true or false
172  #
173  # Returns whether input encoding is ISO-2022-JP or not.
174  def isjis(str)
175    /\A [\t\n\r\x20-\x7E]*
176      (?:
177        (?:\x1b \x28 I      [\x21-\x7E]*
178          |\x1b \x28 J      [\x21-\x7E]*
179          |\x1b \x24 @      (?:[\x21-\x7E]{2})*
180          |\x1b \x24 B      (?:[\x21-\x7E]{2})*
181          |\x1b \x24 \x28 D (?:[\x21-\x7E]{2})*
182        )*
183        \x1b \x28 B [\t\n\r\x20-\x7E]*
184      )*
185     \z/nox =~ str.dup.force_encoding('BINARY') ? true : false
186  end
187  module_function :isjis
188
189  # call-seq:
190  #    Kconv.isutf8(str)   => true or false
191  #
192  # Returns whether input encoding is UTF-8 or not.
193  def isutf8(str)
194    str.dup.force_encoding(UTF8).valid_encoding?
195  end
196  module_function :isutf8
197end
198
199class String
200  # call-seq:
201  #    String#kconv(to_enc, from_enc)
202  #
203  # Convert <code>self</code> to <code>to_enc</code>.
204  # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
205  def kconv(to_enc, from_enc=nil)
206    from_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0]
207    Kconv::kconv(self, to_enc, from_enc)
208  end
209
210  #
211  # to Encoding
212  #
213
214  # call-seq:
215  #    String#tojis   => string
216  #
217  # Convert <code>self</code> to ISO-2022-JP
218  def tojis; Kconv.tojis(self) end
219
220  # call-seq:
221  #    String#toeuc   => string
222  #
223  # Convert <code>self</code> to EUC-JP
224  def toeuc; Kconv.toeuc(self) end
225
226  # call-seq:
227  #    String#tosjis   => string
228  #
229  # Convert <code>self</code> to Shift_JIS
230  def tosjis; Kconv.tosjis(self) end
231
232  # call-seq:
233  #    String#toutf8   => string
234  #
235  # Convert <code>self</code> to UTF-8
236  def toutf8; Kconv.toutf8(self) end
237
238  # call-seq:
239  #    String#toutf16   => string
240  #
241  # Convert <code>self</code> to UTF-16
242  def toutf16; Kconv.toutf16(self) end
243
244  # call-seq:
245  #    String#toutf32   => string
246  #
247  # Convert <code>self</code> to UTF-32
248  def toutf32; Kconv.toutf32(self) end
249
250  # call-seq:
251  #    String#tolocale   => string
252  #
253  # Convert <code>self</code> to locale encoding
254  def tolocale; Kconv.tolocale(self) end
255
256  #
257  # is Encoding
258  #
259
260  # call-seq:
261  #    String#iseuc   => true or false
262  #
263  # Returns whether <code>self</code>'s encoding is EUC-JP or not.
264  def iseuc;	Kconv.iseuc(self) end
265
266  # call-seq:
267  #    String#issjis   => true or false
268  #
269  # Returns whether <code>self</code>'s encoding is Shift_JIS or not.
270  def issjis;	Kconv.issjis(self) end
271
272  # call-seq:
273  #    String#isjis   => true or false
274  #
275  # Returns whether <code>self</code>'s encoding is ISO-2022-JP or not.
276  def isjis;	Kconv.isjis(self) end
277
278  # call-seq:
279  #    String#isutf8   => true or false
280  #
281  # Returns whether <code>self</code>'s encoding is UTF-8 or not.
282  def isutf8;	Kconv.isutf8(self) end
283end
284