1# frozen_string_literal: false 2# 3# kconv.rb - Kanji Converter. 4# 5# $Id: kconv.rb 53143 2015-12-16 05:31:54Z naruse $ 6# 7# ---- 8# 9# kconv.rb implements the Kconv class for Kanji Converter. Additionally, 10# some methods in String classes are added to allow easy conversion. 11# 12 13require 'nkf' 14 15# 16# Kanji Converter for Ruby. 17# 18module Kconv 19 # 20 # Public Constants 21 # 22 23 #Constant of Encoding 24 25 # Auto-Detect 26 AUTO = NKF::AUTO 27 # ISO-2022-JP 28 JIS = NKF::JIS 29 # EUC-JP 30 EUC = NKF::EUC 31 # Shift_JIS 32 SJIS = NKF::SJIS 33 # BINARY 34 BINARY = NKF::BINARY 35 # NOCONV 36 NOCONV = NKF::NOCONV 37 # ASCII 38 ASCII = NKF::ASCII 39 # UTF-8 40 UTF8 = NKF::UTF8 41 # UTF-16 42 UTF16 = NKF::UTF16 43 # UTF-32 44 UTF32 = NKF::UTF32 45 # UNKNOWN 46 UNKNOWN = NKF::UNKNOWN 47 48 # 49 # Public Methods 50 # 51 52 # call-seq: 53 # Kconv.kconv(str, to_enc, from_enc=nil) 54 # 55 # Convert <code>str</code> to <code>to_enc</code>. 56 # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects. 57 def kconv(str, to_enc, from_enc=nil) 58 opt = '' 59 opt += ' --ic=' + from_enc.to_s if from_enc 60 opt += ' --oc=' + to_enc.to_s if to_enc 61 62 ::NKF::nkf(opt, str) 63 end 64 module_function :kconv 65 66 # 67 # Encode to 68 # 69 70 # call-seq: 71 # Kconv.tojis(str) => string 72 # 73 # Convert <code>str</code> to ISO-2022-JP 74 def tojis(str) 75 kconv(str, JIS) 76 end 77 module_function :tojis 78 79 # call-seq: 80 # Kconv.toeuc(str) => string 81 # 82 # Convert <code>str</code> to EUC-JP 83 def toeuc(str) 84 kconv(str, EUC) 85 end 86 module_function :toeuc 87 88 # call-seq: 89 # Kconv.tosjis(str) => string 90 # 91 # Convert <code>str</code> to Shift_JIS 92 def tosjis(str) 93 kconv(str, SJIS) 94 end 95 module_function :tosjis 96 97 # call-seq: 98 # Kconv.toutf8(str) => string 99 # 100 # Convert <code>str</code> to UTF-8 101 def toutf8(str) 102 kconv(str, UTF8) 103 end 104 module_function :toutf8 105 106 # call-seq: 107 # Kconv.toutf16(str) => string 108 # 109 # Convert <code>str</code> to UTF-16 110 def toutf16(str) 111 kconv(str, UTF16) 112 end 113 module_function :toutf16 114 115 # call-seq: 116 # Kconv.toutf32(str) => string 117 # 118 # Convert <code>str</code> to UTF-32 119 def toutf32(str) 120 kconv(str, UTF32) 121 end 122 module_function :toutf32 123 124 # call-seq: 125 # Kconv.tolocale => string 126 # 127 # Convert <code>self</code> to locale encoding 128 def tolocale(str) 129 kconv(str, Encoding.locale_charmap) 130 end 131 module_function :tolocale 132 133 # 134 # guess 135 # 136 137 # call-seq: 138 # Kconv.guess(str) => encoding 139 # 140 # Guess input encoding by NKF.guess 141 def guess(str) 142 ::NKF::guess(str) 143 end 144 module_function :guess 145 146 # 147 # isEncoding 148 # 149 150 # call-seq: 151 # Kconv.iseuc(str) => true or false 152 # 153 # Returns whether input encoding is EUC-JP or not. 154 # 155 # *Note* don't expect this return value is MatchData. 156 def iseuc(str) 157 str.dup.force_encoding(EUC).valid_encoding? 158 end 159 module_function :iseuc 160 161 # call-seq: 162 # Kconv.issjis(str) => true or false 163 # 164 # Returns whether input encoding is Shift_JIS or not. 165 def issjis(str) 166 str.dup.force_encoding(SJIS).valid_encoding? 167 end 168 module_function :issjis 169 170 # call-seq: 171 # Kconv.isjis(str) => true or false 172 # 173 # Returns whether input encoding is ISO-2022-JP or not. 174 def isjis(str) 175 /\A [\t\n\r\x20-\x7E]* 176 (?: 177 (?:\x1b \x28 I [\x21-\x7E]* 178 |\x1b \x28 J [\x21-\x7E]* 179 |\x1b \x24 @ (?:[\x21-\x7E]{2})* 180 |\x1b \x24 B (?:[\x21-\x7E]{2})* 181 |\x1b \x24 \x28 D (?:[\x21-\x7E]{2})* 182 )* 183 \x1b \x28 B [\t\n\r\x20-\x7E]* 184 )* 185 \z/nox =~ str.dup.force_encoding('BINARY') ? true : false 186 end 187 module_function :isjis 188 189 # call-seq: 190 # Kconv.isutf8(str) => true or false 191 # 192 # Returns whether input encoding is UTF-8 or not. 193 def isutf8(str) 194 str.dup.force_encoding(UTF8).valid_encoding? 195 end 196 module_function :isutf8 197end 198 199class String 200 # call-seq: 201 # String#kconv(to_enc, from_enc) 202 # 203 # Convert <code>self</code> to <code>to_enc</code>. 204 # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects. 205 def kconv(to_enc, from_enc=nil) 206 from_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0] 207 Kconv::kconv(self, to_enc, from_enc) 208 end 209 210 # 211 # to Encoding 212 # 213 214 # call-seq: 215 # String#tojis => string 216 # 217 # Convert <code>self</code> to ISO-2022-JP 218 def tojis; Kconv.tojis(self) end 219 220 # call-seq: 221 # String#toeuc => string 222 # 223 # Convert <code>self</code> to EUC-JP 224 def toeuc; Kconv.toeuc(self) end 225 226 # call-seq: 227 # String#tosjis => string 228 # 229 # Convert <code>self</code> to Shift_JIS 230 def tosjis; Kconv.tosjis(self) end 231 232 # call-seq: 233 # String#toutf8 => string 234 # 235 # Convert <code>self</code> to UTF-8 236 def toutf8; Kconv.toutf8(self) end 237 238 # call-seq: 239 # String#toutf16 => string 240 # 241 # Convert <code>self</code> to UTF-16 242 def toutf16; Kconv.toutf16(self) end 243 244 # call-seq: 245 # String#toutf32 => string 246 # 247 # Convert <code>self</code> to UTF-32 248 def toutf32; Kconv.toutf32(self) end 249 250 # call-seq: 251 # String#tolocale => string 252 # 253 # Convert <code>self</code> to locale encoding 254 def tolocale; Kconv.tolocale(self) end 255 256 # 257 # is Encoding 258 # 259 260 # call-seq: 261 # String#iseuc => true or false 262 # 263 # Returns whether <code>self</code>'s encoding is EUC-JP or not. 264 def iseuc; Kconv.iseuc(self) end 265 266 # call-seq: 267 # String#issjis => true or false 268 # 269 # Returns whether <code>self</code>'s encoding is Shift_JIS or not. 270 def issjis; Kconv.issjis(self) end 271 272 # call-seq: 273 # String#isjis => true or false 274 # 275 # Returns whether <code>self</code>'s encoding is ISO-2022-JP or not. 276 def isjis; Kconv.isjis(self) end 277 278 # call-seq: 279 # String#isutf8 => true or false 280 # 281 # Returns whether <code>self</code>'s encoding is UTF-8 or not. 282 def isutf8; Kconv.isutf8(self) end 283end 284