1# -*- encoding: utf-8 -*- 2require_relative 'spec_helper' 3require_relative 'fixtures/encoding' 4 5load_extension('encoding') 6 7describe :rb_enc_get_index, shared: true do 8 it "returns the index of the encoding of a String" do 9 @s.send(@method, "string").should >= 0 10 end 11 12 it "returns the index of the encoding of a Regexp" do 13 @s.send(@method, /regexp/).should >= 0 14 end 15end 16 17describe :rb_enc_set_index, shared: true do 18 it "sets the object's encoding to the Encoding specified by the index" do 19 obj = "abc" 20 result = @s.send(@method, obj, 2) 21 22 # This is used because indexes should be considered implementation 23 # dependent. So a pair is returned: 24 # [rb_enc_find_index()->name, rb_enc_get(obj)->name] 25 result.first.should == result.last 26 end 27 28 it "associates an encoding with a subclass of String" do 29 str = CApiEncodingSpecs::S.new "abc" 30 result = @s.send(@method, str, 1) 31 result.first.should == result.last 32 end 33 34 ruby_version_is "2.6" do 35 it "raises an ArgumentError for a non-encoding capable object" do 36 obj = Object.new 37 -> { 38 result = @s.send(@method, obj, 1) 39 }.should raise_error(ArgumentError, "cannot set encoding on non-encoding capable object") 40 end 41 end 42end 43 44describe "C-API Encoding function" do 45 before :each do 46 @s = CApiEncodingSpecs.new 47 end 48 49 ruby_version_is "2.6" do 50 describe "rb_enc_alias" do 51 it "creates an alias for an existing Encoding" do 52 @s.rb_enc_alias("ZOMGWTFBBQ", "UTF-8").should >= 0 53 Encoding.find("ZOMGWTFBBQ").name.should == "UTF-8" 54 end 55 end 56 end 57 58 describe "rb_enc_find" do 59 it "returns the encoding of an Encoding" do 60 @s.rb_enc_find("UTF-8").should == "UTF-8" 61 end 62 63 it "returns the encoding of an Encoding specified with lower case" do 64 @s.rb_enc_find("utf-8").should == "UTF-8" 65 end 66 end 67 68 describe "rb_enc_find_index" do 69 it "returns the index of an Encoding" do 70 @s.rb_enc_find_index("UTF-8").should >= 0 71 end 72 73 it "returns the index of an Encoding specified with lower case" do 74 @s.rb_enc_find_index("utf-8").should >= 0 75 end 76 77 it "returns -1 for an non existing encoding" do 78 @s.rb_enc_find_index("non-existent-encoding").should == -1 79 end 80 end 81 82 describe "rb_enc_from_index" do 83 it "returns an Encoding" do 84 @s.rb_enc_from_index(0).should be_an_instance_of(String) 85 end 86 end 87 88 describe "rb_usascii_encoding" do 89 it "returns the encoding for Encoding::US_ASCII" do 90 @s.rb_usascii_encoding.should == "US-ASCII" 91 end 92 end 93 94 describe "rb_ascii8bit_encoding" do 95 it "returns the encoding for Encoding::ASCII_8BIT" do 96 @s.rb_ascii8bit_encoding.should == "ASCII-8BIT" 97 end 98 end 99 100 describe "rb_utf8_encoding" do 101 it "returns the encoding for Encoding::UTF_8" do 102 @s.rb_utf8_encoding.should == "UTF-8" 103 end 104 end 105 106 describe "rb_enc_from_encoding" do 107 it "returns an Encoding instance from an encoding data structure" do 108 @s.rb_enc_from_encoding("UTF-8").should == Encoding::UTF_8 109 end 110 end 111 112 describe "rb_locale_encoding" do 113 it "returns the encoding for the current locale" do 114 @s.rb_locale_encoding.should == Encoding.find('locale').name 115 end 116 end 117 118 describe "rb_filesystem_encoding" do 119 it "returns the encoding for the current filesystem" do 120 @s.rb_filesystem_encoding.should == Encoding.find('filesystem').name 121 end 122 end 123 124 describe "rb_enc_get" do 125 it "returns the encoding ossociated with an object" do 126 str = "abc".encode Encoding::ASCII_8BIT 127 @s.rb_enc_get(str).should == "ASCII-8BIT" 128 end 129 end 130 131 describe "rb_obj_encoding" do 132 it "returns the encoding ossociated with an object" do 133 str = "abc".encode Encoding::ASCII_8BIT 134 @s.rb_obj_encoding(str).should == Encoding::ASCII_8BIT 135 end 136 end 137 138 describe "rb_enc_get_index" do 139 it_behaves_like :rb_enc_get_index, :rb_enc_get_index 140 141 it "returns the index of the encoding of a Symbol" do 142 @s.rb_enc_get_index(:symbol).should >= 0 143 end 144 145 it "returns -1 as the index of nil" do 146 @s.rb_enc_get_index(nil).should == -1 147 end 148 149 it "returns -1 as the index for immediates" do 150 @s.rb_enc_get_index(1).should == -1 151 end 152 153 ruby_version_is "2.6" do 154 it "returns -1 for an object without an encoding" do 155 obj = Object.new 156 @s.rb_enc_get_index(obj).should == -1 157 end 158 end 159 end 160 161 describe "rb_enc_set_index" do 162 it_behaves_like :rb_enc_set_index, :rb_enc_set_index 163 end 164 165 describe "rb_enc_str_new" do 166 it "returns a String in US-ASCII encoding when high bits are set" do 167 xEE = [0xEE].pack('C').force_encoding('utf-8') 168 result = @s.rb_enc_str_new(xEE, 1, Encoding::US_ASCII) 169 result.encoding.should equal(Encoding::US_ASCII) 170 end 171 end 172 173 describe "rb_enc_str_coderange" do 174 describe "when the encoding is ASCII-8BIT" do 175 it "returns ENC_CODERANGE_7BIT if there are no high bits set" do 176 result = @s.rb_enc_str_coderange("abc".force_encoding("ascii-8bit")) 177 result.should == :coderange_7bit 178 end 179 180 it "returns ENC_CODERANGE_VALID if there are high bits set" do 181 xEE = [0xEE].pack('C').force_encoding('utf-8') 182 result = @s.rb_enc_str_coderange(xEE.force_encoding("ascii-8bit")) 183 result.should == :coderange_valid 184 end 185 end 186 187 describe "when the encoding is UTF-8" do 188 it "returns ENC_CODERANGE_7BIT if there are no high bits set" do 189 result = @s.rb_enc_str_coderange("abc".force_encoding("utf-8")) 190 result.should == :coderange_7bit 191 end 192 193 it "returns ENC_CODERANGE_VALID if there are high bits set in a valid string" do 194 result = @s.rb_enc_str_coderange("\xE3\x81\x82".force_encoding("utf-8")) 195 result.should == :coderange_valid 196 end 197 198 it "returns ENC_CODERANGE_BROKEN if there are high bits set in an invalid string" do 199 result = @s.rb_enc_str_coderange([0xEE].pack('C').force_encoding("utf-8")) 200 result.should == :coderange_broken 201 end 202 end 203 204 describe "when the encoding is US-ASCII" do 205 it "returns ENC_CODERANGE_7BIT if there are no high bits set" do 206 result = @s.rb_enc_str_coderange("abc".force_encoding("us-ascii")) 207 result.should == :coderange_7bit 208 end 209 210 it "returns ENC_CODERANGE_BROKEN if there are high bits set" do 211 result = @s.rb_enc_str_coderange([0xEE].pack('C').force_encoding("us-ascii")) 212 result.should == :coderange_broken 213 end 214 end 215 end 216 217 describe "ENCODING_GET" do 218 it_behaves_like :rb_enc_get_index, :ENCODING_GET 219 end 220 221 describe "ENCODING_SET" do 222 it_behaves_like :rb_enc_set_index, :ENCODING_SET 223 end 224 225 describe "ENC_CODERANGE_ASCIIONLY" do 226 it "returns true if the object encoding is only ASCII" do 227 str = "abc".force_encoding("us-ascii") 228 str.valid_encoding? # make sure to set the coderange 229 @s.ENC_CODERANGE_ASCIIONLY(str).should be_true 230 end 231 232 it "returns false if the object encoding is not ASCII only" do 233 str = "ありがとう".force_encoding("utf-8") 234 @s.ENC_CODERANGE_ASCIIONLY(str).should be_false 235 end 236 end 237 238 describe "rb_to_encoding" do 239 it "returns the encoding for the Encoding instance passed" do 240 @s.rb_to_encoding(Encoding::BINARY).should == "ASCII-8BIT" 241 end 242 243 it "returns the correct encoding for a replicated encoding" do 244 @s.rb_to_encoding(Encoding::IBM857).should == "IBM857" 245 end 246 247 it "returns the encoding when passed a String" do 248 @s.rb_to_encoding("ASCII").should == "US-ASCII" 249 end 250 251 it "calls #to_str to convert the argument to a String" do 252 obj = mock("rb_to_encoding Encoding name") 253 obj.should_receive(:to_str).and_return("utf-8") 254 255 @s.rb_to_encoding(obj).should == "UTF-8" 256 end 257 end 258 259 describe "rb_to_encoding_index" do 260 it "returns the index of the encoding for the Encoding instance passed" do 261 @s.rb_to_encoding_index(Encoding::BINARY).should >= 0 262 end 263 264 it "returns the index of the encoding when passed a String" do 265 @s.rb_to_encoding_index("ASCII").should >= 0 266 end 267 268 it "returns the index of the dummy encoding of an Object" do 269 index = Encoding.list.index(Encoding::UTF_16) 270 @s.rb_to_encoding_index(Encoding::UTF_16.name).should == index 271 end 272 273 it "calls #to_str to convert the argument to a String" do 274 obj = mock("rb_to_encoding Encoding name") 275 obj.should_receive(:to_str).and_return("utf-8") 276 277 @s.rb_to_encoding_index(obj).should >= 0 278 end 279 end 280 281 describe "rb_enc_compatible" do 282 it "returns 0 if the encodings of the Strings are not compatible" do 283 a = [0xff].pack('C').force_encoding "ascii-8bit" 284 b = "\u3042".encode("utf-8") 285 @s.rb_enc_compatible(a, b).should == 0 286 end 287 288 # The coverage of this sucks, but there is not a simple way (yet?) to 289 # easily share the specs between rb_enc_compatible and 290 # Encoding.compatible? 291 it "returns the same value as Encoding.compatible? if the Strings have a compatible encoding" do 292 a = "abc".force_encoding("us-ascii") 293 b = "\u3042".encode("utf-8") 294 @s.rb_enc_compatible(a, b).should == Encoding.compatible?(a, b) 295 end 296 end 297 298 describe "rb_enc_copy" do 299 before :each do 300 @obj = "rb_enc_copy".encode(Encoding::US_ASCII) 301 end 302 303 it "sets the encoding of a String to that of the second argument" do 304 @s.rb_enc_copy("string", @obj).encoding.should == Encoding::US_ASCII 305 end 306 307 it "raises a RuntimeError if the second argument is a Symbol" do 308 lambda { @s.rb_enc_copy(:symbol, @obj) }.should raise_error(RuntimeError) 309 end 310 311 it "sets the encoding of a Regexp to that of the second argument" do 312 @s.rb_enc_copy(/regexp/, @obj).encoding.should == Encoding::US_ASCII 313 end 314 end 315 316 describe "rb_default_internal_encoding" do 317 before :each do 318 @default = Encoding.default_internal 319 end 320 321 after :each do 322 Encoding.default_internal = @default 323 end 324 325 it "returns 0 if Encoding.default_internal is nil" do 326 Encoding.default_internal = nil 327 @s.rb_default_internal_encoding.should be_nil 328 end 329 330 it "returns the encoding for Encoding.default_internal" do 331 Encoding.default_internal = "US-ASCII" 332 @s.rb_default_internal_encoding.should == "US-ASCII" 333 Encoding.default_internal = "UTF-8" 334 @s.rb_default_internal_encoding.should == "UTF-8" 335 end 336 end 337 338 describe "rb_default_external_encoding" do 339 before :each do 340 @default = Encoding.default_external 341 end 342 343 after :each do 344 Encoding.default_external = @default 345 end 346 347 it "returns the encoding for Encoding.default_external" do 348 Encoding.default_external = "BINARY" 349 @s.rb_default_external_encoding.should == "ASCII-8BIT" 350 end 351 end 352 353 describe "rb_enc_associate" do 354 it "sets the encoding of a String to the encoding" do 355 @s.rb_enc_associate("string", "ASCII-8BIT").encoding.should == Encoding::ASCII_8BIT 356 end 357 358 it "raises a RuntimeError if the argument is Symbol" do 359 lambda { @s.rb_enc_associate(:symbol, "US-ASCII") }.should raise_error(RuntimeError) 360 end 361 362 it "sets the encoding of a Regexp to the encoding" do 363 @s.rb_enc_associate(/regexp/, "ASCII-8BIT").encoding.should == Encoding::ASCII_8BIT 364 end 365 366 it "sets the encoding of a String to a default when the encoding is NULL" do 367 @s.rb_enc_associate("string", nil).encoding.should == Encoding::ASCII_8BIT 368 end 369 end 370 371 describe "rb_enc_associate_index" do 372 it "sets the encoding of a String to the encoding" do 373 index = @s.rb_enc_find_index("ASCII-8BIT") 374 enc = @s.rb_enc_associate_index("string", index).encoding 375 enc.should == Encoding::ASCII_8BIT 376 end 377 378 it "sets the encoding of a Regexp to the encoding" do 379 index = @s.rb_enc_find_index("UTF-8") 380 enc = @s.rb_enc_associate_index(/regexp/, index).encoding 381 enc.should == Encoding::UTF_8 382 end 383 384 it "sets the encoding of a Symbol to the encoding" do 385 index = @s.rb_enc_find_index("UTF-8") 386 lambda { @s.rb_enc_associate_index(:symbol, index) }.should raise_error(RuntimeError) 387 end 388 end 389 390 describe "rb_ascii8bit_encindex" do 391 it "returns an index for the ASCII-8BIT encoding" do 392 @s.rb_ascii8bit_encindex().should >= 0 393 end 394 end 395 396 describe "rb_utf8_encindex" do 397 it "returns an index for the UTF-8 encoding" do 398 @s.rb_utf8_encindex().should >= 0 399 end 400 end 401 402 describe "rb_usascii_encindex" do 403 it "returns an index for the US-ASCII encoding" do 404 @s.rb_usascii_encindex().should >= 0 405 end 406 end 407 408 describe "rb_locale_encindex" do 409 it "returns an index for the locale encoding" do 410 @s.rb_locale_encindex().should >= 0 411 end 412 end 413 414 describe "rb_filesystem_encindex" do 415 it "returns an index for the filesystem encoding" do 416 @s.rb_filesystem_encindex().should >= 0 417 end 418 end 419 420 describe "rb_enc_to_index" do 421 it "returns an index for the encoding" do 422 @s.rb_enc_to_index("UTF-8").should >= 0 423 end 424 425 it "returns a non-negative int if the encoding is not defined" do 426 # Encoding indexes are an implementation detail and not guaranteed 427 # across implementations. 428 @s.rb_enc_to_index("FTU-81").should >= 0 429 end 430 end 431 432 describe "rb_enc_nth" do 433 it "returns the byte index of the given character index" do 434 @s.rb_enc_nth("hüllo", 3).should == 4 435 end 436 end 437 438 describe "rb_enc_codepoint_len" do 439 it "raises ArgumentError if an empty string is given" do 440 lambda do 441 @s.rb_enc_codepoint_len("") 442 end.should raise_error(ArgumentError) 443 end 444 445 it "raises ArgumentError if an invalid byte sequence is given" do 446 lambda do 447 @s.rb_enc_codepoint_len([0xa0, 0xa1].pack('CC').force_encoding('utf-8')) # Invalid sequence identifier 448 end.should raise_error(ArgumentError) 449 end 450 451 it "returns codepoint 0x24 and length 1 for character '$'" do 452 codepoint, length = @s.rb_enc_codepoint_len("$") 453 454 codepoint.should == 0x24 455 length.should == 1 456 end 457 458 it "returns codepoint 0xA2 and length 2 for character '¢'" do 459 codepoint, length = @s.rb_enc_codepoint_len("¢") 460 461 codepoint.should == 0xA2 462 length.should == 2 463 end 464 465 it "returns codepoint 0x20AC and length 3 for character '€'" do 466 codepoint, length = @s.rb_enc_codepoint_len("€") 467 468 codepoint.should == 0x20AC 469 length.should == 3 470 end 471 472 it "returns codepoint 0x24B62 and length 4 for character ''" do 473 codepoint, length = @s.rb_enc_codepoint_len("") 474 475 codepoint.should == 0x24B62 476 length.should == 4 477 end 478 end 479end 480