1# -*- encoding: utf-8 -*-
2require_relative 'spec_helper'
3require_relative 'fixtures/encoding'
4
5load_extension('encoding')
6
7describe :rb_enc_get_index, shared: true do
8  it "returns the index of the encoding of a String" do
9    @s.send(@method, "string").should >= 0
10  end
11
12  it "returns the index of the encoding of a Regexp" do
13    @s.send(@method, /regexp/).should >= 0
14  end
15end
16
17describe :rb_enc_set_index, shared: true do
18  it "sets the object's encoding to the Encoding specified by the index" do
19    obj = "abc"
20    result = @s.send(@method, obj, 2)
21
22    # This is used because indexes should be considered implementation
23    # dependent. So a pair is returned:
24    #   [rb_enc_find_index()->name, rb_enc_get(obj)->name]
25    result.first.should == result.last
26  end
27
28  it "associates an encoding with a subclass of String" do
29    str = CApiEncodingSpecs::S.new "abc"
30    result = @s.send(@method, str, 1)
31    result.first.should == result.last
32  end
33
34  ruby_version_is "2.6" do
35    it "raises an ArgumentError for a non-encoding capable object" do
36      obj = Object.new
37      -> {
38        result = @s.send(@method, obj, 1)
39      }.should raise_error(ArgumentError, "cannot set encoding on non-encoding capable object")
40    end
41  end
42end
43
44describe "C-API Encoding function" do
45  before :each do
46    @s = CApiEncodingSpecs.new
47  end
48
49  ruby_version_is "2.6" do
50    describe "rb_enc_alias" do
51      it "creates an alias for an existing Encoding" do
52        @s.rb_enc_alias("ZOMGWTFBBQ", "UTF-8").should >= 0
53        Encoding.find("ZOMGWTFBBQ").name.should == "UTF-8"
54      end
55    end
56  end
57
58  describe "rb_enc_find" do
59    it "returns the encoding of an Encoding" do
60      @s.rb_enc_find("UTF-8").should == "UTF-8"
61    end
62
63    it "returns the encoding of an Encoding specified with lower case" do
64      @s.rb_enc_find("utf-8").should == "UTF-8"
65    end
66  end
67
68  describe "rb_enc_find_index" do
69    it "returns the index of an Encoding" do
70      @s.rb_enc_find_index("UTF-8").should >= 0
71    end
72
73    it "returns the index of an Encoding specified with lower case" do
74      @s.rb_enc_find_index("utf-8").should >= 0
75    end
76
77    it "returns -1 for an non existing encoding" do
78      @s.rb_enc_find_index("non-existent-encoding").should == -1
79    end
80  end
81
82  describe "rb_enc_from_index" do
83    it "returns an Encoding" do
84      @s.rb_enc_from_index(0).should be_an_instance_of(String)
85    end
86  end
87
88  describe "rb_usascii_encoding" do
89    it "returns the encoding for Encoding::US_ASCII" do
90      @s.rb_usascii_encoding.should == "US-ASCII"
91    end
92  end
93
94  describe "rb_ascii8bit_encoding" do
95    it "returns the encoding for Encoding::ASCII_8BIT" do
96      @s.rb_ascii8bit_encoding.should == "ASCII-8BIT"
97    end
98  end
99
100  describe "rb_utf8_encoding" do
101    it "returns the encoding for Encoding::UTF_8" do
102      @s.rb_utf8_encoding.should == "UTF-8"
103    end
104  end
105
106  describe "rb_enc_from_encoding" do
107    it "returns an Encoding instance from an encoding data structure" do
108      @s.rb_enc_from_encoding("UTF-8").should == Encoding::UTF_8
109    end
110  end
111
112  describe "rb_locale_encoding" do
113    it "returns the encoding for the current locale" do
114      @s.rb_locale_encoding.should == Encoding.find('locale').name
115    end
116  end
117
118  describe "rb_filesystem_encoding" do
119    it "returns the encoding for the current filesystem" do
120      @s.rb_filesystem_encoding.should == Encoding.find('filesystem').name
121    end
122  end
123
124  describe "rb_enc_get" do
125    it "returns the encoding ossociated with an object" do
126      str = "abc".encode Encoding::ASCII_8BIT
127      @s.rb_enc_get(str).should == "ASCII-8BIT"
128    end
129  end
130
131  describe "rb_obj_encoding" do
132    it "returns the encoding ossociated with an object" do
133      str = "abc".encode Encoding::ASCII_8BIT
134      @s.rb_obj_encoding(str).should == Encoding::ASCII_8BIT
135    end
136  end
137
138  describe "rb_enc_get_index" do
139    it_behaves_like :rb_enc_get_index, :rb_enc_get_index
140
141    it "returns the index of the encoding of a Symbol" do
142      @s.rb_enc_get_index(:symbol).should >= 0
143    end
144
145    it "returns -1 as the index of nil" do
146      @s.rb_enc_get_index(nil).should == -1
147    end
148
149    it "returns -1 as the index for immediates" do
150      @s.rb_enc_get_index(1).should == -1
151    end
152
153    ruby_version_is "2.6" do
154      it "returns -1 for an object without an encoding" do
155        obj = Object.new
156        @s.rb_enc_get_index(obj).should == -1
157      end
158    end
159  end
160
161  describe "rb_enc_set_index" do
162    it_behaves_like :rb_enc_set_index, :rb_enc_set_index
163  end
164
165  describe "rb_enc_str_new" do
166    it "returns a String in US-ASCII encoding when high bits are set" do
167      xEE = [0xEE].pack('C').force_encoding('utf-8')
168      result = @s.rb_enc_str_new(xEE, 1, Encoding::US_ASCII)
169      result.encoding.should equal(Encoding::US_ASCII)
170    end
171  end
172
173  describe "rb_enc_str_coderange" do
174    describe "when the encoding is ASCII-8BIT" do
175      it "returns ENC_CODERANGE_7BIT if there are no high bits set" do
176        result = @s.rb_enc_str_coderange("abc".force_encoding("ascii-8bit"))
177        result.should == :coderange_7bit
178      end
179
180      it "returns ENC_CODERANGE_VALID if there are high bits set" do
181        xEE = [0xEE].pack('C').force_encoding('utf-8')
182        result = @s.rb_enc_str_coderange(xEE.force_encoding("ascii-8bit"))
183        result.should == :coderange_valid
184      end
185    end
186
187    describe "when the encoding is UTF-8" do
188      it "returns ENC_CODERANGE_7BIT if there are no high bits set" do
189        result = @s.rb_enc_str_coderange("abc".force_encoding("utf-8"))
190        result.should == :coderange_7bit
191      end
192
193      it "returns ENC_CODERANGE_VALID if there are high bits set in a valid string" do
194        result = @s.rb_enc_str_coderange("\xE3\x81\x82".force_encoding("utf-8"))
195        result.should == :coderange_valid
196      end
197
198      it "returns ENC_CODERANGE_BROKEN if there are high bits set in an invalid string" do
199        result = @s.rb_enc_str_coderange([0xEE].pack('C').force_encoding("utf-8"))
200        result.should == :coderange_broken
201      end
202    end
203
204    describe "when the encoding is US-ASCII" do
205      it "returns ENC_CODERANGE_7BIT if there are no high bits set" do
206        result = @s.rb_enc_str_coderange("abc".force_encoding("us-ascii"))
207        result.should == :coderange_7bit
208      end
209
210      it "returns ENC_CODERANGE_BROKEN if there are high bits set" do
211        result = @s.rb_enc_str_coderange([0xEE].pack('C').force_encoding("us-ascii"))
212        result.should == :coderange_broken
213      end
214    end
215  end
216
217  describe "ENCODING_GET" do
218    it_behaves_like :rb_enc_get_index, :ENCODING_GET
219  end
220
221  describe "ENCODING_SET" do
222    it_behaves_like :rb_enc_set_index, :ENCODING_SET
223  end
224
225  describe "ENC_CODERANGE_ASCIIONLY" do
226    it "returns true if the object encoding is only ASCII" do
227      str = "abc".force_encoding("us-ascii")
228      str.valid_encoding? # make sure to set the coderange
229      @s.ENC_CODERANGE_ASCIIONLY(str).should be_true
230    end
231
232    it "returns false if the object encoding is not ASCII only" do
233      str = "ありがとう".force_encoding("utf-8")
234      @s.ENC_CODERANGE_ASCIIONLY(str).should be_false
235    end
236  end
237
238  describe "rb_to_encoding" do
239    it "returns the encoding for the Encoding instance passed" do
240      @s.rb_to_encoding(Encoding::BINARY).should == "ASCII-8BIT"
241    end
242
243    it "returns the correct encoding for a replicated encoding" do
244      @s.rb_to_encoding(Encoding::IBM857).should == "IBM857"
245    end
246
247    it "returns the encoding when passed a String" do
248      @s.rb_to_encoding("ASCII").should == "US-ASCII"
249    end
250
251    it "calls #to_str to convert the argument to a String" do
252      obj = mock("rb_to_encoding Encoding name")
253      obj.should_receive(:to_str).and_return("utf-8")
254
255      @s.rb_to_encoding(obj).should == "UTF-8"
256    end
257  end
258
259  describe "rb_to_encoding_index" do
260    it "returns the index of the encoding for the Encoding instance passed" do
261      @s.rb_to_encoding_index(Encoding::BINARY).should >= 0
262    end
263
264    it "returns the index of the encoding when passed a String" do
265      @s.rb_to_encoding_index("ASCII").should >= 0
266    end
267
268    it "returns the index of the dummy encoding of an Object" do
269      index = Encoding.list.index(Encoding::UTF_16)
270      @s.rb_to_encoding_index(Encoding::UTF_16.name).should == index
271    end
272
273    it "calls #to_str to convert the argument to a String" do
274      obj = mock("rb_to_encoding Encoding name")
275      obj.should_receive(:to_str).and_return("utf-8")
276
277      @s.rb_to_encoding_index(obj).should >= 0
278    end
279  end
280
281  describe "rb_enc_compatible" do
282    it "returns 0 if the encodings of the Strings are not compatible" do
283      a = [0xff].pack('C').force_encoding "ascii-8bit"
284      b = "\u3042".encode("utf-8")
285      @s.rb_enc_compatible(a, b).should == 0
286    end
287
288    # The coverage of this sucks, but there is not a simple way (yet?) to
289    # easily share the specs between rb_enc_compatible and
290    # Encoding.compatible?
291    it "returns the same value as Encoding.compatible? if the Strings have a compatible encoding" do
292      a = "abc".force_encoding("us-ascii")
293      b = "\u3042".encode("utf-8")
294      @s.rb_enc_compatible(a, b).should == Encoding.compatible?(a, b)
295    end
296  end
297
298  describe "rb_enc_copy" do
299    before :each do
300      @obj = "rb_enc_copy".encode(Encoding::US_ASCII)
301    end
302
303    it "sets the encoding of a String to that of the second argument" do
304      @s.rb_enc_copy("string", @obj).encoding.should == Encoding::US_ASCII
305    end
306
307    it "raises a RuntimeError if the second argument is a Symbol" do
308      lambda { @s.rb_enc_copy(:symbol, @obj) }.should raise_error(RuntimeError)
309    end
310
311    it "sets the encoding of a Regexp to that of the second argument" do
312      @s.rb_enc_copy(/regexp/, @obj).encoding.should == Encoding::US_ASCII
313    end
314  end
315
316  describe "rb_default_internal_encoding" do
317    before :each do
318      @default = Encoding.default_internal
319    end
320
321    after :each do
322      Encoding.default_internal = @default
323    end
324
325    it "returns 0 if Encoding.default_internal is nil" do
326      Encoding.default_internal = nil
327      @s.rb_default_internal_encoding.should be_nil
328    end
329
330    it "returns the encoding for Encoding.default_internal" do
331      Encoding.default_internal = "US-ASCII"
332      @s.rb_default_internal_encoding.should == "US-ASCII"
333      Encoding.default_internal = "UTF-8"
334      @s.rb_default_internal_encoding.should == "UTF-8"
335    end
336  end
337
338  describe "rb_default_external_encoding" do
339    before :each do
340      @default = Encoding.default_external
341    end
342
343    after :each do
344      Encoding.default_external = @default
345    end
346
347    it "returns the encoding for Encoding.default_external" do
348      Encoding.default_external = "BINARY"
349      @s.rb_default_external_encoding.should == "ASCII-8BIT"
350    end
351  end
352
353  describe "rb_enc_associate" do
354    it "sets the encoding of a String to the encoding" do
355      @s.rb_enc_associate("string", "ASCII-8BIT").encoding.should == Encoding::ASCII_8BIT
356    end
357
358    it "raises a RuntimeError if the argument is Symbol" do
359      lambda { @s.rb_enc_associate(:symbol, "US-ASCII") }.should raise_error(RuntimeError)
360    end
361
362    it "sets the encoding of a Regexp to the encoding" do
363      @s.rb_enc_associate(/regexp/, "ASCII-8BIT").encoding.should == Encoding::ASCII_8BIT
364    end
365
366    it "sets the encoding of a String to a default when the encoding is NULL" do
367      @s.rb_enc_associate("string", nil).encoding.should == Encoding::ASCII_8BIT
368    end
369  end
370
371  describe "rb_enc_associate_index" do
372    it "sets the encoding of a String to the encoding" do
373      index = @s.rb_enc_find_index("ASCII-8BIT")
374      enc = @s.rb_enc_associate_index("string", index).encoding
375      enc.should == Encoding::ASCII_8BIT
376    end
377
378    it "sets the encoding of a Regexp to the encoding" do
379      index = @s.rb_enc_find_index("UTF-8")
380      enc = @s.rb_enc_associate_index(/regexp/, index).encoding
381      enc.should == Encoding::UTF_8
382    end
383
384    it "sets the encoding of a Symbol to the encoding" do
385      index = @s.rb_enc_find_index("UTF-8")
386      lambda { @s.rb_enc_associate_index(:symbol, index) }.should raise_error(RuntimeError)
387    end
388  end
389
390  describe "rb_ascii8bit_encindex" do
391    it "returns an index for the ASCII-8BIT encoding" do
392      @s.rb_ascii8bit_encindex().should >= 0
393    end
394  end
395
396  describe "rb_utf8_encindex" do
397    it "returns an index for the UTF-8 encoding" do
398      @s.rb_utf8_encindex().should >= 0
399    end
400  end
401
402  describe "rb_usascii_encindex" do
403    it "returns an index for the US-ASCII encoding" do
404      @s.rb_usascii_encindex().should >= 0
405    end
406  end
407
408  describe "rb_locale_encindex" do
409    it "returns an index for the locale encoding" do
410      @s.rb_locale_encindex().should >= 0
411    end
412  end
413
414  describe "rb_filesystem_encindex" do
415    it "returns an index for the filesystem encoding" do
416      @s.rb_filesystem_encindex().should >= 0
417    end
418  end
419
420  describe "rb_enc_to_index" do
421    it "returns an index for the encoding" do
422      @s.rb_enc_to_index("UTF-8").should >= 0
423    end
424
425    it "returns a non-negative int if the encoding is not defined" do
426      # Encoding indexes are an implementation detail and not guaranteed
427      # across implementations.
428      @s.rb_enc_to_index("FTU-81").should >= 0
429    end
430  end
431
432  describe "rb_enc_nth" do
433    it "returns the byte index of the given character index" do
434      @s.rb_enc_nth("hüllo", 3).should == 4
435    end
436  end
437
438  describe "rb_enc_codepoint_len" do
439    it "raises ArgumentError if an empty string is given" do
440      lambda do
441        @s.rb_enc_codepoint_len("")
442      end.should raise_error(ArgumentError)
443    end
444
445    it "raises ArgumentError if an invalid byte sequence is given" do
446      lambda do
447        @s.rb_enc_codepoint_len([0xa0, 0xa1].pack('CC').force_encoding('utf-8')) # Invalid sequence identifier
448      end.should raise_error(ArgumentError)
449    end
450
451    it "returns codepoint 0x24 and length 1 for character '$'" do
452      codepoint, length = @s.rb_enc_codepoint_len("$")
453
454      codepoint.should == 0x24
455      length.should == 1
456    end
457
458    it "returns codepoint 0xA2 and length 2 for character '¢'" do
459      codepoint, length = @s.rb_enc_codepoint_len("¢")
460
461      codepoint.should == 0xA2
462      length.should == 2
463    end
464
465    it "returns codepoint 0x20AC and length 3 for character '€'" do
466      codepoint, length = @s.rb_enc_codepoint_len("€")
467
468      codepoint.should == 0x20AC
469      length.should == 3
470    end
471
472    it "returns codepoint 0x24B62 and length 4 for character '��'" do
473      codepoint, length = @s.rb_enc_codepoint_len("��")
474
475      codepoint.should == 0x24B62
476      length.should == 4
477    end
478  end
479end
480