1# frozen_string_literal: true
2#--
3# = uri/common.rb
4#
5# Author:: Akira Yamada <akira@ruby-lang.org>
6# Revision:: $Id: common.rb 65505 2018-11-02 17:52:33Z marcandre $
7# License::
8#   You can redistribute it and/or modify it under the same term as Ruby.
9#
10# See URI for general documentation
11#
12
13require_relative "rfc2396_parser"
14require_relative "rfc3986_parser"
15
16module URI
17  REGEXP = RFC2396_REGEXP
18  Parser = RFC2396_Parser
19  RFC3986_PARSER = RFC3986_Parser.new
20
21  # URI::Parser.new
22  DEFAULT_PARSER = Parser.new
23  DEFAULT_PARSER.pattern.each_pair do |sym, str|
24    unless REGEXP::PATTERN.const_defined?(sym)
25      REGEXP::PATTERN.const_set(sym, str)
26    end
27  end
28  DEFAULT_PARSER.regexp.each_pair do |sym, str|
29    const_set(sym, str)
30  end
31
32  module Util # :nodoc:
33    def make_components_hash(klass, array_hash)
34      tmp = {}
35      if array_hash.kind_of?(Array) &&
36          array_hash.size == klass.component.size - 1
37        klass.component[1..-1].each_index do |i|
38          begin
39            tmp[klass.component[i + 1]] = array_hash[i].clone
40          rescue TypeError
41            tmp[klass.component[i + 1]] = array_hash[i]
42          end
43        end
44
45      elsif array_hash.kind_of?(Hash)
46        array_hash.each do |key, value|
47          begin
48            tmp[key] = value.clone
49          rescue TypeError
50            tmp[key] = value
51          end
52        end
53      else
54        raise ArgumentError,
55          "expected Array of or Hash of components of #{klass} (#{klass.component[1..-1].join(', ')})"
56      end
57      tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase
58
59      return tmp
60    end
61    module_function :make_components_hash
62  end
63
64  # Module for escaping unsafe characters with codes.
65  module Escape
66    #
67    # == Synopsis
68    #
69    #   URI.escape(str [, unsafe])
70    #
71    # == Args
72    #
73    # +str+::
74    #   String to replaces in.
75    # +unsafe+::
76    #   Regexp that matches all symbols that must be replaced with codes.
77    #   By default uses <tt>UNSAFE</tt>.
78    #   When this argument is a String, it represents a character set.
79    #
80    # == Description
81    #
82    # Escapes the string, replacing all unsafe characters with codes.
83    #
84    # This method is obsolete and should not be used. Instead, use
85    # CGI.escape, URI.encode_www_form or URI.encode_www_form_component
86    # depending on your specific use case.
87    #
88    # == Usage
89    #
90    #   require 'uri'
91    #
92    #   enc_uri = URI.escape("http://example.com/?a=\11\15")
93    #   # => "http://example.com/?a=%09%0D"
94    #
95    #   URI.unescape(enc_uri)
96    #   # => "http://example.com/?a=\t\r"
97    #
98    #   URI.escape("@?@!", "!?")
99    #   # => "@%3F@%21"
100    #
101    def escape(*arg)
102      warn "URI.escape is obsolete", uplevel: 1 if $VERBOSE
103      DEFAULT_PARSER.escape(*arg)
104    end
105    alias encode escape
106    #
107    # == Synopsis
108    #
109    #   URI.unescape(str)
110    #
111    # == Args
112    #
113    # +str+::
114    #   String to unescape.
115    #
116    # == Description
117    #
118    # This method is obsolete and should not be used. Instead, use
119    # CGI.unescape, URI.decode_www_form or URI.decode_www_form_component
120    # depending on your specific use case.
121    #
122    # == Usage
123    #
124    #   require 'uri'
125    #
126    #   enc_uri = URI.escape("http://example.com/?a=\11\15")
127    #   # => "http://example.com/?a=%09%0D"
128    #
129    #   URI.unescape(enc_uri)
130    #   # => "http://example.com/?a=\t\r"
131    #
132    def unescape(*arg)
133      warn "URI.unescape is obsolete", uplevel: 1 if $VERBOSE
134      DEFAULT_PARSER.unescape(*arg)
135    end
136    alias decode unescape
137  end # module Escape
138
139  extend Escape
140  include REGEXP
141
142  @@schemes = {}
143  # Returns a Hash of the defined schemes.
144  def self.scheme_list
145    @@schemes
146  end
147
148  #
149  # Base class for all URI exceptions.
150  #
151  class Error < StandardError; end
152  #
153  # Not a URI.
154  #
155  class InvalidURIError < Error; end
156  #
157  # Not a URI component.
158  #
159  class InvalidComponentError < Error; end
160  #
161  # URI is valid, bad usage is not.
162  #
163  class BadURIError < Error; end
164
165  #
166  # == Synopsis
167  #
168  #   URI::split(uri)
169  #
170  # == Args
171  #
172  # +uri+::
173  #   String with URI.
174  #
175  # == Description
176  #
177  # Splits the string on following parts and returns array with result:
178  #
179  # * Scheme
180  # * Userinfo
181  # * Host
182  # * Port
183  # * Registry
184  # * Path
185  # * Opaque
186  # * Query
187  # * Fragment
188  #
189  # == Usage
190  #
191  #   require 'uri'
192  #
193  #   URI.split("http://www.ruby-lang.org/")
194  #   # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
195  #
196  def self.split(uri)
197    RFC3986_PARSER.split(uri)
198  end
199
200  #
201  # == Synopsis
202  #
203  #   URI::parse(uri_str)
204  #
205  # == Args
206  #
207  # +uri_str+::
208  #   String with URI.
209  #
210  # == Description
211  #
212  # Creates one of the URI's subclasses instance from the string.
213  #
214  # == Raises
215  #
216  # URI::InvalidURIError::
217  #   Raised if URI given is not a correct one.
218  #
219  # == Usage
220  #
221  #   require 'uri'
222  #
223  #   uri = URI.parse("http://www.ruby-lang.org/")
224  #   # => #<URI::HTTP http://www.ruby-lang.org/>
225  #   uri.scheme
226  #   # => "http"
227  #   uri.host
228  #   # => "www.ruby-lang.org"
229  #
230  # It's recommended to first ::escape the provided +uri_str+ if there are any
231  # invalid URI characters.
232  #
233  def self.parse(uri)
234    RFC3986_PARSER.parse(uri)
235  end
236
237  #
238  # == Synopsis
239  #
240  #   URI::join(str[, str, ...])
241  #
242  # == Args
243  #
244  # +str+::
245  #   String(s) to work with, will be converted to RFC3986 URIs before merging.
246  #
247  # == Description
248  #
249  # Joins URIs.
250  #
251  # == Usage
252  #
253  #   require 'uri'
254  #
255  #   URI.join("http://example.com/","main.rbx")
256  #   # => #<URI::HTTP http://example.com/main.rbx>
257  #
258  #   URI.join('http://example.com', 'foo')
259  #   # => #<URI::HTTP http://example.com/foo>
260  #
261  #   URI.join('http://example.com', '/foo', '/bar')
262  #   # => #<URI::HTTP http://example.com/bar>
263  #
264  #   URI.join('http://example.com', '/foo', 'bar')
265  #   # => #<URI::HTTP http://example.com/bar>
266  #
267  #   URI.join('http://example.com', '/foo/', 'bar')
268  #   # => #<URI::HTTP http://example.com/foo/bar>
269  #
270  def self.join(*str)
271    RFC3986_PARSER.join(*str)
272  end
273
274  #
275  # == Synopsis
276  #
277  #   URI::extract(str[, schemes][,&blk])
278  #
279  # == Args
280  #
281  # +str+::
282  #   String to extract URIs from.
283  # +schemes+::
284  #   Limit URI matching to specific schemes.
285  #
286  # == Description
287  #
288  # Extracts URIs from a string. If block given, iterates through all matched URIs.
289  # Returns nil if block given or array with matches.
290  #
291  # == Usage
292  #
293  #   require "uri"
294  #
295  #   URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.")
296  #   # => ["http://foo.example.com/bla", "mailto:test@example.com"]
297  #
298  def self.extract(str, schemes = nil, &block)
299    warn "URI.extract is obsolete", uplevel: 1 if $VERBOSE
300    DEFAULT_PARSER.extract(str, schemes, &block)
301  end
302
303  #
304  # == Synopsis
305  #
306  #   URI::regexp([match_schemes])
307  #
308  # == Args
309  #
310  # +match_schemes+::
311  #   Array of schemes. If given, resulting regexp matches to URIs
312  #   whose scheme is one of the match_schemes.
313  #
314  # == Description
315  #
316  # Returns a Regexp object which matches to URI-like strings.
317  # The Regexp object returned by this method includes arbitrary
318  # number of capture group (parentheses).  Never rely on it's number.
319  #
320  # == Usage
321  #
322  #   require 'uri'
323  #
324  #   # extract first URI from html_string
325  #   html_string.slice(URI.regexp)
326  #
327  #   # remove ftp URIs
328  #   html_string.sub(URI.regexp(['ftp']), '')
329  #
330  #   # You should not rely on the number of parentheses
331  #   html_string.scan(URI.regexp) do |*matches|
332  #     p $&
333  #   end
334  #
335  def self.regexp(schemes = nil)
336    warn "URI.regexp is obsolete", uplevel: 1 if $VERBOSE
337    DEFAULT_PARSER.make_regexp(schemes)
338  end
339
340  TBLENCWWWCOMP_ = {} # :nodoc:
341  256.times do |i|
342    TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
343  end
344  TBLENCWWWCOMP_[' '] = '+'
345  TBLENCWWWCOMP_.freeze
346  TBLDECWWWCOMP_ = {} # :nodoc:
347  256.times do |i|
348    h, l = i>>4, i&15
349    TBLDECWWWCOMP_[-('%%%X%X' % [h, l])] = -i.chr
350    TBLDECWWWCOMP_[-('%%%x%X' % [h, l])] = -i.chr
351    TBLDECWWWCOMP_[-('%%%X%x' % [h, l])] = -i.chr
352    TBLDECWWWCOMP_[-('%%%x%x' % [h, l])] = -i.chr
353  end
354  TBLDECWWWCOMP_['+'] = ' '
355  TBLDECWWWCOMP_.freeze
356
357  # Encodes given +str+ to URL-encoded form data.
358  #
359  # This method doesn't convert *, -, ., 0-9, A-Z, _, a-z, but does convert SP
360  # (ASCII space) to + and converts others to %XX.
361  #
362  # If +enc+ is given, convert +str+ to the encoding before percent encoding.
363  #
364  # This is an implementation of
365  # http://www.w3.org/TR/2013/CR-html5-20130806/forms.html#url-encoded-form-data.
366  #
367  # See URI.decode_www_form_component, URI.encode_www_form.
368  def self.encode_www_form_component(str, enc=nil)
369    str = str.to_s.dup
370    if str.encoding != Encoding::ASCII_8BIT
371      if enc && enc != Encoding::ASCII_8BIT
372        str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace)
373        str.encode!(enc, fallback: ->(x){"&#{x.ord};"})
374      end
375      str.force_encoding(Encoding::ASCII_8BIT)
376    end
377    str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
378    str.force_encoding(Encoding::US_ASCII)
379  end
380
381  # Decodes given +str+ of URL-encoded form data.
382  #
383  # This decodes + to SP.
384  #
385  # See URI.encode_www_form_component, URI.decode_www_form.
386  def self.decode_www_form_component(str, enc=Encoding::UTF_8)
387    raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/ =~ str
388    str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
389  end
390
391  # Generates URL-encoded form data from given +enum+.
392  #
393  # This generates application/x-www-form-urlencoded data defined in HTML5
394  # from given an Enumerable object.
395  #
396  # This internally uses URI.encode_www_form_component(str).
397  #
398  # This method doesn't convert the encoding of given items, so convert them
399  # before calling this method if you want to send data as other than original
400  # encoding or mixed encoding data. (Strings which are encoded in an HTML5
401  # ASCII incompatible encoding are converted to UTF-8.)
402  #
403  # This method doesn't handle files.  When you send a file, use
404  # multipart/form-data.
405  #
406  # This refers http://url.spec.whatwg.org/#concept-urlencoded-serializer
407  #
408  #    URI.encode_www_form([["q", "ruby"], ["lang", "en"]])
409  #    #=> "q=ruby&lang=en"
410  #    URI.encode_www_form("q" => "ruby", "lang" => "en")
411  #    #=> "q=ruby&lang=en"
412  #    URI.encode_www_form("q" => ["ruby", "perl"], "lang" => "en")
413  #    #=> "q=ruby&q=perl&lang=en"
414  #    URI.encode_www_form([["q", "ruby"], ["q", "perl"], ["lang", "en"]])
415  #    #=> "q=ruby&q=perl&lang=en"
416  #
417  # See URI.encode_www_form_component, URI.decode_www_form.
418  def self.encode_www_form(enum, enc=nil)
419    enum.map do |k,v|
420      if v.nil?
421        encode_www_form_component(k, enc)
422      elsif v.respond_to?(:to_ary)
423        v.to_ary.map do |w|
424          str = encode_www_form_component(k, enc)
425          unless w.nil?
426            str << '='
427            str << encode_www_form_component(w, enc)
428          end
429        end.join('&')
430      else
431        str = encode_www_form_component(k, enc)
432        str << '='
433        str << encode_www_form_component(v, enc)
434      end
435    end.join('&')
436  end
437
438  # Decodes URL-encoded form data from given +str+.
439  #
440  # This decodes application/x-www-form-urlencoded data
441  # and returns an array of key-value arrays.
442  #
443  # This refers http://url.spec.whatwg.org/#concept-urlencoded-parser,
444  # so this supports only &-separator, and doesn't support ;-separator.
445  #
446  #    ary = URI.decode_www_form("a=1&a=2&b=3")
447  #    ary                   #=> [['a', '1'], ['a', '2'], ['b', '3']]
448  #    ary.assoc('a').last   #=> '1'
449  #    ary.assoc('b').last   #=> '3'
450  #    ary.rassoc('a').last  #=> '2'
451  #    Hash[ary]             #=> {"a"=>"2", "b"=>"3"}
452  #
453  # See URI.decode_www_form_component, URI.encode_www_form.
454  def self.decode_www_form(str, enc=Encoding::UTF_8, separator: '&', use__charset_: false, isindex: false)
455    raise ArgumentError, "the input of #{self.name}.#{__method__} must be ASCII only string" unless str.ascii_only?
456    ary = []
457    return ary if str.empty?
458    enc = Encoding.find(enc)
459    str.b.each_line(separator) do |string|
460      string.chomp!(separator)
461      key, sep, val = string.partition('=')
462      if isindex
463        if sep.empty?
464          val = key
465          key = +''
466        end
467        isindex = false
468      end
469
470      if use__charset_ and key == '_charset_' and e = get_encoding(val)
471        enc = e
472        use__charset_ = false
473      end
474
475      key.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
476      if val
477        val.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
478      else
479        val = +''
480      end
481
482      ary << [key, val]
483    end
484    ary.each do |k, v|
485      k.force_encoding(enc)
486      k.scrub!
487      v.force_encoding(enc)
488      v.scrub!
489    end
490    ary
491  end
492
493  private
494=begin command for WEB_ENCODINGS_
495  curl https://encoding.spec.whatwg.org/encodings.json|
496  ruby -rjson -e 'H={}
497  h={
498    "shift_jis"=>"Windows-31J",
499    "euc-jp"=>"cp51932",
500    "iso-2022-jp"=>"cp50221",
501    "x-mac-cyrillic"=>"macCyrillic",
502  }
503  JSON($<.read).map{|x|x["encodings"]}.flatten.each{|x|
504    Encoding.find(n=h.fetch(n=x["name"].downcase,n))rescue next
505    x["labels"].each{|y|H[y]=n}
506  }
507  puts "{"
508  H.each{|k,v|puts %[  #{k.dump}=>#{v.dump},]}
509  puts "}"
510'
511=end
512  WEB_ENCODINGS_ = {
513    "unicode-1-1-utf-8"=>"utf-8",
514    "utf-8"=>"utf-8",
515    "utf8"=>"utf-8",
516    "866"=>"ibm866",
517    "cp866"=>"ibm866",
518    "csibm866"=>"ibm866",
519    "ibm866"=>"ibm866",
520    "csisolatin2"=>"iso-8859-2",
521    "iso-8859-2"=>"iso-8859-2",
522    "iso-ir-101"=>"iso-8859-2",
523    "iso8859-2"=>"iso-8859-2",
524    "iso88592"=>"iso-8859-2",
525    "iso_8859-2"=>"iso-8859-2",
526    "iso_8859-2:1987"=>"iso-8859-2",
527    "l2"=>"iso-8859-2",
528    "latin2"=>"iso-8859-2",
529    "csisolatin3"=>"iso-8859-3",
530    "iso-8859-3"=>"iso-8859-3",
531    "iso-ir-109"=>"iso-8859-3",
532    "iso8859-3"=>"iso-8859-3",
533    "iso88593"=>"iso-8859-3",
534    "iso_8859-3"=>"iso-8859-3",
535    "iso_8859-3:1988"=>"iso-8859-3",
536    "l3"=>"iso-8859-3",
537    "latin3"=>"iso-8859-3",
538    "csisolatin4"=>"iso-8859-4",
539    "iso-8859-4"=>"iso-8859-4",
540    "iso-ir-110"=>"iso-8859-4",
541    "iso8859-4"=>"iso-8859-4",
542    "iso88594"=>"iso-8859-4",
543    "iso_8859-4"=>"iso-8859-4",
544    "iso_8859-4:1988"=>"iso-8859-4",
545    "l4"=>"iso-8859-4",
546    "latin4"=>"iso-8859-4",
547    "csisolatincyrillic"=>"iso-8859-5",
548    "cyrillic"=>"iso-8859-5",
549    "iso-8859-5"=>"iso-8859-5",
550    "iso-ir-144"=>"iso-8859-5",
551    "iso8859-5"=>"iso-8859-5",
552    "iso88595"=>"iso-8859-5",
553    "iso_8859-5"=>"iso-8859-5",
554    "iso_8859-5:1988"=>"iso-8859-5",
555    "arabic"=>"iso-8859-6",
556    "asmo-708"=>"iso-8859-6",
557    "csiso88596e"=>"iso-8859-6",
558    "csiso88596i"=>"iso-8859-6",
559    "csisolatinarabic"=>"iso-8859-6",
560    "ecma-114"=>"iso-8859-6",
561    "iso-8859-6"=>"iso-8859-6",
562    "iso-8859-6-e"=>"iso-8859-6",
563    "iso-8859-6-i"=>"iso-8859-6",
564    "iso-ir-127"=>"iso-8859-6",
565    "iso8859-6"=>"iso-8859-6",
566    "iso88596"=>"iso-8859-6",
567    "iso_8859-6"=>"iso-8859-6",
568    "iso_8859-6:1987"=>"iso-8859-6",
569    "csisolatingreek"=>"iso-8859-7",
570    "ecma-118"=>"iso-8859-7",
571    "elot_928"=>"iso-8859-7",
572    "greek"=>"iso-8859-7",
573    "greek8"=>"iso-8859-7",
574    "iso-8859-7"=>"iso-8859-7",
575    "iso-ir-126"=>"iso-8859-7",
576    "iso8859-7"=>"iso-8859-7",
577    "iso88597"=>"iso-8859-7",
578    "iso_8859-7"=>"iso-8859-7",
579    "iso_8859-7:1987"=>"iso-8859-7",
580    "sun_eu_greek"=>"iso-8859-7",
581    "csiso88598e"=>"iso-8859-8",
582    "csisolatinhebrew"=>"iso-8859-8",
583    "hebrew"=>"iso-8859-8",
584    "iso-8859-8"=>"iso-8859-8",
585    "iso-8859-8-e"=>"iso-8859-8",
586    "iso-ir-138"=>"iso-8859-8",
587    "iso8859-8"=>"iso-8859-8",
588    "iso88598"=>"iso-8859-8",
589    "iso_8859-8"=>"iso-8859-8",
590    "iso_8859-8:1988"=>"iso-8859-8",
591    "visual"=>"iso-8859-8",
592    "csisolatin6"=>"iso-8859-10",
593    "iso-8859-10"=>"iso-8859-10",
594    "iso-ir-157"=>"iso-8859-10",
595    "iso8859-10"=>"iso-8859-10",
596    "iso885910"=>"iso-8859-10",
597    "l6"=>"iso-8859-10",
598    "latin6"=>"iso-8859-10",
599    "iso-8859-13"=>"iso-8859-13",
600    "iso8859-13"=>"iso-8859-13",
601    "iso885913"=>"iso-8859-13",
602    "iso-8859-14"=>"iso-8859-14",
603    "iso8859-14"=>"iso-8859-14",
604    "iso885914"=>"iso-8859-14",
605    "csisolatin9"=>"iso-8859-15",
606    "iso-8859-15"=>"iso-8859-15",
607    "iso8859-15"=>"iso-8859-15",
608    "iso885915"=>"iso-8859-15",
609    "iso_8859-15"=>"iso-8859-15",
610    "l9"=>"iso-8859-15",
611    "iso-8859-16"=>"iso-8859-16",
612    "cskoi8r"=>"koi8-r",
613    "koi"=>"koi8-r",
614    "koi8"=>"koi8-r",
615    "koi8-r"=>"koi8-r",
616    "koi8_r"=>"koi8-r",
617    "koi8-ru"=>"koi8-u",
618    "koi8-u"=>"koi8-u",
619    "dos-874"=>"windows-874",
620    "iso-8859-11"=>"windows-874",
621    "iso8859-11"=>"windows-874",
622    "iso885911"=>"windows-874",
623    "tis-620"=>"windows-874",
624    "windows-874"=>"windows-874",
625    "cp1250"=>"windows-1250",
626    "windows-1250"=>"windows-1250",
627    "x-cp1250"=>"windows-1250",
628    "cp1251"=>"windows-1251",
629    "windows-1251"=>"windows-1251",
630    "x-cp1251"=>"windows-1251",
631    "ansi_x3.4-1968"=>"windows-1252",
632    "ascii"=>"windows-1252",
633    "cp1252"=>"windows-1252",
634    "cp819"=>"windows-1252",
635    "csisolatin1"=>"windows-1252",
636    "ibm819"=>"windows-1252",
637    "iso-8859-1"=>"windows-1252",
638    "iso-ir-100"=>"windows-1252",
639    "iso8859-1"=>"windows-1252",
640    "iso88591"=>"windows-1252",
641    "iso_8859-1"=>"windows-1252",
642    "iso_8859-1:1987"=>"windows-1252",
643    "l1"=>"windows-1252",
644    "latin1"=>"windows-1252",
645    "us-ascii"=>"windows-1252",
646    "windows-1252"=>"windows-1252",
647    "x-cp1252"=>"windows-1252",
648    "cp1253"=>"windows-1253",
649    "windows-1253"=>"windows-1253",
650    "x-cp1253"=>"windows-1253",
651    "cp1254"=>"windows-1254",
652    "csisolatin5"=>"windows-1254",
653    "iso-8859-9"=>"windows-1254",
654    "iso-ir-148"=>"windows-1254",
655    "iso8859-9"=>"windows-1254",
656    "iso88599"=>"windows-1254",
657    "iso_8859-9"=>"windows-1254",
658    "iso_8859-9:1989"=>"windows-1254",
659    "l5"=>"windows-1254",
660    "latin5"=>"windows-1254",
661    "windows-1254"=>"windows-1254",
662    "x-cp1254"=>"windows-1254",
663    "cp1255"=>"windows-1255",
664    "windows-1255"=>"windows-1255",
665    "x-cp1255"=>"windows-1255",
666    "cp1256"=>"windows-1256",
667    "windows-1256"=>"windows-1256",
668    "x-cp1256"=>"windows-1256",
669    "cp1257"=>"windows-1257",
670    "windows-1257"=>"windows-1257",
671    "x-cp1257"=>"windows-1257",
672    "cp1258"=>"windows-1258",
673    "windows-1258"=>"windows-1258",
674    "x-cp1258"=>"windows-1258",
675    "x-mac-cyrillic"=>"macCyrillic",
676    "x-mac-ukrainian"=>"macCyrillic",
677    "chinese"=>"gbk",
678    "csgb2312"=>"gbk",
679    "csiso58gb231280"=>"gbk",
680    "gb2312"=>"gbk",
681    "gb_2312"=>"gbk",
682    "gb_2312-80"=>"gbk",
683    "gbk"=>"gbk",
684    "iso-ir-58"=>"gbk",
685    "x-gbk"=>"gbk",
686    "gb18030"=>"gb18030",
687    "big5"=>"big5",
688    "big5-hkscs"=>"big5",
689    "cn-big5"=>"big5",
690    "csbig5"=>"big5",
691    "x-x-big5"=>"big5",
692    "cseucpkdfmtjapanese"=>"cp51932",
693    "euc-jp"=>"cp51932",
694    "x-euc-jp"=>"cp51932",
695    "csiso2022jp"=>"cp50221",
696    "iso-2022-jp"=>"cp50221",
697    "csshiftjis"=>"Windows-31J",
698    "ms932"=>"Windows-31J",
699    "ms_kanji"=>"Windows-31J",
700    "shift-jis"=>"Windows-31J",
701    "shift_jis"=>"Windows-31J",
702    "sjis"=>"Windows-31J",
703    "windows-31j"=>"Windows-31J",
704    "x-sjis"=>"Windows-31J",
705    "cseuckr"=>"euc-kr",
706    "csksc56011987"=>"euc-kr",
707    "euc-kr"=>"euc-kr",
708    "iso-ir-149"=>"euc-kr",
709    "korean"=>"euc-kr",
710    "ks_c_5601-1987"=>"euc-kr",
711    "ks_c_5601-1989"=>"euc-kr",
712    "ksc5601"=>"euc-kr",
713    "ksc_5601"=>"euc-kr",
714    "windows-949"=>"euc-kr",
715    "utf-16be"=>"utf-16be",
716    "utf-16"=>"utf-16le",
717    "utf-16le"=>"utf-16le",
718  } # :nodoc:
719
720  # :nodoc:
721  # return encoding or nil
722  # http://encoding.spec.whatwg.org/#concept-encoding-get
723  def self.get_encoding(label)
724    Encoding.find(WEB_ENCODINGS_[label.to_str.strip.downcase]) rescue nil
725  end
726end # module URI
727
728module Kernel
729
730  #
731  # Returns +uri+ converted to an URI object.
732  #
733  def URI(uri)
734    if uri.is_a?(URI::Generic)
735      uri
736    elsif uri = String.try_convert(uri)
737      URI.parse(uri)
738    else
739      raise ArgumentError,
740        "bad argument (expected URI object or URI string)"
741    end
742  end
743  module_function :URI
744end
745