1# frozen_string_literal: true 2#-- 3# = uri/common.rb 4# 5# Author:: Akira Yamada <akira@ruby-lang.org> 6# Revision:: $Id: common.rb 65505 2018-11-02 17:52:33Z marcandre $ 7# License:: 8# You can redistribute it and/or modify it under the same term as Ruby. 9# 10# See URI for general documentation 11# 12 13require_relative "rfc2396_parser" 14require_relative "rfc3986_parser" 15 16module URI 17 REGEXP = RFC2396_REGEXP 18 Parser = RFC2396_Parser 19 RFC3986_PARSER = RFC3986_Parser.new 20 21 # URI::Parser.new 22 DEFAULT_PARSER = Parser.new 23 DEFAULT_PARSER.pattern.each_pair do |sym, str| 24 unless REGEXP::PATTERN.const_defined?(sym) 25 REGEXP::PATTERN.const_set(sym, str) 26 end 27 end 28 DEFAULT_PARSER.regexp.each_pair do |sym, str| 29 const_set(sym, str) 30 end 31 32 module Util # :nodoc: 33 def make_components_hash(klass, array_hash) 34 tmp = {} 35 if array_hash.kind_of?(Array) && 36 array_hash.size == klass.component.size - 1 37 klass.component[1..-1].each_index do |i| 38 begin 39 tmp[klass.component[i + 1]] = array_hash[i].clone 40 rescue TypeError 41 tmp[klass.component[i + 1]] = array_hash[i] 42 end 43 end 44 45 elsif array_hash.kind_of?(Hash) 46 array_hash.each do |key, value| 47 begin 48 tmp[key] = value.clone 49 rescue TypeError 50 tmp[key] = value 51 end 52 end 53 else 54 raise ArgumentError, 55 "expected Array of or Hash of components of #{klass} (#{klass.component[1..-1].join(', ')})" 56 end 57 tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase 58 59 return tmp 60 end 61 module_function :make_components_hash 62 end 63 64 # Module for escaping unsafe characters with codes. 65 module Escape 66 # 67 # == Synopsis 68 # 69 # URI.escape(str [, unsafe]) 70 # 71 # == Args 72 # 73 # +str+:: 74 # String to replaces in. 75 # +unsafe+:: 76 # Regexp that matches all symbols that must be replaced with codes. 77 # By default uses <tt>UNSAFE</tt>. 78 # When this argument is a String, it represents a character set. 79 # 80 # == Description 81 # 82 # Escapes the string, replacing all unsafe characters with codes. 83 # 84 # This method is obsolete and should not be used. Instead, use 85 # CGI.escape, URI.encode_www_form or URI.encode_www_form_component 86 # depending on your specific use case. 87 # 88 # == Usage 89 # 90 # require 'uri' 91 # 92 # enc_uri = URI.escape("http://example.com/?a=\11\15") 93 # # => "http://example.com/?a=%09%0D" 94 # 95 # URI.unescape(enc_uri) 96 # # => "http://example.com/?a=\t\r" 97 # 98 # URI.escape("@?@!", "!?") 99 # # => "@%3F@%21" 100 # 101 def escape(*arg) 102 warn "URI.escape is obsolete", uplevel: 1 if $VERBOSE 103 DEFAULT_PARSER.escape(*arg) 104 end 105 alias encode escape 106 # 107 # == Synopsis 108 # 109 # URI.unescape(str) 110 # 111 # == Args 112 # 113 # +str+:: 114 # String to unescape. 115 # 116 # == Description 117 # 118 # This method is obsolete and should not be used. Instead, use 119 # CGI.unescape, URI.decode_www_form or URI.decode_www_form_component 120 # depending on your specific use case. 121 # 122 # == Usage 123 # 124 # require 'uri' 125 # 126 # enc_uri = URI.escape("http://example.com/?a=\11\15") 127 # # => "http://example.com/?a=%09%0D" 128 # 129 # URI.unescape(enc_uri) 130 # # => "http://example.com/?a=\t\r" 131 # 132 def unescape(*arg) 133 warn "URI.unescape is obsolete", uplevel: 1 if $VERBOSE 134 DEFAULT_PARSER.unescape(*arg) 135 end 136 alias decode unescape 137 end # module Escape 138 139 extend Escape 140 include REGEXP 141 142 @@schemes = {} 143 # Returns a Hash of the defined schemes. 144 def self.scheme_list 145 @@schemes 146 end 147 148 # 149 # Base class for all URI exceptions. 150 # 151 class Error < StandardError; end 152 # 153 # Not a URI. 154 # 155 class InvalidURIError < Error; end 156 # 157 # Not a URI component. 158 # 159 class InvalidComponentError < Error; end 160 # 161 # URI is valid, bad usage is not. 162 # 163 class BadURIError < Error; end 164 165 # 166 # == Synopsis 167 # 168 # URI::split(uri) 169 # 170 # == Args 171 # 172 # +uri+:: 173 # String with URI. 174 # 175 # == Description 176 # 177 # Splits the string on following parts and returns array with result: 178 # 179 # * Scheme 180 # * Userinfo 181 # * Host 182 # * Port 183 # * Registry 184 # * Path 185 # * Opaque 186 # * Query 187 # * Fragment 188 # 189 # == Usage 190 # 191 # require 'uri' 192 # 193 # URI.split("http://www.ruby-lang.org/") 194 # # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil] 195 # 196 def self.split(uri) 197 RFC3986_PARSER.split(uri) 198 end 199 200 # 201 # == Synopsis 202 # 203 # URI::parse(uri_str) 204 # 205 # == Args 206 # 207 # +uri_str+:: 208 # String with URI. 209 # 210 # == Description 211 # 212 # Creates one of the URI's subclasses instance from the string. 213 # 214 # == Raises 215 # 216 # URI::InvalidURIError:: 217 # Raised if URI given is not a correct one. 218 # 219 # == Usage 220 # 221 # require 'uri' 222 # 223 # uri = URI.parse("http://www.ruby-lang.org/") 224 # # => #<URI::HTTP http://www.ruby-lang.org/> 225 # uri.scheme 226 # # => "http" 227 # uri.host 228 # # => "www.ruby-lang.org" 229 # 230 # It's recommended to first ::escape the provided +uri_str+ if there are any 231 # invalid URI characters. 232 # 233 def self.parse(uri) 234 RFC3986_PARSER.parse(uri) 235 end 236 237 # 238 # == Synopsis 239 # 240 # URI::join(str[, str, ...]) 241 # 242 # == Args 243 # 244 # +str+:: 245 # String(s) to work with, will be converted to RFC3986 URIs before merging. 246 # 247 # == Description 248 # 249 # Joins URIs. 250 # 251 # == Usage 252 # 253 # require 'uri' 254 # 255 # URI.join("http://example.com/","main.rbx") 256 # # => #<URI::HTTP http://example.com/main.rbx> 257 # 258 # URI.join('http://example.com', 'foo') 259 # # => #<URI::HTTP http://example.com/foo> 260 # 261 # URI.join('http://example.com', '/foo', '/bar') 262 # # => #<URI::HTTP http://example.com/bar> 263 # 264 # URI.join('http://example.com', '/foo', 'bar') 265 # # => #<URI::HTTP http://example.com/bar> 266 # 267 # URI.join('http://example.com', '/foo/', 'bar') 268 # # => #<URI::HTTP http://example.com/foo/bar> 269 # 270 def self.join(*str) 271 RFC3986_PARSER.join(*str) 272 end 273 274 # 275 # == Synopsis 276 # 277 # URI::extract(str[, schemes][,&blk]) 278 # 279 # == Args 280 # 281 # +str+:: 282 # String to extract URIs from. 283 # +schemes+:: 284 # Limit URI matching to specific schemes. 285 # 286 # == Description 287 # 288 # Extracts URIs from a string. If block given, iterates through all matched URIs. 289 # Returns nil if block given or array with matches. 290 # 291 # == Usage 292 # 293 # require "uri" 294 # 295 # URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.") 296 # # => ["http://foo.example.com/bla", "mailto:test@example.com"] 297 # 298 def self.extract(str, schemes = nil, &block) 299 warn "URI.extract is obsolete", uplevel: 1 if $VERBOSE 300 DEFAULT_PARSER.extract(str, schemes, &block) 301 end 302 303 # 304 # == Synopsis 305 # 306 # URI::regexp([match_schemes]) 307 # 308 # == Args 309 # 310 # +match_schemes+:: 311 # Array of schemes. If given, resulting regexp matches to URIs 312 # whose scheme is one of the match_schemes. 313 # 314 # == Description 315 # 316 # Returns a Regexp object which matches to URI-like strings. 317 # The Regexp object returned by this method includes arbitrary 318 # number of capture group (parentheses). Never rely on it's number. 319 # 320 # == Usage 321 # 322 # require 'uri' 323 # 324 # # extract first URI from html_string 325 # html_string.slice(URI.regexp) 326 # 327 # # remove ftp URIs 328 # html_string.sub(URI.regexp(['ftp']), '') 329 # 330 # # You should not rely on the number of parentheses 331 # html_string.scan(URI.regexp) do |*matches| 332 # p $& 333 # end 334 # 335 def self.regexp(schemes = nil) 336 warn "URI.regexp is obsolete", uplevel: 1 if $VERBOSE 337 DEFAULT_PARSER.make_regexp(schemes) 338 end 339 340 TBLENCWWWCOMP_ = {} # :nodoc: 341 256.times do |i| 342 TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i) 343 end 344 TBLENCWWWCOMP_[' '] = '+' 345 TBLENCWWWCOMP_.freeze 346 TBLDECWWWCOMP_ = {} # :nodoc: 347 256.times do |i| 348 h, l = i>>4, i&15 349 TBLDECWWWCOMP_[-('%%%X%X' % [h, l])] = -i.chr 350 TBLDECWWWCOMP_[-('%%%x%X' % [h, l])] = -i.chr 351 TBLDECWWWCOMP_[-('%%%X%x' % [h, l])] = -i.chr 352 TBLDECWWWCOMP_[-('%%%x%x' % [h, l])] = -i.chr 353 end 354 TBLDECWWWCOMP_['+'] = ' ' 355 TBLDECWWWCOMP_.freeze 356 357 # Encodes given +str+ to URL-encoded form data. 358 # 359 # This method doesn't convert *, -, ., 0-9, A-Z, _, a-z, but does convert SP 360 # (ASCII space) to + and converts others to %XX. 361 # 362 # If +enc+ is given, convert +str+ to the encoding before percent encoding. 363 # 364 # This is an implementation of 365 # http://www.w3.org/TR/2013/CR-html5-20130806/forms.html#url-encoded-form-data. 366 # 367 # See URI.decode_www_form_component, URI.encode_www_form. 368 def self.encode_www_form_component(str, enc=nil) 369 str = str.to_s.dup 370 if str.encoding != Encoding::ASCII_8BIT 371 if enc && enc != Encoding::ASCII_8BIT 372 str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace) 373 str.encode!(enc, fallback: ->(x){"&#{x.ord};"}) 374 end 375 str.force_encoding(Encoding::ASCII_8BIT) 376 end 377 str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_) 378 str.force_encoding(Encoding::US_ASCII) 379 end 380 381 # Decodes given +str+ of URL-encoded form data. 382 # 383 # This decodes + to SP. 384 # 385 # See URI.encode_www_form_component, URI.decode_www_form. 386 def self.decode_www_form_component(str, enc=Encoding::UTF_8) 387 raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/ =~ str 388 str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc) 389 end 390 391 # Generates URL-encoded form data from given +enum+. 392 # 393 # This generates application/x-www-form-urlencoded data defined in HTML5 394 # from given an Enumerable object. 395 # 396 # This internally uses URI.encode_www_form_component(str). 397 # 398 # This method doesn't convert the encoding of given items, so convert them 399 # before calling this method if you want to send data as other than original 400 # encoding or mixed encoding data. (Strings which are encoded in an HTML5 401 # ASCII incompatible encoding are converted to UTF-8.) 402 # 403 # This method doesn't handle files. When you send a file, use 404 # multipart/form-data. 405 # 406 # This refers http://url.spec.whatwg.org/#concept-urlencoded-serializer 407 # 408 # URI.encode_www_form([["q", "ruby"], ["lang", "en"]]) 409 # #=> "q=ruby&lang=en" 410 # URI.encode_www_form("q" => "ruby", "lang" => "en") 411 # #=> "q=ruby&lang=en" 412 # URI.encode_www_form("q" => ["ruby", "perl"], "lang" => "en") 413 # #=> "q=ruby&q=perl&lang=en" 414 # URI.encode_www_form([["q", "ruby"], ["q", "perl"], ["lang", "en"]]) 415 # #=> "q=ruby&q=perl&lang=en" 416 # 417 # See URI.encode_www_form_component, URI.decode_www_form. 418 def self.encode_www_form(enum, enc=nil) 419 enum.map do |k,v| 420 if v.nil? 421 encode_www_form_component(k, enc) 422 elsif v.respond_to?(:to_ary) 423 v.to_ary.map do |w| 424 str = encode_www_form_component(k, enc) 425 unless w.nil? 426 str << '=' 427 str << encode_www_form_component(w, enc) 428 end 429 end.join('&') 430 else 431 str = encode_www_form_component(k, enc) 432 str << '=' 433 str << encode_www_form_component(v, enc) 434 end 435 end.join('&') 436 end 437 438 # Decodes URL-encoded form data from given +str+. 439 # 440 # This decodes application/x-www-form-urlencoded data 441 # and returns an array of key-value arrays. 442 # 443 # This refers http://url.spec.whatwg.org/#concept-urlencoded-parser, 444 # so this supports only &-separator, and doesn't support ;-separator. 445 # 446 # ary = URI.decode_www_form("a=1&a=2&b=3") 447 # ary #=> [['a', '1'], ['a', '2'], ['b', '3']] 448 # ary.assoc('a').last #=> '1' 449 # ary.assoc('b').last #=> '3' 450 # ary.rassoc('a').last #=> '2' 451 # Hash[ary] #=> {"a"=>"2", "b"=>"3"} 452 # 453 # See URI.decode_www_form_component, URI.encode_www_form. 454 def self.decode_www_form(str, enc=Encoding::UTF_8, separator: '&', use__charset_: false, isindex: false) 455 raise ArgumentError, "the input of #{self.name}.#{__method__} must be ASCII only string" unless str.ascii_only? 456 ary = [] 457 return ary if str.empty? 458 enc = Encoding.find(enc) 459 str.b.each_line(separator) do |string| 460 string.chomp!(separator) 461 key, sep, val = string.partition('=') 462 if isindex 463 if sep.empty? 464 val = key 465 key = +'' 466 end 467 isindex = false 468 end 469 470 if use__charset_ and key == '_charset_' and e = get_encoding(val) 471 enc = e 472 use__charset_ = false 473 end 474 475 key.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_) 476 if val 477 val.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_) 478 else 479 val = +'' 480 end 481 482 ary << [key, val] 483 end 484 ary.each do |k, v| 485 k.force_encoding(enc) 486 k.scrub! 487 v.force_encoding(enc) 488 v.scrub! 489 end 490 ary 491 end 492 493 private 494=begin command for WEB_ENCODINGS_ 495 curl https://encoding.spec.whatwg.org/encodings.json| 496 ruby -rjson -e 'H={} 497 h={ 498 "shift_jis"=>"Windows-31J", 499 "euc-jp"=>"cp51932", 500 "iso-2022-jp"=>"cp50221", 501 "x-mac-cyrillic"=>"macCyrillic", 502 } 503 JSON($<.read).map{|x|x["encodings"]}.flatten.each{|x| 504 Encoding.find(n=h.fetch(n=x["name"].downcase,n))rescue next 505 x["labels"].each{|y|H[y]=n} 506 } 507 puts "{" 508 H.each{|k,v|puts %[ #{k.dump}=>#{v.dump},]} 509 puts "}" 510' 511=end 512 WEB_ENCODINGS_ = { 513 "unicode-1-1-utf-8"=>"utf-8", 514 "utf-8"=>"utf-8", 515 "utf8"=>"utf-8", 516 "866"=>"ibm866", 517 "cp866"=>"ibm866", 518 "csibm866"=>"ibm866", 519 "ibm866"=>"ibm866", 520 "csisolatin2"=>"iso-8859-2", 521 "iso-8859-2"=>"iso-8859-2", 522 "iso-ir-101"=>"iso-8859-2", 523 "iso8859-2"=>"iso-8859-2", 524 "iso88592"=>"iso-8859-2", 525 "iso_8859-2"=>"iso-8859-2", 526 "iso_8859-2:1987"=>"iso-8859-2", 527 "l2"=>"iso-8859-2", 528 "latin2"=>"iso-8859-2", 529 "csisolatin3"=>"iso-8859-3", 530 "iso-8859-3"=>"iso-8859-3", 531 "iso-ir-109"=>"iso-8859-3", 532 "iso8859-3"=>"iso-8859-3", 533 "iso88593"=>"iso-8859-3", 534 "iso_8859-3"=>"iso-8859-3", 535 "iso_8859-3:1988"=>"iso-8859-3", 536 "l3"=>"iso-8859-3", 537 "latin3"=>"iso-8859-3", 538 "csisolatin4"=>"iso-8859-4", 539 "iso-8859-4"=>"iso-8859-4", 540 "iso-ir-110"=>"iso-8859-4", 541 "iso8859-4"=>"iso-8859-4", 542 "iso88594"=>"iso-8859-4", 543 "iso_8859-4"=>"iso-8859-4", 544 "iso_8859-4:1988"=>"iso-8859-4", 545 "l4"=>"iso-8859-4", 546 "latin4"=>"iso-8859-4", 547 "csisolatincyrillic"=>"iso-8859-5", 548 "cyrillic"=>"iso-8859-5", 549 "iso-8859-5"=>"iso-8859-5", 550 "iso-ir-144"=>"iso-8859-5", 551 "iso8859-5"=>"iso-8859-5", 552 "iso88595"=>"iso-8859-5", 553 "iso_8859-5"=>"iso-8859-5", 554 "iso_8859-5:1988"=>"iso-8859-5", 555 "arabic"=>"iso-8859-6", 556 "asmo-708"=>"iso-8859-6", 557 "csiso88596e"=>"iso-8859-6", 558 "csiso88596i"=>"iso-8859-6", 559 "csisolatinarabic"=>"iso-8859-6", 560 "ecma-114"=>"iso-8859-6", 561 "iso-8859-6"=>"iso-8859-6", 562 "iso-8859-6-e"=>"iso-8859-6", 563 "iso-8859-6-i"=>"iso-8859-6", 564 "iso-ir-127"=>"iso-8859-6", 565 "iso8859-6"=>"iso-8859-6", 566 "iso88596"=>"iso-8859-6", 567 "iso_8859-6"=>"iso-8859-6", 568 "iso_8859-6:1987"=>"iso-8859-6", 569 "csisolatingreek"=>"iso-8859-7", 570 "ecma-118"=>"iso-8859-7", 571 "elot_928"=>"iso-8859-7", 572 "greek"=>"iso-8859-7", 573 "greek8"=>"iso-8859-7", 574 "iso-8859-7"=>"iso-8859-7", 575 "iso-ir-126"=>"iso-8859-7", 576 "iso8859-7"=>"iso-8859-7", 577 "iso88597"=>"iso-8859-7", 578 "iso_8859-7"=>"iso-8859-7", 579 "iso_8859-7:1987"=>"iso-8859-7", 580 "sun_eu_greek"=>"iso-8859-7", 581 "csiso88598e"=>"iso-8859-8", 582 "csisolatinhebrew"=>"iso-8859-8", 583 "hebrew"=>"iso-8859-8", 584 "iso-8859-8"=>"iso-8859-8", 585 "iso-8859-8-e"=>"iso-8859-8", 586 "iso-ir-138"=>"iso-8859-8", 587 "iso8859-8"=>"iso-8859-8", 588 "iso88598"=>"iso-8859-8", 589 "iso_8859-8"=>"iso-8859-8", 590 "iso_8859-8:1988"=>"iso-8859-8", 591 "visual"=>"iso-8859-8", 592 "csisolatin6"=>"iso-8859-10", 593 "iso-8859-10"=>"iso-8859-10", 594 "iso-ir-157"=>"iso-8859-10", 595 "iso8859-10"=>"iso-8859-10", 596 "iso885910"=>"iso-8859-10", 597 "l6"=>"iso-8859-10", 598 "latin6"=>"iso-8859-10", 599 "iso-8859-13"=>"iso-8859-13", 600 "iso8859-13"=>"iso-8859-13", 601 "iso885913"=>"iso-8859-13", 602 "iso-8859-14"=>"iso-8859-14", 603 "iso8859-14"=>"iso-8859-14", 604 "iso885914"=>"iso-8859-14", 605 "csisolatin9"=>"iso-8859-15", 606 "iso-8859-15"=>"iso-8859-15", 607 "iso8859-15"=>"iso-8859-15", 608 "iso885915"=>"iso-8859-15", 609 "iso_8859-15"=>"iso-8859-15", 610 "l9"=>"iso-8859-15", 611 "iso-8859-16"=>"iso-8859-16", 612 "cskoi8r"=>"koi8-r", 613 "koi"=>"koi8-r", 614 "koi8"=>"koi8-r", 615 "koi8-r"=>"koi8-r", 616 "koi8_r"=>"koi8-r", 617 "koi8-ru"=>"koi8-u", 618 "koi8-u"=>"koi8-u", 619 "dos-874"=>"windows-874", 620 "iso-8859-11"=>"windows-874", 621 "iso8859-11"=>"windows-874", 622 "iso885911"=>"windows-874", 623 "tis-620"=>"windows-874", 624 "windows-874"=>"windows-874", 625 "cp1250"=>"windows-1250", 626 "windows-1250"=>"windows-1250", 627 "x-cp1250"=>"windows-1250", 628 "cp1251"=>"windows-1251", 629 "windows-1251"=>"windows-1251", 630 "x-cp1251"=>"windows-1251", 631 "ansi_x3.4-1968"=>"windows-1252", 632 "ascii"=>"windows-1252", 633 "cp1252"=>"windows-1252", 634 "cp819"=>"windows-1252", 635 "csisolatin1"=>"windows-1252", 636 "ibm819"=>"windows-1252", 637 "iso-8859-1"=>"windows-1252", 638 "iso-ir-100"=>"windows-1252", 639 "iso8859-1"=>"windows-1252", 640 "iso88591"=>"windows-1252", 641 "iso_8859-1"=>"windows-1252", 642 "iso_8859-1:1987"=>"windows-1252", 643 "l1"=>"windows-1252", 644 "latin1"=>"windows-1252", 645 "us-ascii"=>"windows-1252", 646 "windows-1252"=>"windows-1252", 647 "x-cp1252"=>"windows-1252", 648 "cp1253"=>"windows-1253", 649 "windows-1253"=>"windows-1253", 650 "x-cp1253"=>"windows-1253", 651 "cp1254"=>"windows-1254", 652 "csisolatin5"=>"windows-1254", 653 "iso-8859-9"=>"windows-1254", 654 "iso-ir-148"=>"windows-1254", 655 "iso8859-9"=>"windows-1254", 656 "iso88599"=>"windows-1254", 657 "iso_8859-9"=>"windows-1254", 658 "iso_8859-9:1989"=>"windows-1254", 659 "l5"=>"windows-1254", 660 "latin5"=>"windows-1254", 661 "windows-1254"=>"windows-1254", 662 "x-cp1254"=>"windows-1254", 663 "cp1255"=>"windows-1255", 664 "windows-1255"=>"windows-1255", 665 "x-cp1255"=>"windows-1255", 666 "cp1256"=>"windows-1256", 667 "windows-1256"=>"windows-1256", 668 "x-cp1256"=>"windows-1256", 669 "cp1257"=>"windows-1257", 670 "windows-1257"=>"windows-1257", 671 "x-cp1257"=>"windows-1257", 672 "cp1258"=>"windows-1258", 673 "windows-1258"=>"windows-1258", 674 "x-cp1258"=>"windows-1258", 675 "x-mac-cyrillic"=>"macCyrillic", 676 "x-mac-ukrainian"=>"macCyrillic", 677 "chinese"=>"gbk", 678 "csgb2312"=>"gbk", 679 "csiso58gb231280"=>"gbk", 680 "gb2312"=>"gbk", 681 "gb_2312"=>"gbk", 682 "gb_2312-80"=>"gbk", 683 "gbk"=>"gbk", 684 "iso-ir-58"=>"gbk", 685 "x-gbk"=>"gbk", 686 "gb18030"=>"gb18030", 687 "big5"=>"big5", 688 "big5-hkscs"=>"big5", 689 "cn-big5"=>"big5", 690 "csbig5"=>"big5", 691 "x-x-big5"=>"big5", 692 "cseucpkdfmtjapanese"=>"cp51932", 693 "euc-jp"=>"cp51932", 694 "x-euc-jp"=>"cp51932", 695 "csiso2022jp"=>"cp50221", 696 "iso-2022-jp"=>"cp50221", 697 "csshiftjis"=>"Windows-31J", 698 "ms932"=>"Windows-31J", 699 "ms_kanji"=>"Windows-31J", 700 "shift-jis"=>"Windows-31J", 701 "shift_jis"=>"Windows-31J", 702 "sjis"=>"Windows-31J", 703 "windows-31j"=>"Windows-31J", 704 "x-sjis"=>"Windows-31J", 705 "cseuckr"=>"euc-kr", 706 "csksc56011987"=>"euc-kr", 707 "euc-kr"=>"euc-kr", 708 "iso-ir-149"=>"euc-kr", 709 "korean"=>"euc-kr", 710 "ks_c_5601-1987"=>"euc-kr", 711 "ks_c_5601-1989"=>"euc-kr", 712 "ksc5601"=>"euc-kr", 713 "ksc_5601"=>"euc-kr", 714 "windows-949"=>"euc-kr", 715 "utf-16be"=>"utf-16be", 716 "utf-16"=>"utf-16le", 717 "utf-16le"=>"utf-16le", 718 } # :nodoc: 719 720 # :nodoc: 721 # return encoding or nil 722 # http://encoding.spec.whatwg.org/#concept-encoding-get 723 def self.get_encoding(label) 724 Encoding.find(WEB_ENCODINGS_[label.to_str.strip.downcase]) rescue nil 725 end 726end # module URI 727 728module Kernel 729 730 # 731 # Returns +uri+ converted to an URI object. 732 # 733 def URI(uri) 734 if uri.is_a?(URI::Generic) 735 uri 736 elsif uri = String.try_convert(uri) 737 URI.parse(uri) 738 else 739 raise ArgumentError, 740 "bad argument (expected URI object or URI string)" 741 end 742 end 743 module_function :URI 744end 745