1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2017-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20%% 21%% [RFC 3986, Chapter 2.2. Reserved Characters] 22%% 23%% reserved = gen-delims / sub-delims 24%% 25%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 26%% 27%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 28%% / "*" / "+" / "," / ";" / "=" 29%% 30%% 31%% [RFC 3986, Chapter 2.3. Unreserved Characters] 32%% 33%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 34%% 35%% 36%% [RFC 3986, Chapter 3. Syntax Components] 37%% 38%% The generic URI syntax consists of a hierarchical sequence of 39%% components referred to as the scheme, authority, path, query, and 40%% fragment. 41%% 42%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 43%% 44%% hier-part = "//" authority path-abempty 45%% / path-absolute 46%% / path-rootless 47%% / path-empty 48%% 49%% The scheme and path components are required, though the path may be 50%% empty (no characters). When authority is present, the path must 51%% either be empty or begin with a slash ("/") character. When 52%% authority is not present, the path cannot begin with two slash 53%% characters ("//"). These restrictions result in five different ABNF 54%% rules for a path (Section 3.3), only one of which will match any 55%% given URI reference. 56%% 57%% The following are two example URIs and their component parts: 58%% 59%% foo://example.com:8042/over/there?name=ferret#nose 60%% \_/ \______________/\_________/ \_________/ \__/ 61%% | | | | | 62%% scheme authority path query fragment 63%% | _____________________|__ 64%% / \ / \ 65%% urn:example:animal:ferret:nose 66%% 67%% 68%% [RFC 3986, Chapter 3.1. Scheme] 69%% 70%% Each URI begins with a scheme name that refers to a specification for 71%% assigning identifiers within that scheme. 72%% 73%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 74%% 75%% 76%% [RFC 3986, Chapter 3.2. Authority] 77%% 78%% Many URI schemes include a hierarchical element for a naming 79%% authority so that governance of the name space defined by the 80%% remainder of the URI is delegated to that authority (which may, in 81%% turn, delegate it further). 82%% 83%% authority = [ userinfo "@" ] host [ ":" port ] 84%% 85%% 86%% [RFC 3986, Chapter 3.2.1. User Information] 87%% 88%% The userinfo subcomponent may consist of a user name and, optionally, 89%% scheme-specific information about how to gain authorization to access 90%% the resource. The user information, if present, is followed by a 91%% commercial at-sign ("@") that delimits it from the host. 92%% 93%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 94%% 95%% 96%% [RFC 3986, Chapter 3.2.2. Host] 97%% 98%% The host subcomponent of authority is identified by an IP literal 99%% encapsulated within square brackets, an IPv4 address in dotted- 100%% decimal form, or a registered name. 101%% 102%% host = IP-literal / IPv4address / reg-name 103%% 104%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" 105%% 106%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 107%% 108%% IPv6address = 6( h16 ":" ) ls32 109%% / "::" 5( h16 ":" ) ls32 110%% / [ h16 ] "::" 4( h16 ":" ) ls32 111%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 112%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 113%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 114%% / [ *4( h16 ":" ) h16 ] "::" ls32 115%% / [ *5( h16 ":" ) h16 ] "::" h16 116%% / [ *6( h16 ":" ) h16 ] "::" 117%% 118%% ls32 = ( h16 ":" h16 ) / IPv4address 119%% ; least-significant 32 bits of address 120%% 121%% h16 = 1*4HEXDIG 122%% ; 16 bits of address represented in hexadecimal 123%% 124%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 125%% 126%% dec-octet = DIGIT ; 0-9 127%% / %x31-39 DIGIT ; 10-99 128%% / "1" 2DIGIT ; 100-199 129%% / "2" %x30-34 DIGIT ; 200-249 130%% / "25" %x30-35 ; 250-255 131%% 132%% reg-name = *( unreserved / pct-encoded / sub-delims ) 133%% 134%% 135%% [RFC 3986, Chapter 3.2.2. Port] 136%% 137%% The port subcomponent of authority is designated by an optional port 138%% number in decimal following the host and delimited from it by a 139%% single colon (":") character. 140%% 141%% port = *DIGIT 142%% 143%% 144%% [RFC 3986, Chapter 3.3. Path] 145%% 146%% The path component contains data, usually organized in hierarchical 147%% form, that, along with data in the non-hierarchical query component 148%% (Section 3.4), serves to identify a resource within the scope of the 149%% URI's scheme and naming authority (if any). The path is terminated 150%% by the first question mark ("?") or number sign ("#") character, or 151%% by the end of the URI. 152%% 153%% path = path-abempty ; begins with "/" or is empty 154%% / path-absolute ; begins with "/" but not "//" 155%% / path-noscheme ; begins with a non-colon segment 156%% / path-rootless ; begins with a segment 157%% / path-empty ; zero characters 158%% 159%% path-abempty = *( "/" segment ) 160%% path-absolute = "/" [ segment-nz *( "/" segment ) ] 161%% path-noscheme = segment-nz-nc *( "/" segment ) 162%% path-rootless = segment-nz *( "/" segment ) 163%% path-empty = 0<pchar> 164%% segment = *pchar 165%% segment-nz = 1*pchar 166%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 167%% ; non-zero-length segment without any colon ":" 168%% 169%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 170%% 171%% 172%% [RFC 3986, Chapter 3.4. Query] 173%% 174%% The query component contains non-hierarchical data that, along with 175%% data in the path component (Section 3.3), serves to identify a 176%% resource within the scope of the URI's scheme and naming authority 177%% (if any). The query component is indicated by the first question 178%% mark ("?") character and terminated by a number sign ("#") character 179%% or by the end of the URI. 180%% 181%% query = *( pchar / "/" / "?" ) 182%% 183%% 184%% [RFC 3986, Chapter 3.5. Fragment] 185%% 186%% The fragment identifier component of a URI allows indirect 187%% identification of a secondary resource by reference to a primary 188%% resource and additional identifying information. 189%% 190%% fragment = *( pchar / "/" / "?" ) 191%% 192%% 193%% [RFC 3986, Chapter 4.1. URI Reference] 194%% 195%% URI-reference is used to denote the most common usage of a resource 196%% identifier. 197%% 198%% URI-reference = URI / relative-ref 199%% 200%% 201%% [RFC 3986, Chapter 4.2. Relative Reference] 202%% 203%% A relative reference takes advantage of the hierarchical syntax 204%% (Section 1.2.3) to express a URI reference relative to the name space 205%% of another hierarchical URI. 206%% 207%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] 208%% 209%% relative-part = "//" authority path-abempty 210%% / path-absolute 211%% / path-noscheme 212%% / path-empty 213%% 214%% 215%% [RFC 3986, Chapter 4.3. Absolute URI] 216%% 217%% Some protocol elements allow only the absolute form of a URI without 218%% a fragment identifier. For example, defining a base URI for later 219%% use by relative references calls for an absolute-URI syntax rule that 220%% does not allow a fragment. 221%% 222%% absolute-URI = scheme ":" hier-part [ "?" query ] 223%% 224-module(uri_string). 225 226%%------------------------------------------------------------------------- 227%% External API 228%%------------------------------------------------------------------------- 229-export([allowed_characters/0, 230 compose_query/1, 231 compose_query/2, 232 dissect_query/1, 233 normalize/1, 234 normalize/2, 235 percent_decode/1, 236 parse/1, 237 recompose/1, 238 resolve/2, 239 resolve/3, 240 transcode/2]). 241-export_type([error/0, 242 uri_map/0, 243 uri_string/0]). 244 245 246%%------------------------------------------------------------------------- 247%% Internal API 248%%------------------------------------------------------------------------- 249-export([is_host/1, is_path/1]). % suppress warnings 250 251 252%%------------------------------------------------------------------------- 253%% Macros 254%%------------------------------------------------------------------------- 255-define(CHAR(Char), <<Char/utf8>>). 256-define(STRING_EMPTY, <<>>). 257-define(STRING(MatchStr), <<MatchStr/binary>>). 258-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>). 259 260-define(DEC2HEX(X), 261 if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0; 262 ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10 263 end). 264 265-define(HEX2DEC(X), 266 if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0; 267 ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10; 268 ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10 269 end). 270 271 272%%%========================================================================= 273%%% API 274%%%========================================================================= 275 276%%------------------------------------------------------------------------- 277%% URI compliant with RFC 3986 278%% ASCII %x21 - %x7A ("!" - "z") except 279%% %x34 " double quote 280%% %x60 < less than 281%% %x62 > greater than 282%% %x92 \ backslash 283%% %x94 ^ caret / circumflex 284%% %x96 ` grave / accent 285%%------------------------------------------------------------------------- 286-type uri_string() :: iodata(). 287-type error() :: {error, atom(), term()}. 288 289 290%%------------------------------------------------------------------------- 291%% RFC 3986, Chapter 3. Syntax Components 292%%------------------------------------------------------------------------- 293-type uri_map() :: 294 #{fragment => unicode:chardata(), 295 host => unicode:chardata(), 296 path => unicode:chardata(), 297 port => non_neg_integer() | undefined, 298 query => unicode:chardata(), 299 scheme => unicode:chardata(), 300 userinfo => unicode:chardata()}. 301 302 303%%------------------------------------------------------------------------- 304%% Normalize URIs 305%%------------------------------------------------------------------------- 306-spec normalize(URI) -> NormalizedURI when 307 URI :: uri_string() | uri_map(), 308 NormalizedURI :: uri_string() 309 | error(). 310normalize(URIMap) -> 311 normalize(URIMap, []). 312 313 314-spec normalize(URI, Options) -> NormalizedURI when 315 URI :: uri_string() | uri_map(), 316 Options :: [return_map], 317 NormalizedURI :: uri_string() | uri_map() 318 | error(). 319normalize(URIMap, []) when is_map(URIMap) -> 320 try recompose(normalize_map(URIMap)) 321 catch 322 throw:{error, Atom, RestData} -> {error, Atom, RestData} 323 end; 324normalize(URIMap, [return_map]) when is_map(URIMap) -> 325 try normalize_map(URIMap) 326 catch 327 throw:{error, Atom, RestData} -> {error, Atom, RestData} 328 end; 329normalize(URIString, []) -> 330 case parse(URIString) of 331 Value when is_map(Value) -> 332 try recompose(normalize_map(Value)) 333 catch 334 throw:{error, Atom, RestData} -> {error, Atom, RestData} 335 end; 336 Error -> 337 Error 338 end; 339normalize(URIString, [return_map]) -> 340 case parse(URIString) of 341 Value when is_map(Value) -> 342 try normalize_map(Value) 343 catch 344 throw:{error, Atom, RestData} -> {error, Atom, RestData} 345 end; 346 Error -> 347 Error 348 end. 349 350 351%%------------------------------------------------------------------------- 352%% Parse URIs 353%%------------------------------------------------------------------------- 354-spec parse(URIString) -> URIMap when 355 URIString :: uri_string(), 356 URIMap :: uri_map() 357 | error(). 358parse(URIString) when is_binary(URIString) -> 359 try parse_uri_reference(URIString, #{}) 360 catch 361 throw:{error, Atom, RestData} -> {error, Atom, RestData} 362 end; 363parse(URIString) when is_list(URIString) -> 364 try 365 Binary = unicode:characters_to_binary(URIString), 366 Map = parse_uri_reference(Binary, #{}), 367 convert_mapfields_to_list(Map) 368 catch 369 throw:{error, Atom, RestData} -> {error, Atom, RestData} 370 end. 371 372 373%%------------------------------------------------------------------------- 374%% Recompose URIs 375%%------------------------------------------------------------------------- 376-spec recompose(URIMap) -> URIString when 377 URIMap :: uri_map(), 378 URIString :: uri_string() 379 | error(). 380recompose(Map) -> 381 case is_valid_map(Map) of 382 false -> 383 {error, invalid_map, Map}; 384 true -> 385 try 386 T0 = update_scheme(Map, empty), 387 T1 = update_userinfo(Map, T0), 388 T2 = update_host(Map, T1), 389 T3 = update_port(Map, T2), 390 T4 = update_path(Map, T3), 391 T5 = update_query(Map, T4), 392 update_fragment(Map, T5) 393 catch 394 throw:{error, Atom, RestData} -> {error, Atom, RestData} 395 end 396 end. 397 398 399%%------------------------------------------------------------------------- 400%% Resolve URIs 401%%------------------------------------------------------------------------- 402-spec resolve(RefURI, BaseURI) -> TargetURI when 403 RefURI :: uri_string() | uri_map(), 404 BaseURI :: uri_string() | uri_map(), 405 TargetURI :: uri_string() 406 | error(). 407resolve(URIMap, BaseURIMap) -> 408 resolve(URIMap, BaseURIMap, []). 409 410 411-spec resolve(RefURI, BaseURI, Options) -> TargetURI when 412 RefURI :: uri_string() | uri_map(), 413 BaseURI :: uri_string() | uri_map(), 414 Options :: [return_map], 415 TargetURI :: uri_string() | uri_map() 416 | error(). 417resolve(URIMap, BaseURIMap, Options) when is_map(URIMap) -> 418 case resolve_map(URIMap, BaseURIMap) of 419 TargetURIMap when is_map(TargetURIMap) -> 420 case Options of 421 [return_map] -> 422 TargetURIMap; 423 [] -> 424 recompose(TargetURIMap) 425 end; 426 Error -> 427 Error 428 end; 429resolve(URIString, BaseURIMap, Options) -> 430 case parse(URIString) of 431 URIMap when is_map(URIMap) -> 432 resolve(URIMap, BaseURIMap, Options); 433 Error -> 434 Error 435 end. 436 437 438%%------------------------------------------------------------------------- 439%% Transcode URIs 440%%------------------------------------------------------------------------- 441-spec transcode(URIString, Options) -> Result when 442 URIString :: uri_string(), 443 Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}], 444 Result :: uri_string() 445 | error(). 446transcode(URIString, Options) when is_binary(URIString) -> 447 try 448 InEnc = proplists:get_value(in_encoding, Options, utf8), 449 OutEnc = proplists:get_value(out_encoding, Options, utf8), 450 List = convert_to_list(URIString, InEnc), 451 Output = transcode(List, [], InEnc, OutEnc), 452 convert_to_binary(Output, utf8, OutEnc) 453 catch 454 throw:{error, Atom, RestData} -> {error, Atom, RestData} 455 end; 456transcode(URIString, Options) when is_list(URIString) -> 457 InEnc = proplists:get_value(in_encoding, Options, utf8), 458 OutEnc = proplists:get_value(out_encoding, Options, utf8), 459 Flattened = flatten_list(URIString, InEnc), 460 try transcode(Flattened, [], InEnc, OutEnc) 461 catch 462 throw:{error, Atom, RestData} -> {error, Atom, RestData} 463 end. 464 465 466%%------------------------------------------------------------------------- 467%% Misc 468%%------------------------------------------------------------------------- 469-spec allowed_characters() -> [{atom(), list()}]. 470allowed_characters() -> 471 Input = lists:seq(0,127), 472 Scheme = lists:filter(fun is_scheme/1, Input), 473 UserInfo = lists:filter(fun is_userinfo/1, Input), 474 Host = lists:filter(fun is_host/1, Input), 475 IPv4 = lists:filter(fun is_ipv4/1, Input), 476 IPv6 = lists:filter(fun is_ipv6/1, Input), 477 RegName = lists:filter(fun is_reg_name/1, Input), 478 Path = lists:filter(fun is_path/1, Input), 479 Query = lists:filter(fun is_query/1, Input), 480 Fragment = lists:filter(fun is_fragment/1, Input), 481 Reserved = lists:filter(fun is_reserved/1, Input), 482 Unreserved = lists:filter(fun is_unreserved/1, Input), 483 [{scheme, Scheme}, 484 {userinfo, UserInfo}, 485 {host, Host}, 486 {ipv4, IPv4}, 487 {ipv6, IPv6}, 488 {regname,RegName}, 489 {path,Path}, 490 {query, Query}, 491 {fragment,Fragment}, 492 {reserved, Reserved}, 493 {unreserved, Unreserved}]. 494 495-spec percent_decode(URI) -> Result when 496 URI :: uri_string() | uri_map(), 497 Result :: uri_string() | 498 uri_map() | 499 {error, {invalid, {atom(), {term(), term()}}}}. 500percent_decode(URIMap) when is_map(URIMap)-> 501 Fun = fun (K,V) when K =:= userinfo; K =:= host; K =:= path; 502 K =:= query; K =:= fragment -> 503 case raw_decode(V) of 504 {error, Reason, Input} -> 505 throw({error, {invalid, {K, {Reason, Input}}}}); 506 Else -> 507 Else 508 end; 509 %% Handle port and scheme 510 (_,V) -> 511 V 512 end, 513 try maps:map(Fun, URIMap) 514 catch throw:Return -> 515 Return 516 end; 517percent_decode(URI) when is_list(URI) orelse 518 is_binary(URI) -> 519 raw_decode(URI). 520 521%%------------------------------------------------------------------------- 522%% Functions for working with the query part of a URI as a list 523%% of key/value pairs. 524%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 525%% HTML 5.0 - 4.10.22.6 URL-encoded form data - non UTF-8 526%%------------------------------------------------------------------------- 527 528%%------------------------------------------------------------------------- 529%% Compose urlencoded query string from a list of unescaped key/value pairs. 530%% (application/x-www-form-urlencoded encoding algorithm) 531%%------------------------------------------------------------------------- 532-spec compose_query(QueryList) -> QueryString when 533 QueryList :: [{unicode:chardata(), unicode:chardata() | true}], 534 QueryString :: uri_string() 535 | error(). 536compose_query(List) -> 537 compose_query(List, [{encoding, utf8}]). 538 539 540-spec compose_query(QueryList, Options) -> QueryString when 541 QueryList :: [{unicode:chardata(), unicode:chardata() | true}], 542 Options :: [{encoding, atom()}], 543 QueryString :: uri_string() 544 | error(). 545compose_query([],_Options) -> 546 []; 547compose_query(List, Options) -> 548 try compose_query(List, Options, false, <<>>) 549 catch 550 throw:{error, Atom, RestData} -> {error, Atom, RestData} 551 end. 552%% 553compose_query([{Key,true}|Rest], Options, IsList, Acc) -> 554 Separator = get_separator(Rest), 555 K = form_urlencode(Key, Options), 556 IsListNew = IsList orelse is_list(Key), 557 compose_query(Rest, Options, IsListNew, <<Acc/binary,K/binary,Separator/binary>>); 558compose_query([{Key,Value}|Rest], Options, IsList, Acc) -> 559 Separator = get_separator(Rest), 560 K = form_urlencode(Key, Options), 561 V = form_urlencode(Value, Options), 562 IsListNew = IsList orelse is_list(Key) orelse is_list(Value), 563 compose_query(Rest, Options, IsListNew, <<Acc/binary,K/binary,"=",V/binary,Separator/binary>>); 564compose_query([], _Options, IsList, Acc) -> 565 case IsList of 566 true -> convert_to_list(Acc, utf8); 567 false -> Acc 568 end. 569 570 571%%------------------------------------------------------------------------- 572%% Dissect a query string into a list of unescaped key/value pairs. 573%% (application/x-www-form-urlencoded decoding algorithm) 574%%------------------------------------------------------------------------- 575-spec dissect_query(QueryString) -> QueryList when 576 QueryString :: uri_string(), 577 QueryList :: [{unicode:chardata(), unicode:chardata() | true}] 578 | error(). 579dissect_query(<<>>) -> 580 []; 581dissect_query([]) -> 582 []; 583dissect_query(QueryString) when is_list(QueryString) -> 584 try 585 B = convert_to_binary(QueryString, utf8, utf8), 586 dissect_query_key(B, true, [], <<>>, <<>>) 587 catch 588 throw:{error, Atom, RestData} -> {error, Atom, RestData} 589 end; 590dissect_query(QueryString) -> 591 try dissect_query_key(QueryString, false, [], <<>>, <<>>) 592 catch 593 throw:{error, Atom, RestData} -> {error, Atom, RestData} 594 end. 595 596 597%%%======================================================================== 598%%% Internal functions 599%%%======================================================================== 600 601%%------------------------------------------------------------------------- 602%% Converts Map fields to lists 603%%------------------------------------------------------------------------- 604convert_mapfields_to_list(Map) -> 605 Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V); 606 (_, V) -> V end, 607 maps:map(Fun, Map). 608 609 610%%------------------------------------------------------------------------- 611%% [RFC 3986, Chapter 4.1. URI Reference] 612%% 613%% URI-reference is used to denote the most common usage of a resource 614%% identifier. 615%% 616%% URI-reference = URI / relative-ref 617%%------------------------------------------------------------------------- 618-spec parse_uri_reference(binary(), uri_map()) -> uri_map(). 619parse_uri_reference(<<>>, _) -> #{path => <<>>}; 620parse_uri_reference(URIString, URI) -> 621 try parse_scheme_start(URIString, URI) 622 catch 623 throw:{_,_,_} -> 624 parse_relative_part(URIString, URI) 625 end. 626 627 628%%------------------------------------------------------------------------- 629%% [RFC 3986, Chapter 4.2. Relative Reference] 630%% 631%% A relative reference takes advantage of the hierarchical syntax 632%% (Section 1.2.3) to express a URI reference relative to the name space 633%% of another hierarchical URI. 634%% 635%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] 636%% 637%% relative-part = "//" authority path-abempty 638%% / path-absolute 639%% / path-noscheme 640%% / path-empty 641%%------------------------------------------------------------------------- 642-spec parse_relative_part(binary(), uri_map()) -> uri_map(). 643parse_relative_part(?STRING_REST("//", Rest), URI) -> 644 %% Parse userinfo - "//" is NOT part of authority 645 try parse_userinfo(Rest, URI) of 646 {T, URI1} -> 647 Userinfo = calculate_parsed_userinfo(Rest, T), 648 URI2 = maybe_add_path(URI1), 649 URI2#{userinfo => Userinfo} 650 catch 651 throw:{_,_,_} -> 652 {T, URI1} = parse_host(Rest, URI), 653 Host = calculate_parsed_host_port(Rest, T), 654 URI2 = maybe_add_path(URI1), 655 URI2#{host => remove_brackets(Host)} 656 end; 657parse_relative_part(?STRING_REST($/, Rest), URI) -> 658 {T, URI1} = parse_segment(Rest, URI), % path-absolute 659 Path = calculate_parsed_part(Rest, T), 660 URI1#{path => ?STRING_REST($/, Path)}; 661parse_relative_part(?STRING_REST($?, Rest), URI) -> 662 {T, URI1} = parse_query(Rest, URI), % path-empty ?query 663 Query = calculate_parsed_query_fragment(Rest, T), 664 URI2 = maybe_add_path(URI1), 665 URI2#{query => Query}; 666parse_relative_part(?STRING_REST($#, Rest), URI) -> 667 {T, URI1} = parse_fragment(Rest, URI), % path-empty 668 Fragment = calculate_parsed_query_fragment(Rest, T), 669 URI2 = maybe_add_path(URI1), 670 URI2#{fragment => Fragment}; 671parse_relative_part(?STRING_REST(Char, Rest), URI) -> 672 case is_segment_nz_nc(Char) of 673 true -> 674 {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme 675 Path = calculate_parsed_part(Rest, T), 676 URI1#{path => ?STRING_REST(Char, Path)}; 677 false -> throw({error,invalid_uri,[Char]}) 678 end. 679 680 681%%------------------------------------------------------------------------- 682%% [RFC 3986, Chapter 3.3. Path] 683%% 684%% The path component contains data, usually organized in hierarchical 685%% form, that, along with data in the non-hierarchical query component 686%% (Section 3.4), serves to identify a resource within the scope of the 687%% URI's scheme and naming authority (if any). The path is terminated 688%% by the first question mark ("?") or number sign ("#") character, or 689%% by the end of the URI. 690%% 691%% path = path-abempty ; begins with "/" or is empty 692%% / path-absolute ; begins with "/" but not "//" 693%% / path-noscheme ; begins with a non-colon segment 694%% / path-rootless ; begins with a segment 695%% / path-empty ; zero characters 696%% 697%% path-abempty = *( "/" segment ) 698%% path-absolute = "/" [ segment-nz *( "/" segment ) ] 699%% path-noscheme = segment-nz-nc *( "/" segment ) 700%% path-rootless = segment-nz *( "/" segment ) 701%% path-empty = 0<pchar> 702%% segment = *pchar 703%% segment-nz = 1*pchar 704%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 705%% ; non-zero-length segment without any colon ":" 706%% 707%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 708%%------------------------------------------------------------------------- 709 710%%------------------------------------------------------------------------- 711%% path-abempty 712%%------------------------------------------------------------------------- 713-spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}. 714parse_segment(?STRING_REST($/, Rest), URI) -> 715 parse_segment(Rest, URI); % segment 716parse_segment(?STRING_REST($?, Rest), URI) -> 717 {T, URI1} = parse_query(Rest, URI), % ?query 718 Query = calculate_parsed_query_fragment(Rest, T), 719 {Rest, URI1#{query => Query}}; 720parse_segment(?STRING_REST($#, Rest), URI) -> 721 {T, URI1} = parse_fragment(Rest, URI), 722 Fragment = calculate_parsed_query_fragment(Rest, T), 723 {Rest, URI1#{fragment => Fragment}}; 724parse_segment(?STRING_REST(Char, Rest), URI) -> 725 case is_pchar(Char) of 726 true -> parse_segment(Rest, URI); 727 false -> throw({error,invalid_uri,[Char]}) 728 end; 729parse_segment(?STRING_EMPTY, URI) -> 730 {?STRING_EMPTY, URI}. 731 732 733%%------------------------------------------------------------------------- 734%% path-noscheme 735%%------------------------------------------------------------------------- 736-spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}. 737parse_segment_nz_nc(?STRING_REST($/, Rest), URI) -> 738 parse_segment(Rest, URI); % segment 739parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> 740 {T, URI1} = parse_query(Rest, URI), % ?query 741 Query = calculate_parsed_query_fragment(Rest, T), 742 {Rest, URI1#{query => Query}}; 743parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> 744 {T, URI1} = parse_fragment(Rest, URI), 745 Fragment = calculate_parsed_query_fragment(Rest, T), 746 {Rest, URI1#{fragment => Fragment}}; 747parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> 748 case is_segment_nz_nc(Char) of 749 true -> parse_segment_nz_nc(Rest, URI); 750 false -> throw({error,invalid_uri,[Char]}) 751 end; 752parse_segment_nz_nc(?STRING_EMPTY, URI) -> 753 {?STRING_EMPTY, URI}. 754 755 756%% Check if char is pchar. 757-spec is_pchar(char()) -> boolean(). 758is_pchar($%) -> true; % pct-encoded 759is_pchar($:) -> true; 760is_pchar($@) -> true; 761is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). 762 763%% Check if char is segment_nz_nc. 764-spec is_segment_nz_nc(char()) -> boolean(). 765is_segment_nz_nc($%) -> true; % pct-encoded 766is_segment_nz_nc($@) -> true; 767is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). 768 769 770%%------------------------------------------------------------------------- 771%% [RFC 3986, Chapter 3.1. Scheme] 772%% 773%% Each URI begins with a scheme name that refers to a specification for 774%% assigning identifiers within that scheme. 775%% 776%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 777%%------------------------------------------------------------------------- 778-spec parse_scheme_start(binary(), uri_map()) -> uri_map(). 779parse_scheme_start(?STRING_REST(Char, Rest), URI) -> 780 case is_alpha(Char) of 781 true -> {T, URI1} = parse_scheme(Rest, URI), 782 Scheme = calculate_parsed_scheme(Rest, T), 783 URI2 = maybe_add_path(URI1), 784 URI2#{scheme => ?STRING_REST(Char, Scheme)}; 785 false -> throw({error,invalid_uri,[Char]}) 786 end. 787 788%% Add path component if it missing after parsing the URI. 789%% According to the URI specification there is always a 790%% path component in every URI-reference and it can be 791%% empty. 792maybe_add_path(Map) -> 793 case maps:is_key(path, Map) of 794 false -> 795 Map#{path => <<>>}; 796 _Else -> 797 Map 798 end. 799 800 801 802-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}. 803parse_scheme(?STRING_REST($:, Rest), URI) -> 804 {_, URI1} = parse_hier(Rest, URI), 805 {Rest, URI1}; 806parse_scheme(?STRING_REST(Char, Rest), URI) -> 807 case is_scheme(Char) of 808 true -> parse_scheme(Rest, URI); 809 false -> throw({error,invalid_uri,[Char]}) 810 end; 811parse_scheme(?STRING_EMPTY, _URI) -> 812 throw({error,invalid_uri,<<>>}). 813 814 815%% Check if char is allowed in scheme 816-spec is_scheme(char()) -> boolean(). 817is_scheme($+) -> true; 818is_scheme($-) -> true; 819is_scheme($.) -> true; 820is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char). 821 822 823%%------------------------------------------------------------------------- 824%% hier-part = "//" authority path-abempty 825%% / path-absolute 826%% / path-rootless 827%% / path-empty 828%%------------------------------------------------------------------------- 829-spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}. 830parse_hier(?STRING_REST("//", Rest), URI) -> 831 % Parse userinfo - "//" is NOT part of authority 832 try parse_userinfo(Rest, URI) of 833 {T, URI1} -> 834 Userinfo = calculate_parsed_userinfo(Rest, T), 835 {Rest, URI1#{userinfo => Userinfo}} 836 catch 837 throw:{_,_,_} -> 838 {T, URI1} = parse_host(Rest, URI), 839 Host = calculate_parsed_host_port(Rest, T), 840 {Rest, URI1#{host => remove_brackets(Host)}} 841 end; 842parse_hier(?STRING_REST($/, Rest), URI) -> 843 {T, URI1} = parse_segment(Rest, URI), % path-absolute 844 Path = calculate_parsed_part(Rest, T), 845 {Rest, URI1#{path => ?STRING_REST($/, Path)}}; 846parse_hier(?STRING_REST($?, Rest), URI) -> 847 {T, URI1} = parse_query(Rest, URI), % path-empty ?query 848 Query = calculate_parsed_query_fragment(Rest, T), 849 {Rest, URI1#{query => Query}}; 850parse_hier(?STRING_REST($#, Rest), URI) -> 851 {T, URI1} = parse_fragment(Rest, URI), % path-empty 852 Fragment = calculate_parsed_query_fragment(Rest, T), 853 {Rest, URI1#{fragment => Fragment}}; 854parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless 855 case is_pchar(Char) of 856 true -> % segment_nz 857 {T, URI1} = parse_segment(Rest, URI), 858 Path = calculate_parsed_part(Rest, T), 859 {Rest, URI1#{path => ?STRING_REST(Char, Path)}}; 860 false -> throw({error,invalid_uri,[Char]}) 861 end; 862parse_hier(?STRING_EMPTY, URI) -> 863 {<<>>, URI}. 864 865 866%%------------------------------------------------------------------------- 867%% [RFC 3986, Chapter 3.2. Authority] 868%% 869%% Many URI schemes include a hierarchical element for a naming 870%% authority so that governance of the name space defined by the 871%% remainder of the URI is delegated to that authority (which may, in 872%% turn, delegate it further). 873%% 874%% The authority component is preceded by a double slash ("//") and is 875%% terminated by the next slash ("/"), question mark ("?"), or number 876%% sign ("#") character, or by the end of the URI. 877%% 878%% authority = [ userinfo "@" ] host [ ":" port ] 879%% 880%% 881%% [RFC 3986, Chapter 3.2.1. User Information] 882%% 883%% The userinfo subcomponent may consist of a user name and, optionally, 884%% scheme-specific information about how to gain authorization to access 885%% the resource. The user information, if present, is followed by a 886%% commercial at-sign ("@") that delimits it from the host. 887%% 888%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 889%%------------------------------------------------------------------------- 890-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}. 891parse_userinfo(?CHAR($@), URI) -> 892 {?STRING_EMPTY, URI#{host => <<>>}}; 893parse_userinfo(?STRING_REST($@, Rest), URI) -> 894 {T, URI1} = parse_host(Rest, URI), 895 Host = calculate_parsed_host_port(Rest, T), 896 {Rest, URI1#{host => remove_brackets(Host)}}; 897parse_userinfo(?STRING_REST(Char, Rest), URI) -> 898 case is_userinfo(Char) of 899 true -> parse_userinfo(Rest, URI); 900 false -> throw({error,invalid_uri,[Char]}) 901 end; 902parse_userinfo(?STRING_EMPTY, _URI) -> 903 %% URI cannot end in userinfo state 904 throw({error,invalid_uri,<<>>}). 905 906 907%% Check if char is allowed in userinfo 908-spec is_userinfo(char()) -> boolean(). 909is_userinfo($%) -> true; % pct-encoded 910is_userinfo($:) -> true; 911is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). 912 913 914%%------------------------------------------------------------------------- 915%% [RFC 3986, Chapter 3.2.2. Host] 916%% 917%% The host subcomponent of authority is identified by an IP literal 918%% encapsulated within square brackets, an IPv4 address in dotted- 919%% decimal form, or a registered name. 920%% 921%% host = IP-literal / IPv4address / reg-name 922%% 923%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" 924%% 925%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 926%% 927%% IPv6address = 6( h16 ":" ) ls32 928%% / "::" 5( h16 ":" ) ls32 929%% / [ h16 ] "::" 4( h16 ":" ) ls32 930%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 931%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 932%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 933%% / [ *4( h16 ":" ) h16 ] "::" ls32 934%% / [ *5( h16 ":" ) h16 ] "::" h16 935%% / [ *6( h16 ":" ) h16 ] "::" 936%% 937%% ls32 = ( h16 ":" h16 ) / IPv4address 938%% ; least-significant 32 bits of address 939%% 940%% h16 = 1*4HEXDIG 941%% ; 16 bits of address represented in hexadecimal 942%% 943%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 944%% 945%% dec-octet = DIGIT ; 0-9 946%% / %x31-39 DIGIT ; 10-99 947%% / "1" 2DIGIT ; 100-199 948%% / "2" %x30-34 DIGIT ; 200-249 949%% / "25" %x30-35 ; 250-255 950%% 951%% reg-name = *( unreserved / pct-encoded / sub-delims ) 952%%------------------------------------------------------------------------- 953-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}. 954parse_host(?STRING_REST($:, Rest), URI) -> 955 {T, URI1} = parse_port(Rest, URI), 956 H = calculate_parsed_host_port(Rest, T), 957 Port = get_port(H), 958 {Rest, URI1#{port => Port}}; 959parse_host(?STRING_REST($/, Rest), URI) -> 960 {T, URI1} = parse_segment(Rest, URI), % path-abempty 961 Path = calculate_parsed_part(Rest, T), 962 {Rest, URI1#{path => ?STRING_REST($/, Path)}}; 963parse_host(?STRING_REST($?, Rest), URI) -> 964 {T, URI1} = parse_query(Rest, URI), % path-empty ?query 965 Query = calculate_parsed_query_fragment(Rest, T), 966 {Rest, URI1#{query => Query}}; 967parse_host(?STRING_REST($[, Rest), URI) -> 968 parse_ipv6_bin(Rest, [], URI); 969parse_host(?STRING_REST($#, Rest), URI) -> 970 {T, URI1} = parse_fragment(Rest, URI), % path-empty 971 Fragment = calculate_parsed_query_fragment(Rest, T), 972 {Rest, URI1#{fragment => Fragment}}; 973parse_host(?STRING_REST(Char, Rest), URI) -> 974 case is_digit(Char) of 975 true -> 976 try parse_ipv4_bin(Rest, [Char], URI) 977 catch 978 throw:{_,_,_} -> 979 parse_reg_name(?STRING_REST(Char, Rest), URI) 980 end; 981 false -> parse_reg_name(?STRING_REST(Char, Rest), URI) 982 end; 983parse_host(?STRING_EMPTY, URI) -> 984 {?STRING_EMPTY, URI}. 985 986 987-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}. 988parse_reg_name(?STRING_REST($:, Rest), URI) -> 989 {T, URI1} = parse_port(Rest, URI), 990 H = calculate_parsed_host_port(Rest, T), 991 Port = get_port(H), 992 {Rest, URI1#{port => Port}}; 993parse_reg_name(?STRING_REST($/, Rest), URI) -> 994 {T, URI1} = parse_segment(Rest, URI), % path-abempty 995 Path = calculate_parsed_part(Rest, T), 996 {Rest, URI1#{path => ?STRING_REST($/, Path)}}; 997parse_reg_name(?STRING_REST($?, Rest), URI) -> 998 {T, URI1} = parse_query(Rest, URI), % path-empty ?query 999 Query = calculate_parsed_query_fragment(Rest, T), 1000 {Rest, URI1#{query => Query}}; 1001parse_reg_name(?STRING_REST($#, Rest), URI) -> 1002 {T, URI1} = parse_fragment(Rest, URI), % path-empty 1003 Fragment = calculate_parsed_query_fragment(Rest, T), 1004 {Rest, URI1#{fragment => Fragment}}; 1005parse_reg_name(?STRING_REST(Char, Rest), URI) -> 1006 case is_reg_name(Char) of 1007 true -> parse_reg_name(Rest, URI); 1008 false -> throw({error,invalid_uri,[Char]}) 1009 end; 1010parse_reg_name(?STRING_EMPTY, URI) -> 1011 {?STRING_EMPTY, URI}. 1012 1013%% Check if char is allowed in reg-name 1014-spec is_reg_name(char()) -> boolean(). 1015is_reg_name($%) -> true; 1016is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). 1017 1018 1019-spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. 1020parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> 1021 _ = validate_ipv4_address(lists:reverse(Acc)), 1022 {T, URI1} = parse_port(Rest, URI), 1023 H = calculate_parsed_host_port(Rest, T), 1024 Port = get_port(H), 1025 {Rest, URI1#{port => Port}}; 1026parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> 1027 _ = validate_ipv4_address(lists:reverse(Acc)), 1028 {T, URI1} = parse_segment(Rest, URI), % path-abempty 1029 Path = calculate_parsed_part(Rest, T), 1030 {Rest, URI1#{path => ?STRING_REST($/, Path)}}; 1031parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> 1032 _ = validate_ipv4_address(lists:reverse(Acc)), 1033 {T, URI1} = parse_query(Rest, URI), % path-empty ?query 1034 Query = calculate_parsed_query_fragment(Rest, T), 1035 {Rest, URI1#{query => Query}}; 1036parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> 1037 _ = validate_ipv4_address(lists:reverse(Acc)), 1038 {T, URI1} = parse_fragment(Rest, URI), % path-empty 1039 Fragment = calculate_parsed_query_fragment(Rest, T), 1040 {Rest, URI1#{fragment => Fragment}}; 1041parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> 1042 case is_ipv4(Char) of 1043 true -> parse_ipv4_bin(Rest, [Char|Acc], URI); 1044 false -> throw({error,invalid_uri,[Char]}) 1045 end; 1046parse_ipv4_bin(?STRING_EMPTY, Acc, URI) -> 1047 _ = validate_ipv4_address(lists:reverse(Acc)), 1048 {?STRING_EMPTY, URI}. 1049 1050 1051%% Check if char is allowed in IPv4 addresses 1052-spec is_ipv4(char()) -> boolean(). 1053is_ipv4($.) -> true; 1054is_ipv4(Char) -> is_digit(Char). 1055 1056-spec validate_ipv4_address(list()) -> list(). 1057validate_ipv4_address(Addr) -> 1058 case inet:parse_ipv4strict_address(Addr) of 1059 {ok, _} -> Addr; 1060 {error, _} -> throw({error,invalid_uri,Addr}) 1061 end. 1062 1063 1064-spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. 1065parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) -> 1066 _ = validate_ipv6_address(lists:reverse(Acc)), 1067 parse_ipv6_bin_end(Rest, URI); 1068parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) -> 1069 case is_ipv6(Char) of 1070 true -> parse_ipv6_bin(Rest, [Char|Acc], URI); 1071 false -> throw({error,invalid_uri,[Char]}) 1072 end; 1073parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) -> 1074 throw({error,invalid_uri,<<>>}). 1075 1076%% Check if char is allowed in IPv6 addresses 1077-spec is_ipv6(char()) -> boolean(). 1078is_ipv6($:) -> true; 1079is_ipv6($.) -> true; 1080is_ipv6(Char) -> is_hex_digit(Char). 1081 1082 1083-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}. 1084parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> 1085 {T, URI1} = parse_port(Rest, URI), 1086 H = calculate_parsed_host_port(Rest, T), 1087 Port = get_port(H), 1088 {Rest, URI1#{port => Port}}; 1089parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> 1090 {T, URI1} = parse_segment(Rest, URI), % path-abempty 1091 Path = calculate_parsed_part(Rest, T), 1092 {Rest, URI1#{path => ?STRING_REST($/, Path)}}; 1093parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> 1094 {T, URI1} = parse_query(Rest, URI), % path-empty ?query 1095 Query = calculate_parsed_query_fragment(Rest, T), 1096 {Rest, URI1#{query => Query}}; 1097parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> 1098 {T, URI1} = parse_fragment(Rest, URI), % path-empty 1099 Fragment = calculate_parsed_query_fragment(Rest, T), 1100 {Rest, URI1#{fragment => Fragment}}; 1101parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> 1102 case is_ipv6(Char) of 1103 true -> parse_ipv6_bin_end(Rest, URI); 1104 false -> throw({error,invalid_uri,[Char]}) 1105 end; 1106parse_ipv6_bin_end(?STRING_EMPTY, URI) -> 1107 {?STRING_EMPTY, URI}. 1108 1109-spec validate_ipv6_address(list()) -> list(). 1110validate_ipv6_address(Addr) -> 1111 case inet:parse_ipv6strict_address(Addr) of 1112 {ok, _} -> Addr; 1113 {error, _} -> throw({error,invalid_uri,Addr}) 1114 end. 1115 1116 1117%%------------------------------------------------------------------------- 1118%% [RFC 3986, Chapter 3.2.2. Port] 1119%% 1120%% The port subcomponent of authority is designated by an optional port 1121%% number in decimal following the host and delimited from it by a 1122%% single colon (":") character. 1123%% 1124%% port = *DIGIT 1125%%------------------------------------------------------------------------- 1126-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}. 1127parse_port(?STRING_REST($/, Rest), URI) -> 1128 {T, URI1} = parse_segment(Rest, URI), % path-abempty 1129 Path = calculate_parsed_part(Rest, T), 1130 {Rest, URI1#{path => ?STRING_REST($/, Path)}}; 1131parse_port(?STRING_REST($?, Rest), URI) -> 1132 {T, URI1} = parse_query(Rest, URI), % path-empty ?query 1133 Query = calculate_parsed_query_fragment(Rest, T), 1134 {Rest, URI1#{query => Query}}; 1135parse_port(?STRING_REST($#, Rest), URI) -> 1136 {T, URI1} = parse_fragment(Rest, URI), % path-empty 1137 Fragment = calculate_parsed_query_fragment(Rest, T), 1138 {Rest, URI1#{fragment => Fragment}}; 1139parse_port(?STRING_REST(Char, Rest), URI) -> 1140 case is_digit(Char) of 1141 true -> parse_port(Rest, URI); 1142 false -> throw({error,invalid_uri,[Char]}) 1143 end; 1144parse_port(?STRING_EMPTY, URI) -> 1145 {?STRING_EMPTY, URI}. 1146 1147 1148%%------------------------------------------------------------------------- 1149%% [RFC 3986, Chapter 3.4. Query] 1150%% 1151%% The query component contains non-hierarchical data that, along with 1152%% data in the path component (Section 3.3), serves to identify a 1153%% resource within the scope of the URI's scheme and naming authority 1154%% (if any). The query component is indicated by the first question 1155%% mark ("?") character and terminated by a number sign ("#") character 1156%% or by the end of the URI. 1157%% 1158%% query = *( pchar / "/" / "?" ) 1159%%------------------------------------------------------------------------- 1160-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}. 1161parse_query(?STRING_REST($#, Rest), URI) -> 1162 {T, URI1} = parse_fragment(Rest, URI), 1163 Fragment = calculate_parsed_query_fragment(Rest, T), 1164 {Rest, URI1#{fragment => Fragment}}; 1165parse_query(?STRING_REST(Char, Rest), URI) -> 1166 case is_query(Char) of 1167 true -> parse_query(Rest, URI); 1168 false -> throw({error,invalid_uri,[Char]}) 1169 end; 1170parse_query(?STRING_EMPTY, URI) -> 1171 {?STRING_EMPTY, URI}. 1172 1173 1174%% Check if char is allowed in query 1175-spec is_query(char()) -> boolean(). 1176is_query($/) -> true; 1177is_query($?) -> true; 1178is_query(Char) -> is_pchar(Char). 1179 1180 1181%%------------------------------------------------------------------------- 1182%% [RFC 3986, Chapter 3.5. Fragment] 1183%% 1184%% The fragment identifier component of a URI allows indirect 1185%% identification of a secondary resource by reference to a primary 1186%% resource and additional identifying information. 1187%% 1188%% fragment = *( pchar / "/" / "?" ) 1189%%------------------------------------------------------------------------- 1190-spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}. 1191parse_fragment(?STRING_REST(Char, Rest), URI) -> 1192 case is_fragment(Char) of 1193 true -> parse_fragment(Rest, URI); 1194 false -> throw({error,invalid_uri,[Char]}) 1195 end; 1196parse_fragment(?STRING_EMPTY, URI) -> 1197 {?STRING_EMPTY, URI}. 1198 1199 1200%% Check if char is allowed in fragment 1201-spec is_fragment(char()) -> boolean(). 1202is_fragment($/) -> true; 1203is_fragment($?) -> true; 1204is_fragment(Char) -> is_pchar(Char). 1205 1206 1207%%------------------------------------------------------------------------- 1208%% [RFC 3986, Chapter 2.2. Reserved Characters] 1209%% 1210%% reserved = gen-delims / sub-delims 1211%% 1212%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 1213%% 1214%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 1215%% / "*" / "+" / "," / ";" / "=" 1216%% 1217%%------------------------------------------------------------------------- 1218 1219%% Return true if input char is reserved. 1220-spec is_reserved(char()) -> boolean(). 1221is_reserved($:) -> true; 1222is_reserved($/) -> true; 1223is_reserved($?) -> true; 1224is_reserved($#) -> true; 1225is_reserved($[) -> true; 1226is_reserved($]) -> true; 1227is_reserved($@) -> true; 1228 1229is_reserved($!) -> true; 1230is_reserved($$) -> true; 1231is_reserved($&) -> true; 1232is_reserved($') -> true; 1233is_reserved($() -> true; 1234is_reserved($)) -> true; 1235 1236is_reserved($*) -> true; 1237is_reserved($+) -> true; 1238is_reserved($,) -> true; 1239is_reserved($;) -> true; 1240is_reserved($=) -> true; 1241is_reserved(_) -> false. 1242 1243 1244%% Check if char is sub-delim. 1245-spec is_sub_delim(char()) -> boolean(). 1246is_sub_delim($!) -> true; 1247is_sub_delim($$) -> true; 1248is_sub_delim($&) -> true; 1249is_sub_delim($') -> true; 1250is_sub_delim($() -> true; 1251is_sub_delim($)) -> true; 1252 1253is_sub_delim($*) -> true; 1254is_sub_delim($+) -> true; 1255is_sub_delim($,) -> true; 1256is_sub_delim($;) -> true; 1257is_sub_delim($=) -> true; 1258is_sub_delim(_) -> false. 1259 1260 1261%%------------------------------------------------------------------------- 1262%% [RFC 3986, Chapter 2.3. Unreserved Characters] 1263%% 1264%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 1265%% 1266%%------------------------------------------------------------------------- 1267-spec is_unreserved(char()) -> boolean(). 1268is_unreserved($-) -> true; 1269is_unreserved($.) -> true; 1270is_unreserved($_) -> true; 1271is_unreserved($~) -> true; 1272is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char). 1273 1274-spec is_alpha(char()) -> boolean(). 1275is_alpha(C) 1276 when $A =< C, C =< $Z; 1277 $a =< C, C =< $z -> true; 1278is_alpha(_) -> false. 1279 1280-spec is_digit(char()) -> boolean(). 1281is_digit(C) 1282 when $0 =< C, C =< $9 -> true; 1283is_digit(_) -> false. 1284 1285-spec is_hex_digit(char()) -> boolean(). 1286is_hex_digit(C) 1287 when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true; 1288is_hex_digit(_) -> false. 1289 1290 1291%% Remove enclosing brackets from binary 1292-spec remove_brackets(binary()) -> binary(). 1293remove_brackets(<<$[/utf8, Rest/binary>>) -> 1294 {H,T} = split_binary(Rest, byte_size(Rest) - 1), 1295 case T =:= <<$]/utf8>> of 1296 true -> H; 1297 false -> Rest 1298 end; 1299remove_brackets(Addr) -> Addr. 1300 1301 1302%%------------------------------------------------------------------------- 1303%% Helper functions for calculating the parsed binary. 1304%%------------------------------------------------------------------------- 1305-spec calculate_parsed_scheme(binary(), binary()) -> binary(). 1306calculate_parsed_scheme(Input, <<>>) -> 1307 strip_last_char(Input, [$:]); 1308calculate_parsed_scheme(Input, Unparsed) -> 1309 get_parsed_binary(Input, Unparsed). 1310 1311 1312-spec calculate_parsed_part(binary(), binary()) -> binary(). 1313calculate_parsed_part(Input, <<>>) -> 1314 strip_last_char(Input, [$?,$#]); 1315calculate_parsed_part(Input, Unparsed) -> 1316 get_parsed_binary(Input, Unparsed). 1317 1318 1319-spec calculate_parsed_userinfo(binary(), binary()) -> binary(). 1320calculate_parsed_userinfo(Input, <<>>) -> 1321 strip_last_char(Input, [$?,$#,$@]); 1322calculate_parsed_userinfo(Input, Unparsed) -> 1323 get_parsed_binary(Input, Unparsed). 1324 1325 1326-spec calculate_parsed_host_port(binary(), binary()) -> binary(). 1327calculate_parsed_host_port(Input, <<>>) -> 1328 strip_last_char(Input, [$:,$?,$#,$/]); 1329calculate_parsed_host_port(Input, Unparsed) -> 1330 get_parsed_binary(Input, Unparsed). 1331 1332 1333calculate_parsed_query_fragment(Input, <<>>) -> 1334 strip_last_char(Input, [$#]); 1335calculate_parsed_query_fragment(Input, Unparsed) -> 1336 get_parsed_binary(Input, Unparsed). 1337 1338 1339get_port(<<>>) -> 1340 undefined; 1341get_port(B) -> 1342 try binary_to_integer(B) 1343 catch 1344 error:badarg -> 1345 throw({error, invalid_uri, B}) 1346 end. 1347 1348 1349%% Strip last char if it is in list 1350%% 1351%% This function is optimized for speed: parse/1 is about 10% faster than 1352%% with an alternative implementation based on lists and sets. 1353strip_last_char(<<>>, _) -> <<>>; 1354strip_last_char(Input, [C0]) -> 1355 case binary:last(Input) of 1356 C0 -> 1357 init_binary(Input); 1358 _Else -> 1359 Input 1360 end; 1361strip_last_char(Input, [C0,C1]) -> 1362 case binary:last(Input) of 1363 C0 -> 1364 init_binary(Input); 1365 C1 -> 1366 init_binary(Input); 1367 _Else -> 1368 Input 1369 end; 1370strip_last_char(Input, [C0,C1,C2]) -> 1371 case binary:last(Input) of 1372 C0 -> 1373 init_binary(Input); 1374 C1 -> 1375 init_binary(Input); 1376 C2 -> 1377 init_binary(Input); 1378 _Else -> 1379 Input 1380 end; 1381strip_last_char(Input, [C0,C1,C2,C3]) -> 1382 case binary:last(Input) of 1383 C0 -> 1384 init_binary(Input); 1385 C1 -> 1386 init_binary(Input); 1387 C2 -> 1388 init_binary(Input); 1389 C3 -> 1390 init_binary(Input); 1391 _Else -> 1392 Input 1393 end. 1394 1395 1396%% Get parsed binary 1397get_parsed_binary(Input, Unparsed) -> 1398 {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), 1399 First. 1400 1401 1402%% Return all bytes of the binary except the last one. The binary must be non-empty. 1403init_binary(B) -> 1404 {Init, _} = 1405 split_binary(B, byte_size(B) - 1), 1406 Init. 1407 1408 1409%% Returns the size of a binary exluding the first element. 1410%% Used in calls to split_binary(). 1411-spec byte_size_exl_head(binary()) -> number(). 1412byte_size_exl_head(<<>>) -> 0; 1413byte_size_exl_head(Binary) -> byte_size(Binary) + 1. 1414 1415 1416%%------------------------------------------------------------------------- 1417%% [RFC 3986, Chapter 2.1. Percent-Encoding] 1418%% 1419%% A percent-encoding mechanism is used to represent a data octet in a 1420%% component when that octet's corresponding character is outside the 1421%% allowed set or is being used as a delimiter of, or within, the 1422%% component. A percent-encoded octet is encoded as a character 1423%% triplet, consisting of the percent character "%" followed by the two 1424%% hexadecimal digits representing that octet's numeric value. For 1425%% example, "%20" is the percent-encoding for the binary octet 1426%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space 1427%% character (SP). Section 2.4 describes when percent-encoding and 1428%% decoding is applied. 1429%% 1430%% pct-encoded = "%" HEXDIG HEXDIG 1431%%------------------------------------------------------------------------- 1432 1433%%------------------------------------------------------------------------- 1434%% Percent-encode 1435%%------------------------------------------------------------------------- 1436 1437%% Only validates as scheme cannot have percent-encoded characters 1438-spec encode_scheme(list()|binary()) -> list() | binary(). 1439encode_scheme([]) -> 1440 throw({error,invalid_scheme,""}); 1441encode_scheme(<<>>) -> 1442 throw({error,invalid_scheme,<<>>}); 1443encode_scheme(Scheme) -> 1444 case validate_scheme(Scheme) of 1445 true -> Scheme; 1446 false -> throw({error,invalid_scheme,Scheme}) 1447 end. 1448 1449-spec encode_userinfo(list()|binary()) -> list() | binary(). 1450encode_userinfo(Cs) -> 1451 encode(Cs, fun is_userinfo/1). 1452 1453-spec encode_host(list()|binary()) -> list() | binary(). 1454encode_host(Cs) -> 1455 case classify_host(Cs) of 1456 regname -> Cs; 1457 ipv4 -> Cs; 1458 ipv6 -> bracket_ipv6(Cs); 1459 other -> encode(Cs, fun is_reg_name/1) 1460 end. 1461 1462-spec encode_path(list()|binary()) -> list() | binary(). 1463encode_path(Cs) -> 1464 encode(Cs, fun is_path/1). 1465 1466-spec encode_query(list()|binary()) -> list() | binary(). 1467encode_query(Cs) -> 1468 encode(Cs, fun is_query/1). 1469 1470-spec encode_fragment(list()|binary()) -> list() | binary(). 1471encode_fragment(Cs) -> 1472 encode(Cs, fun is_fragment/1). 1473 1474%%------------------------------------------------------------------------- 1475%% Helper funtions for percent-decode 1476%%------------------------------------------------------------------------- 1477 1478-spec decode(list()|binary()) -> list() | binary(). 1479decode(Cs) -> 1480 decode(Cs, <<>>). 1481%% 1482decode(L, Acc) when is_list(L) -> 1483 B0 = unicode:characters_to_binary(L), 1484 B1 = decode(B0, Acc), 1485 unicode:characters_to_list(B1); 1486decode(<<$%,C0,C1,Cs/binary>>, Acc) -> 1487 case is_hex_digit(C0) andalso is_hex_digit(C1) of 1488 true -> 1489 B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), 1490 %% [2.4] When a URI is dereferenced, the components and subcomponents 1491 %% significant to the scheme-specific dereferencing process (if any) 1492 %% must be parsed and separated before the percent-encoded octets within 1493 %% those components can be safely decoded, as otherwise the data may be 1494 %% mistaken for component delimiters. The only exception is for 1495 %% percent-encoded octets corresponding to characters in the unreserved 1496 %% set, which can be decoded at any time. 1497 case is_unreserved(B) of 1498 false -> 1499 %% [2.2] Characters in the reserved set are protected from 1500 %% normalization. 1501 %% [2.1] For consistency, URI producers and normalizers should 1502 %% use uppercase hexadecimal digits for all percent- 1503 %% encodings. 1504 H0 = hex_to_upper(C0), 1505 H1 = hex_to_upper(C1), 1506 decode(Cs, <<Acc/binary,$%,H0,H1>>); 1507 true -> 1508 decode(Cs, <<Acc/binary, B>>) 1509 end; 1510 false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>}) 1511 end; 1512decode(<<C,Cs/binary>>, Acc) -> 1513 decode(Cs, <<Acc/binary, C>>); 1514decode(<<>>, Acc) -> 1515 check_utf8(Acc). 1516 1517-spec raw_decode(list()|binary()) -> list() | binary() | error(). 1518raw_decode(Cs) -> 1519 raw_decode(Cs, <<>>). 1520%% 1521raw_decode(L, Acc) when is_list(L) -> 1522 try 1523 B0 = unicode:characters_to_binary(L), 1524 B1 = raw_decode(B0, Acc), 1525 unicode:characters_to_list(B1) 1526 catch 1527 throw:{error, Atom, RestData} -> 1528 {error, Atom, RestData} 1529 end; 1530raw_decode(<<$%,C0,C1,Cs/binary>>, Acc) -> 1531 case is_hex_digit(C0) andalso is_hex_digit(C1) of 1532 true -> 1533 B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), 1534 raw_decode(Cs, <<Acc/binary, B>>); 1535 false -> 1536 throw({error,invalid_percent_encoding,<<$%,C0,C1>>}) 1537 end; 1538raw_decode(<<C,Cs/binary>>, Acc) -> 1539 raw_decode(Cs, <<Acc/binary, C>>); 1540raw_decode(<<>>, Acc) -> 1541 check_utf8(Acc). 1542 1543%% Returns Cs if it is utf8 encoded. 1544check_utf8(Cs) -> 1545 case unicode:characters_to_list(Cs) of 1546 {incomplete,_,_} -> 1547 throw({error,invalid_utf8,Cs}); 1548 {error,_,_} -> 1549 throw({error,invalid_utf8,Cs}); 1550 _ -> Cs 1551 end. 1552 1553%% Convert hex digit to uppercase form 1554hex_to_upper(H) when $a =< H, H =< $f -> 1555 H - 32; 1556hex_to_upper(H) when $0 =< H, H =< $9;$A =< H, H =< $F-> 1557 H; 1558hex_to_upper(H) -> 1559 throw({error,invalid_input, H}). 1560 1561%% Check if char is allowed in host 1562-spec is_host(char()) -> boolean(). 1563is_host($:) -> true; 1564is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). 1565 1566%% Check if char is allowed in path 1567-spec is_path(char()) -> boolean(). 1568is_path($/) -> true; 1569is_path(Char) -> is_pchar(Char). 1570 1571 1572%%------------------------------------------------------------------------- 1573%% Helper functions for percent-encode 1574%%------------------------------------------------------------------------- 1575-spec encode(list()|binary(), fun()) -> list() | binary(). 1576encode(Component, Fun) when is_list(Component) -> 1577 B = unicode:characters_to_binary(Component), 1578 unicode:characters_to_list(encode(B, Fun, <<>>)); 1579encode(Component, Fun) when is_binary(Component) -> 1580 encode(Component, Fun, <<>>). 1581%% 1582encode(<<Char/utf8, Rest/binary>>, Fun, Acc) -> 1583 C = encode_codepoint_binary(Char, Fun), 1584 encode(Rest, Fun, <<Acc/binary,C/binary>>); 1585encode(<<Char, Rest/binary>>, _Fun, _Acc) -> 1586 throw({error,invalid_input,<<Char,Rest/binary>>}); 1587encode(<<>>, _Fun, Acc) -> 1588 Acc. 1589 1590 1591-spec encode_codepoint_binary(integer(), fun()) -> binary(). 1592encode_codepoint_binary(C, Fun) -> 1593 case Fun(C) of 1594 false -> percent_encode_binary(C); 1595 true -> <<C>> 1596 end. 1597 1598 1599-spec percent_encode_binary(integer()) -> binary(). 1600percent_encode_binary(Code) -> 1601 percent_encode_binary(<<Code/utf8>>, <<>>). 1602 1603 1604percent_encode_binary(<<A:4,B:4,Rest/binary>>, Acc) -> 1605 percent_encode_binary(Rest, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>); 1606percent_encode_binary(<<>>, Acc) -> 1607 Acc. 1608 1609 1610%%------------------------------------------------------------------------- 1611%%------------------------------------------------------------------------- 1612validate_scheme([]) -> true; 1613validate_scheme([H|T]) -> 1614 case is_scheme(H) of 1615 true -> validate_scheme(T); 1616 false -> false 1617 end; 1618validate_scheme(<<>>) -> true; 1619validate_scheme(<<H, Rest/binary>>) -> 1620 case is_scheme(H) of 1621 true -> validate_scheme(Rest); 1622 false -> false 1623 end. 1624 1625 1626%%------------------------------------------------------------------------- 1627%% Classifies hostname into the following categories: 1628%% regname, ipv4 - address does not contain reserved characters to be 1629%% percent-encoded 1630%% ipv6 - address does not contain reserved characters but it shall be 1631%% encolsed in brackets 1632%% other - address shall be percent-encoded 1633%%------------------------------------------------------------------------- 1634classify_host([]) -> other; 1635classify_host(Addr) when is_binary(Addr) -> 1636 A = unicode:characters_to_list(Addr), 1637 classify_host_ipv6(A); 1638classify_host(Addr) -> 1639 classify_host_ipv6(Addr). 1640 1641classify_host_ipv6(Addr) -> 1642 case is_ipv6_address(Addr) of 1643 true -> ipv6; 1644 false -> classify_host_ipv4(Addr) 1645 end. 1646 1647classify_host_ipv4(Addr) -> 1648 case is_ipv4_address(Addr) of 1649 true -> ipv4; 1650 false -> classify_host_regname(Addr) 1651 end. 1652 1653classify_host_regname([]) -> regname; 1654classify_host_regname([H|T]) -> 1655 case is_reg_name(H) of 1656 true -> classify_host_regname(T); 1657 false -> other 1658 end. 1659 1660is_ipv4_address(Addr) -> 1661 case inet:parse_ipv4strict_address(Addr) of 1662 {ok, _} -> true; 1663 {error, _} -> false 1664 end. 1665 1666is_ipv6_address(Addr) -> 1667 case inet:parse_ipv6strict_address(Addr) of 1668 {ok, _} -> true; 1669 {error, _} -> false 1670 end. 1671 1672bracket_ipv6(Addr) when is_binary(Addr) -> 1673 concat(<<$[,Addr/binary>>,<<$]>>); 1674bracket_ipv6(Addr) when is_list(Addr) -> 1675 [$[|Addr] ++ "]". 1676 1677 1678%%------------------------------------------------------------------------- 1679%% Helper funtions for recompose 1680%%------------------------------------------------------------------------- 1681 1682%%------------------------------------------------------------------------- 1683%% Checks if input Map has valid combination of fields that can be 1684%% recomposed into a URI. 1685%% 1686%% The implementation is based on a decision tree that fulfills the 1687%% following rules: 1688%% - 'path' shall always be present in the input map 1689%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 1690%% hier-part = "//" authority path-abempty 1691%% / path-absolute 1692%% / path-rootless 1693%% / path-empty 1694%% - 'host' shall be present in the input map when 'path' starts with 1695%% two slashes ("//") 1696%% path = path-abempty ; begins with "/" or is empty 1697%% / path-absolute ; begins with "/" but not "//" 1698%% / path-noscheme ; begins with a non-colon segment 1699%% / path-rootless ; begins with a segment 1700%% / path-empty ; zero characters 1701%% path-abempty = *( "/" segment ) 1702%% segment = *pchar 1703%% - 'host' shall be present if userinfo or port is present in input map 1704%% authority = [ userinfo "@" ] host [ ":" port ] 1705%% - All fields shall be valid (scheme, userinfo, host, port, path, query 1706%% or fragment). 1707%%------------------------------------------------------------------------- 1708is_valid_map(#{path := Path} = Map) -> 1709 ((starts_with_two_slash(Path) andalso is_valid_map_host(Map)) 1710 orelse 1711 (maps:is_key(userinfo, Map) andalso is_valid_map_host(Map)) 1712 orelse 1713 (maps:is_key(port, Map) andalso is_valid_map_host(Map)) 1714 orelse 1715 all_fields_valid(Map)); 1716is_valid_map(#{}) -> 1717 false. 1718 1719 1720is_valid_map_host(Map) -> 1721 maps:is_key(host, Map) andalso all_fields_valid(Map). 1722 1723 1724all_fields_valid(Map) -> 1725 Fun = fun(scheme, _, Acc) -> Acc; 1726 (userinfo, _, Acc) -> Acc; 1727 (host, _, Acc) -> Acc; 1728 (port, _, Acc) -> Acc; 1729 (path, _, Acc) -> Acc; 1730 (query, _, Acc) -> Acc; 1731 (fragment, _, Acc) -> Acc; 1732 (_, _, _) -> false 1733 end, 1734 maps:fold(Fun, true, Map). 1735 1736 1737starts_with_two_slash([$/,$/|_]) -> 1738 true; 1739starts_with_two_slash(?STRING_REST("//", _)) -> 1740 true; 1741starts_with_two_slash(_) -> false. 1742 1743 1744update_scheme(#{scheme := Scheme}, _) -> 1745 add_colon_postfix(encode_scheme(Scheme)); 1746update_scheme(#{}, _) -> 1747 empty. 1748 1749 1750update_userinfo(#{userinfo := Userinfo}, empty) -> 1751 add_auth_prefix(encode_userinfo(Userinfo)); 1752update_userinfo(#{userinfo := Userinfo}, URI) -> 1753 concat(URI,add_auth_prefix(encode_userinfo(Userinfo))); 1754update_userinfo(#{}, empty) -> 1755 empty; 1756update_userinfo(#{}, URI) -> 1757 URI. 1758 1759 1760update_host(#{host := Host}, empty) -> 1761 add_auth_prefix(encode_host(Host)); 1762update_host(#{host := Host} = Map, URI) -> 1763 concat(URI,add_host_prefix(Map, encode_host(Host))); 1764update_host(#{}, empty) -> 1765 empty; 1766update_host(#{}, URI) -> 1767 URI. 1768 1769 1770%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI 1771update_port(#{port := undefined}, URI) -> 1772 concat(URI, <<":">>); 1773update_port(#{port := Port}, URI) -> 1774 concat(URI,add_colon(encode_port(Port))); 1775update_port(#{}, URI) -> 1776 URI. 1777 1778 1779update_path(#{path := Path}, empty) -> 1780 encode_path(Path); 1781update_path(#{host := _, path := Path0}, URI) -> 1782 %% When host is present in a URI the path must begin with "/" or be empty. 1783 Path1 = maybe_flatten_list(Path0), 1784 Path = make_path_absolute(Path1), 1785 concat(URI,encode_path(Path)); 1786update_path(#{path := Path}, URI) -> 1787 concat(URI,encode_path(Path)); 1788update_path(#{}, empty) -> 1789 empty; 1790update_path(#{}, URI) -> 1791 URI. 1792 1793 1794update_query(#{query := Query}, empty) -> 1795 encode_query(Query); 1796update_query(#{query := Query}, URI) -> 1797 concat(URI,add_question_mark(encode_query(Query))); 1798update_query(#{}, empty) -> 1799 empty; 1800update_query(#{}, URI) -> 1801 URI. 1802 1803 1804update_fragment(#{fragment := Fragment}, empty) -> 1805 add_hashmark(encode_fragment(Fragment)); 1806update_fragment(#{fragment := Fragment}, URI) -> 1807 concat(URI,add_hashmark(encode_fragment(Fragment))); 1808update_fragment(#{}, empty) -> 1809 ""; 1810update_fragment(#{}, URI) -> 1811 URI. 1812 1813%%------------------------------------------------------------------------- 1814%% Concatenates its arguments that can be lists and binaries. 1815%% The result is a list if at least one of its argument is a list and 1816%% binary otherwise. 1817%%------------------------------------------------------------------------- 1818concat(A, B) when is_binary(A), is_binary(B) -> 1819 <<A/binary, B/binary>>; 1820concat(A, B) when is_binary(A), is_list(B) -> 1821 unicode:characters_to_list(A) ++ B; 1822concat(A, B) when is_list(A) -> 1823 A ++ maybe_to_list(B). 1824 1825add_hashmark(Comp) when is_binary(Comp) -> 1826 <<$#, Comp/binary>>; 1827add_hashmark(Comp) when is_list(Comp) -> 1828 [$#|Comp]. 1829 1830add_question_mark(Comp) when is_binary(Comp) -> 1831 <<$?, Comp/binary>>; 1832add_question_mark(Comp) when is_list(Comp) -> 1833 [$?|Comp]. 1834 1835add_colon(Comp) when is_binary(Comp) -> 1836 <<$:, Comp/binary>>. 1837 1838add_colon_postfix(Comp) when is_binary(Comp) -> 1839 <<Comp/binary,$:>>; 1840add_colon_postfix(Comp) when is_list(Comp) -> 1841 Comp ++ ":". 1842 1843add_auth_prefix(Comp) when is_binary(Comp) -> 1844 <<"//", Comp/binary>>; 1845add_auth_prefix(Comp) when is_list(Comp) -> 1846 [$/,$/|Comp]. 1847 1848add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) -> 1849 <<$@,Host/binary>>; 1850add_host_prefix(#{}, Host) when is_binary(Host) -> 1851 <<"//",Host/binary>>; 1852add_host_prefix(#{userinfo := _}, Host) when is_list(Host) -> 1853 [$@|Host]; 1854add_host_prefix(#{}, Host) when is_list(Host) -> 1855 [$/,$/|Host]. 1856 1857maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp); 1858maybe_to_list(Comp) -> Comp. 1859 1860encode_port(Port) -> 1861 integer_to_binary(Port). 1862 1863%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 1864%% 1865%% hier-part = "//" authority path-abempty 1866%% / path-absolute 1867%% / path-rootless 1868%% / path-empty 1869%% 1870%% path = path-abempty ; begins with "/" or is empty 1871%% / path-absolute ; begins with "/" but not "//" 1872%% / path-noscheme ; begins with a non-colon segment 1873%% / path-rootless ; begins with a segment 1874%% / path-empty ; zero characters 1875make_path_absolute(<<>>) -> 1876 <<>>; 1877make_path_absolute("") -> 1878 ""; 1879make_path_absolute(<<"/",_/binary>> = Path) -> 1880 Path; 1881make_path_absolute([$/|_] = Path) -> 1882 Path; 1883make_path_absolute(Path) when is_binary(Path) -> 1884 concat(<<$/>>, Path); 1885make_path_absolute(Path) when is_list(Path) -> 1886 concat("/", Path). 1887 1888maybe_flatten_list(Path) when is_binary(Path) -> 1889 Path; 1890maybe_flatten_list(Path) -> 1891 unicode:characters_to_list(Path). 1892 1893%%------------------------------------------------------------------------- 1894%% Helper functions for resolve 1895%%------------------------------------------------------------------------- 1896 1897resolve_map(URIMap=#{scheme := _}, _) -> 1898 normalize_path_segment(URIMap); 1899resolve_map(URIMap, #{scheme := _}=BaseURIMap) -> 1900 resolve_map(URIMap, BaseURIMap, resolve_path_type(URIMap)); 1901resolve_map(_URIMap, BaseURIMap) when is_map(BaseURIMap) -> 1902 {error,invalid_scheme,""}; 1903resolve_map(URIMap, BaseURIString) -> 1904 case parse(BaseURIString) of 1905 BaseURIMap = #{scheme := _} -> 1906 resolve_map(URIMap, BaseURIMap, resolve_path_type(URIMap)); 1907 BaseURIMap when is_map(BaseURIMap) -> 1908 {error,invalid_scheme,""}; 1909 Error -> 1910 Error 1911 end. 1912 1913resolve_path_type(URIMap) -> 1914 case iolist_to_binary(maps:get(path, URIMap, <<>>)) of 1915 <<>> -> empty_path; 1916 <<$/,_/bits>> -> absolute_path; 1917 _ -> relative_path 1918 end. 1919 1920resolve_map(URI=#{host := _}, #{scheme := Scheme}, _) -> 1921 normalize_path_segment(URI#{scheme => Scheme}); 1922resolve_map(URI, BaseURI, empty_path) -> 1923 Keys = case maps:is_key(query, URI) of 1924 true -> [scheme, userinfo, host, port, path]; 1925 false -> [scheme, userinfo, host, port, path, query] 1926 end, 1927 maps:merge(URI, maps:with(Keys, BaseURI)); 1928resolve_map(URI, BaseURI, absolute_path) -> 1929 normalize_path_segment(maps:merge( 1930 URI, 1931 maps:with([scheme, userinfo, host, port], BaseURI))); 1932resolve_map(URI=#{path := Path}, BaseURI, relative_path) -> 1933 normalize_path_segment(maps:merge( 1934 URI#{path => merge_paths(Path, BaseURI)}, 1935 maps:with([scheme, userinfo, host, port], BaseURI))). 1936 1937merge_paths(Path, BaseURI=#{path := BasePath0}) -> 1938 case {BaseURI, iolist_size(BasePath0)} of 1939 {#{host := _}, 0} -> 1940 merge_paths_absolute(Path); 1941 _ -> 1942 case string:split(BasePath0, <<$/>>, trailing) of 1943 [BasePath, _] when is_binary(Path) -> unicode:characters_to_binary([BasePath, $/, Path]); 1944 [BasePath, _] when is_list(Path) -> unicode:characters_to_list([BasePath, $/, Path]); 1945 [_] -> Path 1946 end 1947 end. 1948 1949merge_paths_absolute(Path) when is_binary(Path) -> 1950 <<$/, Path/binary>>; 1951merge_paths_absolute(Path) when is_list(Path) -> 1952 unicode:characters_to_list([$/, Path]). 1953 1954 1955%%------------------------------------------------------------------------- 1956%% Helper functions for transcode 1957%%------------------------------------------------------------------------- 1958 1959%%------------------------------------------------------------------------- 1960%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>). 1961%% 1. Convert (transcode/2) input to list form (list of unicode codepoints) 1962%% "x%00%00%00%F6" 1963%% 2. Accumulate characters until percent-encoded segment (transcode/4). 1964%% Acc = "x" 1965%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4) 1966%% <<0,0,0,246>> 1967%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8): 1968%% <<195,182>> 1969%% 5. Percent-encode out-encoded binary: 1970%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>> 1971%% 6. Convert binary to list form, reverse it and append the accumulator 1972%% "6B%3C%" + "x" 1973%% 7. Reverse Acc and return it 1974%%------------------------------------------------------------------------- 1975transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) -> 1976 transcode_pct(L, Acc, <<>>, InEnc, OutEnc); 1977transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) -> 1978 transcode(L, Acc, [], InEnc, OutEnc). 1979%% 1980transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) -> 1981 transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding); 1982transcode([C|Rest], Acc, List, InEncoding, OutEncoding) -> 1983 transcode(Rest, Acc, [C|List], InEncoding, OutEncoding); 1984transcode([], Acc, List, _InEncoding, _OutEncoding) -> 1985 lists:reverse(List ++ Acc). 1986 1987 1988%% Transcode percent-encoded segment 1989transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) -> 1990 case is_hex_digit(C0) andalso is_hex_digit(C1) of 1991 true -> 1992 Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1), 1993 transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding); 1994 false -> throw({error, invalid_percent_encoding,L}) 1995 end; 1996transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) -> 1997 OutBinary = convert_to_binary(B, InEncoding, OutEncoding), 1998 PctEncUtf8 = percent_encode_segment(OutBinary), 1999 Out = lists:reverse(convert_to_list(PctEncUtf8, utf8)), 2000 transcode(L, Out ++ Acc, [], InEncoding, OutEncoding); 2001transcode_pct([], Acc, B, InEncoding, OutEncoding) -> 2002 OutBinary = convert_to_binary(B, InEncoding, OutEncoding), 2003 PctEncUtf8 = percent_encode_segment(OutBinary), 2004 Out = convert_to_list(PctEncUtf8, utf8), 2005 lists:reverse(Acc, Out). 2006 2007 2008%% Convert to binary 2009convert_to_binary(Binary, InEncoding, OutEncoding) -> 2010 case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of 2011 {error, _List, RestData} -> 2012 throw({error, invalid_input, RestData}); 2013 {incomplete, _List, RestData} -> 2014 throw({error, invalid_input, RestData}); 2015 Result -> 2016 Result 2017 end. 2018 2019 2020%% Convert to list 2021convert_to_list(Binary, InEncoding) -> 2022 case unicode:characters_to_list(Binary, InEncoding) of 2023 {error, _List, RestData} -> 2024 throw({error, invalid_input, RestData}); 2025 {incomplete, _List, RestData} -> 2026 throw({error, invalid_input, RestData}); 2027 Result -> 2028 Result 2029 end. 2030 2031 2032%% Flatten input list 2033flatten_list([], _) -> 2034 []; 2035flatten_list(L, InEnc) -> 2036 flatten_list(L, InEnc, []). 2037%% 2038flatten_list([H|T], InEnc, Acc) when is_binary(H) -> 2039 L = convert_to_list(H, InEnc), 2040 flatten_list(T, InEnc, lists:reverse(L, Acc)); 2041flatten_list([H|T], InEnc, Acc) when is_list(H) -> 2042 flatten_list(H ++ T, InEnc, Acc); 2043flatten_list([H|T], InEnc, Acc) -> 2044 flatten_list(T, InEnc, [H|Acc]); 2045flatten_list([], _InEnc, Acc) -> 2046 lists:reverse(Acc); 2047flatten_list(Arg, _, _) -> 2048 throw({error, invalid_input, Arg}). 2049 2050 2051percent_encode_segment(Segment) -> 2052 percent_encode_binary(Segment, <<>>). 2053 2054 2055%%------------------------------------------------------------------------- 2056%% Helper functions for compose_query 2057%%------------------------------------------------------------------------- 2058 2059%% Returns separator to be used between key-value pairs 2060get_separator([]) -> 2061 <<>>; 2062get_separator(_L) -> 2063 <<"&">>. 2064 2065 2066%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 2067%% HTML 5.0 - 4.10.22.6 URL-encoded form data - encoding (non UTF-8) 2068form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) -> 2069 B = convert_to_binary(Cs, utf8, utf8), 2070 html5_byte_encode(base10_encode(B)); 2071form_urlencode(Cs, [{encoding, latin1}]) when is_binary(Cs) -> 2072 html5_byte_encode(base10_encode(Cs)); 2073form_urlencode(Cs, [{encoding, Encoding}]) 2074 when is_list(Cs), Encoding =:= utf8; Encoding =:= unicode -> 2075 B = convert_to_binary(Cs, utf8, Encoding), 2076 html5_byte_encode(B); 2077form_urlencode(Cs, [{encoding, Encoding}]) 2078 when is_binary(Cs), Encoding =:= utf8; Encoding =:= unicode -> 2079 html5_byte_encode(Cs); 2080form_urlencode(Cs, [{encoding, Encoding}]) when is_list(Cs); is_binary(Cs) -> 2081 throw({error,invalid_encoding, Encoding}); 2082form_urlencode(Cs, _) -> 2083 throw({error,invalid_input, Cs}). 2084 2085 2086%% For each character in the entry's name and value that cannot be expressed using 2087%% the selected character encoding, replace the character by a string consisting of 2088%% a U+0026 AMPERSAND character (&), a "#" (U+0023) character, one or more ASCII 2089%% digits representing the Unicode code point of the character in base ten, and 2090%% finally a ";" (U+003B) character. 2091base10_encode(Cs) -> 2092 base10_encode(Cs, <<>>). 2093%% 2094base10_encode(<<>>, Acc) -> 2095 Acc; 2096base10_encode(<<H/utf8,T/binary>>, Acc) when H > 255 -> 2097 Base10 = convert_to_binary(integer_to_list(H,10), utf8, utf8), 2098 base10_encode(T, <<Acc/binary,"&#",Base10/binary,$;>>); 2099base10_encode(<<H/utf8,T/binary>>, Acc) -> 2100 base10_encode(T, <<Acc/binary,H>>). 2101 2102 2103html5_byte_encode(B) -> 2104 html5_byte_encode(B, <<>>). 2105%% 2106html5_byte_encode(<<>>, Acc) -> 2107 Acc; 2108html5_byte_encode(<<$ ,T/binary>>, Acc) -> 2109 html5_byte_encode(T, <<Acc/binary,$+>>); 2110html5_byte_encode(<<H,T/binary>>, Acc) -> 2111 case is_url_char(H) of 2112 true -> 2113 html5_byte_encode(T, <<Acc/binary,H>>); 2114 false -> 2115 <<A:4,B:4>> = <<H>>, 2116 html5_byte_encode(T, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>) 2117 end; 2118html5_byte_encode(H, _Acc) -> 2119 throw({error,invalid_input, H}). 2120 2121 2122%% Return true if input char can appear in form-urlencoded string 2123%% Allowed chararacters: 2124%% 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 2125%% 0x5F, 0x61 to 0x7A 2126is_url_char(C) 2127 when C =:= 16#2A; C =:= 16#2D; 2128 C =:= 16#2E; C =:= 16#5F; 2129 16#30 =< C, C =< 16#39; 2130 16#41 =< C, C =< 16#5A; 2131 16#61 =< C, C =< 16#7A -> true; 2132is_url_char(_) -> false. 2133 2134 2135%%------------------------------------------------------------------------- 2136%% Helper functions for dissect_query 2137%%------------------------------------------------------------------------- 2138dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) -> 2139 dissect_query_value(T, IsList, Acc, Key, Value); 2140dissect_query_key(<<"&#",T/binary>>, IsList, Acc, Key, Value) -> 2141 dissect_query_key(T, IsList, Acc, <<Key/binary,"&#">>, Value); 2142dissect_query_key(T = <<$&,_/binary>>, IsList, Acc, Key, <<>>) -> 2143 dissect_query_value(T, IsList, Acc, Key, true); 2144dissect_query_key(<<H,T/binary>>, IsList, Acc, Key, Value) -> 2145 dissect_query_key(T, IsList, Acc, <<Key/binary,H>>, Value); 2146dissect_query_key(T = <<>>, IsList, Acc, Key, <<>>) -> 2147 dissect_query_value(T, IsList, Acc, Key, true). 2148 2149dissect_query_value(<<$&,T/binary>>, IsList, Acc, Key, Value) -> 2150 K = form_urldecode(IsList, Key), 2151 V = form_urldecode(IsList, Value), 2152 dissect_query_key(T, IsList, [{K,V}|Acc], <<>>, <<>>); 2153dissect_query_value(<<H,T/binary>>, IsList, Acc, Key, Value) -> 2154 dissect_query_value(T, IsList, Acc, Key, <<Value/binary,H>>); 2155dissect_query_value(<<>>, IsList, Acc, Key, Value) -> 2156 K = form_urldecode(IsList, Key), 2157 V = form_urldecode(IsList, Value), 2158 lists:reverse([{K,V}|Acc]). 2159 2160%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 2161%% HTML 5.0 - 4.10.22.6 URL-encoded form data - decoding (non UTF-8) 2162form_urldecode(_, true) -> 2163 true; 2164form_urldecode(true, B) -> 2165 Result = base10_decode(form_urldecode(B, <<>>)), 2166 convert_to_list(Result, utf8); 2167form_urldecode(false, B) -> 2168 base10_decode(form_urldecode(B, <<>>)); 2169form_urldecode(<<>>, Acc) -> 2170 Acc; 2171form_urldecode(<<$+,T/binary>>, Acc) -> 2172 form_urldecode(T, <<Acc/binary,$ >>); 2173form_urldecode(<<$%,C0,C1,T/binary>>, Acc) -> 2174 case is_hex_digit(C0) andalso is_hex_digit(C1) of 2175 true -> 2176 V = ?HEX2DEC(C0)*16+?HEX2DEC(C1), 2177 form_urldecode(T, <<Acc/binary, V>>); 2178 false -> 2179 L = convert_to_list(<<$%,C0,C1,T/binary>>, utf8), 2180 throw({error, invalid_percent_encoding, L}) 2181 end; 2182form_urldecode(<<H/utf8,T/binary>>, Acc) -> 2183 form_urldecode(T, <<Acc/binary,H/utf8>>); 2184form_urldecode(<<H,_/binary>>, _Acc) -> 2185 throw({error, invalid_character, [H]}). 2186 2187base10_decode(Cs) -> 2188 base10_decode(Cs, <<>>). 2189% 2190base10_decode(<<>>, Acc) -> 2191 Acc; 2192base10_decode(<<"&#",T/binary>>, Acc) -> 2193 base10_decode_unicode(T, Acc); 2194base10_decode(<<H/utf8,T/binary>>, Acc) -> 2195 base10_decode(T,<<Acc/binary,H/utf8>>); 2196base10_decode(<<H,_/binary>>, _) -> 2197 throw({error, invalid_input, [H]}). 2198 2199 2200base10_decode_unicode(B, Acc) -> 2201 base10_decode_unicode(B, 0, Acc). 2202%% 2203base10_decode_unicode(<<H/utf8,T/binary>>, Codepoint, Acc) when $0 =< H, H =< $9 -> 2204 Res = Codepoint * 10 + (H - $0), 2205 base10_decode_unicode(T, Res, Acc); 2206base10_decode_unicode(<<$;,T/binary>>, Codepoint, Acc) -> 2207 base10_decode(T, <<Acc/binary,Codepoint/utf8>>); 2208base10_decode_unicode(<<H,_/binary>>, _, _) -> 2209 throw({error, invalid_input, [H]}). 2210 2211 2212%%------------------------------------------------------------------------- 2213%% Helper functions for normalize 2214%%------------------------------------------------------------------------- 2215 2216normalize_map(URIMap) -> 2217 normalize_path_segment( 2218 normalize_scheme_based( 2219 normalize_percent_encoding( 2220 normalize_case(URIMap)))). 2221 2222 2223%% 6.2.2.1. Case Normalization 2224normalize_case(#{scheme := Scheme, host := Host} = Map) -> 2225 Map#{scheme => to_lower(Scheme), 2226 host => to_lower(Host)}; 2227normalize_case(#{host := Host} = Map) -> 2228 Map#{host => to_lower(Host)}; 2229normalize_case(#{scheme := Scheme} = Map) -> 2230 Map#{scheme => to_lower(Scheme)}; 2231normalize_case(#{} = Map) -> 2232 Map. 2233 2234 2235%% 6.2.2.2. Percent-Encoding Normalization 2236normalize_percent_encoding(Map) -> 2237 Fun = fun (K,V) when K =:= userinfo; K =:= host; K =:= path; 2238 K =:= query; K =:= fragment -> 2239 decode(V); 2240 %% Handle port and scheme 2241 (_,V) -> 2242 V 2243 end, 2244 maps:map(Fun, Map). 2245 2246 2247to_lower(Cs) when is_list(Cs) -> 2248 B = convert_to_binary(Cs, utf8, utf8), 2249 convert_to_list(to_lower(B), utf8); 2250to_lower(Cs) when is_binary(Cs) -> 2251 to_lower(Cs, <<>>). 2252%% 2253to_lower(<<C,Cs/binary>>, Acc) when $A =< C, C =< $Z -> 2254 to_lower(Cs, <<Acc/binary,(C + 32)>>); 2255to_lower(<<C,Cs/binary>>, Acc) -> 2256 to_lower(Cs, <<Acc/binary,C>>); 2257to_lower(<<>>, Acc) -> 2258 Acc. 2259 2260 2261%% 6.2.2.3. Path Segment Normalization 2262%% 5.2.4. Remove Dot Segments 2263normalize_path_segment(Map) -> 2264 Path = maps:get(path, Map, undefined), 2265 Map#{path => remove_dot_segments(Path)}. 2266 2267 2268remove_dot_segments(Path) when is_binary(Path) -> 2269 remove_dot_segments(Path, <<>>); 2270remove_dot_segments(Path) when is_list(Path) -> 2271 B = convert_to_binary(Path, utf8, utf8), 2272 B1 = remove_dot_segments(B, <<>>), 2273 convert_to_list(B1, utf8). 2274%% 2275remove_dot_segments(<<>>, Output) -> 2276 Output; 2277remove_dot_segments(<<"../",T/binary>>, Output) -> 2278 remove_dot_segments(T, Output); 2279remove_dot_segments(<<"./",T/binary>>, Output) -> 2280 remove_dot_segments(T, Output); 2281remove_dot_segments(<<"/./",T/binary>>, Output) -> 2282 remove_dot_segments(<<$/,T/binary>>, Output); 2283remove_dot_segments(<<"/.">>, Output) -> 2284 remove_dot_segments(<<$/>>, Output); 2285remove_dot_segments(<<"/../",T/binary>>, Output) -> 2286 Out1 = remove_last_segment(Output), 2287 remove_dot_segments(<<$/,T/binary>>, Out1); 2288remove_dot_segments(<<"/..">>, Output) -> 2289 Out1 = remove_last_segment(Output), 2290 remove_dot_segments(<<$/>>, Out1); 2291remove_dot_segments(<<$.>>, Output) -> 2292 remove_dot_segments(<<>>, Output); 2293remove_dot_segments(<<"..">>, Output) -> 2294 remove_dot_segments(<<>>, Output); 2295remove_dot_segments(Input, Output) -> 2296 {First, Rest} = first_path_segment(Input), 2297 remove_dot_segments(Rest, <<Output/binary,First/binary>>). 2298 2299 2300first_path_segment(Input) -> 2301 F = first_path_segment(Input, <<>>), 2302 split_binary(Input, byte_size(F)). 2303%% 2304first_path_segment(<<$/,T/binary>>, Acc) -> 2305 first_path_segment_end(<<T/binary>>, <<Acc/binary,$/>>); 2306first_path_segment(<<C,T/binary>>, Acc) -> 2307 first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>). 2308 2309 2310first_path_segment_end(<<>>, Acc) -> 2311 Acc; 2312first_path_segment_end(<<$/,_/binary>>, Acc) -> 2313 Acc; 2314first_path_segment_end(<<C,T/binary>>, Acc) -> 2315 first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>). 2316 2317 2318remove_last_segment(<<>>) -> 2319 <<>>; 2320remove_last_segment(B) -> 2321 {Init, Last} = split_binary(B, byte_size(B) - 1), 2322 case Last of 2323 <<$/>> -> 2324 Init; 2325 _Char -> 2326 remove_last_segment(Init) 2327 end. 2328 2329 2330%% RFC 3986, 6.2.3. Scheme-Based Normalization 2331normalize_scheme_based(Map) -> 2332 Scheme = maps:get(scheme, Map, undefined), 2333 Port = maps:get(port, Map, undefined), 2334 Path= maps:get(path, Map, undefined), 2335 normalize_scheme_based(Map, Scheme, Port, Path). 2336%% 2337normalize_scheme_based(Map, Scheme, Port, Path) 2338 when Scheme =:= "http"; Scheme =:= <<"http">> -> 2339 normalize_http(Map, Port, Path); 2340normalize_scheme_based(Map, Scheme, Port, Path) 2341 when Scheme =:= "https"; Scheme =:= <<"https">> -> 2342 normalize_https(Map, Port, Path); 2343normalize_scheme_based(Map, Scheme, Port, _Path) 2344 when Scheme =:= "ftp"; Scheme =:= <<"ftp">> -> 2345 normalize_ftp(Map, Port); 2346normalize_scheme_based(Map, Scheme, Port, _Path) 2347 when Scheme =:= "ssh"; Scheme =:= <<"ssh">> -> 2348 normalize_ssh_sftp(Map, Port); 2349normalize_scheme_based(Map, Scheme, Port, _Path) 2350 when Scheme =:= "sftp"; Scheme =:= <<"sftp">> -> 2351 normalize_ssh_sftp(Map, Port); 2352normalize_scheme_based(Map, Scheme, Port, _Path) 2353 when Scheme =:= "tftp"; Scheme =:= <<"tftp">> -> 2354 normalize_tftp(Map, Port); 2355normalize_scheme_based(Map, _, _, _) -> 2356 Map. 2357 2358 2359normalize_http(Map, Port, Path) -> 2360 M1 = normalize_port(Map, Port, 80), 2361 normalize_http_path(M1, Path). 2362 2363 2364normalize_https(Map, Port, Path) -> 2365 M1 = normalize_port(Map, Port, 443), 2366 normalize_http_path(M1, Path). 2367 2368 2369normalize_ftp(Map, Port) -> 2370 normalize_port(Map, Port, 21). 2371 2372 2373normalize_ssh_sftp(Map, Port) -> 2374 normalize_port(Map, Port, 22). 2375 2376 2377normalize_tftp(Map, Port) -> 2378 normalize_port(Map, Port, 69). 2379 2380 2381normalize_port(Map, Port, Default) -> 2382 case Port of 2383 Default -> 2384 maps:remove(port, Map); 2385 _Else -> 2386 Map 2387 end. 2388 2389 2390normalize_http_path(Map, Path) -> 2391 case Path of 2392 "" -> 2393 Map#{path => "/"}; 2394 <<>> -> 2395 Map#{path => <<"/">>}; 2396 _Else -> 2397 Map 2398 end. 2399