1=head1 NAME 2 3Locale::SubCountry - Convert state, province, county etc. names to/from ISO 3166-2 codes, get all states in a country 4 5=head1 SYNOPSIS 6 7 use Locale::SubCountry; 8 9 my $fr = Locale::SubCountry->new('France'); 10 if ( not $fr ) 11 { 12 die "Invalid country or code: France\n"; 13 } 14 else 15 { 16 print($fr->country,"\n"); # France 17 print($fr->country_code,"\n"); # FR 18 print($fr->country_number,"\n"); # 250 19 20 if ( $fr->has_sub_countries ) 21 { 22 print($fr->code('Hautes-Alpes '),"\n"); # 05 23 print($fr->full_name('03'),"\n"); # Allier 24 my $upper_case = 1; 25 print($fr->full_name('02',$upper_case),"\n"); # AINSE 26 print($fr->level('02'),"\n"); # Metropolitan department 27 print($fr->level('A'),"\n"); # Metropolitan region 28 print($fr->level('BL'),"\n"); # Overseas territorial collectivity 29 print($fr->levels,"\n"); # Metropolitan region => 22, Metropolitan department => 96 ... 30 31 my @fr_names = $fr->all_full_names; # Ain, Ainse, Allier... 32 my @fr_codes = $fr->all_codes; # 01, 02, 03 ... 33 my %fr_names_keyed_by_code = $fr->code_full_name_hash; # 01 => Ain... 34 my %fr_codes_keyed_by_name = $fr->full_name_code_hash; # Ain => 01 ... 35 36 foreach my $code ( sort keys %fr_names_keyed_by_code ) 37 { 38 printf("%-3s : %s\n",$code,$fr_names_keyed_by_code{$code}); 39 } 40 } 41 } 42 43 # Methods for fetching all country codes and names in the world 44 45 my $world = Locale::SubCountry::World->new(); 46 my @all_countries = $world->all_full_names; 47 my @all_country_codes = $world->all_codes; 48 49 my %all_countries_keyed_by_name = $world->full_name_code_hash; 50 my %all_country_keyed_by_code = $world->code_full_name_hash; 51 52 53=head1 DESCRIPTION 54 55This module allows you to convert the full name for a country's administrative 56region to the code commonly used for postal addressing. The reverse look up 57can also be done. 58 59Lists of sub country codes are useful for web applications that require a valid 60state, county etc to be entered as part of a users location. 61 62Sub countries are termed as states in the US and Australia, provinces 63in Canada and counties in the UK and Ireland. Other terms include region, 64department, city and territory. Countries such as France have several 65levels of sub countries, such as Metropolitan department, Metropolitan region etc. 66 67Names and ISO 3166-2 codes for all sub countries in a country can be 68returned as either a hash or an array. 69 70Names and ISO 3166-1 codes for all countries in the world can be 71returned as either a hash or an array. This in turn can be used to 72fetch every sub country from every country (see examples/demo.pl). 73 74Sub country codes are defined in "ISO 3166-2, 75Codes for the representation of names of countries and their subdivisions". 76 77 78=head1 METHODS 79 80Note that the following methods duplicate some of the functionality of the 81Locale::Country module (part of the Locale::Codes bundle). They are provided 82here because you may need to first access the list of countries and 83ISO 3166-1 codes, before fetching their sub country data. If you only need 84access to country data, then Locale::Country should be used. 85 86Note also the following method names are also used for sub country objects. 87(interface polymorphism for the technically minded). To avoid confusion, make 88sure that your chosen method is acting on the correct type of object. 89 90 all_codes 91 all_full_names 92 code_full_name_hash 93 full_name_code_hash 94 95 96=head2 Locale::SubCountry::World->new() 97 98The C<new> method creates an instance of a world country object. This must be 99called before any of the following methods are invoked. The method takes no 100arguments. 101 102 103=head2 full_name_code_hash (for world objects) 104 105Given a world object, returns a hash of full name/code pairs for every country, 106keyed by country name. 107 108=head2 code_full_name_hash for world objects) 109 110Given a world object, returns a hash of full name/code pairs for every country, 111keyed by country code. 112 113 114=head2 all_full_names (for world objects) 115 116Given a world object, returns an array of all country full names, 117sorted alphabetically. 118 119=head2 all_codes (for world objects) 120 121Given a world object, returns an array of all country ISO 3166-1 codes, 122sorted alphabetically. 123 124 125=head2 Locale::SubCountry->new() 126 127The C<new> method creates an instance of a sub country object. This must be 128called before any of the following methods are invoked. The method takes a 129single argument, the name of the country that contains the sub country 130that you want to work with. It may be specified either by the ISO 3166-1 131alpha-2 code or the full name. For example: 132 133 AF - Afghanistan 134 AL - Albania 135 DZ - Algeria 136 AO - Angola 137 AR - Argentina 138 AM - Armenia 139 AU - Australia 140 AT - Austria 141 142 143If the code is specified, such as 'AU' the format may be in capitals or lower case 144If the full name is specified, such as 'Australia', the format must be in title case 145If a country name or code is specified that the module doesn't recognised, it will issue a warning. 146 147=head2 country 148 149Returns the current country name of a sub country object. The format is in title case, 150such as 'United Kingdom' 151 152=head2 country_code 153 154Given a sub country object, returns the alpha-2 ISO 3166-1 code of the country, 155such as 'GB' 156 157 158=head2 code 159 160Given a sub country object, the C<code> method takes the full name of a sub 161country and returns the sub country's alpha-2 ISO 3166-2 code. The full name can appear 162in mixed case. All white space and non alphabetic characters are ignored, except 163the single space used to separate sub country names such as "New South Wales". 164The code is returned as a capitalised string, or "unknown" if no match is found. 165 166=head2 full_name 167 168Given a sub country object, the C<full_name> method takes the alpha-2 ISO 3166-2 code 169of a sub country and returns the sub country's full name. The code can appear 170in mixed case. All white space and non alphabetic characters are ignored. The 171full name is returned as a title cased string, such as "South Australia". 172 173If an optional argument is supplied and set to a true value, the full name is 174returned as an upper cased string. 175 176=head2 level 177 178Given a sub country object, the C<level> method takes the alpha-2 ISO 3166-2 code 179of a sub country and returns the sub country's level . Examples are city, 180province,state and district, and usually relates to the a regions size. 181The level is returned as a string, or "unknown" if no match is found. 182 183 184=head2 has_sub_countries 185 186Given a sub country object, the C<has_sub_countries> method returns 1 if the 187current country has sub countries, or 0 if it does not. Some small countries 188such as New Caledonia" do not have sub countries. 189 190 191=head2 full_name_code_hash (for sub country objects) 192 193Given a sub country object, returns a hash of all full name/code pairs, 194keyed by sub country name. If the country has no sub countries, returns undef. 195 196=head2 code_full_name_hash (for sub country objects) 197 198Given a sub country object, returns a hash of all code/full name pairs, 199keyed by sub country code. If the country has no sub countries, returns undef. 200 201 202=head2 all_full_names (for sub country objects) 203 204Given a sub country object, returns an array of all sub country full names, 205sorted alphabetically. If the country has no sub countries, returns undef. 206 207=head2 all_codes (for sub country objects) 208 209Given a sub country object, returns an array of all sub country alpha-2 ISO 3166-2 codes. 210If the country has no sub countries, returns undef. 211 212 213=head1 SEE ALSO 214 215All codes have been downloaded from the latest version of the Debian Salsa project 216L<https://salsa.debian.org/iso-codes-team/iso-codes/> 217and then files iso_3166-1.json, iso_3166-2.json 218 219L<Locale::Country>,L<Lingua::EN::AddressParse>, 220L<Geo::StreetAddress::US>,L<Geo::PostalAddress>,L<Geo::IP> 221L<WWW::Scraper::Wikipedia::ISO3166> for obtaining ISO 3166-2 data 222 223ISO 3166-1 Codes for the representation of names of countries and their 224subdivisions - Part 1: Country codes 225 226ISO 3166-2 Codes for the representation of names of countries and their 227subdivisions - Part 2: Country subdivision code 228 229 230=head1 LIMITATIONS 231 232The ISO 3166-2 standard romanizes the names of provinces and regions in non-latin 233script areas, such as Russia and South Korea. One Romanisation is given for each 234province name. For Russia, the BGN (1947) Romanization is used. 235 236Several sub country names have more than one code, and may not return 237the correct code for that sub country. These entries are usually duplicated 238because the name represents two different types of sub country, such as a 239province and a geographical unit. Examples are: 240 241 AZERBAIJAN : Lankaran; LA (the Municipality), LAN (the Rayon) [see note] 242 AZERBAIJAN : Saki; SA,SAK [see note] 243 AZERBAIJAN : Susa; SS,SUS 244 AZERBAIJAN : Yevlax; YE,YEV 245 LAOS : Vientiane VI the Vientiane, VT the Prefecture 246 MOZAMBIQUE : Maputo; MPM (City),L (Province) 247 248Note: these names are spelt with diacrtic characters (such as two dots above 249some of the 'a' characters). This causes utf8 errors on some versions 250of Perl, so they are omitted here. See the Locale::SubCountry::Codes module 251for correct spelling 252 253 254=head1 AUTHOR 255 256Locale::SubCountry was written by Kim Ryan <kimryan at cpan dot org>. 257 258=head1 COPYRIGHT AND LICENCE 259 260This software is Copyright (c) 2018 by Kim Ryan. 261 262This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 263 264 265=head1 CREDITS 266 267Ron Savage for many corrections to the data 268 269Terrence Brannon produced Locale::US, which was the starting point for 270this module. 271 272 273=head1 COPYRIGHT AND LICENSE 274 275Copyright (c) 2019 Kim Ryan. All rights reserved. 276 277This library is free software; you can redistribute it and/or modify 278it under the same terms as Perl itself. 279 280=cut 281 282#------------------------------------------------------------------------------- 283 284package Locale::SubCountry::World; 285use strict; 286use warnings; 287use locale; 288use Exporter; 289use JSON; 290use Locale::SubCountry::Codes; 291 292#------------------------------------------------------------------------------- 293 294 295our $VERSION = '2.07'; 296 297# Define all the methods for the 'world' class here. Note that because the 298# name space inherits from the Locale::SubCountry name space, the 299# package wide variables $SubCountry::country and $Locale::SubCountry::subcountry are 300# accessible. 301 302 303#------------------------------------------------------------------------------- 304# Create new instance of a SubCountry::World object 305 306sub new 307{ 308 my $class = shift; 309 310 my $world = {}; 311 bless($world,$class); 312 return($world); 313} 314 315#------------------------------------------------------------------------------- 316# Returns a hash of code/name pairs for all countries, keyed by country code. 317 318sub code_full_name_hash 319{ 320 my $world = shift; 321 return( %{ $Locale::SubCountry::country{_code_keyed} } ); 322} 323#------------------------------------------------------------------------------- 324# Returns a hash of name/code pairs for all countries, keyed by country name. 325 326sub full_name_code_hash 327{ 328 my $world = shift; 329 return( %{ $Locale::SubCountry::country{_full_name_keyed} } ); 330} 331#------------------------------------------------------------------------------- 332# Returns sorted array of all country full names 333 334sub all_full_names 335{ 336 my $world = shift; 337 return ( sort keys %{ $Locale::SubCountry::country{_full_name_keyed} }); 338} 339#------------------------------------------------------------------------------- 340# Returns sorted array of all two letter country codes 341 342sub all_codes 343{ 344 my $world = shift; 345 return ( sort keys %{ $Locale::SubCountry::country{_code_keyed} }); 346} 347 348#------------------------------------------------------------------------------- 349 350package Locale::SubCountry; 351our $VERSION = '2.07'; 352 353#------------------------------------------------------------------------------- 354# Initialization code which will be run first to create global data structure. 355# Read in the list of abbreviations and full names defined in the 356# Locale::SubCountry::Codes package 357 358{ 359 360 unless ( $Locale::SubCountry::Codes::JSON ) 361 { 362 die "Could not locate Locale::SubCountry::Codes::JSON variable"; 363 } 364 365 # Get all the data from the Locale::SubCountry::Codes package and place into a structure 366 367 # Note: will fail on badly formed JSON data 368 my $json_text = $Locale::SubCountry::Codes::JSON; 369 my $json = JSON->new->allow_nonref; 370 371 my $all_codes_ref = $json->decode($json_text); 372 373 374 foreach my $country_ref ( @{ $all_codes_ref->{'3166-1'} }) 375 { 376 # Create doubly indexed hash, keyed by country code and full name. 377 # The user can supply either form to create a new sub_country 378 # object, and the objects properties will hold both the countries 379 # name and it's code. 380 381 $Locale::SubCountry::country{_code_keyed}{$country_ref->{alpha_2}} = $country_ref->{name}; 382 $Locale::SubCountry::country{_full_name_keyed}{$country_ref->{name}} = $country_ref->{alpha_2}; 383 384 # Get numeric code for country, such as Australia = '036' 385 $Locale::SubCountry::country{$country_ref->{name}}{_numeric }= $country_ref->{numeric}; 386 } 387 388 389 foreach my $sub_country_ref ( @{ $all_codes_ref->{'3166-2'} }) 390 { 391 my ($country_code,$sub_country_code) = split(/\-/,$sub_country_ref->{code}); 392 my $sub_country_name = $sub_country_ref->{name}; 393 394 $Locale::SubCountry::subcountry{$country_code}{_code_keyed}{$sub_country_code} = $sub_country_name; 395 $Locale::SubCountry::subcountry{$country_code}{_full_name_keyed}{$sub_country_name} = $sub_country_code; 396 $Locale::SubCountry::subcountry{$country_code}{$sub_country_code}{_level} = $sub_country_ref->{type}; 397 398 # Record level occurence in a country 399 $Locale::SubCountry::subcountry{$country_code}{_levels}{$sub_country_ref->{type}}++; 400 401 } 402} 403 404#------------------------------------------------------------------------------- 405# Create new instance of a sub country object 406 407sub new 408{ 409 my $class = shift; 410 my ($country_or_code) = @_; 411 412 my ($country,$country_code); 413 414 # Country may be supplied either as a two letter code, or the full name 415 if ( length($country_or_code) == 2 ) 416 { 417 $country_or_code = uc($country_or_code); # lower case codes may be used, so fold to upper case 418 if ( $Locale::SubCountry::country{_code_keyed}{$country_or_code} ) 419 { 420 $country_code = $country_or_code; 421 # set country to it's full name 422 $country = $Locale::SubCountry::country{_code_keyed}{$country_code}; 423 } 424 else 425 { 426 warn "Invalid country code: $country_or_code chosen"; 427 return(undef); 428 } 429 } 430 else 431 { 432 if ( $Locale::SubCountry::country{_full_name_keyed}{$country_or_code} ) 433 { 434 $country = $country_or_code; 435 $country_code = $Locale::SubCountry::country{_full_name_keyed}{$country}; 436 } 437 else 438 { 439 warn "Invalid country name: $country_or_code chosen, names must be in title case"; 440 return(undef); 441 442 } 443 } 444 445 my $sub_country = {}; 446 bless($sub_country,$class); 447 $sub_country->{_country} = $country; 448 $sub_country->{_country_code} = $country_code; 449 $sub_country->{_numeric} = $Locale::SubCountry::country{$country}{_numeric}; 450 451 452 return($sub_country); 453} 454 455#------------------------------------------------------------------------------- 456# Returns the current country's name of the sub country object 457 458sub country 459{ 460 my $sub_country = shift; 461 return( $sub_country->{_country} ); 462} 463#------------------------------------------------------------------------------- 464# Returns the current country's alpha2 code of the sub country object 465 466sub country_code 467{ 468 my $sub_country = shift; 469 return( $sub_country->{_country_code} ); 470} 471 472#------------------------------------------------------------------------------- 473# Returns the current country's numeric code of the sub country object 474 475sub country_number 476{ 477 my $sub_country = shift; 478 return( $sub_country->{_numeric} ); 479} 480 481#------------------------------------------------------------------------------- 482# Given the full name for a sub country, return the ISO 3166-2 code 483 484sub code 485{ 486 my $sub_country = shift; 487 my ($full_name) = @_; 488 489 unless ( $sub_country->has_sub_countries ) 490 { 491 # this country has no sub countries 492 return; 493 } 494 495 my $orig = $full_name; 496 497 $full_name = _clean($full_name); 498 499 my $code = $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_full_name_keyed}{$full_name}; 500 501 # If a code wasn't found, it could be because the user's capitalization 502 # does not match the one in the look up data of this module. For example, 503 # the user may have supplied the sub country "Ag R" (in Turkey) but the 504 # ISO standard defines the spelling as "Ag r". 505 506 unless ( defined $code ) 507 { 508 # For every sub country, compare upper cased full name supplied by user 509 # to upper cased full name from lookup hash. If they match, return the 510 # correctly cased full name from the lookup hash. 511 512 my @all_names = $sub_country->all_full_names; 513 my $current_name; 514 foreach $current_name ( @all_names ) 515 { 516 if ( uc($full_name) eq uc($current_name) ) 517 { 518 $code = $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_full_name_keyed}{$current_name}; 519 } 520 } 521 } 522 523 if ( defined $code ) 524 { 525 return($code); 526 } 527 else 528 { 529 return('unknown'); 530 } 531} 532 533#------------------------------------------------------------------------------- 534# Given the alpha-2 ISO 3166-2 code for a sub country, return the full name. 535# Parameters are the code and a flag, which if set to true 536# will cause the full name to be uppercased 537 538sub full_name 539{ 540 my $sub_country = shift; 541 my ($code,$uc_name) = @_; 542 543 unless ( $sub_country->has_sub_countries ) 544 { 545 # this country has no sub countries 546 # return; 547 } 548 549 $code = _clean($code); 550 $code = uc($code); 551 552 my $full_name = 553 $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_code_keyed}{$code}; 554 if ( $uc_name ) 555 { 556 $full_name = uc($full_name); 557 } 558 559 if ( $full_name ) 560 { 561 return($full_name); 562 } 563 else 564 { 565 return('unknown'); 566 } 567} 568 569#------------------------------------------------------------------------------- 570# Given the alpha-2 ISO 3166-2 code for a sub country, return the level, 571# being one of state, province, overseas territory, city, council etc 572sub level 573{ 574 my $sub_country = shift; 575 my ($code) = @_; 576 577 $code = _clean($code); 578 579 my $level = $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{$code}{_level}; 580 581 if ( $level ) 582 { 583 return($level); 584 } 585 else 586 { 587 return('unknown'); 588 } 589} 590#------------------------------------------------------------------------------- 591# Given a sub country object, return a hash of all the levels and their totals 592# Such as Australia: State => 6, Territory => 2 593 594sub levels 595{ 596 my $sub_country = shift; 597 598 return( %{ $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_levels} }); 599 600} 601 602#------------------------------------------------------------------------------- 603# Returns 1 if the current country has sub countries. otherwise 0. 604 605sub has_sub_countries 606{ 607 my $sub_country = shift; 608 if ( $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_code_keyed} ) 609 { 610 return(1); 611 } 612 else 613 { 614 return(0); 615 } 616} 617#------------------------------------------------------------------------------- 618# Returns a hash of code/full name pairs, keyed by sub country code. 619 620sub code_full_name_hash 621{ 622 my $sub_country = shift; 623 if ( $sub_country->has_sub_countries ) 624 { 625 return( %{ $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_code_keyed} } ); 626 } 627 else 628 { 629 return(undef); 630 } 631} 632#------------------------------------------------------------------------------- 633# Returns a hash of name/code pairs, keyed by sub country name. 634 635sub full_name_code_hash 636{ 637 my $sub_country = shift; 638 if ( $sub_country->has_sub_countries ) 639 { 640 return( %{ $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_full_name_keyed} } ); 641 } 642 else 643 { 644 return(undef); 645 } 646} 647#------------------------------------------------------------------------------- 648# Returns sorted array of all sub country full names for the current country 649 650sub all_full_names 651{ 652 my $sub_country = shift; 653 if ( $sub_country->full_name_code_hash ) 654 { 655 my %all_full_names = $sub_country->full_name_code_hash; 656 if ( %all_full_names ) 657 { 658 return( sort keys %all_full_names ); 659 } 660 } 661 else 662 { 663 return(undef); 664 } 665} 666#------------------------------------------------------------------------------- 667# Returns array of all sub country alpha-2 ISO 3166-2 codes for the current country 668 669sub all_codes 670{ 671 my $sub_country = shift; 672 673 if ( $sub_country->code_full_name_hash ) 674 { 675 my %all_codes = $sub_country->code_full_name_hash; 676 return( sort keys %all_codes ); 677 } 678 else 679 { 680 return(undef); 681 } 682} 683 684#------------------------------------------------------------------------------- 685sub _clean 686{ 687 my ($input_string) = @_; 688 689 if ( $input_string =~ /[\. ]/ ) 690 { 691 # remove dots 692 $input_string =~ s/\.//go; 693 694 # remove repeating spaces 695 $input_string =~ s/ +/ /go; 696 697 # remove any remaining leading or trailing space 698 $input_string =~ s/^ //; 699 $input_string =~ s/ $//; 700 } 701 702 return($input_string); 703} 704 705return(1); 706