1=head1 NAME
2
3Locale::SubCountry - Convert state, province, county etc. names to/from ISO 3166-2 codes, get all states in a country
4
5=head1 SYNOPSIS
6
7    use Locale::SubCountry;
8
9    my $fr = Locale::SubCountry->new('France');
10    if ( not $fr )
11    {
12        die "Invalid country or code: France\n";
13    }
14    else
15    {
16        print($fr->country,"\n");        # France
17        print($fr->country_code,"\n");   # FR
18        print($fr->country_number,"\n"); # 250
19
20        if (  $fr->has_sub_countries )
21        {
22            print($fr->code('Hautes-Alpes '),"\n");       # 05
23            print($fr->full_name('03'),"\n");             # Allier
24            my $upper_case = 1;
25            print($fr->full_name('02',$upper_case),"\n"); # AINSE
26            print($fr->level('02'),"\n");                 # Metropolitan department
27            print($fr->level('A'),"\n");                  # Metropolitan region
28            print($fr->level('BL'),"\n");                 # Overseas territorial collectivity
29            print($fr->levels,"\n");                      # Metropolitan region => 22, Metropolitan department => 96 ...
30
31            my @fr_names  = $fr->all_full_names;    # Ain, Ainse, Allier...
32            my @fr_codes   = $fr->all_codes;        # 01, 02, 03 ...
33            my %fr_names_keyed_by_code  = $fr->code_full_name_hash;  # 01 => Ain...
34            my %fr_codes_keyed_by_name  = $fr->full_name_code_hash;  # Ain => 01 ...
35
36            foreach my $code ( sort keys %fr_names_keyed_by_code )
37            {
38               printf("%-3s : %s\n",$code,$fr_names_keyed_by_code{$code});
39            }
40        }
41    }
42
43    # Methods for fetching all country codes and names in the world
44
45    my $world = Locale::SubCountry::World->new();
46    my @all_countries     = $world->all_full_names;
47    my @all_country_codes = $world->all_codes;
48
49    my %all_countries_keyed_by_name = $world->full_name_code_hash;
50    my %all_country_keyed_by_code   = $world->code_full_name_hash;
51
52
53=head1 DESCRIPTION
54
55This module allows you to convert the full name for a country's administrative
56region to the code commonly used for postal addressing. The reverse look up
57can also be done.
58
59Lists of sub country codes are useful for web applications that require a valid
60state, county etc to be entered as part of a users location.
61
62Sub countries are termed as states in the US and Australia, provinces
63in Canada and counties in the UK and Ireland. Other terms include region,
64department, city and territory. Countries such as France have several
65levels of sub countries, such as Metropolitan department, Metropolitan region etc.
66
67Names and ISO 3166-2 codes for all sub countries in a country can be
68returned as either a hash or an array.
69
70Names and ISO 3166-1 codes for all countries in the world can be
71returned as either a hash or an array. This in turn can be used to
72fetch every sub country from every country (see examples/demo.pl).
73
74Sub country codes are defined in "ISO 3166-2,
75Codes for the representation of names of countries and their subdivisions".
76
77
78=head1 METHODS
79
80Note that the following methods duplicate some of the functionality of the
81Locale::Country module (part of the Locale::Codes bundle). They are provided
82here because you may need to first access the list of  countries and
83ISO 3166-1 codes, before fetching their sub country data. If you only need
84access to country data, then Locale::Country should be used.
85
86Note also the following method names are also used for sub country objects.
87(interface polymorphism for the technically minded). To avoid confusion, make
88sure that your chosen method is acting on the correct type of object.
89
90    all_codes
91    all_full_names
92    code_full_name_hash
93    full_name_code_hash
94
95
96=head2  Locale::SubCountry::World->new()
97
98The C<new> method creates an instance of a world country object. This must be
99called before any of the following methods are invoked. The method takes no
100arguments.
101
102
103=head2 full_name_code_hash (for world objects)
104
105Given a world object, returns a hash of full name/code pairs for every country,
106keyed by country name.
107
108=head2 code_full_name_hash  for world objects)
109
110Given a world object, returns a hash of full name/code pairs for every country,
111keyed by country code.
112
113
114=head2 all_full_names (for world objects)
115
116Given a world object, returns an array of all country full names,
117sorted alphabetically.
118
119=head2 all_codes (for world objects)
120
121Given a world object, returns an array of all country ISO 3166-1 codes,
122sorted alphabetically.
123
124
125=head2 Locale::SubCountry->new()
126
127The C<new> method creates an instance of a sub country object. This must be
128called before any of the following methods are invoked. The method takes a
129single argument, the name of the country that contains the sub country
130that you want to work with. It may be specified either by the ISO 3166-1
131alpha-2  code or the full name. For example:
132
133    AF - Afghanistan
134    AL - Albania
135    DZ - Algeria
136    AO - Angola
137    AR - Argentina
138    AM - Armenia
139    AU - Australia
140    AT - Austria
141
142
143If the code is specified, such as 'AU'  the format may be in capitals or lower case
144If the full name is specified, such as 'Australia', the format must be in title case
145If a country name or code is specified that the module doesn't recognised, it will issue a warning.
146
147=head2 country
148
149Returns the current country name of a sub country object. The format is in title case,
150such as 'United Kingdom'
151
152=head2 country_code
153
154Given a sub country object, returns the alpha-2 ISO 3166-1 code of the country,
155such as 'GB'
156
157
158=head2 code
159
160Given a sub country object, the C<code> method takes the full name of a sub
161country and returns the sub country's alpha-2 ISO 3166-2 code. The full name can appear
162in mixed case. All white space and non alphabetic characters are ignored, except
163the single space used to separate sub country names such as "New South Wales".
164The code is returned as a capitalised string, or "unknown" if no match is found.
165
166=head2 full_name
167
168Given a sub country object, the C<full_name> method takes the alpha-2 ISO 3166-2 code
169of a sub country and returns the sub country's full name. The code can appear
170in mixed case. All white space and non alphabetic characters are ignored. The
171full name is returned as a title cased string, such as "South Australia".
172
173If an optional argument is supplied and set to a true value, the full name is
174returned as an upper cased string.
175
176=head2 level
177
178Given a sub country object, the C<level> method takes the alpha-2 ISO 3166-2 code
179of a sub country and returns the sub country's level . Examples are city,
180province,state and district, and usually relates to the a regions size.
181The level is returned as a  string, or "unknown" if no match is found.
182
183
184=head2 has_sub_countries
185
186Given a sub country object, the C<has_sub_countries> method returns 1 if the
187current country has sub countries, or 0 if it does not. Some small countries
188such as New Caledonia" do not have sub countries.
189
190
191=head2 full_name_code_hash  (for sub country objects)
192
193Given a sub country object, returns a hash of all full name/code pairs,
194keyed by sub country name. If the country has no sub countries, returns undef.
195
196=head2 code_full_name_hash  (for sub country objects)
197
198Given a sub country object, returns a hash of all code/full name pairs,
199keyed by sub country code. If the country has no sub countries, returns undef.
200
201
202=head2 all_full_names  (for sub country objects)
203
204Given a sub country object, returns an array of all sub country full names,
205sorted alphabetically. If the country has no sub countries, returns undef.
206
207=head2 all_codes  (for sub country objects)
208
209Given a sub country object, returns an array of all sub country alpha-2 ISO 3166-2 codes.
210If the country has no sub countries, returns undef.
211
212
213=head1 SEE ALSO
214
215All codes have been downloaded from the latest version of the Debian Salsa project
216L<https://salsa.debian.org/iso-codes-team/iso-codes/>
217and then files iso_3166-1.json, iso_3166-2.json
218
219L<Locale::Country>,L<Lingua::EN::AddressParse>,
220L<Geo::StreetAddress::US>,L<Geo::PostalAddress>,L<Geo::IP>
221L<WWW::Scraper::Wikipedia::ISO3166> for obtaining ISO 3166-2 data
222
223ISO 3166-1 Codes for the representation of names of countries and their
224subdivisions - Part 1: Country codes
225
226ISO 3166-2 Codes for the representation of names of countries and their
227subdivisions - Part 2: Country subdivision code
228
229
230=head1 LIMITATIONS
231
232The ISO 3166-2 standard romanizes the names of provinces and regions in non-latin
233script areas, such as Russia and South Korea. One Romanisation is given for each
234province name. For Russia, the BGN (1947) Romanization is used.
235
236Several sub country names have more than one code, and may not return
237the correct code for that sub country. These entries are usually duplicated
238because the name represents two different types of sub country, such as a
239province and a geographical unit. Examples are:
240
241    AZERBAIJAN : Lankaran; LA (the Municipality), LAN (the Rayon) [see note]
242    AZERBAIJAN : Saki; SA,SAK [see note]
243    AZERBAIJAN : Susa; SS,SUS
244    AZERBAIJAN : Yevlax; YE,YEV
245    LAOS       : Vientiane VI the Vientiane, VT the Prefecture
246    MOZAMBIQUE : Maputo; MPM (City),L (Province)
247
248Note: these names are spelt with  diacrtic characters (such as two dots above
249some of the 'a' characters). This causes utf8 errors on some versions
250of Perl, so they are omitted here. See the Locale::SubCountry::Codes module
251for correct spelling
252
253
254=head1 AUTHOR
255
256Locale::SubCountry was written by Kim Ryan <kimryan at cpan dot org>.
257
258=head1 COPYRIGHT AND LICENCE
259
260This software is Copyright (c) 2018 by Kim Ryan.
261
262This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
263
264
265=head1 CREDITS
266
267Ron Savage for many corrections to the data
268
269Terrence Brannon produced Locale::US, which was the starting point for
270this module.
271
272
273=head1 COPYRIGHT AND LICENSE
274
275Copyright (c) 2019 Kim Ryan. All rights reserved.
276
277This library is free software; you can redistribute it and/or modify
278it under the same terms as Perl itself.
279
280=cut
281
282#-------------------------------------------------------------------------------
283
284package Locale::SubCountry::World;
285use strict;
286use warnings;
287use locale;
288use Exporter;
289use JSON;
290use Locale::SubCountry::Codes;
291
292#-------------------------------------------------------------------------------
293
294
295our $VERSION = '2.07';
296
297# Define all the methods for the 'world' class here. Note that because the
298# name space inherits from the Locale::SubCountry name space, the
299# package wide variables $SubCountry::country and $Locale::SubCountry::subcountry are
300# accessible.
301
302
303#-------------------------------------------------------------------------------
304# Create new instance of a SubCountry::World object
305
306sub new
307{
308    my $class = shift;
309
310    my $world = {};
311    bless($world,$class);
312    return($world);
313}
314
315#-------------------------------------------------------------------------------
316# Returns a hash of code/name pairs for all countries, keyed by  country code.
317
318sub code_full_name_hash
319{
320    my $world = shift;
321    return(  %{ $Locale::SubCountry::country{_code_keyed} } );
322}
323#-------------------------------------------------------------------------------
324# Returns a hash of name/code pairs for all countries, keyed by country name.
325
326sub full_name_code_hash
327{
328    my $world = shift;
329    return( %{ $Locale::SubCountry::country{_full_name_keyed} } );
330}
331#-------------------------------------------------------------------------------
332# Returns sorted array of all country full names
333
334sub all_full_names
335{
336    my $world = shift;
337    return ( sort keys %{ $Locale::SubCountry::country{_full_name_keyed} });
338}
339#-------------------------------------------------------------------------------
340# Returns sorted array of all two letter country codes
341
342sub all_codes
343{
344    my $world = shift;
345    return ( sort keys %{ $Locale::SubCountry::country{_code_keyed} });
346}
347
348#-------------------------------------------------------------------------------
349
350package Locale::SubCountry;
351our $VERSION = '2.07';
352
353#-------------------------------------------------------------------------------
354# Initialization code which will be run first to create global data structure.
355# Read in the list of abbreviations and full names defined in the
356# Locale::SubCountry::Codes package
357
358{
359
360    unless ( $Locale::SubCountry::Codes::JSON )
361    {
362      die "Could not locate Locale::SubCountry::Codes::JSON variable";
363    }
364
365    # Get all the data from the Locale::SubCountry::Codes package and place into a structure
366
367    # Note: will fail on badly formed JSON data
368    my $json_text = $Locale::SubCountry::Codes::JSON;
369    my $json = JSON->new->allow_nonref;
370
371    my $all_codes_ref = $json->decode($json_text);
372
373
374    foreach my $country_ref ( @{ $all_codes_ref->{'3166-1'} })
375    {
376        # Create doubly indexed hash, keyed by country code and full name.
377        # The user can supply either form to create a new sub_country
378        # object, and the objects properties will hold both the countries
379        # name and it's code.
380
381        $Locale::SubCountry::country{_code_keyed}{$country_ref->{alpha_2}} = $country_ref->{name};
382        $Locale::SubCountry::country{_full_name_keyed}{$country_ref->{name}} = $country_ref->{alpha_2};
383
384        # Get numeric code for country, such as Australia = '036'
385        $Locale::SubCountry::country{$country_ref->{name}}{_numeric }= $country_ref->{numeric};
386    }
387
388
389    foreach my $sub_country_ref ( @{ $all_codes_ref->{'3166-2'} })
390    {
391        my ($country_code,$sub_country_code) = split(/\-/,$sub_country_ref->{code});
392        my $sub_country_name =  $sub_country_ref->{name};
393
394        $Locale::SubCountry::subcountry{$country_code}{_code_keyed}{$sub_country_code} = $sub_country_name;
395        $Locale::SubCountry::subcountry{$country_code}{_full_name_keyed}{$sub_country_name} = $sub_country_code;
396        $Locale::SubCountry::subcountry{$country_code}{$sub_country_code}{_level} = $sub_country_ref->{type};
397
398        # Record  level occurence in a country
399        $Locale::SubCountry::subcountry{$country_code}{_levels}{$sub_country_ref->{type}}++;
400
401    }
402}
403
404#-------------------------------------------------------------------------------
405# Create new instance of a sub country object
406
407sub new
408{
409    my $class = shift;
410    my ($country_or_code) = @_;
411
412    my ($country,$country_code);
413
414    # Country may be supplied either as a two letter code, or the full name
415    if ( length($country_or_code) == 2 )
416    {
417        $country_or_code = uc($country_or_code); # lower case codes may be used, so fold to upper case
418        if ( $Locale::SubCountry::country{_code_keyed}{$country_or_code} )
419        {
420            $country_code = $country_or_code;
421            # set country to it's full name
422            $country = $Locale::SubCountry::country{_code_keyed}{$country_code};
423         }
424        else
425        {
426          warn "Invalid country code: $country_or_code chosen";
427          return(undef);
428        }
429    }
430    else
431    {
432        if ( $Locale::SubCountry::country{_full_name_keyed}{$country_or_code} )
433        {
434            $country = $country_or_code;
435            $country_code = $Locale::SubCountry::country{_full_name_keyed}{$country};
436        }
437        else
438        {
439            warn "Invalid country name: $country_or_code chosen, names must be in title case";
440            return(undef);
441
442        }
443    }
444
445    my $sub_country = {};
446    bless($sub_country,$class);
447    $sub_country->{_country} = $country;
448    $sub_country->{_country_code} = $country_code;
449    $sub_country->{_numeric} = $Locale::SubCountry::country{$country}{_numeric};
450
451
452    return($sub_country);
453}
454
455#-------------------------------------------------------------------------------
456# Returns the current country's name of the sub country object
457
458sub country
459{
460    my $sub_country = shift;
461    return( $sub_country->{_country} );
462}
463#-------------------------------------------------------------------------------
464# Returns the current country's alpha2 code of the sub country object
465
466sub country_code
467{
468    my $sub_country = shift;
469    return( $sub_country->{_country_code} );
470}
471
472#-------------------------------------------------------------------------------
473# Returns the current country's numeric code of the sub country object
474
475sub country_number
476{
477    my $sub_country = shift;
478    return( $sub_country->{_numeric} );
479}
480
481#-------------------------------------------------------------------------------
482# Given the full name for a sub country, return the ISO 3166-2 code
483
484sub code
485{
486    my $sub_country = shift;
487    my ($full_name) = @_;
488
489    unless ( $sub_country->has_sub_countries )
490    {
491        # this country has no sub countries
492        return;
493    }
494
495    my $orig = $full_name;
496
497    $full_name = _clean($full_name);
498
499    my $code = $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_full_name_keyed}{$full_name};
500
501    # If a code wasn't found, it could be because the user's capitalization
502    # does not match the one in the look up data of this module. For example,
503    # the user may have supplied the sub country "Ag R" (in Turkey) but the
504    # ISO standard defines the spelling as "Ag r".
505
506    unless ( defined $code )
507    {
508        # For every sub country, compare upper cased full name supplied by user
509        # to upper cased full name from lookup hash. If they match, return the
510        # correctly cased full name from the lookup hash.
511
512        my @all_names = $sub_country->all_full_names;
513        my $current_name;
514        foreach $current_name ( @all_names )
515        {
516            if ( uc($full_name) eq uc($current_name) )
517            {
518                $code = $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_full_name_keyed}{$current_name};
519            }
520        }
521    }
522
523    if ( defined $code )
524    {
525        return($code);
526    }
527    else
528    {
529        return('unknown');
530    }
531}
532
533#-------------------------------------------------------------------------------
534# Given the alpha-2 ISO 3166-2 code for a sub country, return the full name.
535# Parameters are the code and a flag, which if set to true
536# will cause the full name to be uppercased
537
538sub full_name
539{
540    my $sub_country = shift;
541    my ($code,$uc_name) = @_;
542
543    unless ( $sub_country->has_sub_countries )
544    {
545        # this country has no sub countries
546        # return;
547    }
548
549    $code = _clean($code);
550    $code = uc($code);
551
552    my $full_name =
553        $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_code_keyed}{$code};
554    if ( $uc_name )
555    {
556        $full_name = uc($full_name);
557    }
558
559    if ( $full_name )
560    {
561        return($full_name);
562    }
563    else
564    {
565        return('unknown');
566    }
567}
568
569#-------------------------------------------------------------------------------
570# Given the alpha-2 ISO 3166-2 code for a sub country, return the level,
571# being one of state, province, overseas territory, city, council etc
572sub level
573{
574    my $sub_country = shift;
575    my ($code) = @_;
576
577    $code = _clean($code);
578
579    my $level = $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{$code}{_level};
580
581    if ( $level )
582    {
583        return($level);
584    }
585    else
586    {
587        return('unknown');
588    }
589}
590#-------------------------------------------------------------------------------
591# Given a sub country object, return a hash of all the levels and their totals
592# Such as Australia: State => 6, Territory => 2
593
594sub levels
595{
596    my $sub_country = shift;
597
598    return( %{ $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_levels} });
599
600}
601
602#-------------------------------------------------------------------------------
603# Returns 1 if the current country has sub countries. otherwise 0.
604
605sub has_sub_countries
606{
607    my $sub_country = shift;
608    if ( $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_code_keyed} )
609    {
610        return(1);
611    }
612    else
613    {
614        return(0);
615    }
616}
617#-------------------------------------------------------------------------------
618# Returns a hash of code/full name pairs, keyed by sub country code.
619
620sub code_full_name_hash
621{
622    my $sub_country = shift;
623    if ( $sub_country->has_sub_countries )
624    {
625        return( %{ $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_code_keyed} } );
626    }
627    else
628    {
629        return(undef);
630    }
631}
632#-------------------------------------------------------------------------------
633# Returns a hash of name/code pairs, keyed by sub country name.
634
635sub full_name_code_hash
636{
637    my $sub_country = shift;
638    if ( $sub_country->has_sub_countries )
639    {
640        return( %{ $Locale::SubCountry::subcountry{$sub_country->{_country_code}}{_full_name_keyed} } );
641    }
642    else
643    {
644        return(undef);
645    }
646}
647#-------------------------------------------------------------------------------
648# Returns sorted array of all sub country full names for the current country
649
650sub all_full_names
651{
652    my $sub_country = shift;
653    if ( $sub_country->full_name_code_hash )
654    {
655        my %all_full_names = $sub_country->full_name_code_hash;
656        if ( %all_full_names )
657        {
658            return( sort keys %all_full_names );
659        }
660    }
661    else
662    {
663        return(undef);
664    }
665}
666#-------------------------------------------------------------------------------
667# Returns array of all sub country alpha-2  ISO 3166-2 codes for the current country
668
669sub all_codes
670{
671    my $sub_country = shift;
672
673    if ( $sub_country->code_full_name_hash )
674    {
675        my %all_codes = $sub_country->code_full_name_hash;
676        return( sort keys %all_codes );
677    }
678    else
679    {
680        return(undef);
681    }
682}
683
684#-------------------------------------------------------------------------------
685sub _clean
686{
687    my ($input_string) = @_;
688
689    if ( $input_string =~ /[\. ]/ )
690    {
691        # remove dots
692        $input_string =~ s/\.//go;
693
694        # remove repeating spaces
695        $input_string =~ s/  +/ /go;
696
697        # remove any remaining leading or trailing space
698        $input_string =~ s/^ //;
699        $input_string =~ s/ $//;
700    }
701
702    return($input_string);
703}
704
705return(1);
706