1 /*
2    mkvmerge -- utility for splicing together matroska files
3    from component media subtypes
4 
5    Distributed under the GPL v2
6    see the file COPYING for details
7    or visit http://www.gnu.org/copyleft/gpl.html
8 
9    BCP 47 language tags
10 
11    Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13 
14 #pragma once
15 
16 #include "common/common_pch.h"
17 
18 namespace mtx::bcp47 {
19 
20 class language_c {
21 public:
22   struct extension_t {
23     std::string identifier;
24     std::vector<std::string> extensions;
25 
26     extension_t(std::string const &identifier_, std::vector<std::string> const &extensions_);
27 
28     std::string format() const noexcept;
29 
30     bool operator ==(extension_t const &other) const noexcept;
31     bool operator !=(extension_t const &other) const noexcept;
32   };
33 
34 protected:
35   struct prefix_restrictions_t {
36     bool language{}, extended_language_subtags{}, script{}, region{}, variants{};
37   };
38 
39   std::string m_language;                               // shortest ISO 639 code or reserved or registered language subtag
40   std::vector<std::string> m_extended_language_subtags; // selected ISO 639 codes
41   std::string m_script;                                 // ISO 15924 code
42   std::string m_region;                                 // either ISO 3166-1 code or UN M.49 code
43   std::vector<std::string> m_variants;                  // registered variants
44   std::vector<extension_t> m_extensions;
45   std::vector<std::string> m_private_use;
46 
47   bool m_valid{false};
48   std::string m_parser_error;
49 
50   mutable std::string m_formatted;
51   mutable bool m_formatted_up_to_date{};
52 
53   static bool ms_disabled;
54 
55 public:
56   void clear() noexcept;
57 
58   bool has_valid_iso639_code() const noexcept;
59   bool has_valid_iso639_2_code() const noexcept;
60   std::string get_iso639_alpha_3_code() const noexcept;
61   std::string get_iso639_2_alpha_3_code_or(std::string const &value_if_invalid) const noexcept;
62 
63   bool has_valid_iso3166_1_alpha_2_or_top_level_domain_country_code() const noexcept;
64   std::string get_iso3166_1_alpha_2_code() const noexcept;
65   std::string get_top_level_domain_country_code() const noexcept;
66 
67   std::string dump() const noexcept;
68   std::string format(bool force = false) const noexcept;
69   std::string format_long(bool force = false) const noexcept;
70   bool is_valid() const noexcept;
71   std::string const &get_error() const noexcept;
72 
73   bool operator ==(language_c const &other) const noexcept;
74   bool operator !=(language_c const &other) const noexcept;
75 
76   bool matches(language_c const &match) const noexcept;
77   language_c find_best_match(std::vector<language_c> const &potential_matches) const noexcept;
78 
79   language_c &set_valid(bool valid);
80   language_c &set_language(std::string const &language);
81   language_c &set_extended_language_subtags(std::vector<std::string> const &extended_language_subtags);
82   language_c &set_script(std::string const &script);
83   language_c &set_region(std::string const &region);
84   language_c &set_variants(std::vector<std::string> const &variants);
85   language_c &set_extensions(std::vector<extension_t> const &extensions);
86   language_c &set_private_use(std::vector<std::string> const &private_use);
87 
88   language_c &add_extension(extension_t const &extensions);
89 
90   std::string const &get_language() const noexcept;
91   std::vector<std::string> const &get_extended_language_subtags() const noexcept;
92   std::string const &get_script() const noexcept;
93   std::string const &get_region() const noexcept;
94   std::vector<std::string> const &get_variants() const noexcept;
95   std::vector<extension_t> const &get_extensions() const noexcept;
96   std::vector<std::string> const &get_private_use() const noexcept;
97 
98 protected:
99   std::string format_internal(bool force) const noexcept;
100 
101   bool parse_language(std::string const &code);
102   bool parse_extensions(std::string const &str);
103   bool parse_script(std::string const &code);
104   bool parse_region(std::string const &code);
105   bool parse_extlangs_or_variants(std::string const &str, bool is_extlangs);
106 
107   bool validate_extensions();
108   bool validate_extlangs_or_variants(bool is_extlangs);
109   bool validate_one_extlang_or_variant(std::size_t extlang_or_variant_index, bool is_extlang);
110   bool matches_prefix(language_c const &prefix, std::size_t extlang_or_variant_index, bool is_extlang, prefix_restrictions_t const &restrictions) const noexcept;
111 
112 public:
113   static language_c parse(std::string const &language);
114 
115   static void disable();
116   static bool is_disabled();
117 };
118 
119 void init_re();
120 
121 inline std::ostream &
122 operator<<(std::ostream &out,
123            language_c::extension_t const &extension) {
124   out << extension.format();
125   return out;
126 }
127 
128 inline std::ostream &
129 operator <<(std::ostream &out,
130             language_c const &language) {
131   out << language.format();
132   return out;
133 }
134 
135 inline bool
136 operator<(language_c const &a,
137           language_c const &b) {
138   return a.format() < b.format();
139 }
140 
141 } // namespace mtx::bcp47
142 
143 namespace std {
144 
145 template<>
146 struct hash<mtx::bcp47::language_c> {
147   std::size_t operator()(mtx::bcp47::language_c const &key) const {
148     return std::hash<std::string>()(key.format());
149   }
150 };
151 
152 } // namespace mtx::bcp47
153