1 /*
2 SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5 */
6
7 #include "airportdb.h"
8 #include "airportdb_p.h"
9 #include "airportdb_data.cpp"
10 #include "airportnametokenizer_p.h"
11 #include "stringutil.h"
12
13 #include <QDebug>
14 #include <QTimeZone>
15
16 #include <algorithm>
17 #include <cstring>
18
19 namespace KItinerary {
20 namespace KnowledgeDb {
21
22 static_assert(alignof(Airport) <= sizeof(Airport), "Airport struct alignment too big!");
23
operator <(const Airport & lhs,IataCode rhs)24 static bool operator<(const Airport &lhs, IataCode rhs)
25 {
26 return lhs.iataCode < rhs;
27 }
28
coordinateForAirport(IataCode iataCode)29 Coordinate coordinateForAirport(IataCode iataCode)
30 {
31 const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
32 if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
33 return {};
34 }
35
36 return (*it).coordinate;
37 }
38
timezoneForAirport(IataCode iataCode)39 QTimeZone timezoneForAirport(IataCode iataCode)
40 {
41 const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
42 if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
43 return {};
44 }
45
46 return KnowledgeDb::timezoneForLocation((*it).coordinate.latitude, (*it).coordinate.longitude, (*it).country.toString());
47 }
48
countryForAirport(IataCode iataCode)49 KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
50 {
51 const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
52 if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
53 return {};
54 }
55
56 return (*it).country;
57 }
58
normalizeFragment(const QString & s)59 static QString normalizeFragment(const QString &s)
60 {
61 auto res = StringUtil::normalize(s);
62 // resolve abbreviations
63 if (res == QLatin1String("intl")) {
64 return QStringLiteral("international");
65 }
66
67 return res;
68 }
69
applyTransliterations(QStringList & fragments)70 static void applyTransliterations(QStringList &fragments)
71 {
72 // note that the output has the corresponding diacritic markers already stripped,
73 // as StringUtil::normalize has already been applied to fragments
74 // similarly, the input is already case-folded
75 for (auto &fragment : fragments) {
76 fragment.replace(QLatin1String("ae"), QLatin1String("a"));
77 fragment.replace(QLatin1String("oe"), QLatin1String("o"));
78 fragment.replace(QLatin1String("ue"), QLatin1String("u"));
79 }
80 }
81
82 // HACK to work around MSVC string length limit
name1_string_table(uint32_t offset)83 static const char* name1_string_table(uint32_t offset)
84 {
85 if (offset < sizeof(name1_string_table_0)) {
86 return name1_string_table_0 + offset;
87 }
88 return name1_string_table_1 + (offset - sizeof(name1_string_table_0));
89 }
90
iataCodeForUniqueFragment(const QString & s)91 static IataCode iataCodeForUniqueFragment(const QString &s)
92 {
93 const auto it = std::lower_bound(std::begin(name1_string_index), std::end(name1_string_index), s.toUtf8(), [](const Name1Index &lhs, const QByteArray &rhs) {
94 const auto cmp = strncmp(name1_string_table(lhs.offset()), rhs.constData(), std::min<int>(lhs.length, rhs.size()));
95 if (cmp == 0) {
96 return lhs.length < rhs.size();
97 }
98 return cmp < 0;
99 });
100 if (it == std::end(name1_string_index) || it->length != s.toUtf8().size() || strncmp(name1_string_table(it->offset()), s.toUtf8().constData(), it->length) != 0) {
101 return {};
102 }
103 return airport_table[it->iataIndex].iataCode;
104 }
105
iataCodeForUniqueFragment(const QStringList & fragments,std::vector<IataCode> & codes)106 static void iataCodeForUniqueFragment(const QStringList &fragments, std::vector<IataCode> &codes)
107 {
108 for (const auto &s : fragments) {
109 const auto foundCode = iataCodeForUniqueFragment(s);
110 if (!foundCode.isValid()) {
111 continue;
112 }
113
114 auto it = std::lower_bound(codes.begin(), codes.end(), foundCode);
115 if (it == codes.end() || (*it) != foundCode) {
116 codes.insert(it, foundCode);
117 }
118 }
119 }
120
iataCodeForNonUniqueFragments(const QStringList & fragments,std::vector<IataCode> & codes)121 static void iataCodeForNonUniqueFragments(const QStringList &fragments, std::vector<IataCode> &codes)
122 {
123 // we didn't find a unique name fragment, try the non-unique index
124 QSet<uint16_t> iataIdxs;
125 for (const auto &s : fragments) {
126 const auto it = std::lower_bound(std::begin(nameN_string_index), std::end(nameN_string_index), s.toUtf8(), [](const NameNIndex &lhs, const QByteArray &rhs) {
127 const auto cmp = strncmp(nameN_string_table + lhs.strOffset, rhs.constData(), std::min<int>(lhs.strLength, rhs.size()));
128 if (cmp == 0) {
129 return lhs.strLength < rhs.size();
130 }
131 return cmp < 0;
132 });
133 if (it == std::end(nameN_string_index) || it->strLength != s.toUtf8().size() || strncmp(nameN_string_table + it->strOffset, s.toUtf8().constData(), it->strLength) != 0) {
134 continue;
135 }
136
137 // TODO we can do this in-place in codes
138 QSet<uint16_t> candidates;
139 candidates.reserve(it->iataCount);
140 for (auto i = 0; i < it->iataCount; ++i) {
141 candidates.insert(nameN_iata_table[it->iataOffset + i]);
142 }
143 if (iataIdxs.isEmpty()) { // first round
144 iataIdxs = candidates;
145 continue;
146 }
147
148 // ignore the imprecisely used "international" if it results in an empty set here
149 if (s == QLatin1String("international") && !iataIdxs.intersects(candidates)) {
150 continue;
151 }
152
153 iataIdxs &= candidates;
154 if (iataIdxs.isEmpty()) {
155 break;
156 }
157 }
158
159 std::transform(iataIdxs.begin(), iataIdxs.end(), std::back_inserter(codes), [](const auto idx) { return airport_table[idx].iataCode; });
160 std::sort(codes.begin(), codes.end());
161 }
162
iataCodeForIataCodeFragment(const QStringList & fragments)163 static IataCode iataCodeForIataCodeFragment(const QStringList &fragments)
164 {
165 IataCode code;
166 for (const auto &s : fragments) {
167 if (s.size() != 3) {
168 continue;
169 }
170 if (!std::all_of(s.begin(), s.end(), [](const auto c) { return c.isUpper(); })) {
171 continue;
172 }
173 const IataCode searchCode{s};
174 if (code.isValid() && searchCode != code) {
175 return {};
176 }
177 const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), searchCode);
178 if (it != std::end(airport_table) && (*it).iataCode == searchCode) {
179 code = searchCode;
180 }
181 // check that this is only a IATA code, not also a (conflicting) name fragment
182 const auto uniqueFragmentCode = iataCodeForUniqueFragment(normalizeFragment(s));
183 if (uniqueFragmentCode.isValid() && code.isValid() && uniqueFragmentCode != code) {
184 return {};
185 }
186 }
187 return code;
188 }
189
iataCodeForNameFragments(const QStringList & fragments,std::vector<IataCode> & codes)190 static void iataCodeForNameFragments(const QStringList &fragments, std::vector<IataCode> &codes)
191 {
192 iataCodeForUniqueFragment(fragments, codes);
193 if (!codes.empty()) {
194 return;
195 }
196 iataCodeForNonUniqueFragments(fragments, codes);
197 }
198
splitToFragments(QStringView name)199 static QStringList splitToFragments(QStringView name)
200 {
201 AirportNameTokenizer tokenizer(name);
202 return tokenizer.toStringList();
203 }
204
205 }
206
iataCodesFromName(QStringView name)207 std::vector<KnowledgeDb::IataCode> KnowledgeDb::iataCodesFromName(QStringView name)
208 {
209 const auto fragments = splitToFragments(name);
210 QStringList normalizedFragments;
211 normalizedFragments.reserve(fragments.size());
212 std::transform(fragments.begin(), fragments.end(), std::back_inserter(normalizedFragments), [](const auto &s) { return normalizeFragment(s); });
213
214 std::vector<IataCode> codes;
215 std::vector<IataCode> candidates;
216 iataCodeForNameFragments(normalizedFragments, codes);
217
218 // try again, with alternative translitarations of e.g. umlauts replaced
219 applyTransliterations(normalizedFragments);
220 iataCodeForNameFragments(normalizedFragments, candidates);
221 if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
222 codes = std::move(candidates);
223 }
224
225 // check if the name contained the IATA code as disambiguation already
226 const auto code = iataCodeForIataCodeFragment(fragments);
227 if (code.isValid() && std::find(codes.begin(), codes.end(), code) != codes.end()) {
228 return {code};
229 }
230
231 // attempt to cut off possibly confusing fancy terminal names
232 auto it = std::find(normalizedFragments.begin(), normalizedFragments.end(), QStringLiteral("terminal"));
233 if (it != normalizedFragments.end()) {
234 normalizedFragments.erase(it, normalizedFragments.end());
235 candidates.clear();
236 iataCodeForNameFragments(normalizedFragments, candidates);
237 if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
238 codes = std::move(candidates);
239 }
240 }
241 return codes;
242 }
243
244 }
245