1 /*
2    SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
3 
4    SPDX-License-Identifier: LGPL-2.0-or-later
5 */
6 
7 #include "airportdb.h"
8 #include "airportdb_p.h"
9 #include "airportdb_data.cpp"
10 #include "airportnametokenizer_p.h"
11 #include "stringutil.h"
12 
13 #include <QDebug>
14 #include <QTimeZone>
15 
16 #include <algorithm>
17 #include <cstring>
18 
19 namespace KItinerary {
20 namespace KnowledgeDb {
21 
22 static_assert(alignof(Airport) <= sizeof(Airport), "Airport struct alignment too big!");
23 
operator <(const Airport & lhs,IataCode rhs)24 static bool operator<(const Airport &lhs, IataCode rhs)
25 {
26     return lhs.iataCode < rhs;
27 }
28 
coordinateForAirport(IataCode iataCode)29 Coordinate coordinateForAirport(IataCode iataCode)
30 {
31     const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
32     if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
33         return {};
34     }
35 
36     return (*it).coordinate;
37 }
38 
timezoneForAirport(IataCode iataCode)39 QTimeZone timezoneForAirport(IataCode iataCode)
40 {
41     const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
42     if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
43         return {};
44     }
45 
46     return KnowledgeDb::timezoneForLocation((*it).coordinate.latitude, (*it).coordinate.longitude, (*it).country.toString());
47 }
48 
countryForAirport(IataCode iataCode)49 KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
50 {
51     const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
52     if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
53         return {};
54     }
55 
56     return (*it).country;
57 }
58 
normalizeFragment(const QString & s)59 static QString normalizeFragment(const QString &s)
60 {
61     auto res = StringUtil::normalize(s);
62     // resolve abbreviations
63     if (res == QLatin1String("intl")) {
64         return QStringLiteral("international");
65     }
66 
67     return res;
68 }
69 
applyTransliterations(QStringList & fragments)70 static void applyTransliterations(QStringList &fragments)
71 {
72     // note that the output has the corresponding diacritic markers already stripped,
73     // as StringUtil::normalize has already been applied to fragments
74     // similarly, the input is already case-folded
75     for (auto &fragment : fragments) {
76         fragment.replace(QLatin1String("ae"), QLatin1String("a"));
77         fragment.replace(QLatin1String("oe"), QLatin1String("o"));
78         fragment.replace(QLatin1String("ue"), QLatin1String("u"));
79     }
80 }
81 
82 // HACK to work around MSVC string length limit
name1_string_table(uint32_t offset)83 static const char* name1_string_table(uint32_t offset)
84 {
85     if (offset < sizeof(name1_string_table_0)) {
86         return name1_string_table_0 + offset;
87     }
88     return name1_string_table_1 + (offset - sizeof(name1_string_table_0));
89 }
90 
iataCodeForUniqueFragment(const QString & s)91 static IataCode iataCodeForUniqueFragment(const QString &s)
92 {
93     const auto it = std::lower_bound(std::begin(name1_string_index), std::end(name1_string_index), s.toUtf8(), [](const Name1Index &lhs, const QByteArray &rhs) {
94         const auto cmp = strncmp(name1_string_table(lhs.offset()), rhs.constData(), std::min<int>(lhs.length, rhs.size()));
95         if (cmp == 0) {
96             return lhs.length < rhs.size();
97         }
98         return cmp < 0;
99     });
100     if (it == std::end(name1_string_index) || it->length != s.toUtf8().size() || strncmp(name1_string_table(it->offset()), s.toUtf8().constData(), it->length) != 0) {
101         return {};
102     }
103     return airport_table[it->iataIndex].iataCode;
104 }
105 
iataCodeForUniqueFragment(const QStringList & fragments,std::vector<IataCode> & codes)106 static void iataCodeForUniqueFragment(const QStringList &fragments, std::vector<IataCode> &codes)
107 {
108     for (const auto &s : fragments) {
109         const auto foundCode = iataCodeForUniqueFragment(s);
110         if (!foundCode.isValid()) {
111             continue;
112         }
113 
114         auto it = std::lower_bound(codes.begin(), codes.end(), foundCode);
115         if (it == codes.end() || (*it) != foundCode) {
116             codes.insert(it, foundCode);
117         }
118     }
119 }
120 
iataCodeForNonUniqueFragments(const QStringList & fragments,std::vector<IataCode> & codes)121 static void iataCodeForNonUniqueFragments(const QStringList &fragments, std::vector<IataCode> &codes)
122 {
123     // we didn't find a unique name fragment, try the non-unique index
124     QSet<uint16_t> iataIdxs;
125     for (const auto &s : fragments) {
126         const auto it = std::lower_bound(std::begin(nameN_string_index), std::end(nameN_string_index), s.toUtf8(), [](const NameNIndex &lhs, const QByteArray &rhs) {
127                 const auto cmp = strncmp(nameN_string_table + lhs.strOffset, rhs.constData(), std::min<int>(lhs.strLength, rhs.size()));
128                 if (cmp == 0) {
129                     return lhs.strLength < rhs.size();
130                 }
131                 return cmp < 0;
132             });
133         if (it == std::end(nameN_string_index) || it->strLength != s.toUtf8().size() || strncmp(nameN_string_table + it->strOffset, s.toUtf8().constData(), it->strLength) != 0) {
134             continue;
135         }
136 
137         // TODO we can do this in-place in codes
138         QSet<uint16_t> candidates;
139         candidates.reserve(it->iataCount);
140         for (auto i = 0; i < it->iataCount; ++i) {
141             candidates.insert(nameN_iata_table[it->iataOffset + i]);
142         }
143         if (iataIdxs.isEmpty()) { // first round
144             iataIdxs = candidates;
145             continue;
146         }
147 
148         // ignore the imprecisely used "international" if it results in an empty set here
149         if (s == QLatin1String("international") && !iataIdxs.intersects(candidates)) {
150             continue;
151         }
152 
153         iataIdxs &= candidates;
154         if (iataIdxs.isEmpty()) {
155             break;
156         }
157     }
158 
159     std::transform(iataIdxs.begin(), iataIdxs.end(), std::back_inserter(codes), [](const auto idx) { return airport_table[idx].iataCode; });
160     std::sort(codes.begin(), codes.end());
161 }
162 
iataCodeForIataCodeFragment(const QStringList & fragments)163 static IataCode iataCodeForIataCodeFragment(const QStringList &fragments)
164 {
165     IataCode code;
166     for (const auto &s : fragments) {
167         if (s.size() != 3) {
168             continue;
169         }
170         if (!std::all_of(s.begin(), s.end(), [](const auto c) { return c.isUpper(); })) {
171             continue;
172         }
173         const IataCode searchCode{s};
174         if (code.isValid() && searchCode != code) {
175             return {};
176         }
177         const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), searchCode);
178         if (it != std::end(airport_table) && (*it).iataCode == searchCode) {
179             code = searchCode;
180         }
181         // check that this is only a IATA code, not also a (conflicting) name fragment
182         const auto uniqueFragmentCode = iataCodeForUniqueFragment(normalizeFragment(s));
183         if (uniqueFragmentCode.isValid() && code.isValid() && uniqueFragmentCode != code) {
184             return {};
185         }
186     }
187     return code;
188 }
189 
iataCodeForNameFragments(const QStringList & fragments,std::vector<IataCode> & codes)190 static void iataCodeForNameFragments(const QStringList &fragments, std::vector<IataCode> &codes)
191 {
192     iataCodeForUniqueFragment(fragments, codes);
193     if (!codes.empty()) {
194         return;
195     }
196     iataCodeForNonUniqueFragments(fragments, codes);
197 }
198 
splitToFragments(QStringView name)199 static QStringList splitToFragments(QStringView name)
200 {
201     AirportNameTokenizer tokenizer(name);
202     return tokenizer.toStringList();
203 }
204 
205 }
206 
iataCodesFromName(QStringView name)207 std::vector<KnowledgeDb::IataCode> KnowledgeDb::iataCodesFromName(QStringView name)
208 {
209     const auto fragments = splitToFragments(name);
210     QStringList normalizedFragments;
211     normalizedFragments.reserve(fragments.size());
212     std::transform(fragments.begin(), fragments.end(), std::back_inserter(normalizedFragments), [](const auto &s) { return normalizeFragment(s); });
213 
214     std::vector<IataCode> codes;
215     std::vector<IataCode> candidates;
216     iataCodeForNameFragments(normalizedFragments, codes);
217 
218     // try again, with alternative translitarations of e.g. umlauts replaced
219     applyTransliterations(normalizedFragments);
220     iataCodeForNameFragments(normalizedFragments, candidates);
221     if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
222         codes = std::move(candidates);
223     }
224 
225     // check if the name contained the IATA code as disambiguation already
226     const auto code = iataCodeForIataCodeFragment(fragments);
227     if (code.isValid() && std::find(codes.begin(), codes.end(), code) != codes.end()) {
228         return {code};
229     }
230 
231     // attempt to cut off possibly confusing fancy terminal names
232     auto it = std::find(normalizedFragments.begin(), normalizedFragments.end(), QStringLiteral("terminal"));
233     if (it != normalizedFragments.end()) {
234         normalizedFragments.erase(it, normalizedFragments.end());
235         candidates.clear();
236         iataCodeForNameFragments(normalizedFragments, candidates);
237         if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
238             codes = std::move(candidates);
239         }
240     }
241     return codes;
242 }
243 
244 }
245