// Copyright (C) 2018 ycmd contributors
//
// This file is part of ycmd.
//
// ycmd is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ycmd is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ycmd. If not, see .
#ifndef CODE_POINT_H_3W0LNCLY
#define CODE_POINT_H_3W0LNCLY
#include
#include
#include
namespace YouCompleteMe {
// See
// http://www.unicode.org/reports/tr29/tr29-37.html#Grapheme_Cluster_Break_Property_Values
// NOTE: The properties must take the same value as the ones defined in the
// update_unicode.py script.
enum class BreakProperty : uint8_t {
OTHER = 0,
CR = 1,
LF = 2,
CONTROL = 3,
EXTEND = 4,
ZWJ = 5,
REGIONAL_INDICATOR = 6,
PREPEND = 7,
SPACINGMARK = 8,
L = 9,
V = 10,
T = 11,
LV = 12,
LVT = 13,
EXTPICT = 18
};
// This is the structure used to store the data in the Unicode table. See the
// CodePoint class for a description of the members.
struct RawCodePoint {
std::string_view original;
std::string_view normal;
std::string_view folded_case;
std::string_view swapped_case;
bool is_letter;
bool is_punctuation;
bool is_uppercase;
uint8_t break_property;
uint8_t combining_class;
};
// This class represents a UTF-8 code point. It takes a UTF-8 encoded string
// corresponding to a UTF-8 code point and compute the following properties
// from a Unicode table:
// - the UTF-8 code point itself;
// - its normalized version: two code points (or sequence of code points)
// represent the same character if they have identical normalized version;
// - its case-folded version: identical to the normalized version if the code
// point is caseless;
// - its case-swapped version: lowercase if the code point is uppercase,
// uppercase if the code point is lowercase, identical to the normalized
// version if the code point is caseless;
// - if the code point is a letter;
// - if the code point is a punctuation;
// - if the code point is in uppercase: false if the code point has no
// uppercase version;
// - its breaking property: used to split a word into characters.
// - its combining class: used to sort a sequence of code points according to
// the Canonical Ordering algorithm (see
// https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G49591).
class CodePoint {
public:
YCM_EXPORT explicit CodePoint( std::string_view code_point );
// Make class noncopyable
CodePoint( const CodePoint& ) = delete;
CodePoint& operator=( const CodePoint& ) = delete;
CodePoint( CodePoint&& ) = default;
CodePoint& operator=( CodePoint&& ) = default;
inline std::string Normal() const {
return normal_;
}
inline std::string FoldedCase() const {
return folded_case_;
}
inline std::string SwappedCase() const {
return swapped_case_;
}
inline bool IsLetter() const {
return is_letter_;
}
inline bool IsPunctuation() const {
return is_punctuation_;
}
inline bool IsUppercase() const {
return is_uppercase_;
}
inline BreakProperty GetBreakProperty() const {
return break_property_;
}
inline uint8_t CombiningClass() const {
return combining_class_;
}
inline bool operator< ( const CodePoint &other ) const {
return combining_class_ < other.combining_class_;
}
private:
explicit CodePoint( RawCodePoint&& code_point );
std::string normal_;
std::string folded_case_;
std::string swapped_case_;
bool is_letter_;
bool is_punctuation_;
bool is_uppercase_;
BreakProperty break_property_;
uint8_t combining_class_;
};
using CodePointSequence = std::vector< const CodePoint * >;
// Split a UTF-8 encoded string into UTF-8 code points.
YCM_EXPORT CodePointSequence BreakIntoCodePoints( std::string_view text );
// Thrown when an error occurs while decoding a UTF-8 string.
struct YCM_EXPORT UnicodeDecodeError : std::runtime_error {
using std::runtime_error::runtime_error;
const char* what() const noexcept override;
};
} // namespace YouCompleteMe
#endif /* end of include guard: CODE_POINT_H_3W0LNCLY */