1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 6 #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 7 8 #include <stddef.h> 9 10 #include <string> 11 #include <string_view> 12 #include <vector> 13 14 namespace base { 15 16 // A helper class and associated data structures to adjust offsets into a 17 // string in response to various adjustments one might do to that string 18 // (e.g., eliminating a range). For details on offsets, see the comments by 19 // the AdjustOffsets() function below. 20 class OffsetAdjuster { 21 public: 22 struct Adjustment { 23 Adjustment(size_t original_offset, 24 size_t original_length, 25 size_t output_length); 26 27 size_t original_offset; 28 size_t original_length; 29 size_t output_length; 30 }; 31 typedef std::vector<Adjustment> Adjustments; 32 33 // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments 34 // recorded in |adjustments|. Adjusted offsets greater than |limit| will be 35 // set to std::u16string::npos. 36 // 37 // Offsets represents insertion/selection points between characters: if |src| 38 // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the 39 // end of the string. Valid input offsets range from 0 to |src_len|. On 40 // exit, each offset will have been modified to point at the same logical 41 // position in the output string. If an offset cannot be successfully 42 // adjusted (e.g., because it points into the middle of a multibyte sequence), 43 // it will be set to std::u16string::npos. 44 static void AdjustOffsets(const Adjustments& adjustments, 45 std::vector<size_t>* offsets_for_adjustment, 46 size_t limit = std::u16string::npos); 47 48 // Adjusts the single |offset| to reflect the adjustments recorded in 49 // |adjustments|. 50 static void AdjustOffset(const Adjustments& adjustments, 51 size_t* offset, 52 size_t limit = std::u16string::npos); 53 54 // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse 55 // of the adjustments recorded in |adjustments|. In other words, the offsets 56 // provided represent offsets into an adjusted string and the caller wants 57 // to know the offsets they correspond to in the original string. If an 58 // offset cannot be successfully unadjusted (e.g., because it points into 59 // the middle of a multibyte sequence), it will be set to 60 // std::u16string::npos. 61 static void UnadjustOffsets(const Adjustments& adjustments, 62 std::vector<size_t>* offsets_for_unadjustment); 63 64 // Adjusts the single |offset| to reflect the reverse of the adjustments 65 // recorded in |adjustments|. 66 static void UnadjustOffset(const Adjustments& adjustments, size_t* offset); 67 68 // Combines two sequential sets of adjustments, storing the combined revised 69 // adjustments in |adjustments_on_adjusted_string|. That is, suppose a 70 // string was altered in some way, with the alterations recorded as 71 // adjustments in |first_adjustments|. Then suppose the resulting string is 72 // further altered, with the alterations recorded as adjustments scored in 73 // |adjustments_on_adjusted_string|, with the offsets recorded in these 74 // adjustments being with respect to the intermediate string. This function 75 // combines the two sets of adjustments into one, storing the result in 76 // |adjustments_on_adjusted_string|, whose offsets are correct with respect 77 // to the original string. 78 // 79 // Assumes both parameters are sorted by increasing offset. 80 // 81 // WARNING: Only supports |first_adjustments| that involve collapsing ranges 82 // of text, not expanding ranges. 83 static void MergeSequentialAdjustments( 84 const Adjustments& first_adjustments, 85 Adjustments* adjustments_on_adjusted_string); 86 }; 87 88 // Like the conversions in utf_string_conversions.h, but also fills in an 89 // |adjustments| parameter that reflects the alterations done to the string. 90 // It may be NULL. 91 bool UTF8ToUTF16WithAdjustments(const char* src, 92 size_t src_len, 93 std::u16string* output, 94 base::OffsetAdjuster::Adjustments* adjustments); 95 std::u16string UTF8ToUTF16WithAdjustments( 96 const std::string_view& utf8, 97 base::OffsetAdjuster::Adjustments* adjustments); 98 // As above, but instead internally examines the adjustments and applies them 99 // to |offsets_for_adjustment|. Input offsets greater than the length of the 100 // input string will be set to std::u16string::npos. See comments by 101 // AdjustOffsets(). 102 std::u16string UTF8ToUTF16AndAdjustOffsets( 103 const std::string_view& utf8, 104 std::vector<size_t>* offsets_for_adjustment); 105 std::string UTF16ToUTF8AndAdjustOffsets( 106 const std::u16string_view& utf16, 107 std::vector<size_t>* offsets_for_adjustment); 108 109 } // namespace base 110 111 #endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 112