1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
6 #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
7 
8 #include <stddef.h>
9 
10 #include <string>
11 #include <string_view>
12 #include <vector>
13 
14 namespace base {
15 
16 // A helper class and associated data structures to adjust offsets into a
17 // string in response to various adjustments one might do to that string
18 // (e.g., eliminating a range).  For details on offsets, see the comments by
19 // the AdjustOffsets() function below.
20 class OffsetAdjuster {
21  public:
22   struct Adjustment {
23     Adjustment(size_t original_offset,
24                size_t original_length,
25                size_t output_length);
26 
27     size_t original_offset;
28     size_t original_length;
29     size_t output_length;
30   };
31   typedef std::vector<Adjustment> Adjustments;
32 
33   // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
34   // recorded in |adjustments|.  Adjusted offsets greater than |limit| will be
35   // set to std::u16string::npos.
36   //
37   // Offsets represents insertion/selection points between characters: if |src|
38   // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
39   // end of the string.  Valid input offsets range from 0 to |src_len|.  On
40   // exit, each offset will have been modified to point at the same logical
41   // position in the output string.  If an offset cannot be successfully
42   // adjusted (e.g., because it points into the middle of a multibyte sequence),
43   // it will be set to std::u16string::npos.
44   static void AdjustOffsets(const Adjustments& adjustments,
45                             std::vector<size_t>* offsets_for_adjustment,
46                             size_t limit = std::u16string::npos);
47 
48   // Adjusts the single |offset| to reflect the adjustments recorded in
49   // |adjustments|.
50   static void AdjustOffset(const Adjustments& adjustments,
51                            size_t* offset,
52                            size_t limit = std::u16string::npos);
53 
54   // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
55   // of the adjustments recorded in |adjustments|.  In other words, the offsets
56   // provided represent offsets into an adjusted string and the caller wants
57   // to know the offsets they correspond to in the original string.  If an
58   // offset cannot be successfully unadjusted (e.g., because it points into
59   // the middle of a multibyte sequence), it will be set to
60   // std::u16string::npos.
61   static void UnadjustOffsets(const Adjustments& adjustments,
62                               std::vector<size_t>* offsets_for_unadjustment);
63 
64   // Adjusts the single |offset| to reflect the reverse of the adjustments
65   // recorded in |adjustments|.
66   static void UnadjustOffset(const Adjustments& adjustments, size_t* offset);
67 
68   // Combines two sequential sets of adjustments, storing the combined revised
69   // adjustments in |adjustments_on_adjusted_string|.  That is, suppose a
70   // string was altered in some way, with the alterations recorded as
71   // adjustments in |first_adjustments|.  Then suppose the resulting string is
72   // further altered, with the alterations recorded as adjustments scored in
73   // |adjustments_on_adjusted_string|, with the offsets recorded in these
74   // adjustments being with respect to the intermediate string.  This function
75   // combines the two sets of adjustments into one, storing the result in
76   // |adjustments_on_adjusted_string|, whose offsets are correct with respect
77   // to the original string.
78   //
79   // Assumes both parameters are sorted by increasing offset.
80   //
81   // WARNING: Only supports |first_adjustments| that involve collapsing ranges
82   // of text, not expanding ranges.
83   static void MergeSequentialAdjustments(
84       const Adjustments& first_adjustments,
85       Adjustments* adjustments_on_adjusted_string);
86 };
87 
88 // Like the conversions in utf_string_conversions.h, but also fills in an
89 // |adjustments| parameter that reflects the alterations done to the string.
90 // It may be NULL.
91 bool UTF8ToUTF16WithAdjustments(const char* src,
92                                 size_t src_len,
93                                 std::u16string* output,
94                                 base::OffsetAdjuster::Adjustments* adjustments);
95 std::u16string UTF8ToUTF16WithAdjustments(
96     const std::string_view& utf8,
97     base::OffsetAdjuster::Adjustments* adjustments);
98 // As above, but instead internally examines the adjustments and applies them
99 // to |offsets_for_adjustment|.  Input offsets greater than the length of the
100 // input string will be set to std::u16string::npos.  See comments by
101 // AdjustOffsets().
102 std::u16string UTF8ToUTF16AndAdjustOffsets(
103     const std::string_view& utf8,
104     std::vector<size_t>* offsets_for_adjustment);
105 std::string UTF16ToUTF8AndAdjustOffsets(
106     const std::u16string_view& utf16,
107     std::vector<size_t>* offsets_for_adjustment);
108 
109 }  // namespace base
110 
111 #endif  // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
112