1 /*
2  * This source file is part of libRocket, the HTML/CSS Interface Middleware
3  *
4  * For the latest information, see http://www.librocket.com
5  *
6  * Copyright (c) 2008-2010 CodePoint Ltd, Shift Technology Ltd
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24  * THE SOFTWARE.
25  *
26  */
27 
28 #include "precompiled.h"
29 #include "UnicodeRange.h"
30 
31 namespace Rocket {
32 namespace Core {
33 
UnicodeRange()34 UnicodeRange::UnicodeRange()
35 {
36 	min_codepoint = UINT_MAX;
37 	max_codepoint = UINT_MAX;
38 }
39 
UnicodeRange(int _min_codepoint,int _max_codepoint)40 UnicodeRange::UnicodeRange(int _min_codepoint, int _max_codepoint)
41 {
42 	min_codepoint = _min_codepoint;
43 	max_codepoint = _max_codepoint;
44 
45 	ROCKET_ASSERT(min_codepoint <= max_codepoint);
46 }
47 
48 // Initialises the range from a unicode range in string form.
Initialise(const String & unicode_range)49 bool UnicodeRange::Initialise(const String& unicode_range)
50 {
51 	// Check for a 'U+' at the start.
52 	if (unicode_range.Length() < 2 ||
53 		unicode_range[0] != 'U' ||
54 		unicode_range[1] != '+')
55 		return false;
56 
57 	// Check if there's a '-' sign; if so, we've got a range.
58 	String::size_type separator_index = unicode_range.Find("-", 2);
59 	if (separator_index != String::npos)
60 	{
61 		const char* end = unicode_range.CString() + separator_index;
62 		min_codepoint = strtoul(unicode_range.CString() + 2, (char **) &end, 16);
63 
64 		end = unicode_range.CString() + unicode_range.Length();
65 		max_codepoint = strtoul(unicode_range.CString() + separator_index + 1, (char **) &end, 16);
66 
67 		return min_codepoint <= max_codepoint;
68 	}
69 
70 	// No range! Check if we have any wildcards.
71 	String::size_type wildcard_index = unicode_range.Find("?", 2);
72 	if (wildcard_index != String::npos)
73 	{
74 		String range_min(unicode_range.CString() + 2, unicode_range.CString() + wildcard_index);
75 		String range_max(range_min);
76 
77 		for (String::size_type i = 0; i < unicode_range.Length() - wildcard_index; ++i)
78 		{
79 			range_min += "0";
80 			range_max += "F";
81 		}
82 
83 		const char* end = range_min.CString() + range_min.Length();
84 		min_codepoint = strtoul(range_min.CString(), (char**) &end, 16);
85 		end = range_max.CString() + range_max.Length();
86 		max_codepoint = strtoul(range_max.CString(), (char**) &end, 16);
87 
88 		return true;
89 	}
90 
91 	const char* end = unicode_range.CString() + unicode_range.Length();
92 	min_codepoint = strtoul(unicode_range.CString() + 2, (char**) &end, 16);
93 	max_codepoint = min_codepoint;
94 
95 	return true;
96 }
97 
98 // Builds up a list of unicode ranges from a comma-separated list of unicode ranges in string form.
BuildList(UnicodeRangeList & list,const String & unicode_range)99 bool UnicodeRange::BuildList(UnicodeRangeList& list, const String& unicode_range)
100 {
101 	StringList unicode_ranges;
102 	StringUtilities::ExpandString(unicode_ranges, unicode_range);
103 
104 	for (size_t i = 0; i < unicode_ranges.size(); ++i)
105 	{
106 		UnicodeRange range;
107 		if (!range.Initialise(unicode_ranges[i]))
108 			return false;
109 
110 		list.push_back(range);
111 	}
112 
113 	// Collapse contiguous ranges.
114 	for (size_t i = 0; i < list.size(); ++i)
115 	{
116 		size_t j = i + 1;
117 		while (j < list.size())
118 		{
119 			if (list[i].IsContiguous(list[j]))
120 			{
121 				list[i] = list[i].Join(list[j]);
122 				list.erase(list.begin() + j);
123 			}
124 			else
125 				++j;
126 		}
127 	}
128 
129 	return !list.empty();
130 }
131 
132 // Returns true if this range is wholly contained within another range.
IsContained(const UnicodeRange & rhs)133 bool UnicodeRange::IsContained(const UnicodeRange& rhs)
134 {
135 	return rhs.min_codepoint <= min_codepoint &&
136 		   rhs.max_codepoint >= max_codepoint;
137 }
138 
139 // Returns true if this range is wholly contained within another range list.
IsContained(const UnicodeRangeList & rhs)140 bool UnicodeRange::IsContained(const UnicodeRangeList& rhs)
141 {
142 	for (size_t i = 0; i < rhs.size(); ++i)
143 	{
144 		if (IsContained(rhs[i]))
145 			return true;
146 	}
147 
148 	return false;
149 }
150 
151 // Returns true if this range is contained or contiguous with another range.
IsContiguous(const UnicodeRange & rhs)152 bool UnicodeRange::IsContiguous(const UnicodeRange& rhs)
153 {
154 	return (min_codepoint >= rhs.min_codepoint && min_codepoint <= ((rhs.max_codepoint == 0xFFFFFFFF) ? rhs.max_codepoint : rhs.max_codepoint + 1)) ||
155 		   (max_codepoint >= ((rhs.min_codepoint == 0) ? 0 : rhs.min_codepoint - 1) && max_codepoint <= rhs.max_codepoint);
156 }
157 
158 // Joins this range with another that it is contiguous with.
Join(const UnicodeRange & rhs)159 UnicodeRange UnicodeRange::Join(const UnicodeRange& rhs)
160 {
161 	return UnicodeRange(Math::Min(min_codepoint, rhs.min_codepoint),
162 						   Math::Max(max_codepoint, rhs.max_codepoint));
163 }
164 
165 }
166 }
167