1 /*************************************************************************
2 ** ToUnicodeMap.cpp                                                     **
3 **                                                                      **
4 ** This file is part of dvisvgm -- the DVI to SVG converter             **
5 ** Copyright (C) 2005-2015 Martin Gieseking <martin.gieseking@uos.de>   **
6 **                                                                      **
7 ** This program is free software; you can redistribute it and/or        **
8 ** modify it under the terms of the GNU General Public License as       **
9 ** published by the Free Software Foundation; either version 3 of       **
10 ** the License, or (at your option) any later version.                  **
11 **                                                                      **
12 ** This program is distributed in the hope that it will be useful, but  **
13 ** WITHOUT ANY WARRANTY; without even the implied warranty of           **
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the         **
15 ** GNU General Public License for more details.                         **
16 **                                                                      **
17 ** You should have received a copy of the GNU General Public License    **
18 ** along with this program; if not, see <http://www.gnu.org/licenses/>. **
19 *************************************************************************/
20 
21 #include <limits>
22 #include "ToUnicodeMap.h"
23 #include "Unicode.h"
24 
25 using namespace std;
26 
27 
28 /** Adds valid but random mappings for all missing character indexes.
29  *  If a font's cmap table doensn't provide unicode mappings for all
30  *  glyphs in the font, it's necessary to fill the gaps in order to
31  *  handle all characters correctly. This functions assumes that the
32  *  characters are numbered from 1 to maxIndex.
33  *  @param[in] maxIndex largest character index to consider
34  *  @return true on success */
addMissingMappings(UInt32 maxIndex)35 bool ToUnicodeMap::addMissingMappings (UInt32 maxIndex) {
36 	bool success=true;
37 	// collect unicode points already in assigned
38 	NumericRanges<UInt32> codepoints;
39 	for (size_t i=0; i < size() && success; i++)
40 		codepoints.addRange(rangeAt(i).minval(), rangeAt(i).maxval());
41 	// fill unmapped ranges
42 	if (empty()) // no unicode mapping present at all?
43 		success = fillRange(1, maxIndex, 1, codepoints, true);
44 	else {   // (partial) unicode mapping present?
45 		success = fillRange(1, rangeAt(0).min()-1, rangeAt(0).minval()-1, codepoints, false);
46 		for (size_t i=0; i < size()-1 && success; i++)
47 			success = fillRange(rangeAt(i).max()+1, rangeAt(i+1).min()-1, rangeAt(i).maxval()+1, codepoints, true);
48 		if (success)
49 			success = fillRange(rangeAt(size()-1).max()+1, maxIndex, rangeAt(size()-1).maxval()+1, codepoints, true);
50 	}
51 	return success;
52 }
53 
54 
55 /** Checks if a given codepoint is valid and unused. Otherwise, try to find an alternative.
56  * @param[in,out] ucp codepoint to fix
57  * @param[in] used_codepoints codepoints already in use
58  * @param[in] ascending if true, increase ucp to look for valid/unused codepoints
59  * @return true on success */
fix_codepoint(UInt32 & ucp,const NumericRanges<UInt32> & used_codepoints,bool ascending)60 static bool fix_codepoint (UInt32 &ucp, const NumericRanges<UInt32> &used_codepoints, bool ascending) {
61 	UInt32 start = ucp;
62 	while (!Unicode::isValidCodepoint(ucp) && used_codepoints.valueExists(ucp)) {
63 		if (ascending)
64 			ucp = (ucp == numeric_limits<UInt32>::max()) ? 0 : ucp+1;
65 		else
66 			ucp = (ucp == 0) ? numeric_limits<UInt32>::max() : ucp-1;
67 		if (ucp == start) // no free unicode point found
68 			return false;
69 	}
70 	return true;
71 }
72 
73 
is_less_or_equal(UInt32 a,UInt32 b)74 static bool is_less_or_equal (UInt32 a, UInt32 b) {return a <= b;}
is_greater_or_equal(UInt32 a,UInt32 b)75 static bool is_greater_or_equal (UInt32 a, UInt32 b) {return a >= b;}
76 
77 
78 /** Adds index to unicode mappings for a given range of character indexes.
79  *  @param[in] minIndex lower bound of range to fill
80  *  @param[in] maxIndex upper bound of range to fill
81  *  @param[in] ucp first unicode point to add (if possible)
82  *  @param[in,out] used_ucps unicode points already in use
83  *  @param[in] ascending if true, fill range from lower to upper bound
84  *  @return true on success */
fillRange(UInt32 minIndex,UInt32 maxIndex,UInt32 ucp,NumericRanges<UInt32> & used_ucps,bool ascending)85 bool ToUnicodeMap::fillRange (UInt32 minIndex, UInt32 maxIndex, UInt32 ucp, NumericRanges<UInt32> &used_ucps, bool ascending) {
86 	if (minIndex <= maxIndex) {
87 		UInt32 first=minIndex, last=maxIndex;
88 		int inc=1;
89 		bool (*cmp)(UInt32, UInt32) = is_less_or_equal;
90 		if (!ascending) {
91 			swap(first, last);
92 			inc = -1;
93 			cmp = is_greater_or_equal;
94 		}
95 		for (UInt32 i=first; cmp(i, last); i += inc) {
96 			if (!fix_codepoint(ucp, used_ucps, ascending))
97 				return false;
98 			else {
99 				addRange(i, i, ucp);
100 				used_ucps.addRange(ucp);
101 				ucp += inc;  // preferred unicode point for the next character of the current range
102 			}
103 		}
104 	}
105 	return true;
106 }