1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 #include "ortools/util/fp_utils.h"
15
16 #include <limits.h>
17 #include <stdint.h>
18
19 #include <algorithm>
20 #include <cmath>
21 #include <limits>
22 #include <utility>
23 #include <vector>
24
25 #include "absl/base/casts.h"
26 #include "absl/base/internal/endian.h"
27 #include "ortools/base/integral_types.h"
28 #include "ortools/base/logging.h"
29 #include "ortools/util/bitset.h"
30
31 namespace operations_research {
32
33 namespace {
34
ReorderAndCapTerms(double * min,double * max)35 void ReorderAndCapTerms(double* min, double* max) {
36 if (*min > *max) std::swap(*min, *max);
37 if (*min > 0.0) *min = 0.0;
38 if (*max < 0.0) *max = 0.0;
39 }
40
41 template <bool use_bounds>
ComputeScalingErrors(const std::vector<double> & input,const std::vector<double> & lb,const std::vector<double> & ub,double scaling_factor,double * max_relative_coeff_error,double * max_scaled_sum_error)42 void ComputeScalingErrors(const std::vector<double>& input,
43 const std::vector<double>& lb,
44 const std::vector<double>& ub, double scaling_factor,
45 double* max_relative_coeff_error,
46 double* max_scaled_sum_error) {
47 double max_error = 0.0;
48 double min_error = 0.0;
49 *max_relative_coeff_error = 0.0;
50 const int size = input.size();
51 for (int i = 0; i < size; ++i) {
52 const double x = input[i];
53 if (x == 0.0) continue;
54 const double scaled = x * scaling_factor;
55
56 if (scaled == 0.0) {
57 *max_relative_coeff_error = std::numeric_limits<double>::infinity();
58 } else {
59 *max_relative_coeff_error = std::max(
60 *max_relative_coeff_error, std::abs(std::round(scaled) / scaled - 1));
61 }
62
63 const double error = std::round(scaled) - scaled;
64 const double error_lb = (use_bounds ? error * lb[i] : -error);
65 const double error_ub = (use_bounds ? error * ub[i] : error);
66 max_error += std::max(error_lb, error_ub);
67 min_error += std::min(error_lb, error_ub);
68 }
69 *max_scaled_sum_error = std::max(std::abs(max_error), std::abs(min_error));
70 }
71
72 template <bool use_bounds>
GetBestScalingOfDoublesToInt64(const std::vector<double> & input,const std::vector<double> & lb,const std::vector<double> & ub,int64_t max_absolute_sum,double * scaling_factor)73 void GetBestScalingOfDoublesToInt64(const std::vector<double>& input,
74 const std::vector<double>& lb,
75 const std::vector<double>& ub,
76 int64_t max_absolute_sum,
77 double* scaling_factor) {
78 const double kInfinity = std::numeric_limits<double>::infinity();
79
80 // We start by initializing the returns value to the "error" state.
81 *scaling_factor = 0;
82
83 // Abort in the "error" state if max_absolute_sum doesn't make sense.
84 if (max_absolute_sum < 0) return;
85
86 // Our scaling scaling_factor will be 2^factor_exponent.
87 //
88 // TODO(user): Consider using a non-power of two factor if the error can't be
89 // zero? Note however that using power of two has the extra advantage that
90 // subsequent int64_t -> double -> scaled back to int64_t will loose no extra
91 // information.
92 int factor_exponent = 0;
93 uint64_t sum_min = 0; // negated.
94 uint64_t sum_max = 0;
95 bool recompute_sum = false;
96 bool is_first_value = true;
97 const int msb = MostSignificantBitPosition64(max_absolute_sum);
98 const int size = input.size();
99 for (int i = 0; i < size; ++i) {
100 const double x = input[i];
101 double min_term = use_bounds ? x * lb[i] : -x;
102 double max_term = use_bounds ? x * ub[i] : x;
103 ReorderAndCapTerms(&min_term, &max_term);
104
105 // If min_term or max_term is not finite, then abort in the "error" state.
106 if (!(min_term > -kInfinity && max_term < kInfinity)) return;
107
108 // A value of zero can just be skipped (and needs to because the code below
109 // doesn't handle it correctly).
110 if (min_term == 0.0 && max_term == 0.0) continue;
111
112 // Compute the greatest candidate such that
113 // round(fabs(c).2^candidate) <= max_absolute_sum.
114 const double c = std::max(-min_term, max_term);
115 int candidate = msb - ilogb(c);
116 if (std::round(ldexp(std::abs(c), candidate)) > max_absolute_sum) {
117 --candidate;
118 }
119 DCHECK_LE(std::abs(static_cast<int64_t>(round(ldexp(c, candidate)))),
120 max_absolute_sum);
121
122 // Update factor_exponent which is the min of all the candidates.
123 if (is_first_value || candidate < factor_exponent) {
124 is_first_value = false;
125 factor_exponent = candidate;
126 recompute_sum = true;
127 } else {
128 // Update the sum of absolute values of the numbers seen so far.
129 sum_min -=
130 static_cast<int64_t>(std::round(ldexp(min_term, factor_exponent)));
131 sum_max +=
132 static_cast<int64_t>(std::round(ldexp(max_term, factor_exponent)));
133 if (sum_min > max_absolute_sum || sum_max > max_absolute_sum) {
134 factor_exponent--;
135 recompute_sum = true;
136 }
137 }
138
139 // TODO(user): This is not super efficient, but note that in practice we
140 // will only scan the vector of values about log(size) times. It is possible
141 // to maintain an upper bound on the abs_sum in linear time instead, but the
142 // code and corner cases are a lot more involved. Also, we currently only
143 // use this code in situations where its run-time is negligeable compared to
144 // the rest.
145 while (recompute_sum) {
146 sum_min = 0;
147 sum_max = 0;
148 for (int j = 0; j <= i; ++j) {
149 const double x = input[j];
150 double min_term = use_bounds ? x * lb[j] : -x;
151 double max_term = use_bounds ? x * ub[j] : x;
152 ReorderAndCapTerms(&min_term, &max_term);
153 sum_min -=
154 static_cast<int64_t>(std::round(ldexp(min_term, factor_exponent)));
155 sum_max +=
156 static_cast<int64_t>(std::round(ldexp(max_term, factor_exponent)));
157 }
158 if (sum_min > max_absolute_sum || sum_max > max_absolute_sum) {
159 factor_exponent--;
160 } else {
161 recompute_sum = false;
162 }
163 }
164 }
165 *scaling_factor = ldexp(1.0, factor_exponent);
166 }
167
168 } // namespace
169
ComputeScalingErrors(const std::vector<double> & input,const std::vector<double> & lb,const std::vector<double> & ub,double scaling_factor,double * max_relative_coeff_error,double * max_scaled_sum_error)170 void ComputeScalingErrors(const std::vector<double>& input,
171 const std::vector<double>& lb,
172 const std::vector<double>& ub, double scaling_factor,
173 double* max_relative_coeff_error,
174 double* max_scaled_sum_error) {
175 ComputeScalingErrors<true>(input, lb, ub, scaling_factor,
176 max_relative_coeff_error, max_scaled_sum_error);
177 }
178
GetBestScalingOfDoublesToInt64(const std::vector<double> & input,const std::vector<double> & lb,const std::vector<double> & ub,int64_t max_absolute_sum)179 double GetBestScalingOfDoublesToInt64(const std::vector<double>& input,
180 const std::vector<double>& lb,
181 const std::vector<double>& ub,
182 int64_t max_absolute_sum) {
183 double scaling_factor;
184 GetBestScalingOfDoublesToInt64<true>(input, lb, ub, max_absolute_sum,
185 &scaling_factor);
186 return scaling_factor;
187 }
188
GetBestScalingOfDoublesToInt64(const std::vector<double> & input,int64_t max_absolute_sum,double * scaling_factor,double * max_relative_coeff_error)189 void GetBestScalingOfDoublesToInt64(const std::vector<double>& input,
190 int64_t max_absolute_sum,
191 double* scaling_factor,
192 double* max_relative_coeff_error) {
193 double max_scaled_sum_error;
194 GetBestScalingOfDoublesToInt64<false>(input, {}, {}, max_absolute_sum,
195 scaling_factor);
196 ComputeScalingErrors<false>(input, {}, {}, *scaling_factor,
197 max_relative_coeff_error, &max_scaled_sum_error);
198 }
199
ComputeGcdOfRoundedDoubles(const std::vector<double> & x,double scaling_factor)200 int64_t ComputeGcdOfRoundedDoubles(const std::vector<double>& x,
201 double scaling_factor) {
202 int64_t gcd = 0;
203 for (int i = 0; i < x.size() && gcd != 1; ++i) {
204 int64_t value = std::abs(std::round(x[i] * scaling_factor));
205 DCHECK_GE(value, 0);
206 if (value == 0) continue;
207 if (gcd == 0) {
208 gcd = value;
209 continue;
210 }
211 // GCD(gcd, value) = GCD(value, gcd % value);
212 while (value != 0) {
213 const int64_t r = gcd % value;
214 gcd = value;
215 value = r;
216 }
217 }
218 DCHECK_GE(gcd, 0);
219 return gcd > 0 ? gcd : 1;
220 }
221
fast_ilogb(double value)222 int fast_ilogb(double value) {
223 static_assert(CHAR_BIT == 8);
224 static_assert(sizeof(double) == 8);
225 // Get little-endian bit-representation of the floating point value.
226 const uint64_t bit_rep =
227 absl::little_endian::FromHost64(absl::bit_cast<uint64_t>(value));
228 return static_cast<int>((bit_rep >> 52) & 0x7FF) - 1023;
229 }
230
fast_scalbn_inplace(double & mutable_value,int exponent)231 void fast_scalbn_inplace(double& mutable_value, int exponent) {
232 mutable_value = fast_scalbn(mutable_value, exponent);
233 }
234
fast_scalbn(double value,int exponent)235 double fast_scalbn(double value, int exponent) {
236 if (value == 0.0) return 0.0;
237 uint64_t bit_rep =
238 absl::little_endian::FromHost64(absl::bit_cast<uint64_t>(value));
239 // Binary representation is: (sign-bit)(11 exponent bits)(52 mantissa bits)
240 constexpr uint64_t kExponentMask(0x7FF0000000000000);
241 // This addition relies on the fact that signed numbers are written in
242 // two-s complement, and is correct as long as the sum does not
243 // overflow/underflow the result.
244 const uint64_t value_exponent =
245 (bit_rep + (static_cast<uint64_t>(exponent) << 52)) & kExponentMask;
246 bit_rep &= ~kExponentMask;
247 bit_rep |= value_exponent;
248 return absl::bit_cast<double>(absl::little_endian::ToHost64(bit_rep));
249 }
250
251 } // namespace operations_research
252