1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 //     http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #include "ortools/util/fp_utils.h"
15 
16 #include <limits.h>
17 #include <stdint.h>
18 
19 #include <algorithm>
20 #include <cmath>
21 #include <limits>
22 #include <utility>
23 #include <vector>
24 
25 #include "absl/base/casts.h"
26 #include "absl/base/internal/endian.h"
27 #include "ortools/base/integral_types.h"
28 #include "ortools/base/logging.h"
29 #include "ortools/util/bitset.h"
30 
31 namespace operations_research {
32 
33 namespace {
34 
ReorderAndCapTerms(double * min,double * max)35 void ReorderAndCapTerms(double* min, double* max) {
36   if (*min > *max) std::swap(*min, *max);
37   if (*min > 0.0) *min = 0.0;
38   if (*max < 0.0) *max = 0.0;
39 }
40 
41 template <bool use_bounds>
ComputeScalingErrors(const std::vector<double> & input,const std::vector<double> & lb,const std::vector<double> & ub,double scaling_factor,double * max_relative_coeff_error,double * max_scaled_sum_error)42 void ComputeScalingErrors(const std::vector<double>& input,
43                           const std::vector<double>& lb,
44                           const std::vector<double>& ub, double scaling_factor,
45                           double* max_relative_coeff_error,
46                           double* max_scaled_sum_error) {
47   double max_error = 0.0;
48   double min_error = 0.0;
49   *max_relative_coeff_error = 0.0;
50   const int size = input.size();
51   for (int i = 0; i < size; ++i) {
52     const double x = input[i];
53     if (x == 0.0) continue;
54     const double scaled = x * scaling_factor;
55 
56     if (scaled == 0.0) {
57       *max_relative_coeff_error = std::numeric_limits<double>::infinity();
58     } else {
59       *max_relative_coeff_error = std::max(
60           *max_relative_coeff_error, std::abs(std::round(scaled) / scaled - 1));
61     }
62 
63     const double error = std::round(scaled) - scaled;
64     const double error_lb = (use_bounds ? error * lb[i] : -error);
65     const double error_ub = (use_bounds ? error * ub[i] : error);
66     max_error += std::max(error_lb, error_ub);
67     min_error += std::min(error_lb, error_ub);
68   }
69   *max_scaled_sum_error = std::max(std::abs(max_error), std::abs(min_error));
70 }
71 
72 template <bool use_bounds>
GetBestScalingOfDoublesToInt64(const std::vector<double> & input,const std::vector<double> & lb,const std::vector<double> & ub,int64_t max_absolute_sum,double * scaling_factor)73 void GetBestScalingOfDoublesToInt64(const std::vector<double>& input,
74                                     const std::vector<double>& lb,
75                                     const std::vector<double>& ub,
76                                     int64_t max_absolute_sum,
77                                     double* scaling_factor) {
78   const double kInfinity = std::numeric_limits<double>::infinity();
79 
80   // We start by initializing the returns value to the "error" state.
81   *scaling_factor = 0;
82 
83   // Abort in the "error" state if max_absolute_sum doesn't make sense.
84   if (max_absolute_sum < 0) return;
85 
86   // Our scaling scaling_factor will be 2^factor_exponent.
87   //
88   // TODO(user): Consider using a non-power of two factor if the error can't be
89   // zero? Note however that using power of two has the extra advantage that
90   // subsequent int64_t -> double -> scaled back to int64_t will loose no extra
91   // information.
92   int factor_exponent = 0;
93   uint64_t sum_min = 0;  // negated.
94   uint64_t sum_max = 0;
95   bool recompute_sum = false;
96   bool is_first_value = true;
97   const int msb = MostSignificantBitPosition64(max_absolute_sum);
98   const int size = input.size();
99   for (int i = 0; i < size; ++i) {
100     const double x = input[i];
101     double min_term = use_bounds ? x * lb[i] : -x;
102     double max_term = use_bounds ? x * ub[i] : x;
103     ReorderAndCapTerms(&min_term, &max_term);
104 
105     // If min_term or max_term is not finite, then abort in the "error" state.
106     if (!(min_term > -kInfinity && max_term < kInfinity)) return;
107 
108     // A value of zero can just be skipped (and needs to because the code below
109     // doesn't handle it correctly).
110     if (min_term == 0.0 && max_term == 0.0) continue;
111 
112     // Compute the greatest candidate such that
113     // round(fabs(c).2^candidate) <= max_absolute_sum.
114     const double c = std::max(-min_term, max_term);
115     int candidate = msb - ilogb(c);
116     if (std::round(ldexp(std::abs(c), candidate)) > max_absolute_sum) {
117       --candidate;
118     }
119     DCHECK_LE(std::abs(static_cast<int64_t>(round(ldexp(c, candidate)))),
120               max_absolute_sum);
121 
122     // Update factor_exponent which is the min of all the candidates.
123     if (is_first_value || candidate < factor_exponent) {
124       is_first_value = false;
125       factor_exponent = candidate;
126       recompute_sum = true;
127     } else {
128       // Update the sum of absolute values of the numbers seen so far.
129       sum_min -=
130           static_cast<int64_t>(std::round(ldexp(min_term, factor_exponent)));
131       sum_max +=
132           static_cast<int64_t>(std::round(ldexp(max_term, factor_exponent)));
133       if (sum_min > max_absolute_sum || sum_max > max_absolute_sum) {
134         factor_exponent--;
135         recompute_sum = true;
136       }
137     }
138 
139     // TODO(user): This is not super efficient, but note that in practice we
140     // will only scan the vector of values about log(size) times. It is possible
141     // to maintain an upper bound on the abs_sum in linear time instead, but the
142     // code and corner cases are a lot more involved. Also, we currently only
143     // use this code in situations where its run-time is negligeable compared to
144     // the rest.
145     while (recompute_sum) {
146       sum_min = 0;
147       sum_max = 0;
148       for (int j = 0; j <= i; ++j) {
149         const double x = input[j];
150         double min_term = use_bounds ? x * lb[j] : -x;
151         double max_term = use_bounds ? x * ub[j] : x;
152         ReorderAndCapTerms(&min_term, &max_term);
153         sum_min -=
154             static_cast<int64_t>(std::round(ldexp(min_term, factor_exponent)));
155         sum_max +=
156             static_cast<int64_t>(std::round(ldexp(max_term, factor_exponent)));
157       }
158       if (sum_min > max_absolute_sum || sum_max > max_absolute_sum) {
159         factor_exponent--;
160       } else {
161         recompute_sum = false;
162       }
163     }
164   }
165   *scaling_factor = ldexp(1.0, factor_exponent);
166 }
167 
168 }  // namespace
169 
ComputeScalingErrors(const std::vector<double> & input,const std::vector<double> & lb,const std::vector<double> & ub,double scaling_factor,double * max_relative_coeff_error,double * max_scaled_sum_error)170 void ComputeScalingErrors(const std::vector<double>& input,
171                           const std::vector<double>& lb,
172                           const std::vector<double>& ub, double scaling_factor,
173                           double* max_relative_coeff_error,
174                           double* max_scaled_sum_error) {
175   ComputeScalingErrors<true>(input, lb, ub, scaling_factor,
176                              max_relative_coeff_error, max_scaled_sum_error);
177 }
178 
GetBestScalingOfDoublesToInt64(const std::vector<double> & input,const std::vector<double> & lb,const std::vector<double> & ub,int64_t max_absolute_sum)179 double GetBestScalingOfDoublesToInt64(const std::vector<double>& input,
180                                       const std::vector<double>& lb,
181                                       const std::vector<double>& ub,
182                                       int64_t max_absolute_sum) {
183   double scaling_factor;
184   GetBestScalingOfDoublesToInt64<true>(input, lb, ub, max_absolute_sum,
185                                        &scaling_factor);
186   return scaling_factor;
187 }
188 
GetBestScalingOfDoublesToInt64(const std::vector<double> & input,int64_t max_absolute_sum,double * scaling_factor,double * max_relative_coeff_error)189 void GetBestScalingOfDoublesToInt64(const std::vector<double>& input,
190                                     int64_t max_absolute_sum,
191                                     double* scaling_factor,
192                                     double* max_relative_coeff_error) {
193   double max_scaled_sum_error;
194   GetBestScalingOfDoublesToInt64<false>(input, {}, {}, max_absolute_sum,
195                                         scaling_factor);
196   ComputeScalingErrors<false>(input, {}, {}, *scaling_factor,
197                               max_relative_coeff_error, &max_scaled_sum_error);
198 }
199 
ComputeGcdOfRoundedDoubles(const std::vector<double> & x,double scaling_factor)200 int64_t ComputeGcdOfRoundedDoubles(const std::vector<double>& x,
201                                    double scaling_factor) {
202   int64_t gcd = 0;
203   for (int i = 0; i < x.size() && gcd != 1; ++i) {
204     int64_t value = std::abs(std::round(x[i] * scaling_factor));
205     DCHECK_GE(value, 0);
206     if (value == 0) continue;
207     if (gcd == 0) {
208       gcd = value;
209       continue;
210     }
211     // GCD(gcd, value) = GCD(value, gcd % value);
212     while (value != 0) {
213       const int64_t r = gcd % value;
214       gcd = value;
215       value = r;
216     }
217   }
218   DCHECK_GE(gcd, 0);
219   return gcd > 0 ? gcd : 1;
220 }
221 
fast_ilogb(double value)222 int fast_ilogb(double value) {
223   static_assert(CHAR_BIT == 8);
224   static_assert(sizeof(double) == 8);
225   // Get little-endian bit-representation of the floating point value.
226   const uint64_t bit_rep =
227       absl::little_endian::FromHost64(absl::bit_cast<uint64_t>(value));
228   return static_cast<int>((bit_rep >> 52) & 0x7FF) - 1023;
229 }
230 
fast_scalbn_inplace(double & mutable_value,int exponent)231 void fast_scalbn_inplace(double& mutable_value, int exponent) {
232   mutable_value = fast_scalbn(mutable_value, exponent);
233 }
234 
fast_scalbn(double value,int exponent)235 double fast_scalbn(double value, int exponent) {
236   if (value == 0.0) return 0.0;
237   uint64_t bit_rep =
238       absl::little_endian::FromHost64(absl::bit_cast<uint64_t>(value));
239   // Binary representation is: (sign-bit)(11 exponent bits)(52 mantissa bits)
240   constexpr uint64_t kExponentMask(0x7FF0000000000000);
241   // This addition relies on the fact that signed numbers are written in
242   // two-s complement, and is correct as long as the sum does not
243   // overflow/underflow the result.
244   const uint64_t value_exponent =
245       (bit_rep + (static_cast<uint64_t>(exponent) << 52)) & kExponentMask;
246   bit_rep &= ~kExponentMask;
247   bit_rep |= value_exponent;
248   return absl::bit_cast<double>(absl::little_endian::ToHost64(bit_rep));
249 }
250 
251 }  // namespace operations_research
252