1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file defines static methods and classes for comparing Protocol
36 // Messages.
37 //
38 // Aug. 2008: Added Unknown Fields Comparison for messages.
39 // Aug. 2009: Added different options to compare repeated fields.
40 // Apr. 2010: Moved field comparison to FieldComparator
41 // Sep. 2020: Added option to output map keys in path
42 
43 #ifndef GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
44 #define GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
45 
46 #include <functional>
47 #include <map>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <vector>
52 
53 #include <google/protobuf/descriptor.h>  // FieldDescriptor
54 #include <google/protobuf/message.h>     // Message
55 #include <google/protobuf/unknown_field_set.h>
56 #include <google/protobuf/util/field_comparator.h>
57 
58 // Always include as last one, otherwise it can break compilation
59 #include <google/protobuf/port_def.inc>
60 
61 namespace google {
62 namespace protobuf {
63 
64 class DynamicMessageFactory;
65 class FieldDescriptor;
66 
67 namespace io {
68 class ZeroCopyOutputStream;
69 class Printer;
70 }  // namespace io
71 
72 namespace util {
73 
74 class DefaultFieldComparator;
75 class FieldContext;  // declared below MessageDifferencer
76 
77 // Defines a collection of field descriptors.
78 // In case of internal google codebase we are using absl::FixedArray instead
79 // of vector. It significantly speeds up proto comparison (by ~30%) by
80 // reducing the number of malloc/free operations
81 typedef std::vector<const FieldDescriptor*> FieldDescriptorArray;
82 
83 // A basic differencer that can be used to determine
84 // the differences between two specified Protocol Messages. If any differences
85 // are found, the Compare method will return false, and any differencer reporter
86 // specified via ReportDifferencesTo will have its reporting methods called (see
87 // below for implementation of the report). Based off of the original
88 // ProtocolDifferencer implementation in //net/proto/protocol-differencer.h
89 // (Thanks Todd!).
90 //
91 // MessageDifferencer REQUIRES that compared messages be the same type, defined
92 // as messages that share the same descriptor.  If not, the behavior of this
93 // class is undefined.
94 //
95 // People disagree on what MessageDifferencer should do when asked to compare
96 // messages with different descriptors.  Some people think it should always
97 // return false.  Others expect it to try to look for similar fields and
98 // compare them anyway -- especially if the descriptors happen to be identical.
99 // If we chose either of these behaviors, some set of people would find it
100 // surprising, and could end up writing code expecting the other behavior
101 // without realizing their error.  Therefore, we forbid that usage.
102 //
103 // This class is implemented based on the proto2 reflection. The performance
104 // should be good enough for normal usages. However, for places where the
105 // performance is extremely sensitive, there are several alternatives:
106 // - Comparing serialized string
107 // Downside: false negatives (there are messages that are the same but their
108 // serialized strings are different).
109 // - Equals code generator by compiler plugin (net/proto2/contrib/equals_plugin)
110 // Downside: more generated code; maintenance overhead for the additional rule
111 // (must be in sync with the original proto_library).
112 //
113 // Note on handling of google.protobuf.Any: MessageDifferencer automatically
114 // unpacks Any::value into a Message and compares its individual fields.
115 // Messages encoded in a repeated Any cannot be compared using TreatAsMap.
116 //
117 // Note on thread-safety: MessageDifferencer is *not* thread-safe. You need to
118 // guard it with a lock to use the same MessageDifferencer instance from
119 // multiple threads. Note that it's fine to call static comparison methods
120 // (like MessageDifferencer::Equals) concurrently, but it's not recommended for
121 // performance critical code as it leads to extra allocations.
122 class PROTOBUF_EXPORT MessageDifferencer {
123  public:
124   // Determines whether the supplied messages are equal. Equality is defined as
125   // all fields within the two messages being set to the same value. Primitive
126   // fields and strings are compared by value while embedded messages/groups
127   // are compared as if via a recursive call. Use Compare() with IgnoreField()
128   // if some fields should be ignored in the comparison. Use Compare() with
129   // TreatAsSet() if there are repeated fields where ordering does not matter.
130   //
131   // This method REQUIRES that the two messages have the same
132   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
133   static bool Equals(const Message& message1, const Message& message2);
134 
135   // Determines whether the supplied messages are equivalent. Equivalency is
136   // defined as all fields within the two messages having the same value. This
137   // differs from the Equals method above in that fields with default values
138   // are considered set to said value automatically. For details on how default
139   // values are defined for each field type, see:
140   // https://developers.google.com/protocol-buffers/docs/proto?csw=1#optional.
141   // Also, Equivalent() ignores unknown fields. Use IgnoreField() and Compare()
142   // if some fields should be ignored in the comparison.
143   //
144   // This method REQUIRES that the two messages have the same
145   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
146   static bool Equivalent(const Message& message1, const Message& message2);
147 
148   // Determines whether the supplied messages are approximately equal.
149   // Approximate equality is defined as all fields within the two messages
150   // being approximately equal.  Primitive (non-float) fields and strings are
151   // compared by value, floats are compared using MathUtil::AlmostEquals() and
152   // embedded messages/groups are compared as if via a recursive call. Use
153   // IgnoreField() and Compare() if some fields should be ignored in the
154   // comparison.
155   //
156   // This method REQUIRES that the two messages have the same
157   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
158   static bool ApproximatelyEquals(const Message& message1,
159                                   const Message& message2);
160 
161   // Determines whether the supplied messages are approximately equivalent.
162   // Approximate equivalency is defined as all fields within the two messages
163   // being approximately equivalent. As in
164   // MessageDifferencer::ApproximatelyEquals, primitive (non-float) fields and
165   // strings are compared by value, floats are compared using
166   // MathUtil::AlmostEquals() and embedded messages/groups are compared as if
167   // via a recursive call. However, fields with default values are considered
168   // set to said value, as per MessageDiffencer::Equivalent. Use IgnoreField()
169   // and Compare() if some fields should be ignored in the comparison.
170   //
171   // This method REQUIRES that the two messages have the same
172   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
173   static bool ApproximatelyEquivalent(const Message& message1,
174                                       const Message& message2);
175 
176   // Identifies an individual field in a message instance.  Used for field_path,
177   // below.
178   struct SpecificField {
179     // For known fields, "field" is filled in and "unknown_field_number" is -1.
180     // For unknown fields, "field" is NULL, "unknown_field_number" is the field
181     // number, and "unknown_field_type" is its type.
182     const FieldDescriptor* field = nullptr;
183     int unknown_field_number = -1;
184     UnknownField::Type unknown_field_type = UnknownField::Type::TYPE_VARINT;
185 
186     // If this a repeated field, "index" is the index within it.  For unknown
187     // fields, this is the index of the field among all unknown fields of the
188     // same field number and type.
189     int index = -1;
190 
191     // If "field" is a repeated field which is being treated as a map or
192     // a set (see TreatAsMap() and TreatAsSet(), below), new_index indicates
193     // the index the position to which the element has moved.  If the element
194     // has not moved, "new_index" will have the same value as "index".
195     int new_index = -1;
196 
197     // If "field" is a map field, point to the map entry.
198     const Message* map_entry1 = nullptr;
199     const Message* map_entry2 = nullptr;
200 
201     // For unknown fields, these are the pointers to the UnknownFieldSet
202     // containing the unknown fields. In certain cases (e.g. proto1's
203     // MessageSet, or nested groups of unknown fields), these may differ from
204     // the messages' internal UnknownFieldSets.
205     const UnknownFieldSet* unknown_field_set1 = nullptr;
206     const UnknownFieldSet* unknown_field_set2 = nullptr;
207 
208     // For unknown fields, these are the index of the field within the
209     // UnknownFieldSets. One or the other will be -1 when
210     // reporting an addition or deletion.
211     int unknown_field_index1 = -1;
212     int unknown_field_index2 = -1;
213   };
214 
215   // Class for processing Any deserialization.  This logic is used by both the
216   // MessageDifferencer and StreamReporter classes.
217   class UnpackAnyField {
218    private:
219     std::unique_ptr<DynamicMessageFactory> dynamic_message_factory_;
220 
221    public:
222     UnpackAnyField() = default;
223     ~UnpackAnyField() = default;
224     // If "any" is of type google.protobuf.Any, extract its payload using
225     // DynamicMessageFactory and store in "data".
226     bool UnpackAny(const Message& any, std::unique_ptr<Message>* data);
227   };
228 
229   // Abstract base class from which all MessageDifferencer
230   // reporters derive. The five Report* methods below will be called when
231   // a field has been added, deleted, modified, moved, or matched. The third
232   // argument is a vector of FieldDescriptor pointers which describes the chain
233   // of fields that was taken to find the current field. For example, for a
234   // field found in an embedded message, the vector will contain two
235   // FieldDescriptors. The first will be the field of the embedded message
236   // itself and the second will be the actual field in the embedded message
237   // that was added/deleted/modified.
238   // Fields will be reported in PostTraversalOrder.
239   // For example, given following proto, if both baz and quux are changed.
240   // foo {
241   //   bar {
242   //     baz: 1
243   //     quux: 2
244   //   }
245   // }
246   // ReportModified will be invoked with following order:
247   // 1. foo.bar.baz or foo.bar.quux
248   // 2. foo.bar.quux or foo.bar.baz
249   // 2. foo.bar
250   // 3. foo
251   class PROTOBUF_EXPORT Reporter {
252    public:
253     Reporter();
254     virtual ~Reporter();
255 
256     // Reports that a field has been added into Message2.
257     virtual void ReportAdded(const Message& message1, const Message& message2,
258                              const std::vector<SpecificField>& field_path) = 0;
259 
260     // Reports that a field has been deleted from Message1.
261     virtual void ReportDeleted(
262         const Message& message1, const Message& message2,
263         const std::vector<SpecificField>& field_path) = 0;
264 
265     // Reports that the value of a field has been modified.
266     virtual void ReportModified(
267         const Message& message1, const Message& message2,
268         const std::vector<SpecificField>& field_path) = 0;
269 
270     // Reports that a repeated field has been moved to another location.  This
271     // only applies when using TreatAsSet or TreatAsMap()  -- see below. Also
272     // note that for any given field, ReportModified and ReportMoved are
273     // mutually exclusive. If a field has been both moved and modified, then
274     // only ReportModified will be called.
ReportMoved(const Message &,const Message &,const std::vector<SpecificField> &)275     virtual void ReportMoved(
276         const Message& /* message1 */, const Message& /* message2 */,
277         const std::vector<SpecificField>& /* field_path */) {}
278 
279     // Reports that two fields match. Useful for doing side-by-side diffs.
280     // This function is mutually exclusive with ReportModified and ReportMoved.
281     // Note that you must call set_report_matches(true) before calling Compare
282     // to make use of this function.
ReportMatched(const Message &,const Message &,const std::vector<SpecificField> &)283     virtual void ReportMatched(
284         const Message& /* message1 */, const Message& /* message2 */,
285         const std::vector<SpecificField>& /* field_path */) {}
286 
287     // Reports that two fields would have been compared, but the
288     // comparison has been skipped because the field was marked as
289     // 'ignored' using IgnoreField().  This function is mutually
290     // exclusive with all the other Report() functions.
291     //
292     // The contract of ReportIgnored is slightly different than the
293     // other Report() functions, in that |field_path.back().index| is
294     // always equal to -1, even if the last field is repeated. This is
295     // because while the other Report() functions indicate where in a
296     // repeated field the action (Addition, Deletion, etc...)
297     // happened, when a repeated field is 'ignored', the differencer
298     // simply calls ReportIgnored on the repeated field as a whole and
299     // moves on without looking at its individual elements.
300     //
301     // Furthermore, ReportIgnored() does not indicate whether the
302     // fields were in fact equal or not, as Compare() does not inspect
303     // these fields at all. It is up to the Reporter to decide whether
304     // the fields are equal or not (perhaps with a second call to
305     // Compare()), if it cares.
ReportIgnored(const Message &,const Message &,const std::vector<SpecificField> &)306     virtual void ReportIgnored(
307         const Message& /* message1 */, const Message& /* message2 */,
308         const std::vector<SpecificField>& /* field_path */) {}
309 
310     // Report that an unknown field is ignored. (see comment above).
311     // Note this is a different function since the last SpecificField in field
312     // path has a null field.  This could break existing Reporter.
ReportUnknownFieldIgnored(const Message &,const Message &,const std::vector<SpecificField> &)313     virtual void ReportUnknownFieldIgnored(
314         const Message& /* message1 */, const Message& /* message2 */,
315         const std::vector<SpecificField>& /* field_path */) {}
316 
317    private:
318     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Reporter);
319   };
320 
321   // MapKeyComparator is used to determine if two elements have the same key
322   // when comparing elements of a repeated field as a map.
323   class PROTOBUF_EXPORT MapKeyComparator {
324    public:
325     MapKeyComparator();
326     virtual ~MapKeyComparator();
327 
IsMatch(const Message &,const Message &,const std::vector<SpecificField> &)328     virtual bool IsMatch(
329         const Message& /* message1 */, const Message& /* message2 */,
330         const std::vector<SpecificField>& /* parent_fields */) const {
331       GOOGLE_CHECK(false) << "IsMatch() is not implemented.";
332       return false;
333     }
334 
335    private:
336     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MapKeyComparator);
337   };
338 
339   // Abstract base class from which all IgnoreCriteria derive.
340   // By adding IgnoreCriteria more complex ignore logic can be implemented.
341   // IgnoreCriteria are registered with AddIgnoreCriteria. For each compared
342   // field IsIgnored is called on each added IgnoreCriteria until one returns
343   // true or all return false.
344   // IsIgnored is called for fields where at least one side has a value.
345   class PROTOBUF_EXPORT IgnoreCriteria {
346    public:
347     IgnoreCriteria();
348     virtual ~IgnoreCriteria();
349 
350     // Returns true if the field should be ignored.
351     virtual bool IsIgnored(
352         const Message& /* message1 */, const Message& /* message2 */,
353         const FieldDescriptor* /* field */,
354         const std::vector<SpecificField>& /* parent_fields */) = 0;
355 
356     // Returns true if the unknown field should be ignored.
357     // Note: This will be called for unknown fields as well in which case
358     //       field.field will be null.
IsUnknownFieldIgnored(const Message &,const Message &,const SpecificField &,const std::vector<SpecificField> &)359     virtual bool IsUnknownFieldIgnored(
360         const Message& /* message1 */, const Message& /* message2 */,
361         const SpecificField& /* field */,
362         const std::vector<SpecificField>& /* parent_fields */) {
363       return false;
364     }
365   };
366 
367   // To add a Reporter, construct default here, then use ReportDifferencesTo or
368   // ReportDifferencesToString.
369   explicit MessageDifferencer();
370 
371   ~MessageDifferencer();
372 
373   enum MessageFieldComparison {
374     EQUAL,       // Fields must be present in both messages
375                  // for the messages to be considered the same.
376     EQUIVALENT,  // Fields with default values are considered set
377                  // for comparison purposes even if not explicitly
378                  // set in the messages themselves.  Unknown fields
379                  // are ignored.
380   };
381 
382   enum Scope {
383     FULL,    // All fields of both messages are considered in the comparison.
384     PARTIAL  // Only fields present in the first message are considered; fields
385              // set only in the second message will be skipped during
386              // comparison.
387   };
388 
389   // DEPRECATED. Use FieldComparator::FloatComparison instead.
390   enum FloatComparison {
391     EXACT,       // Floats and doubles are compared exactly.
392     APPROXIMATE  // Floats and doubles are compared using the
393                  // MathUtil::AlmostEquals method.
394   };
395 
396   enum RepeatedFieldComparison {
397     AS_LIST,  // Repeated fields are compared in order.  Differing values at
398               // the same index are reported using ReportModified().  If the
399               // repeated fields have different numbers of elements, the
400               // unpaired elements are reported using ReportAdded() or
401               // ReportDeleted().
402     AS_SET,   // Treat all the repeated fields as sets.
403               // See TreatAsSet(), as below.
404     AS_SMART_LIST,  // Similar to AS_SET, but preserve the order and find the
405                     // longest matching sequence from the first matching
406                     // element. To use an optimal solution, call
407                     // SetMatchIndicesForSmartListCallback() to pass it in.
408     AS_SMART_SET,   // Similar to AS_SET, but match elements with fewest diffs.
409   };
410 
411   // The elements of the given repeated field will be treated as a set for
412   // diffing purposes, so different orderings of the same elements will be
413   // considered equal.  Elements which are present on both sides of the
414   // comparison but which have changed position will be reported with
415   // ReportMoved().  Elements which only exist on one side or the other are
416   // reported with ReportAdded() and ReportDeleted() regardless of their
417   // positions.  ReportModified() is never used for this repeated field.  If
418   // the only differences between the compared messages is that some fields
419   // have been moved, then the comparison returns true.
420   //
421   // Note that despite the name of this method, this is really
422   // comparison as multisets: if one side of the comparison has a duplicate
423   // in the repeated field but the other side doesn't, this will count as
424   // a mismatch.
425   //
426   // If the scope of comparison is set to PARTIAL, then in addition to what's
427   // above, extra values added to repeated fields of the second message will
428   // not cause the comparison to fail.
429   //
430   // Note that set comparison is currently O(k * n^2) (where n is the total
431   // number of elements, and k is the average size of each element). In theory
432   // it could be made O(n * k) with a more complex hashing implementation. Feel
433   // free to contribute one if the current implementation is too slow for you.
434   // If partial matching is also enabled, the time complexity will be O(k * n^2
435   // + n^3) in which n^3 is the time complexity of the maximum matching
436   // algorithm.
437   //
438   // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
439   void TreatAsSet(const FieldDescriptor* field);
440   void TreatAsSmartSet(const FieldDescriptor* field);
441 
442   // The elements of the given repeated field will be treated as a list for
443   // diffing purposes, so different orderings of the same elements will NOT be
444   // considered equal.
445   //
446   // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
447   void TreatAsList(const FieldDescriptor* field);
448   // Note that the complexity is similar to treating as SET.
449   void TreatAsSmartList(const FieldDescriptor* field);
450 
451   // The elements of the given repeated field will be treated as a map for
452   // diffing purposes, with |key| being the map key.  Thus, elements with the
453   // same key will be compared even if they do not appear at the same index.
454   // Differences are reported similarly to TreatAsSet(), except that
455   // ReportModified() is used to report elements with the same key but
456   // different values.  Note that if an element is both moved and modified,
457   // only ReportModified() will be called.  As with TreatAsSet, if the only
458   // differences between the compared messages is that some fields have been
459   // moved, then the comparison returns true. See TreatAsSet for notes on
460   // performance.
461   //
462   // REQUIRES:  field->is_repeated()
463   // REQUIRES:  field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
464   // REQUIRES:  key->containing_type() == field->message_type()
465   void TreatAsMap(const FieldDescriptor* field, const FieldDescriptor* key);
466   // Same as TreatAsMap except that this method will use multiple fields as
467   // the key in comparison. All specified fields in 'key_fields' should be
468   // present in the compared elements. Two elements will be treated as having
469   // the same key iff they have the same value for every specified field. There
470   // are two steps in the comparison process. The first one is key matching.
471   // Every element from one message will be compared to every element from
472   // the other message. Only fields in 'key_fields' are compared in this step
473   // to decide if two elements have the same key. The second step is value
474   // comparison. Those pairs of elements with the same key (with equal value
475   // for every field in 'key_fields') will be compared in this step.
476   // Time complexity of the first step is O(s * m * n ^ 2) where s is the
477   // average size of the fields specified in 'key_fields', m is the number of
478   // fields in 'key_fields' and n is the number of elements. If partial
479   // matching is enabled, an extra O(n^3) will be incured by the maximum
480   // matching algorithm. The second step is O(k * n) where k is the average
481   // size of each element.
482   void TreatAsMapWithMultipleFieldsAsKey(
483       const FieldDescriptor* field,
484       const std::vector<const FieldDescriptor*>& key_fields);
485   // Same as TreatAsMapWithMultipleFieldsAsKey, except that each of the field
486   // do not necessarily need to be a direct subfield. Each element in
487   // key_field_paths indicate a path from the message being compared, listing
488   // successive subfield to reach the key field.
489   //
490   // REQUIRES:
491   //   for key_field_path in key_field_paths:
492   //     key_field_path[0]->containing_type() == field->message_type()
493   //     for i in [0, key_field_path.size() - 1):
494   //       key_field_path[i+1]->containing_type() ==
495   //           key_field_path[i]->message_type()
496   //       key_field_path[i]->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
497   //       !key_field_path[i]->is_repeated()
498   void TreatAsMapWithMultipleFieldPathsAsKey(
499       const FieldDescriptor* field,
500       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
501 
502   // Uses a custom MapKeyComparator to determine if two elements have the same
503   // key when comparing a repeated field as a map.
504   // The caller is responsible to delete the key_comparator.
505   // This method varies from TreatAsMapWithMultipleFieldsAsKey only in the
506   // first key matching step. Rather than comparing some specified fields, it
507   // will invoke the IsMatch method of the given 'key_comparator' to decide if
508   // two elements have the same key.
509   void TreatAsMapUsingKeyComparator(const FieldDescriptor* field,
510                                     const MapKeyComparator* key_comparator);
511 
512   // Initiates and returns a new instance of MultipleFieldsMapKeyComparator.
513   MapKeyComparator* CreateMultipleFieldsMapKeyComparator(
514       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
515 
516   // Add a custom ignore criteria that is evaluated in addition to the
517   // ignored fields added with IgnoreField.
518   // Takes ownership of ignore_criteria.
519   void AddIgnoreCriteria(IgnoreCriteria* ignore_criteria);
520 
521   // Indicates that any field with the given descriptor should be
522   // ignored for the purposes of comparing two messages. This applies
523   // to fields nested in the message structure as well as top level
524   // ones. When the MessageDifferencer encounters an ignored field,
525   // ReportIgnored is called on the reporter, if one is specified.
526   //
527   // The only place where the field's 'ignored' status is not applied is when
528   // it is being used as a key in a field passed to TreatAsMap or is one of
529   // the fields passed to TreatAsMapWithMultipleFieldsAsKey.
530   // In this case it is compared in key matching but after that it's ignored
531   // in value comparison.
532   void IgnoreField(const FieldDescriptor* field);
533 
534   // Sets the field comparator used to determine differences between protocol
535   // buffer fields. By default it's set to a DefaultFieldComparator instance.
536   // MessageDifferencer doesn't take ownership over the passed object.
537   // Note that this method must be called before Compare for the comparator to
538   // be used.
539   void set_field_comparator(FieldComparator* comparator);
540 #ifdef PROTOBUF_FUTURE_BREAKING_CHANGES
541   void set_field_comparator(DefaultFieldComparator* comparator);
542 #endif  // PROTOBUF_FUTURE_BREAKING_CHANGES
543 
544   // DEPRECATED. Pass a DefaultFieldComparator instance instead.
545   // Sets the fraction and margin for the float comparison of a given field.
546   // Uses MathUtil::WithinFractionOrMargin to compare the values.
547   // NOTE: this method does nothing if differencer's field comparator has been
548   //       set to a custom object.
549   //
550   // REQUIRES: field->cpp_type == FieldDescriptor::CPPTYPE_DOUBLE or
551   //           field->cpp_type == FieldDescriptor::CPPTYPE_FLOAT
552   // REQUIRES: float_comparison_ == APPROXIMATE
553   void SetFractionAndMargin(const FieldDescriptor* field, double fraction,
554                             double margin);
555 
556   // Sets the type of comparison (as defined in the MessageFieldComparison
557   // enumeration above) that is used by this differencer when determining how
558   // to compare fields in messages.
559   void set_message_field_comparison(MessageFieldComparison comparison);
560 
561   // Tells the differencer whether or not to report matches. This method must
562   // be called before Compare. The default for a new differencer is false.
set_report_matches(bool report_matches)563   void set_report_matches(bool report_matches) {
564     report_matches_ = report_matches;
565   }
566 
567   // Tells the differencer whether or not to report moves (in a set or map
568   // repeated field). This method must be called before Compare. The default for
569   // a new differencer is true.
set_report_moves(bool report_moves)570   void set_report_moves(bool report_moves) { report_moves_ = report_moves; }
571 
572   // Tells the differencer whether or not to report ignored values. This method
573   // must be called before Compare. The default for a new differencer is true.
set_report_ignores(bool report_ignores)574   void set_report_ignores(bool report_ignores) {
575     report_ignores_ = report_ignores;
576   }
577 
578   // Sets the scope of the comparison (as defined in the Scope enumeration
579   // above) that is used by this differencer when determining which fields to
580   // compare between the messages.
581   void set_scope(Scope scope);
582 
583   // Returns the current scope used by this differencer.
584   Scope scope();
585 
586   // DEPRECATED. Pass a DefaultFieldComparator instance instead.
587   // Sets the type of comparison (as defined in the FloatComparison enumeration
588   // above) that is used by this differencer when comparing float (and double)
589   // fields in messages.
590   // NOTE: this method does nothing if differencer's field comparator has been
591   //       set to a custom object.
592   void set_float_comparison(FloatComparison comparison);
593 
594   // Sets the type of comparison for repeated field (as defined in the
595   // RepeatedFieldComparison enumeration above) that is used by this
596   // differencer when compare repeated fields in messages.
597   void set_repeated_field_comparison(RepeatedFieldComparison comparison);
598 
599   // Returns the current repeated field comparison used by this differencer.
600   RepeatedFieldComparison repeated_field_comparison();
601 
602   // Compares the two specified messages, returning true if they are the same,
603   // false otherwise. If this method returns false, any changes between the
604   // two messages will be reported if a Reporter was specified via
605   // ReportDifferencesTo (see also ReportDifferencesToString).
606   //
607   // This method REQUIRES that the two messages have the same
608   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
609   bool Compare(const Message& message1, const Message& message2);
610 
611   // Same as above, except comparing only the list of fields specified by the
612   // two vectors of FieldDescriptors.
613   bool CompareWithFields(
614       const Message& message1, const Message& message2,
615       const std::vector<const FieldDescriptor*>& message1_fields,
616       const std::vector<const FieldDescriptor*>& message2_fields);
617 
618   // Automatically creates a reporter that will output the differences
619   // found (if any) to the specified output string pointer. Note that this
620   // method must be called before Compare.
621   void ReportDifferencesToString(std::string* output);
622 
623   // Tells the MessageDifferencer to report differences via the specified
624   // reporter. Note that this method must be called before Compare for
625   // the reporter to be used. It is the responsibility of the caller to delete
626   // this object.
627   // If the provided pointer equals NULL, the MessageDifferencer stops reporting
628   // differences to any previously set reporters or output strings.
629   void ReportDifferencesTo(Reporter* reporter);
630 
631   // An implementation of the MessageDifferencer Reporter that outputs
632   // any differences found in human-readable form to the supplied
633   // ZeroCopyOutputStream or Printer. If a printer is used, the delimiter
634   // *must* be '$'.
635   //
636   // WARNING: this reporter does not necessarily flush its output until it is
637   // destroyed. As a result, it is not safe to assume the output is valid or
638   // complete until after you destroy the reporter. For example, if you use a
639   // StreamReporter to write to a StringOutputStream, the target string may
640   // contain uninitialized data until the reporter is destroyed.
641   class PROTOBUF_EXPORT StreamReporter : public Reporter {
642    public:
643     explicit StreamReporter(io::ZeroCopyOutputStream* output);
644     explicit StreamReporter(io::Printer* printer);  // delimiter '$'
645     ~StreamReporter() override;
646 
647     // When set to true, the stream reporter will also output aggregates nodes
648     // (i.e. messages and groups) whose subfields have been modified. When
649     // false, will only report the individual subfields. Defaults to false.
set_report_modified_aggregates(bool report)650     void set_report_modified_aggregates(bool report) {
651       report_modified_aggregates_ = report;
652     }
653 
654     // The following are implementations of the methods described above.
655 
656     void ReportAdded(const Message& message1, const Message& message2,
657                      const std::vector<SpecificField>& field_path) override;
658 
659     void ReportDeleted(const Message& message1, const Message& message2,
660                        const std::vector<SpecificField>& field_path) override;
661 
662     void ReportModified(const Message& message1, const Message& message2,
663                         const std::vector<SpecificField>& field_path) override;
664 
665     void ReportMoved(const Message& message1, const Message& message2,
666                      const std::vector<SpecificField>& field_path) override;
667 
668     void ReportMatched(const Message& message1, const Message& message2,
669                        const std::vector<SpecificField>& field_path) override;
670 
671     void ReportIgnored(const Message& message1, const Message& message2,
672                        const std::vector<SpecificField>& field_path) override;
673 
674     void ReportUnknownFieldIgnored(
675         const Message& message1, const Message& message2,
676         const std::vector<SpecificField>& field_path) override;
677 
678     // Messages that are being compared must be provided to StreamReporter prior
679     // to processing
680     void SetMessages(const Message& message1, const Message& message2);
681 
682    protected:
683     // Prints the specified path of fields to the buffer.
684     virtual void PrintPath(const std::vector<SpecificField>& field_path,
685                            bool left_side);
686 
687     // Prints the value of fields to the buffer.  left_side is true if the
688     // given message is from the left side of the comparison, false if it
689     // was the right.  This is relevant only to decide whether to follow
690     // unknown_field_index1 or unknown_field_index2 when an unknown field
691     // is encountered in field_path.
692     virtual void PrintValue(const Message& message,
693                             const std::vector<SpecificField>& field_path,
694                             bool left_side);
695 
696     // Prints the specified path of unknown fields to the buffer.
697     virtual void PrintUnknownFieldValue(const UnknownField* unknown_field);
698 
699     // Just print a string
700     void Print(const std::string& str);
701 
702    private:
703     // helper function for PrintPath that contains logic for printing maps
704     void PrintMapKey(bool left_side, const SpecificField& specific_field);
705 
706     io::Printer* printer_;
707     bool delete_printer_;
708     bool report_modified_aggregates_;
709     const Message* message1_;
710     const Message* message2_;
711     MessageDifferencer::UnpackAnyField unpack_any_field_;
712     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StreamReporter);
713   };
714 
715  private:
716   friend class SimpleFieldComparator;
717 
718   // A MapKeyComparator to be used in TreatAsMapUsingKeyComparator.
719   // Implementation of this class needs to do field value comparison which
720   // relies on some private methods of MessageDifferencer. That's why this
721   // class is declared as a nested class of MessageDifferencer.
722   class MultipleFieldsMapKeyComparator;
723 
724   // A MapKeyComparator for use with map_entries.
725   class PROTOBUF_EXPORT MapEntryKeyComparator : public MapKeyComparator {
726    public:
727     explicit MapEntryKeyComparator(MessageDifferencer* message_differencer);
728     bool IsMatch(
729         const Message& message1, const Message& message2,
730         const std::vector<SpecificField>& parent_fields) const override;
731 
732    private:
733     MessageDifferencer* message_differencer_;
734   };
735 
736   // Returns true if field1's number() is less than field2's.
737   static bool FieldBefore(const FieldDescriptor* field1,
738                           const FieldDescriptor* field2);
739 
740   // Retrieve all the set fields, including extensions.
741   FieldDescriptorArray RetrieveFields(const Message& message,
742                                       bool base_message);
743 
744   // Combine the two lists of fields into the combined_fields output vector.
745   // All fields present in both lists will always be included in the combined
746   // list.  Fields only present in one of the lists will only appear in the
747   // combined list if the corresponding fields_scope option is set to FULL.
748   FieldDescriptorArray CombineFields(const FieldDescriptorArray& fields1,
749                                      Scope fields1_scope,
750                                      const FieldDescriptorArray& fields2,
751                                      Scope fields2_scope);
752 
753   // Internal version of the Compare method which performs the actual
754   // comparison. The parent_fields vector is a vector containing field
755   // descriptors of all fields accessed to get to this comparison operation
756   // (i.e. if the current message is an embedded message, the parent_fields
757   // vector will contain the field that has this embedded message).
758   bool Compare(const Message& message1, const Message& message2,
759                std::vector<SpecificField>* parent_fields);
760 
761   // Compares all the unknown fields in two messages.
762   bool CompareUnknownFields(const Message& message1, const Message& message2,
763                             const UnknownFieldSet&, const UnknownFieldSet&,
764                             std::vector<SpecificField>* parent_fields);
765 
766   // Compares the specified messages for the requested field lists. The field
767   // lists are modified depending on comparison settings, and then passed to
768   // CompareWithFieldsInternal.
769   bool CompareRequestedFieldsUsingSettings(
770       const Message& message1, const Message& message2,
771       const FieldDescriptorArray& message1_fields,
772       const FieldDescriptorArray& message2_fields,
773       std::vector<SpecificField>* parent_fields);
774 
775   // Compares the specified messages with the specified field lists.
776   bool CompareWithFieldsInternal(const Message& message1,
777                                  const Message& message2,
778                                  const FieldDescriptorArray& message1_fields,
779                                  const FieldDescriptorArray& message2_fields,
780                                  std::vector<SpecificField>* parent_fields);
781 
782   // Compares the repeated fields, and report the error.
783   bool CompareRepeatedField(const Message& message1, const Message& message2,
784                             const FieldDescriptor* field,
785                             std::vector<SpecificField>* parent_fields);
786 
787   // Compares map fields, and report the error.
788   bool CompareMapField(const Message& message1, const Message& message2,
789                        const FieldDescriptor* field,
790                        std::vector<SpecificField>* parent_fields);
791 
792   // Helper for CompareRepeatedField and CompareMapField: compares and reports
793   // differences element-wise. This is the implementation for non-map fields,
794   // and can also compare map fields by using the underlying representation.
795   bool CompareRepeatedRep(const Message& message1, const Message& message2,
796                           const FieldDescriptor* field,
797                           std::vector<SpecificField>* parent_fields);
798 
799   // Helper for CompareMapField: compare the map fields using map reflection
800   // instead of sync to repeated.
801   bool CompareMapFieldByMapReflection(const Message& message1,
802                                       const Message& message2,
803                                       const FieldDescriptor* field,
804                                       std::vector<SpecificField>* parent_fields,
805                                       DefaultFieldComparator* comparator);
806 
807   // Shorthand for CompareFieldValueUsingParentFields with NULL parent_fields.
808   bool CompareFieldValue(const Message& message1, const Message& message2,
809                          const FieldDescriptor* field, int index1, int index2);
810 
811   // Compares the specified field on the two messages, returning
812   // true if they are the same, false otherwise. For repeated fields,
813   // this method only compares the value in the specified index. This method
814   // uses Compare functions to recurse into submessages.
815   // The parent_fields vector is used in calls to a Reporter instance calls.
816   // It can be NULL, in which case the MessageDifferencer will create new
817   // list of parent messages if it needs to recursively compare the given field.
818   // To avoid confusing users you should not set it to NULL unless you modified
819   // Reporter to handle the change of parent_fields correctly.
820   bool CompareFieldValueUsingParentFields(
821       const Message& message1, const Message& message2,
822       const FieldDescriptor* field, int index1, int index2,
823       std::vector<SpecificField>* parent_fields);
824 
825   // Compares the specified field on the two messages, returning comparison
826   // result, as returned by appropriate FieldComparator.
827   FieldComparator::ComparisonResult GetFieldComparisonResult(
828       const Message& message1, const Message& message2,
829       const FieldDescriptor* field, int index1, int index2,
830       const FieldContext* field_context);
831 
832   // Check if the two elements in the repeated field are match to each other.
833   // if the key_comprator is NULL, this function returns true when the two
834   // elements are equal.
835   bool IsMatch(const FieldDescriptor* repeated_field,
836                const MapKeyComparator* key_comparator, const Message* message1,
837                const Message* message2,
838                const std::vector<SpecificField>& parent_fields,
839                Reporter* reporter, int index1, int index2);
840 
841   // Returns true when this repeated field has been configured to be treated
842   // as a Set / SmartSet / SmartList.
843   bool IsTreatedAsSet(const FieldDescriptor* field);
844   bool IsTreatedAsSmartSet(const FieldDescriptor* field);
845 
846   bool IsTreatedAsSmartList(const FieldDescriptor* field);
847   // When treating as SMART_LIST, it uses MatchIndicesPostProcessorForSmartList
848   // by default to find the longest matching sequence from the first matching
849   // element. The callback takes two vectors showing the matching indices from
850   // the other vector, where -1 means an unmatch.
851   void SetMatchIndicesForSmartListCallback(
852       std::function<void(std::vector<int>*, std::vector<int>*)> callback);
853 
854   // Returns true when this repeated field is to be compared as a subset, ie.
855   // has been configured to be treated as a set or map and scope is set to
856   // PARTIAL.
857   bool IsTreatedAsSubset(const FieldDescriptor* field);
858 
859   // Returns true if this field is to be ignored when this
860   // MessageDifferencer compares messages.
861   bool IsIgnored(const Message& message1, const Message& message2,
862                  const FieldDescriptor* field,
863                  const std::vector<SpecificField>& parent_fields);
864 
865   // Returns true if this unknown field is to be ignored when this
866   // MessageDifferencer compares messages.
867   bool IsUnknownFieldIgnored(const Message& message1, const Message& message2,
868                              const SpecificField& field,
869                              const std::vector<SpecificField>& parent_fields);
870 
871   // Returns MapKeyComparator* when this field has been configured to be treated
872   // as a map or its is_map() return true.  If not, returns NULL.
873   const MapKeyComparator* GetMapKeyComparator(
874       const FieldDescriptor* field) const;
875 
876   // Attempts to match indices of a repeated field, so that the contained values
877   // match. Clears output vectors and sets their values to indices of paired
878   // messages, ie. if message1[0] matches message2[1], then match_list1[0] == 1
879   // and match_list2[1] == 0. The unmatched indices are indicated by -1.
880   // Assumes the repeated field is not treated as a simple list.
881   // This method returns false if the match failed. However, it doesn't mean
882   // that the comparison succeeds when this method returns true (you need to
883   // double-check in this case).
884   bool MatchRepeatedFieldIndices(
885       const Message& message1, const Message& message2,
886       const FieldDescriptor* repeated_field,
887       const MapKeyComparator* key_comparator,
888       const std::vector<SpecificField>& parent_fields,
889       std::vector<int>* match_list1, std::vector<int>* match_list2);
890 
891   // Checks if index is equal to new_index in all the specific fields.
892   static bool CheckPathChanged(const std::vector<SpecificField>& parent_fields);
893 
894   // CHECKs that the given repeated field can be compared according to
895   // new_comparison.
896   void CheckRepeatedFieldComparisons(
897       const FieldDescriptor* field,
898       const RepeatedFieldComparison& new_comparison);
899 
900   // Defines a map between field descriptors and their MapKeyComparators.
901   // Used for repeated fields when they are configured as TreatAsMap.
902   typedef std::map<const FieldDescriptor*, const MapKeyComparator*>
903       FieldKeyComparatorMap;
904 
905   // Defines a set to store field descriptors.  Used for repeated fields when
906   // they are configured as TreatAsSet.
907   typedef std::set<const FieldDescriptor*> FieldSet;
908   typedef std::map<const FieldDescriptor*, RepeatedFieldComparison> FieldMap;
909 
910   Reporter* reporter_;
911   DefaultFieldComparator default_field_comparator_;
912   MessageFieldComparison message_field_comparison_;
913   Scope scope_;
914   RepeatedFieldComparison repeated_field_comparison_;
915 
916   FieldMap repeated_field_comparisons_;
917   // Keeps track of MapKeyComparators that are created within
918   // MessageDifferencer. These MapKeyComparators should be deleted
919   // before MessageDifferencer is destroyed.
920   // When TreatAsMap or TreatAsMapWithMultipleFieldsAsKey is called, we don't
921   // store the supplied FieldDescriptors directly. Instead, a new
922   // MapKeyComparator is created for comparison purpose.
923   std::vector<MapKeyComparator*> owned_key_comparators_;
924   FieldKeyComparatorMap map_field_key_comparator_;
925   MapEntryKeyComparator map_entry_key_comparator_;
926   std::vector<IgnoreCriteria*> ignore_criteria_;
927   // Reused multiple times in RetrieveFields to avoid extra allocations
928   std::vector<const FieldDescriptor*> tmp_message_fields_;
929 
930   FieldSet ignored_fields_;
931 
932   union {
933     DefaultFieldComparator* default_impl;
934     FieldComparator* base;
935   } field_comparator_ = {&default_field_comparator_};
936   enum { kFCDefault, kFCBase } field_comparator_kind_ = kFCDefault;
937 
938   bool report_matches_;
939   bool report_moves_;
940   bool report_ignores_;
941 
942   std::string* output_string_;
943 
944   // Callback to post-process the matched indices to support SMART_LIST.
945   std::function<void(std::vector<int>*, std::vector<int>*)>
946       match_indices_for_smart_list_callback_;
947 
948   MessageDifferencer::UnpackAnyField unpack_any_field_;
949   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MessageDifferencer);
950 };
951 
952 // This class provides extra information to the FieldComparator::Compare
953 // function.
954 class PROTOBUF_EXPORT FieldContext {
955  public:
FieldContext(std::vector<MessageDifferencer::SpecificField> * parent_fields)956   explicit FieldContext(
957       std::vector<MessageDifferencer::SpecificField>* parent_fields)
958       : parent_fields_(parent_fields) {}
959 
parent_fields()960   std::vector<MessageDifferencer::SpecificField>* parent_fields() const {
961     return parent_fields_;
962   }
963 
964  private:
965   std::vector<MessageDifferencer::SpecificField>* parent_fields_;
966 };
967 
968 }  // namespace util
969 }  // namespace protobuf
970 }  // namespace google
971 
972 #include <google/protobuf/port_undef.inc>
973 
974 #endif  // GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
975