1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 
5 #include "precomp.hpp"
6 
7 #include <map>
8 #include <set>
9 #include <string>
10 #include <tuple>
11 #include <unordered_map>
12 #include <vector>
13 #include <utility>
14 #include <limits>
15 #include <algorithm>
16 
17 #include "opencv2/tracking/tracking_by_matching.hpp"
18 #include "opencv2/core/check.hpp"
19 #include "kuhn_munkres.hpp"
20 
21 #define TBM_CHECK(cond) CV_Assert(cond)
22 
23 #define TBM_CHECK_EQ(actual, expected) CV_CheckEQ(actual, expected, "Assertion error:")
24 #define TBM_CHECK_NE(actual, expected) CV_CheckNE(actual, expected, "Assertion error:")
25 #define TBM_CHECK_LT(actual, expected) CV_CheckLT(actual, expected, "Assertion error:")
26 #define TBM_CHECK_GT(actual, expected) CV_CheckGT(actual, expected, "Assertion error:")
27 #define TBM_CHECK_LE(actual, expected) CV_CheckLE(actual, expected, "Assertion error:")
28 #define TBM_CHECK_GE(actual, expected) CV_CheckGE(actual, expected, "Assertion error:")
29 
30 namespace cv {
31 namespace detail {
32 inline namespace tracking {
33 
34 using namespace tbm;
35 
CosDistance(const cv::Size & descriptor_size)36 CosDistance::CosDistance(const cv::Size &descriptor_size)
37     : descriptor_size_(descriptor_size) {
38     TBM_CHECK(descriptor_size.area() != 0);
39 }
40 
compute(const cv::Mat & descr1,const cv::Mat & descr2)41 float CosDistance::compute(const cv::Mat &descr1, const cv::Mat &descr2) {
42     TBM_CHECK(!descr1.empty());
43     TBM_CHECK(!descr2.empty());
44     TBM_CHECK(descr1.size() == descriptor_size_);
45     TBM_CHECK(descr2.size() == descriptor_size_);
46 
47     double xy = descr1.dot(descr2);
48     double xx = descr1.dot(descr1);
49     double yy = descr2.dot(descr2);
50     double norm = sqrt(xx * yy) + 1e-6;
51     return 0.5f * static_cast<float>(1.0 - xy / norm);
52 }
53 
compute(const std::vector<cv::Mat> & descrs1,const std::vector<cv::Mat> & descrs2)54 std::vector<float> CosDistance::compute(const std::vector<cv::Mat> &descrs1,
55                                         const std::vector<cv::Mat> &descrs2) {
56     TBM_CHECK(descrs1.size() != 0);
57     TBM_CHECK(descrs1.size() == descrs2.size());
58 
59     std::vector<float> distances(descrs1.size(), 1.f);
60     for (size_t i = 0; i < descrs1.size(); i++) {
61         distances.at(i) = compute(descrs1.at(i), descrs2.at(i));
62     }
63 
64     return distances;
65 }
66 
67 
compute(const cv::Mat & descr1,const cv::Mat & descr2)68 float MatchTemplateDistance::compute(const cv::Mat &descr1,
69                                      const cv::Mat &descr2) {
70     TBM_CHECK(!descr1.empty() && !descr2.empty());
71     TBM_CHECK_EQ(descr1.size(), descr2.size());
72     TBM_CHECK_EQ(descr1.type(), descr2.type());
73     cv::Mat res;
74     cv::matchTemplate(descr1, descr2, res, type_);
75     TBM_CHECK(res.size() == cv::Size(1, 1));
76     float dist = res.at<float>(0, 0);
77     return scale_ * dist + offset_;
78 }
79 
compute(const std::vector<cv::Mat> & descrs1,const std::vector<cv::Mat> & descrs2)80 std::vector<float> MatchTemplateDistance::compute(const std::vector<cv::Mat> &descrs1,
81                                                   const std::vector<cv::Mat> &descrs2) {
82     std::vector<float> result;
83     for (size_t i = 0; i < descrs1.size(); i++) {
84         result.push_back(compute(descrs1[i], descrs2[i]));
85     }
86     return result;
87 }
88 
89 namespace {
Center(const cv::Rect & rect)90 cv::Point Center(const cv::Rect& rect) {
91     return cv::Point((int)(rect.x + rect.width * .5), (int)(rect.y + rect.height * .5));
92 }
93 
Centers(const TrackedObjects & detections)94 std::vector<cv::Point> Centers(const TrackedObjects &detections) {
95     std::vector<cv::Point> centers(detections.size());
96     for (size_t i = 0; i < detections.size(); i++) {
97         centers[i] = Center(detections[i].rect);
98     }
99     return centers;
100 }
101 
IsInRange(float val,float min,float max)102 inline bool IsInRange(float val, float min, float max) {
103     return min <= val && val <= max;
104 }
105 
IsInRange(float val,cv::Vec2f range)106 inline bool IsInRange(float val, cv::Vec2f range) {
107     return IsInRange(val, range[0], range[1]);
108 }
109 
GenRandomColors(int colors_num)110 std::vector<cv::Scalar> GenRandomColors(int colors_num) {
111     std::vector<cv::Scalar> colors(colors_num);
112     for (int i = 0; i < colors_num; i++) {
113         colors[i] = cv::Scalar(static_cast<uchar>(255. * rand() / RAND_MAX),  // NOLINT
114                                static_cast<uchar>(255. * rand() / RAND_MAX),  // NOLINT
115                                static_cast<uchar>(255. * rand() / RAND_MAX));  // NOLINT
116     }
117     return colors;
118 }
119 
120 ///
121 /// \brief Draws a polyline on a frame.
122 /// \param[in] polyline Vector of points (polyline).
123 /// \param[in] color Color (BGR).
124 /// \param[in,out] image Frame.
125 /// \param[in] lwd Line width.
126 ///
DrawPolyline(const std::vector<cv::Point> & polyline,const cv::Scalar & color,CV_OUT cv::Mat & image,int lwd=5)127 void DrawPolyline(const std::vector<cv::Point>& polyline,
128                   const cv::Scalar& color, CV_OUT cv::Mat& image,
129                   int lwd = 5) {
130     TBM_CHECK(!image.empty());
131     TBM_CHECK_EQ(image.type(), CV_8UC3);
132     TBM_CHECK_GT(lwd, 0);
133     TBM_CHECK_LT(lwd, 20);
134 
135     for (size_t i = 1; i < polyline.size(); i++) {
136         cv::line(image, polyline[i - 1], polyline[i], color, lwd);
137     }
138 }
139 
ValidateParams(const TrackerParams & p)140 void ValidateParams(const TrackerParams &p) {
141     TBM_CHECK_GE(p.min_track_duration, static_cast<size_t>(500));
142     TBM_CHECK_LE(p.min_track_duration, static_cast<size_t>(10000));
143 
144     TBM_CHECK_LE(p.forget_delay, static_cast<size_t>(10000));
145 
146     TBM_CHECK_GE(p.aff_thr_fast, 0.0f);
147     TBM_CHECK_LE(p.aff_thr_fast, 1.0f);
148 
149     TBM_CHECK_GE(p.aff_thr_strong, 0.0f);
150     TBM_CHECK_LE(p.aff_thr_strong, 1.0f);
151 
152     TBM_CHECK_GE(p.shape_affinity_w, 0.0f);
153     TBM_CHECK_LE(p.shape_affinity_w, 100.0f);
154 
155     TBM_CHECK_GE(p.motion_affinity_w, 0.0f);
156     TBM_CHECK_LE(p.motion_affinity_w, 100.0f);
157 
158     TBM_CHECK_GE(p.time_affinity_w, 0.0f);
159     TBM_CHECK_LE(p.time_affinity_w, 100.0f);
160 
161     TBM_CHECK_GE(p.min_det_conf, 0.0f);
162     TBM_CHECK_LE(p.min_det_conf, 1.0f);
163 
164     TBM_CHECK_GE(p.bbox_aspect_ratios_range[0], 0.0f);
165     TBM_CHECK_LE(p.bbox_aspect_ratios_range[1], 10.0f);
166     TBM_CHECK_LT(p.bbox_aspect_ratios_range[0], p.bbox_aspect_ratios_range[1]);
167 
168     TBM_CHECK_GE(p.bbox_heights_range[0], 10.0f);
169     TBM_CHECK_LE(p.bbox_heights_range[1], 1080.0f);
170     TBM_CHECK_LT(p.bbox_heights_range[0], p.bbox_heights_range[1]);
171 
172     TBM_CHECK_GE(p.predict, 0);
173     TBM_CHECK_LE(p.predict, 10000);
174 
175     TBM_CHECK_GE(p.strong_affinity_thr, 0.0f);
176     TBM_CHECK_LE(p.strong_affinity_thr, 1.0f);
177 
178     TBM_CHECK_GE(p.reid_thr, 0.0f);
179     TBM_CHECK_LE(p.reid_thr, 1.0f);
180 
181 
182     if (p.max_num_objects_in_track > 0) {
183         int min_required_track_length = static_cast<int>(p.forget_delay);
184         TBM_CHECK_GE(p.max_num_objects_in_track, min_required_track_length);
185         TBM_CHECK_LE(p.max_num_objects_in_track, 10000);
186     }
187 }
188 
189 }  // anonymous namespace
190 
191 ///
192 /// \brief Tracker-by-Matching algorithm implementation.
193 ///
194 /// This class is implementation of tracking-by-matching system. It uses two
195 /// different appearance measures to compute affinity between bounding boxes:
196 /// some fast descriptor and some strong descriptor. Each time the assignment
197 /// problem is solved. The assignment problem in our case is how to establish
198 /// correspondence between existing tracklets and recently detected objects.
199 /// First step is to compute an affinity matrix between tracklets and
200 /// detections. The affinity equals to
201 ///       appearance_affinity * motion_affinity * shape_affinity.
202 /// Where appearance is 1 - distance(tracklet_fast_dscr, detection_fast_dscr).
203 /// Second step is to solve the assignment problem using Kuhn-Munkres
204 /// algorithm. If correspondence between some tracklet and detection is
205 /// established with low confidence (affinity) then the strong descriptor is
206 /// used to determine if there is correspondence between tracklet and detection.
207 ///
208 class TrackerByMatching: public ITrackerByMatching {
209 public:
210     using Descriptor = std::shared_ptr<IImageDescriptor>;
211     using Distance = std::shared_ptr<IDescriptorDistance>;
212 
213     ///
214     /// \brief Constructor that creates an instance of the tracker with
215     /// parameters.
216     /// \param[in] params - the tracker parameters.
217     ///
218     explicit TrackerByMatching(const TrackerParams &params = TrackerParams());
~TrackerByMatching()219     virtual ~TrackerByMatching() {}
220 
221     ///
222     /// \brief Process given frame.
223     /// \param[in] frame Colored image (CV_8UC3).
224     /// \param[in] detections Detected objects on the frame.
225     /// \param[in] timestamp Timestamp must be positive and measured in
226     /// milliseconds
227     ///
228     void process(const cv::Mat &frame, const TrackedObjects &detections,
229                  uint64_t timestamp) override;
230 
231     ///
232     /// \brief Pipeline parameters getter.
233     /// \return Parameters of pipeline.
234     ///
235     const TrackerParams &params() const override;
236 
237     ///
238     /// \brief Pipeline parameters setter.
239     /// \param[in] params Parameters of pipeline.
240     ///
241     void setParams(const TrackerParams &params) override;
242 
243     ///
244     /// \brief Fast descriptor getter.
245     /// \return Fast descriptor used in pipeline.
246     ///
247     const Descriptor &descriptorFast() const override;
248 
249     ///
250     /// \brief Fast descriptor setter.
251     /// \param[in] val Fast descriptor used in pipeline.
252     ///
253     void setDescriptorFast(const Descriptor &val) override;
254 
255     ///
256     /// \brief Strong descriptor getter.
257     /// \return Strong descriptor used in pipeline.
258     ///
259     const Descriptor &descriptorStrong() const override;
260 
261     ///
262     /// \brief Strong descriptor setter.
263     /// \param[in] val Strong descriptor used in pipeline.
264     ///
265     void setDescriptorStrong(const Descriptor &val) override;
266 
267     ///
268     /// \brief Fast distance getter.
269     /// \return Fast distance used in pipeline.
270     ///
271     const Distance &distanceFast() const override;
272 
273     ///
274     /// \brief Fast distance setter.
275     /// \param[in] val Fast distance used in pipeline.
276     ///
277     void setDistanceFast(const Distance &val) override;
278 
279     ///
280     /// \brief Strong distance getter.
281     /// \return Strong distance used in pipeline.
282     ///
283     const Distance &distanceStrong() const override;
284 
285     ///
286     /// \brief Strong distance setter.
287     /// \param[in] val Strong distance used in pipeline.
288     ///
289     void setDistanceStrong(const Distance &val) override;
290 
291     ///
292     /// \brief Returns number of counted people.
293     /// \return a number of counted people.
294     ///
295     size_t count() const override;
296 
297     ///
298     /// \brief Get active tracks to draw
299     /// \return Active tracks.
300     ///
301     std::unordered_map<size_t, std::vector<cv::Point> > getActiveTracks() const override;
302 
303     ///
304     /// \brief Get tracked detections.
305     /// \return Tracked detections.
306     ///
307     TrackedObjects trackedDetections() const override;
308 
309     ///
310     /// \brief Draws active tracks on a given frame.
311     /// \param[in] frame Colored image (CV_8UC3).
312     /// \return Colored image with drawn active tracks.
313     ///
314     cv::Mat drawActiveTracks(const cv::Mat &frame) override;
315 
316     ///
317     /// \brief Print confusion matrices of data association classifiers.
318     /// It works only in case of loaded detection logs instead of native
319     /// detectors.
320     ///
321     void PrintConfusionMatrices() const;
322 
323     ///
324     /// \brief isTrackForgotten returns true if track is forgotten.
325     /// \param id Track ID.
326     /// \return true if track is forgotten.
327     ///
328     bool isTrackForgotten(size_t id) const override;
329 
330     ///
331     /// \brief tracks Returns all tracks including forgotten (lost too many frames
332     /// ago).
333     /// \return Set of tracks {id, track}.
334     ///
335     const std::unordered_map<size_t, Track> &tracks() const override;
336 
337     ///
338     /// \brief isTrackValid Checks whether track is valid (duration > threshold).
339     /// \param track_id Index of checked track.
340     /// \return True if track duration exceeds some predefined value.
341     ///
342     bool isTrackValid(size_t track_id) const override;
343 
344     ///
345     /// \brief dropForgottenTracks Removes tracks from memory that were lost too
346     /// many frames ago.
347     ///
348     void dropForgottenTracks() override;
349 
350     ///
351     /// \brief dropForgottenTrack Check that the track was lost too many frames
352     /// ago
353     /// and removes it frm memory.
354     ///
355     void dropForgottenTrack(size_t track_id) override;
356 
357 private:
358     struct Match {
359         int frame_idx1;
360         int frame_idx2;
361         cv::Rect rect1;
362         cv::Rect rect2;
363         cv::Rect pr_rect1;
364         bool pr_label;
365         bool gt_label;
366 
Matchcv::detail::tracking::TrackerByMatching::Match367         Match() {}
368 
Matchcv::detail::tracking::TrackerByMatching::Match369         Match(const TrackedObject &a, const cv::Rect &a_pr_rect,
370               const TrackedObject &b, bool pr_label)
371             : frame_idx1(a.frame_idx),
372             frame_idx2(b.frame_idx),
373             rect1(a.rect),
374             rect2(b.rect),
375             pr_rect1(a_pr_rect),
376             pr_label(pr_label),
377             gt_label(a.object_id == b.object_id) {
378                 CV_Assert(frame_idx1 != frame_idx2);
379             }
380     };
381 
382 
383     const ObjectTracks all_tracks(bool valid_only) const;
384     // Returns shape affinity.
385     static float ShapeAffinity(float w, const cv::Rect &trk, const cv::Rect &det);
386 
387     // Returns motion affinity.
388     static float MotionAffinity(float w, const cv::Rect &trk,
389                                 const cv::Rect &det);
390 
391     // Returns time affinity.
392     static float TimeAffinity(float w, const float &trk, const float &det);
393 
394     cv::Rect PredictRect(size_t id, size_t k, size_t s) const;
395 
396     cv::Rect PredictRectSmoothed(size_t id, size_t k, size_t s) const;
397 
398     cv::Rect PredictRectSimple(size_t id, size_t k, size_t s) const;
399 
400     void SolveAssignmentProblem(
401         const std::set<size_t> &track_ids, const TrackedObjects &detections,
402         const std::vector<cv::Mat> &descriptors,
403         CV_OUT std::set<size_t>& unmatched_tracks,
404         CV_OUT std::set<size_t>& unmatched_detections,
405         CV_OUT std::set<std::tuple<size_t, size_t, float>>& matches);
406 
407     void ComputeFastDesciptors(const cv::Mat &frame,
408                                const TrackedObjects &detections,
409                                CV_OUT std::vector<cv::Mat>& desriptors);
410 
411     void ComputeDissimilarityMatrix(const std::set<size_t> &active_track_ids,
412                                     const TrackedObjects &detections,
413                                     const std::vector<cv::Mat> &fast_descriptors,
414                                     CV_OUT cv::Mat& dissimilarity_matrix);
415 
416     std::vector<float> ComputeDistances(
417         const cv::Mat &frame,
418         const TrackedObjects& detections,
419         const std::vector<std::pair<size_t, size_t>> &track_and_det_ids,
420         CV_OUT std::map<size_t, cv::Mat>& det_id_to_descriptor);
421 
422     std::map<size_t, std::pair<bool, cv::Mat>> StrongMatching(
423         const cv::Mat &frame,
424         const TrackedObjects& detections,
425         const std::vector<std::pair<size_t, size_t>> &track_and_det_ids);
426 
427     std::vector<std::pair<size_t, size_t>> GetTrackToDetectionIds(
428         const std::set<std::tuple<size_t, size_t, float>> &matches);
429 
430     float AffinityFast(const cv::Mat &descriptor1, const TrackedObject &obj1,
431                        const cv::Mat &descriptor2, const TrackedObject &obj2);
432 
433     float Affinity(const TrackedObject &obj1, const TrackedObject &obj2);
434 
435     void AddNewTrack(const cv::Mat &frame, const TrackedObject &detection,
436                      const cv::Mat &fast_descriptor,
437                      const cv::Mat &descriptor_strong = cv::Mat());
438 
439     void AddNewTracks(const cv::Mat &frame, const TrackedObjects &detections,
440                       const std::vector<cv::Mat> &descriptors_fast);
441 
442     void AddNewTracks(const cv::Mat &frame, const TrackedObjects &detections,
443                       const std::vector<cv::Mat> &descriptors_fast,
444                       const std::set<size_t> &ids);
445 
446     void AppendToTrack(const cv::Mat &frame, size_t track_id,
447                        const TrackedObject &detection,
448                        const cv::Mat &descriptor_fast,
449                        const cv::Mat &descriptor_strong);
450 
451     bool EraseTrackIfBBoxIsOutOfFrame(size_t track_id);
452 
453     bool EraseTrackIfItWasLostTooManyFramesAgo(size_t track_id);
454 
455     bool UpdateLostTrackAndEraseIfItsNeeded(size_t track_id);
456 
457     void UpdateLostTracks(const std::set<size_t> &track_ids);
458 
459     static cv::Mat ConfusionMatrix(const std::vector<Match> &matches);
460 
461     const std::set<size_t> &active_track_ids() const;
462 
463     // Returns decisions made by heuristic based on fast distance/descriptor and
464     // shape, motion and time affinity.
465     const std::vector<Match> & base_classifier_matches() const;
466 
467     // Returns decisions made by heuristic based on strong distance/descriptor
468     // and
469     // shape, motion and time affinity.
470     const std::vector<Match> &reid_based_classifier_matches() const;
471 
472     // Returns decisions made by strong distance/descriptor affinity.
473     const std::vector<Match> &reid_classifier_matches() const;
474 
475     TrackedObjects FilterDetections(const TrackedObjects &detections) const;
476     bool isTrackForgotten(const Track &track) const;
477 
478     // Parameters of the pipeline.
479     TrackerParams params_;
480 
481     // Indexes of active tracks.
482     std::set<size_t> active_track_ids_;
483 
484     // Descriptor fast (base classifer).
485     Descriptor descriptor_fast_;
486 
487     // Distance fast (base classifer).
488     Distance distance_fast_;
489 
490     // Descriptor strong (reid classifier).
491     Descriptor descriptor_strong_;
492 
493     // Distance strong (reid classifier).
494     Distance distance_strong_;
495 
496     // All tracks.
497     std::unordered_map<size_t, Track> tracks_;
498 
499     // Previous frame image.
500     cv::Size prev_frame_size_;
501 
502     struct pair_hash {
operator ()cv::detail::tracking::TrackerByMatching::pair_hash503         std::size_t operator()(const std::pair<size_t, size_t> &p) const {
504             CV_Assert(p.first < 1e6 && p.second < 1e6);
505             return static_cast<size_t>(p.first * 1e6 + p.second);
506         }
507     };
508 
509     // Distance between current active tracks.
510     std::unordered_map<std::pair<size_t, size_t>, float, pair_hash> tracks_dists_;
511 
512     // Whether collect matches and compute confusion matrices for
513     // track-detection
514     // association task (base classifier, reid-based classifier,
515     // reid-classiifer).
516     bool collect_matches_;
517 
518     // This vector contains decisions made by
519     // fast_apperance-motion-shape affinity model.
520     std::vector<Match> base_classifier_matches_;
521 
522     // This vector contains decisions made by
523     // strong_apperance(cnn-reid)-motion-shape affinity model.
524     std::vector<Match> reid_based_classifier_matches_;
525 
526     // This vector contains decisions made by
527     // strong_apperance(cnn-reid) affinity model only.
528     std::vector<Match> reid_classifier_matches_;
529 
530     // Number of all current tracks.
531     size_t tracks_counter_;
532 
533     // Number of dropped valid tracks.
534     size_t valid_tracks_counter_;
535 
536     cv::Size frame_size_;
537 
538     std::vector<cv::Scalar> colors_;
539 
540     uint64_t prev_timestamp_;
541 };
542 
createTrackerByMatching(const TrackerParams & params)543 cv::Ptr<ITrackerByMatching> cv::tbm::createTrackerByMatching(const TrackerParams &params)
544 {
545     ITrackerByMatching* ptr = new TrackerByMatching(params);
546     return cv::Ptr<ITrackerByMatching>(ptr);
547 }
548 
TrackerParams()549 TrackerParams::TrackerParams()
550     : min_track_duration(1000),
551     forget_delay(150),
552     aff_thr_fast(0.8f),
553     aff_thr_strong(0.75f),
554     shape_affinity_w(0.5f),
555     motion_affinity_w(0.2f),
556     time_affinity_w(0.0f),
557     min_det_conf(0.1f),
558     bbox_aspect_ratios_range(0.666f, 5.0f),
559     bbox_heights_range(40.f, 1000.f),
560     predict(25),
561     strong_affinity_thr(0.2805f),
562     reid_thr(0.61f),
563     drop_forgotten_tracks(true),
564     max_num_objects_in_track(300) {}
565 
566 // Returns confusion matrix as:
567 //   |tp fn|
568 //   |fp tn|
ConfusionMatrix(const std::vector<Match> & matches)569 cv::Mat TrackerByMatching::ConfusionMatrix(const std::vector<Match> &matches) {
570     const bool kNegative = false;
571     cv::Mat conf_mat(2, 2, CV_32F, cv::Scalar(0));
572     for (const auto &m : matches) {
573         conf_mat.at<float>(m.gt_label == kNegative, m.pr_label == kNegative)++;
574     }
575 
576     return conf_mat;
577 }
578 
TrackerByMatching(const TrackerParams & params)579 TrackerByMatching::TrackerByMatching(const TrackerParams &params)
580     : params_(params),
581     descriptor_strong_(nullptr),
582     distance_strong_(nullptr),
583     collect_matches_(true),
584     tracks_counter_(0),
585     valid_tracks_counter_(0),
586     frame_size_(0, 0),
587     prev_timestamp_(std::numeric_limits<uint64_t>::max()) {
588         ValidateParams(params);
589     }
590 
591 // Pipeline parameters getter.
params() const592 const TrackerParams &TrackerByMatching::params() const { return params_; }
593 
594 // Pipeline parameters setter.
setParams(const TrackerParams & params)595 void TrackerByMatching::setParams(const TrackerParams &params) {
596     ValidateParams(params);
597     params_ = params;
598 }
599 
600 // Descriptor fast getter.
descriptorFast() const601 const TrackerByMatching::Descriptor &TrackerByMatching::descriptorFast() const {
602     return descriptor_fast_;
603 }
604 
605 // Descriptor fast setter.
setDescriptorFast(const Descriptor & val)606 void TrackerByMatching::setDescriptorFast(const Descriptor &val) {
607     descriptor_fast_ = val;
608 }
609 
610 // Descriptor strong getter.
descriptorStrong() const611 const TrackerByMatching::Descriptor &TrackerByMatching::descriptorStrong() const {
612     return descriptor_strong_;
613 }
614 
615 // Descriptor strong setter.
setDescriptorStrong(const Descriptor & val)616 void TrackerByMatching::setDescriptorStrong(const Descriptor &val) {
617     descriptor_strong_ = val;
618 }
619 
620 // Distance fast getter.
distanceFast() const621 const TrackerByMatching::Distance &TrackerByMatching::distanceFast() const { return distance_fast_; }
622 
623 // Distance fast setter.
setDistanceFast(const Distance & val)624 void TrackerByMatching::setDistanceFast(const Distance &val) { distance_fast_ = val; }
625 
626 // Distance strong getter.
distanceStrong() const627 const TrackerByMatching::Distance &TrackerByMatching::distanceStrong() const { return distance_strong_; }
628 
629 // Distance strong setter.
setDistanceStrong(const Distance & val)630 void TrackerByMatching::setDistanceStrong(const Distance &val) { distance_strong_ = val; }
631 
632 // Returns all tracks including forgotten (lost too many frames ago).
633 const std::unordered_map<size_t, Track> &
tracks() const634 TrackerByMatching::tracks() const {
635     return tracks_;
636 }
637 
638 // Returns indexes of active tracks only.
active_track_ids() const639 const std::set<size_t> &TrackerByMatching::active_track_ids() const {
640     return active_track_ids_;
641 }
642 
643 
644 // Returns decisions made by heuristic based on fast distance/descriptor and
645 // shape, motion and time affinity.
646 const std::vector<TrackerByMatching::Match> &
base_classifier_matches() const647 TrackerByMatching::base_classifier_matches() const {
648     return base_classifier_matches_;
649 }
650 
651 // Returns decisions made by heuristic based on strong distance/descriptor
652 // and
653 // shape, motion and time affinity.
reid_based_classifier_matches() const654 const std::vector<TrackerByMatching::Match> &TrackerByMatching::reid_based_classifier_matches() const {
655     return reid_based_classifier_matches_;
656 }
657 
658 // Returns decisions made by strong distance/descriptor affinity.
reid_classifier_matches() const659 const std::vector<TrackerByMatching::Match> &TrackerByMatching::reid_classifier_matches() const {
660     return reid_classifier_matches_;
661 }
662 
FilterDetections(const TrackedObjects & detections) const663 TrackedObjects TrackerByMatching::FilterDetections(
664     const TrackedObjects &detections) const {
665     TrackedObjects filtered_detections;
666     for (const auto &det : detections) {
667         float aspect_ratio = static_cast<float>(det.rect.height) / det.rect.width;
668         if (det.confidence > params_.min_det_conf &&
669             IsInRange(aspect_ratio, params_.bbox_aspect_ratios_range) &&
670             IsInRange(static_cast<float>(det.rect.height), params_.bbox_heights_range)) {
671             filtered_detections.emplace_back(det);
672         }
673     }
674     return filtered_detections;
675 }
676 
SolveAssignmentProblem(const std::set<size_t> & track_ids,const TrackedObjects & detections,const std::vector<cv::Mat> & descriptors,std::set<size_t> & unmatched_tracks,std::set<size_t> & unmatched_detections,std::set<std::tuple<size_t,size_t,float>> & matches)677 void TrackerByMatching::SolveAssignmentProblem(
678     const std::set<size_t> &track_ids, const TrackedObjects &detections,
679     const std::vector<cv::Mat> &descriptors,
680     std::set<size_t>& unmatched_tracks, std::set<size_t>& unmatched_detections,
681     std::set<std::tuple<size_t, size_t, float>>& matches) {
682     unmatched_tracks.clear();
683     unmatched_detections.clear();
684 
685     TBM_CHECK(!track_ids.empty());
686     TBM_CHECK(!detections.empty());
687     TBM_CHECK(descriptors.size() == detections.size());
688     matches.clear();
689 
690     cv::Mat dissimilarity;
691     ComputeDissimilarityMatrix(track_ids, detections, descriptors,
692                                dissimilarity);
693 
694     auto res = KuhnMunkres().Solve(dissimilarity);
695 
696     for (size_t i = 0; i < detections.size(); i++) {
697         unmatched_detections.insert(i);
698     }
699 
700     int i = 0;
701     for (auto id : track_ids) {
702         if (res[i] < detections.size()) {
703             matches.emplace(id, res[i], 1 - dissimilarity.at<float>(i, static_cast<int>(res[i])));
704         } else {
705             unmatched_tracks.insert(id);
706         }
707         i++;
708     }
709 }
710 
all_tracks(bool valid_only) const711 const ObjectTracks TrackerByMatching::all_tracks(bool valid_only) const {
712     ObjectTracks all_objects;
713     int counter = 0;
714 
715     std::set<size_t> sorted_ids;
716     for (const auto &pair : tracks()) {
717         sorted_ids.emplace(pair.first);
718     }
719 
720     for (size_t id : sorted_ids) {
721         if (!valid_only || isTrackValid(id)) {
722             TrackedObjects filtered_objects;
723             for (const auto &object : tracks().at(id).objects) {
724                 filtered_objects.emplace_back(object);
725                 filtered_objects.back().object_id = counter;
726             }
727             all_objects.emplace(counter++, filtered_objects);
728         }
729     }
730     return all_objects;
731 }
732 
PredictRect(size_t id,size_t k,size_t s) const733 cv::Rect TrackerByMatching::PredictRect(size_t id, size_t k,
734                                         size_t s) const {
735     const auto &track = tracks_.at(id);
736     TBM_CHECK(!track.empty());
737 
738     if (track.size() == 1) {
739         return track[0].rect;
740     }
741 
742     size_t start_i = track.size() > k ? track.size() - k : 0;
743     float width = 0, height = 0;
744 
745     for (size_t i = start_i; i < track.size(); i++) {
746         width += track[i].rect.width;
747         height += track[i].rect.height;
748     }
749 
750     TBM_CHECK(track.size() - start_i > 0);
751     width /= (track.size() - start_i);
752     height /= (track.size() - start_i);
753 
754     float delim = 0;
755     cv::Point2f d(0, 0);
756 
757     for (size_t i = start_i + 1; i < track.size(); i++) {
758         d += cv::Point2f(Center(track[i].rect) - Center(track[i - 1].rect));
759         delim += (track[i].frame_idx - track[i - 1].frame_idx);
760     }
761 
762     if (delim) {
763         d /= delim;
764     }
765 
766     s += 1;
767 
768     cv::Point c = Center(track.back().rect);
769     return cv::Rect(static_cast<int>(c.x - width / 2 + d.x * s),
770                     static_cast<int>(c.y - height / 2 + d.y * s),
771                     static_cast<int>(width),
772                     static_cast<int>(height));
773 }
774 
775 
EraseTrackIfBBoxIsOutOfFrame(size_t track_id)776 bool TrackerByMatching::EraseTrackIfBBoxIsOutOfFrame(size_t track_id) {
777     if (tracks_.find(track_id) == tracks_.end()) return true;
778     auto c = Center(tracks_.at(track_id).predicted_rect);
779     if (!prev_frame_size_.empty() &&
780         (c.x < 0 || c.y < 0 || c.x > prev_frame_size_.width ||
781          c.y > prev_frame_size_.height)) {
782         tracks_.at(track_id).lost = params_.forget_delay + 1;
783         for (auto id : active_track_ids()) {
784             size_t min_id = std::min(id, track_id);
785             size_t max_id = std::max(id, track_id);
786             tracks_dists_.erase(std::pair<size_t, size_t>(min_id, max_id));
787         }
788         active_track_ids_.erase(track_id);
789         return true;
790     }
791     return false;
792 }
793 
EraseTrackIfItWasLostTooManyFramesAgo(size_t track_id)794 bool TrackerByMatching::EraseTrackIfItWasLostTooManyFramesAgo(
795     size_t track_id) {
796     if (tracks_.find(track_id) == tracks_.end()) return true;
797     if (tracks_.at(track_id).lost > params_.forget_delay) {
798         for (auto id : active_track_ids()) {
799             size_t min_id = std::min(id, track_id);
800             size_t max_id = std::max(id, track_id);
801             tracks_dists_.erase(std::pair<size_t, size_t>(min_id, max_id));
802         }
803         active_track_ids_.erase(track_id);
804 
805         return true;
806     }
807     return false;
808 }
809 
UpdateLostTrackAndEraseIfItsNeeded(size_t track_id)810 bool TrackerByMatching::UpdateLostTrackAndEraseIfItsNeeded(
811     size_t track_id) {
812     tracks_.at(track_id).lost++;
813     tracks_.at(track_id).predicted_rect =
814         PredictRect(track_id, params().predict, tracks_.at(track_id).lost);
815 
816     bool erased = EraseTrackIfBBoxIsOutOfFrame(track_id);
817     if (!erased) erased = EraseTrackIfItWasLostTooManyFramesAgo(track_id);
818     return erased;
819 }
820 
UpdateLostTracks(const std::set<size_t> & track_ids)821 void TrackerByMatching::UpdateLostTracks(
822     const std::set<size_t> &track_ids) {
823     for (auto track_id : track_ids) {
824         UpdateLostTrackAndEraseIfItsNeeded(track_id);
825     }
826 }
827 
process(const cv::Mat & frame,const TrackedObjects & input_detections,uint64_t timestamp)828 void TrackerByMatching::process(const cv::Mat &frame,
829                                 const TrackedObjects &input_detections,
830                                 uint64_t timestamp) {
831     if (prev_timestamp_ != std::numeric_limits<uint64_t>::max())
832         TBM_CHECK_LT(static_cast<size_t>(prev_timestamp_), static_cast<size_t>(timestamp));
833 
834     if (frame_size_ == cv::Size(0, 0)) {
835         frame_size_ = frame.size();
836     } else {
837         TBM_CHECK_EQ(frame_size_, frame.size());
838     }
839 
840     TrackedObjects detections = FilterDetections(input_detections);
841     for (auto &obj : detections) {
842         obj.timestamp = timestamp;
843     }
844 
845     std::vector<cv::Mat> descriptors_fast;
846     ComputeFastDesciptors(frame, detections, descriptors_fast);
847 
848     auto active_tracks = active_track_ids_;
849 
850     if (!active_tracks.empty() && !detections.empty()) {
851         std::set<size_t> unmatched_tracks, unmatched_detections;
852         std::set<std::tuple<size_t, size_t, float>> matches;
853 
854         SolveAssignmentProblem(active_tracks, detections, descriptors_fast,
855                                unmatched_tracks,
856                                unmatched_detections, matches);
857 
858         std::map<size_t, std::pair<bool, cv::Mat>> is_matching_to_track;
859 
860         if (distance_strong_) {
861             std::vector<std::pair<size_t, size_t>> reid_track_and_det_ids =
862                 GetTrackToDetectionIds(matches);
863             is_matching_to_track = StrongMatching(
864                 frame, detections, reid_track_and_det_ids);
865         }
866 
867         for (const auto &match : matches) {
868             size_t track_id = std::get<0>(match);
869             size_t det_id = std::get<1>(match);
870             float conf = std::get<2>(match);
871 
872             auto last_det = tracks_.at(track_id).objects.back();
873             last_det.rect = tracks_.at(track_id).predicted_rect;
874 
875             if (collect_matches_ && last_det.object_id >= 0 &&
876                 detections[det_id].object_id >= 0) {
877                 base_classifier_matches_.emplace_back(
878                     tracks_.at(track_id).objects.back(), last_det.rect,
879                     detections[det_id], conf > params_.aff_thr_fast);
880             }
881 
882             if (conf > params_.aff_thr_fast) {
883                 AppendToTrack(frame, track_id, detections[det_id],
884                               descriptors_fast[det_id], cv::Mat());
885                 unmatched_detections.erase(det_id);
886             } else {
887                 if (conf > params_.strong_affinity_thr) {
888                     if (distance_strong_ && is_matching_to_track[track_id].first) {
889                         AppendToTrack(frame, track_id, detections[det_id],
890                                       descriptors_fast[det_id],
891                                       is_matching_to_track[track_id].second.clone());
892                     } else {
893                         if (UpdateLostTrackAndEraseIfItsNeeded(track_id)) {
894                             AddNewTrack(frame, detections[det_id], descriptors_fast[det_id],
895                                         distance_strong_
896                                         ? is_matching_to_track[track_id].second.clone()
897                                         : cv::Mat());
898                         }
899                     }
900 
901                     unmatched_detections.erase(det_id);
902                 } else {
903                     unmatched_tracks.insert(track_id);
904                 }
905             }
906         }
907 
908         AddNewTracks(frame, detections, descriptors_fast, unmatched_detections);
909         UpdateLostTracks(unmatched_tracks);
910 
911         for (size_t id : active_tracks) {
912             EraseTrackIfBBoxIsOutOfFrame(id);
913         }
914     } else {
915         AddNewTracks(frame, detections, descriptors_fast);
916         UpdateLostTracks(active_tracks);
917     }
918 
919     prev_frame_size_ = frame.size();
920     if (params_.drop_forgotten_tracks) dropForgottenTracks();
921 
922     tracks_dists_.clear();
923     prev_timestamp_ = timestamp;
924 }
925 
dropForgottenTracks()926 void TrackerByMatching::dropForgottenTracks() {
927     std::unordered_map<size_t, Track> new_tracks;
928     std::set<size_t> new_active_tracks;
929 
930     size_t max_id = 0;
931     if (!active_track_ids_.empty())
932         max_id =
933             *std::max_element(active_track_ids_.begin(), active_track_ids_.end());
934 
935     const size_t kMaxTrackID = 10000;
936     bool reassign_id = max_id > kMaxTrackID;
937 
938     size_t counter = 0;
939     for (const auto &pair : tracks_) {
940         if (!isTrackForgotten(pair.first)) {
941             new_tracks.emplace(reassign_id ? counter : pair.first, pair.second);
942             new_active_tracks.emplace(reassign_id ? counter : pair.first);
943             counter++;
944 
945         } else {
946             if (isTrackValid(pair.first)) {
947                 valid_tracks_counter_++;
948             }
949         }
950     }
951     tracks_.swap(new_tracks);
952     active_track_ids_.swap(new_active_tracks);
953 
954     tracks_counter_ = reassign_id ? counter : tracks_counter_;
955 }
956 
dropForgottenTrack(size_t track_id)957 void TrackerByMatching::dropForgottenTrack(size_t track_id) {
958     TBM_CHECK(isTrackForgotten(track_id));
959     TBM_CHECK(active_track_ids_.count(track_id) == 0);
960     tracks_.erase(track_id);
961 }
962 
ShapeAffinity(float weight,const cv::Rect & trk,const cv::Rect & det)963 float TrackerByMatching::ShapeAffinity(float weight, const cv::Rect &trk,
964                                        const cv::Rect &det) {
965     float w_dist = static_cast<float>(std::fabs(trk.width - det.width) / (trk.width + det.width));
966     float h_dist = static_cast<float>(std::fabs(trk.height - det.height) / (trk.height + det.height));
967     return exp(-weight * (w_dist + h_dist));
968 }
969 
MotionAffinity(float weight,const cv::Rect & trk,const cv::Rect & det)970 float TrackerByMatching::MotionAffinity(float weight, const cv::Rect &trk,
971                                         const cv::Rect &det) {
972     float x_dist = static_cast<float>(trk.x - det.x) * (trk.x - det.x) /
973         (det.width * det.width);
974     float y_dist = static_cast<float>(trk.y - det.y) * (trk.y - det.y) /
975         (det.height * det.height);
976     return exp(-weight * (x_dist + y_dist));
977 }
978 
TimeAffinity(float weight,const float & trk_time,const float & det_time)979 float TrackerByMatching::TimeAffinity(float weight, const float &trk_time,
980                                       const float &det_time) {
981     return exp(-weight * std::fabs(trk_time - det_time));
982 }
983 
ComputeFastDesciptors(const cv::Mat & frame,const TrackedObjects & detections,std::vector<cv::Mat> & desriptors)984 void TrackerByMatching::ComputeFastDesciptors(
985     const cv::Mat &frame, const TrackedObjects &detections,
986     std::vector<cv::Mat>& desriptors) {
987     desriptors = std::vector<cv::Mat>(detections.size(), cv::Mat());
988     for (size_t i = 0; i < detections.size(); i++) {
989         descriptor_fast_->compute(frame(detections[i].rect).clone(),
990                                   desriptors[i]);
991     }
992 }
993 
ComputeDissimilarityMatrix(const std::set<size_t> & active_tracks,const TrackedObjects & detections,const std::vector<cv::Mat> & descriptors_fast,cv::Mat & dissimilarity_matrix)994 void TrackerByMatching::ComputeDissimilarityMatrix(
995     const std::set<size_t> &active_tracks, const TrackedObjects &detections,
996     const std::vector<cv::Mat> &descriptors_fast,
997     cv::Mat& dissimilarity_matrix) {
998     cv::Mat am(static_cast<int>(active_tracks.size()), static_cast<int>(detections.size()), CV_32F, cv::Scalar(0));
999     int i = 0;
1000     for (auto id : active_tracks) {
1001         auto ptr = am.ptr<float>(i);
1002         for (size_t j = 0; j < descriptors_fast.size(); j++) {
1003             auto last_det = tracks_.at(id).objects.back();
1004             last_det.rect = tracks_.at(id).predicted_rect;
1005             ptr[j] = AffinityFast(tracks_.at(id).descriptor_fast, last_det,
1006                                   descriptors_fast[j], detections[j]);
1007         }
1008         i++;
1009     }
1010     dissimilarity_matrix = 1.0 - am;
1011 }
1012 
ComputeDistances(const cv::Mat & frame,const TrackedObjects & detections,const std::vector<std::pair<size_t,size_t>> & track_and_det_ids,std::map<size_t,cv::Mat> & det_id_to_descriptor)1013 std::vector<float> TrackerByMatching::ComputeDistances(
1014     const cv::Mat &frame,
1015     const TrackedObjects& detections,
1016     const std::vector<std::pair<size_t, size_t>> &track_and_det_ids,
1017     std::map<size_t, cv::Mat>& det_id_to_descriptor) {
1018     std::map<size_t, size_t> det_to_batch_ids;
1019     std::map<size_t, size_t> track_to_batch_ids;
1020 
1021     std::vector<cv::Mat> images;
1022     std::vector<cv::Mat> descriptors;
1023     for (size_t i = 0; i < track_and_det_ids.size(); i++) {
1024         size_t track_id = track_and_det_ids[i].first;
1025         size_t det_id = track_and_det_ids[i].second;
1026 
1027         if (tracks_.at(track_id).descriptor_strong.empty()) {
1028             images.push_back(tracks_.at(track_id).last_image);
1029             descriptors.push_back(cv::Mat());
1030             track_to_batch_ids[track_id] = descriptors.size() - 1;
1031         }
1032 
1033         images.push_back(frame(detections[det_id].rect));
1034         descriptors.push_back(cv::Mat());
1035         det_to_batch_ids[det_id] = descriptors.size() - 1;
1036     }
1037 
1038     descriptor_strong_->compute(images, descriptors);
1039 
1040     std::vector<cv::Mat> descriptors1;
1041     std::vector<cv::Mat> descriptors2;
1042     for (size_t i = 0; i < track_and_det_ids.size(); i++) {
1043         size_t track_id = track_and_det_ids[i].first;
1044         size_t det_id = track_and_det_ids[i].second;
1045 
1046         if (tracks_.at(track_id).descriptor_strong.empty()) {
1047             tracks_.at(track_id).descriptor_strong =
1048                 descriptors[track_to_batch_ids[track_id]].clone();
1049         }
1050         det_id_to_descriptor[det_id] = descriptors[det_to_batch_ids[det_id]];
1051 
1052         descriptors1.push_back(descriptors[det_to_batch_ids[det_id]]);
1053         descriptors2.push_back(tracks_.at(track_id).descriptor_strong);
1054     }
1055 
1056     std::vector<float> distances =
1057         distance_strong_->compute(descriptors1, descriptors2);
1058 
1059     return distances;
1060 }
1061 
1062 std::vector<std::pair<size_t, size_t>>
GetTrackToDetectionIds(const std::set<std::tuple<size_t,size_t,float>> & matches)1063 TrackerByMatching::GetTrackToDetectionIds(
1064     const std::set<std::tuple<size_t, size_t, float>> &matches) {
1065     std::vector<std::pair<size_t, size_t>> track_and_det_ids;
1066 
1067     for (const auto &match : matches) {
1068         size_t track_id = std::get<0>(match);
1069         size_t det_id = std::get<1>(match);
1070         float conf = std::get<2>(match);
1071         if (conf < params_.aff_thr_fast && conf > params_.strong_affinity_thr) {
1072             track_and_det_ids.emplace_back(track_id, det_id);
1073         }
1074     }
1075     return track_and_det_ids;
1076 }
1077 
1078 std::map<size_t, std::pair<bool, cv::Mat>>
StrongMatching(const cv::Mat & frame,const TrackedObjects & detections,const std::vector<std::pair<size_t,size_t>> & track_and_det_ids)1079 TrackerByMatching::StrongMatching(
1080     const cv::Mat &frame,
1081     const TrackedObjects& detections,
1082     const std::vector<std::pair<size_t, size_t>> &track_and_det_ids) {
1083     std::map<size_t, std::pair<bool, cv::Mat>> is_matching;
1084 
1085     if (track_and_det_ids.size() == 0) {
1086         return is_matching;
1087     }
1088 
1089     std::map<size_t, cv::Mat> det_ids_to_descriptors;
1090     std::vector<float> distances =
1091         ComputeDistances(frame, detections,
1092                          track_and_det_ids, det_ids_to_descriptors);
1093 
1094     for (size_t i = 0; i < track_and_det_ids.size(); i++) {
1095         auto reid_affinity = 1.0 - distances[i];
1096 
1097         size_t track_id = track_and_det_ids[i].first;
1098         size_t det_id = track_and_det_ids[i].second;
1099 
1100         const auto& track = tracks_.at(track_id);
1101         const auto& detection = detections[det_id];
1102 
1103         auto last_det = track.objects.back();
1104         last_det.rect = track.predicted_rect;
1105 
1106         float affinity = static_cast<float>(reid_affinity * Affinity(last_det, detection));
1107 
1108         if (collect_matches_ && last_det.object_id >= 0 &&
1109             detection.object_id >= 0) {
1110             reid_classifier_matches_.emplace_back(track.objects.back(), last_det.rect,
1111                                                   detection,
1112                                                   reid_affinity > params_.reid_thr);
1113 
1114             reid_based_classifier_matches_.emplace_back(
1115                 track.objects.back(), last_det.rect, detection,
1116                 affinity > params_.aff_thr_strong);
1117         }
1118 
1119         bool is_detection_matching =
1120             reid_affinity > params_.reid_thr && affinity > params_.aff_thr_strong;
1121 
1122         is_matching[track_id] = std::pair<bool, cv::Mat>(
1123             is_detection_matching, det_ids_to_descriptors[det_id]);
1124     }
1125     return is_matching;
1126 }
1127 
AddNewTracks(const cv::Mat & frame,const TrackedObjects & detections,const std::vector<cv::Mat> & descriptors_fast)1128 void TrackerByMatching::AddNewTracks(
1129     const cv::Mat &frame, const TrackedObjects &detections,
1130     const std::vector<cv::Mat> &descriptors_fast) {
1131     TBM_CHECK(detections.size() == descriptors_fast.size());
1132     for (size_t i = 0; i < detections.size(); i++) {
1133         AddNewTrack(frame, detections[i], descriptors_fast[i]);
1134     }
1135 }
1136 
AddNewTracks(const cv::Mat & frame,const TrackedObjects & detections,const std::vector<cv::Mat> & descriptors_fast,const std::set<size_t> & ids)1137 void TrackerByMatching::AddNewTracks(
1138     const cv::Mat &frame, const TrackedObjects &detections,
1139     const std::vector<cv::Mat> &descriptors_fast, const std::set<size_t> &ids) {
1140     TBM_CHECK(detections.size() == descriptors_fast.size());
1141     for (size_t i : ids) {
1142         TBM_CHECK(i < detections.size());
1143         AddNewTrack(frame, detections[i], descriptors_fast[i]);
1144     }
1145 }
1146 
AddNewTrack(const cv::Mat & frame,const TrackedObject & detection,const cv::Mat & descriptor_fast,const cv::Mat & descriptor_strong)1147 void TrackerByMatching::AddNewTrack(const cv::Mat &frame,
1148                                     const TrackedObject &detection,
1149                                     const cv::Mat &descriptor_fast,
1150                                     const cv::Mat &descriptor_strong) {
1151     auto detection_with_id = detection;
1152     detection_with_id.object_id = static_cast<int>(tracks_counter_);
1153     tracks_.emplace(std::pair<size_t, Track>(
1154             tracks_counter_,
1155             Track({detection_with_id}, frame(detection.rect).clone(),
1156                   descriptor_fast.clone(), descriptor_strong.clone())));
1157 
1158     for (size_t id : active_track_ids_) {
1159         tracks_dists_.emplace(std::pair<size_t, size_t>(id, tracks_counter_),
1160                               std::numeric_limits<float>::max());
1161     }
1162 
1163     active_track_ids_.insert(tracks_counter_);
1164     tracks_counter_++;
1165 }
1166 
AppendToTrack(const cv::Mat & frame,size_t track_id,const TrackedObject & detection,const cv::Mat & descriptor_fast,const cv::Mat & descriptor_strong)1167 void TrackerByMatching::AppendToTrack(const cv::Mat &frame,
1168                                       size_t track_id,
1169                                       const TrackedObject &detection,
1170                                       const cv::Mat &descriptor_fast,
1171                                       const cv::Mat &descriptor_strong) {
1172     TBM_CHECK(!isTrackForgotten(track_id));
1173 
1174     auto detection_with_id = detection;
1175     detection_with_id.object_id = static_cast<int>(track_id);
1176 
1177     auto &cur_track = tracks_.at(track_id);
1178     cur_track.objects.emplace_back(detection_with_id);
1179     cur_track.predicted_rect = detection.rect;
1180     cur_track.lost = 0;
1181     cur_track.last_image = frame(detection.rect).clone();
1182     cur_track.descriptor_fast = descriptor_fast.clone();
1183     cur_track.length++;
1184 
1185     if (cur_track.descriptor_strong.empty()) {
1186         cur_track.descriptor_strong = descriptor_strong.clone();
1187     } else if (!descriptor_strong.empty()) {
1188         cur_track.descriptor_strong =
1189             0.5 * (descriptor_strong + cur_track.descriptor_strong);
1190     }
1191 
1192 
1193     if (params_.max_num_objects_in_track > 0) {
1194         while (cur_track.size() >
1195                static_cast<size_t>(params_.max_num_objects_in_track)) {
1196             cur_track.objects.erase(cur_track.objects.begin());
1197         }
1198     }
1199 }
1200 
AffinityFast(const cv::Mat & descriptor1,const TrackedObject & obj1,const cv::Mat & descriptor2,const TrackedObject & obj2)1201 float TrackerByMatching::AffinityFast(const cv::Mat &descriptor1,
1202                                       const TrackedObject &obj1,
1203                                       const cv::Mat &descriptor2,
1204                                       const TrackedObject &obj2) {
1205     const float eps = static_cast<float>(1e-6);
1206     float shp_aff = ShapeAffinity(params_.shape_affinity_w, obj1.rect, obj2.rect);
1207     if (shp_aff < eps) return 0.0;
1208 
1209     float mot_aff =
1210         MotionAffinity(params_.motion_affinity_w, obj1.rect, obj2.rect);
1211     if (mot_aff < eps) return 0.0;
1212     float time_aff =
1213         TimeAffinity(params_.time_affinity_w, static_cast<float>(obj1.frame_idx), static_cast<float>(obj2.frame_idx));
1214 
1215     if (time_aff < eps) return 0.0;
1216 
1217     float app_aff = static_cast<float>(1.0 - distance_fast_->compute(descriptor1, descriptor2));
1218 
1219     return shp_aff * mot_aff * app_aff * time_aff;
1220 }
1221 
Affinity(const TrackedObject & obj1,const TrackedObject & obj2)1222 float TrackerByMatching::Affinity(const TrackedObject &obj1,
1223                                   const TrackedObject &obj2) {
1224     float shp_aff = ShapeAffinity(params_.shape_affinity_w, obj1.rect, obj2.rect);
1225     float mot_aff =
1226         MotionAffinity(params_.motion_affinity_w, obj1.rect, obj2.rect);
1227     float time_aff =
1228         TimeAffinity(params_.time_affinity_w, static_cast<float>(obj1.frame_idx), static_cast<float>(obj2.frame_idx));
1229     return shp_aff * mot_aff * time_aff;
1230 }
1231 
isTrackValid(size_t id) const1232 bool TrackerByMatching::isTrackValid(size_t id) const {
1233     const auto& track = tracks_.at(id);
1234     const auto &objects = track.objects;
1235     if (objects.empty()) {
1236         return false;
1237     }
1238     int64_t duration_ms = objects.back().timestamp - track.first_object.timestamp;
1239     if (duration_ms < static_cast<int64_t>(params_.min_track_duration))
1240         return false;
1241     return true;
1242 }
1243 
isTrackForgotten(size_t id) const1244 bool TrackerByMatching::isTrackForgotten(size_t id) const {
1245     return isTrackForgotten(tracks_.at(id));
1246 }
1247 
isTrackForgotten(const Track & track) const1248 bool TrackerByMatching::isTrackForgotten(const Track &track) const {
1249     return (track.lost > params_.forget_delay);
1250 }
1251 
count() const1252 size_t TrackerByMatching::count() const {
1253     size_t count = valid_tracks_counter_;
1254     for (const auto &pair : tracks_) {
1255         count += (isTrackValid(pair.first) ? 1 : 0);
1256     }
1257     return count;
1258 }
1259 
1260 std::unordered_map<size_t, std::vector<cv::Point>>
getActiveTracks() const1261 TrackerByMatching::getActiveTracks() const {
1262     std::unordered_map<size_t, std::vector<cv::Point>> active_tracks;
1263     for (size_t idx : active_track_ids()) {
1264         auto track = tracks().at(idx);
1265         if (isTrackValid(idx) && !isTrackForgotten(idx)) {
1266             active_tracks.emplace(idx, Centers(track.objects));
1267         }
1268     }
1269     return active_tracks;
1270 }
1271 
trackedDetections() const1272 TrackedObjects TrackerByMatching::trackedDetections() const {
1273     TrackedObjects detections;
1274     for (size_t idx : active_track_ids()) {
1275         auto track = tracks().at(idx);
1276         if (isTrackValid(idx) && !track.lost) {
1277             detections.emplace_back(track.objects.back());
1278         }
1279     }
1280     return detections;
1281 }
1282 
drawActiveTracks(const cv::Mat & frame)1283 cv::Mat TrackerByMatching::drawActiveTracks(const cv::Mat &frame) {
1284     cv::Mat out_frame = frame.clone();
1285 
1286     if (colors_.empty()) {
1287         int num_colors = 100;
1288         colors_ = GenRandomColors(num_colors);
1289     }
1290 
1291     auto active_tracks = getActiveTracks();
1292     for (auto active_track : active_tracks) {
1293         size_t idx = active_track.first;
1294         auto centers = active_track.second;
1295         DrawPolyline(centers, colors_[idx % colors_.size()], out_frame);
1296         std::stringstream ss;
1297         ss << idx;
1298         cv::putText(out_frame, ss.str(), centers.back(), cv::FONT_HERSHEY_SCRIPT_COMPLEX, 2.0,
1299                     colors_[idx % colors_.size()], 3);
1300         auto track = tracks().at(idx);
1301         if (track.lost) {
1302             cv::line(out_frame, active_track.second.back(),
1303                      Center(track.predicted_rect), cv::Scalar(0, 0, 0), 4);
1304         }
1305     }
1306 
1307     return out_frame;
1308 }
1309 
1310 const cv::Size kMinFrameSize = cv::Size(320, 240);
1311 const cv::Size kMaxFrameSize = cv::Size(1920, 1080);
1312 
PrintConfusionMatrices() const1313 void TrackerByMatching::PrintConfusionMatrices() const {
1314     std::cout << "Base classifier quality: " << std::endl;
1315     {
1316         auto cm = ConfusionMatrix(base_classifier_matches());
1317         std::cout << cm << std::endl;
1318         std::cout << "or" << std::endl;
1319         cm.row(0) = cm.row(0) / std::max(1.0, cv::sum(cm.row(0))[0]);
1320         cm.row(1) = cm.row(1) / std::max(1.0, cv::sum(cm.row(1))[0]);
1321         std::cout << cm << std::endl << std::endl;
1322     }
1323 
1324     std::cout << "Reid-based classifier quality: " << std::endl;
1325     {
1326         auto cm = ConfusionMatrix(reid_based_classifier_matches());
1327         std::cout << cm << std::endl;
1328         std::cout << "or" << std::endl;
1329         cm.row(0) = cm.row(0) / std::max(1.0, cv::sum(cm.row(0))[0]);
1330         cm.row(1) = cm.row(1) / std::max(1.0, cv::sum(cm.row(1))[0]);
1331         std::cout << cm << std::endl << std::endl;
1332     }
1333 
1334     std::cout << "Reid only classifier quality: " << std::endl;
1335     {
1336         auto cm = ConfusionMatrix(reid_classifier_matches());
1337         std::cout << cm << std::endl;
1338         std::cout << "or" << std::endl;
1339         cm.row(0) = cm.row(0) / std::max(1.0, cv::sum(cm.row(0))[0]);
1340         cm.row(1) = cm.row(1) / std::max(1.0, cv::sum(cm.row(1))[0]);
1341         std::cout << cm << std::endl << std::endl;
1342     }
1343 }
1344 
1345 }}}  // namespace
1346