1pro match, a, b, suba, subb, COUNT = count, SORT = sort, epsilon=epsilon
2  ;+
3  ; NAME:
4  ;       MATCH
5  ; PURPOSE:
6  ;       Routine to match values in two vectors.
7  ;
8  ; CALLING SEQUENCE:
9  ;       match, a, b, suba, subb, [ COUNT =, /SORT, EPSILON =  ]
10  ;
11  ; INPUTS:
12  ;       a,b - two vectors to match elements, numeric or string data types
13  ;
14  ; OUTPUTS:
15  ;       suba - subscripts of elements in vector a with a match
16  ;               in vector b
17  ;       subb - subscripts of the positions of the elements in
18  ;               vector b with matchs in vector a.
19  ;
20  ;       suba and subb are ordered such that a[suba] equals b[subb]
21  ;       suba and subb are set to !NULL if there are no matches (or set to -1
22  ;		if prior to IDL Version 8.0)
23  ;
24  ; OPTIONAL INPUT KEYWORD:
25  ;       /SORT - By default, MATCH uses two different algorithm: (1) the
26  ;               /REVERSE_INDICES keyword to HISTOGRAM is used for integer data,
27  ;               while (2) a sorting algorithm is used for non-integer data.  The
28  ;               histogram algorithm is usually faster, except when the input
29  ;               vectors are sparse and contain very large numbers, possibly
30  ;               causing memory problems.   Use the /SORT keyword to always use
31  ;               the sort algorithm.
32  ;       epsilon - if values are within epsilon, they are considered equal. Used only
33  ;               only for non-integer matching.  Note that input vectors should
34  ;               be unique to within epsilon to provide one-to-one mapping..
35  ;               Default=0.
36  ;
37  ; OPTIONAL KEYWORD OUTPUT:
38  ;       COUNT - set to the number of matches, integer scalar
39  ;
40  ; SIDE EFFECTS:
41  ;       The obsolete system variable !ERR is set to the number of matches;
42  ;       however, the use !ERR is deprecated in favor of the COUNT keyword
43  ;
44  ; RESTRICTIONS:
45  ;       The vectors a and b should not have duplicate values within them.
46  ;       You can use rem_dup function to remove duplicate values
47  ;       in a vector
48  ;
49  ; EXAMPLE:
50  ;       If a = [3,5,7,9,11]   & b = [5,6,7,8,9,10]
51  ;       then
52  ;               IDL> match, a, b, suba, subb, COUNT = count
53  ;
54  ;       will give suba = [1,2,3], subb = [0,2,4],  COUNT = 3
55  ;       and       a[suba] = b[subb] = [5,7,9]
56  ;
57  ;
58  ; METHOD:
59  ;       For non-integer data types, the two input vectors are combined and
60  ;       sorted and the consecutive equal elements are identified.   For integer
61  ;       data types, the /REVERSE_INDICES keyword to HISTOGRAM of each array
62  ;       is used to identify where the two arrays have elements in common.
63  ; HISTORY:
64  ;       D. Lindler  Mar. 1986.
65  ;       Fixed "indgen" call for very large arrays   W. Landsman  Sep 1991
66  ;       Added COUNT keyword    W. Landsman   Sep. 1992
67  ;       Fixed case where single element array supplied   W. Landsman Aug 95
68  ;       Use a HISTOGRAM algorithm for integer vector inputs for improved
69  ;             performance                W. Landsman         March 2000
70  ;       Work again for strings           W. Landsman         April 2000
71  ;       Use size(/type)                  W. Landsman         December 2002
72  ;       Work for scalar integer input    W. Landsman         June 2003
73  ;       Assume since V5.4, use COMPLEMENT to WHERE() W. Landsman Apr 2006
74  ;       Added epsilon keyword            Kim Tolbert         March 14, 2008
75  ;       Fix bug with Histogram method with all negative values W. Landsman/
76  ;       R. Gutermuth, return !NULL for no matches  November 2017
77  ;       Added epsilon test in na=1||nb=1 section (missed that when added
78  ;             epsilon in 2008)           Kim Tolbert         July 10, 2018
79  ;
80  ;-
81  ;-------------------------------------------------------------------------
82  compile_opt idl2
83  Catch, theError
84  IF theError NE 0 then begin
85    Catch,/Cancel
86    void = cgErrorMsg(/quiet)
87    RETURN
88  ENDIF
89
90
91  if N_elements(epsilon) EQ 0 then epsilon = 0
92
93  if N_params() LT 3 then begin
94    print,'Syntax - match, a, b, suba, subb, [ COUNT =, EPSILON=, /SORT]'
95    print,'    a,b -- input vectors for which to match elements'
96    print,'    suba,subb -- output subscript vectors of matched elements'
97    return
98  endif
99
100  da = size(a,/type) & db =size(b,/type)
101  if keyword_set(sort) then hist = 0b else $
102    hist = (( da LE 3 ) || (da GE 12)) &&  ((db LE 3) || (db GE 12 ))
103
104  if ~hist then begin           ;Non-integer calculation
105
106    na = N_elements(a)              ;number of elements in a
107    nb = N_elements(b)             ;number of elements in b
108
109    ; Check for a single element array
110
111    if (na EQ 1) || (nb EQ 1) then begin
112      if (nb GT 1) then begin
113        if epsilon eq 0. then subb = where(b EQ a[0], nw) else $
114          subb = where(abs(b - a[0]) lt epsilon, nw)
115        if (nw GT 0) then suba = replicate(0,nw) else suba = [-1]
116      endif else begin
117        if epsilon eq 0. then suba = where(a EQ b[0], nw) else $
118          suba = where(abs(a - b[0]) lt epsilon, nw)
119        if (nw GT 0) then subb = replicate(0,nw) else subb = [-1]
120      endelse
121      count = nw
122      return
123    endif
124
125    c = [ a, b ]                   ;combined list of a and b
126    ind = [ lindgen(na), lindgen(nb) ]       ;combined list of indices
127    vec = [ bytarr(na), replicate(1b,nb) ]  ;flag of which vector in  combined
128    ;list   0 - a   1 - b
129
130    ; sort combined list
131
132    sub = sort(c)
133    c = c[sub]
134    ind = ind[sub]
135    vec = vec[sub]
136
137    ; find duplicates in sorted combined list
138
139    n = na + nb                            ;total elements in c
140    if epsilon eq 0. then $
141      firstdup = where( (c EQ shift(c,-1)) and (vec NE shift(vec,-1)), Count ) $
142    else $
143      firstdup = where( (abs(c - shift(c,-1)) lt epsilon) and (vec NE shift(vec,-1)), Count )
144
145    if Count EQ 0 then begin               ;any found?
146      suba = lonarr(1)-1
147      subb = lonarr(1)-1
148      return
149    end
150
151    dup = lonarr( Count*2 )                     ;both duplicate values
152    even = lindgen( N_elements(firstdup))*2     ;Changed to LINDGEN 6-Sep-1991
153    dup[even] = firstdup
154    dup[even+1] = firstdup+1
155    ind = ind[dup]                         ;indices of duplicates
156    vec = vec[dup]                         ;vector id of duplicates
157    subb = ind[ where( vec, complement = vzero) ]             ;b subscripts
158    suba = ind[ vzero]
159
160  endif else begin             ;Integer calculation using histogram.
161
162    minab = min(a, MAX=maxa) > min(b, MAX=maxb) ;Only need intersection of ranges
163    maxab = maxa < maxb
164
165    ;If either set is empty, or their ranges don't intersect:
166    ;  result = NULL (which is denoted by integer = -1)
167    !ERR = -1
168    if !VERSION.RELEASE GE '8.0' then begin
169      suba = !NULL
170      subb = !NULL
171    endif else begin
172      suba = -1
173      subb = -1
174    endelse
175    COUNT = 0L
176    if maxab lt minab then return       ;No overlap
177
178    ha = histogram([a], MIN=minab, MAX=maxab, reverse_indices=reva)
179    hb = histogram([b], MIN=minab, MAX=maxab, reverse_indices=revb)
180
181    r = where((ha ne 0) and (hb ne 0), count)
182
183    if count gt 0 then begin
184      suba = reva[reva[r]]
185      subb = revb[revb[r]]
186    endif
187  endelse
188
189  return
190
191end
192