1pro match, a, b, suba, subb, COUNT = count, SORT = sort, epsilon=epsilon 2 ;+ 3 ; NAME: 4 ; MATCH 5 ; PURPOSE: 6 ; Routine to match values in two vectors. 7 ; 8 ; CALLING SEQUENCE: 9 ; match, a, b, suba, subb, [ COUNT =, /SORT, EPSILON = ] 10 ; 11 ; INPUTS: 12 ; a,b - two vectors to match elements, numeric or string data types 13 ; 14 ; OUTPUTS: 15 ; suba - subscripts of elements in vector a with a match 16 ; in vector b 17 ; subb - subscripts of the positions of the elements in 18 ; vector b with matchs in vector a. 19 ; 20 ; suba and subb are ordered such that a[suba] equals b[subb] 21 ; suba and subb are set to !NULL if there are no matches (or set to -1 22 ; if prior to IDL Version 8.0) 23 ; 24 ; OPTIONAL INPUT KEYWORD: 25 ; /SORT - By default, MATCH uses two different algorithm: (1) the 26 ; /REVERSE_INDICES keyword to HISTOGRAM is used for integer data, 27 ; while (2) a sorting algorithm is used for non-integer data. The 28 ; histogram algorithm is usually faster, except when the input 29 ; vectors are sparse and contain very large numbers, possibly 30 ; causing memory problems. Use the /SORT keyword to always use 31 ; the sort algorithm. 32 ; epsilon - if values are within epsilon, they are considered equal. Used only 33 ; only for non-integer matching. Note that input vectors should 34 ; be unique to within epsilon to provide one-to-one mapping.. 35 ; Default=0. 36 ; 37 ; OPTIONAL KEYWORD OUTPUT: 38 ; COUNT - set to the number of matches, integer scalar 39 ; 40 ; SIDE EFFECTS: 41 ; The obsolete system variable !ERR is set to the number of matches; 42 ; however, the use !ERR is deprecated in favor of the COUNT keyword 43 ; 44 ; RESTRICTIONS: 45 ; The vectors a and b should not have duplicate values within them. 46 ; You can use rem_dup function to remove duplicate values 47 ; in a vector 48 ; 49 ; EXAMPLE: 50 ; If a = [3,5,7,9,11] & b = [5,6,7,8,9,10] 51 ; then 52 ; IDL> match, a, b, suba, subb, COUNT = count 53 ; 54 ; will give suba = [1,2,3], subb = [0,2,4], COUNT = 3 55 ; and a[suba] = b[subb] = [5,7,9] 56 ; 57 ; 58 ; METHOD: 59 ; For non-integer data types, the two input vectors are combined and 60 ; sorted and the consecutive equal elements are identified. For integer 61 ; data types, the /REVERSE_INDICES keyword to HISTOGRAM of each array 62 ; is used to identify where the two arrays have elements in common. 63 ; HISTORY: 64 ; D. Lindler Mar. 1986. 65 ; Fixed "indgen" call for very large arrays W. Landsman Sep 1991 66 ; Added COUNT keyword W. Landsman Sep. 1992 67 ; Fixed case where single element array supplied W. Landsman Aug 95 68 ; Use a HISTOGRAM algorithm for integer vector inputs for improved 69 ; performance W. Landsman March 2000 70 ; Work again for strings W. Landsman April 2000 71 ; Use size(/type) W. Landsman December 2002 72 ; Work for scalar integer input W. Landsman June 2003 73 ; Assume since V5.4, use COMPLEMENT to WHERE() W. Landsman Apr 2006 74 ; Added epsilon keyword Kim Tolbert March 14, 2008 75 ; Fix bug with Histogram method with all negative values W. Landsman/ 76 ; R. Gutermuth, return !NULL for no matches November 2017 77 ; Added epsilon test in na=1||nb=1 section (missed that when added 78 ; epsilon in 2008) Kim Tolbert July 10, 2018 79 ; 80 ;- 81 ;------------------------------------------------------------------------- 82 compile_opt idl2 83 Catch, theError 84 IF theError NE 0 then begin 85 Catch,/Cancel 86 void = cgErrorMsg(/quiet) 87 RETURN 88 ENDIF 89 90 91 if N_elements(epsilon) EQ 0 then epsilon = 0 92 93 if N_params() LT 3 then begin 94 print,'Syntax - match, a, b, suba, subb, [ COUNT =, EPSILON=, /SORT]' 95 print,' a,b -- input vectors for which to match elements' 96 print,' suba,subb -- output subscript vectors of matched elements' 97 return 98 endif 99 100 da = size(a,/type) & db =size(b,/type) 101 if keyword_set(sort) then hist = 0b else $ 102 hist = (( da LE 3 ) || (da GE 12)) && ((db LE 3) || (db GE 12 )) 103 104 if ~hist then begin ;Non-integer calculation 105 106 na = N_elements(a) ;number of elements in a 107 nb = N_elements(b) ;number of elements in b 108 109 ; Check for a single element array 110 111 if (na EQ 1) || (nb EQ 1) then begin 112 if (nb GT 1) then begin 113 if epsilon eq 0. then subb = where(b EQ a[0], nw) else $ 114 subb = where(abs(b - a[0]) lt epsilon, nw) 115 if (nw GT 0) then suba = replicate(0,nw) else suba = [-1] 116 endif else begin 117 if epsilon eq 0. then suba = where(a EQ b[0], nw) else $ 118 suba = where(abs(a - b[0]) lt epsilon, nw) 119 if (nw GT 0) then subb = replicate(0,nw) else subb = [-1] 120 endelse 121 count = nw 122 return 123 endif 124 125 c = [ a, b ] ;combined list of a and b 126 ind = [ lindgen(na), lindgen(nb) ] ;combined list of indices 127 vec = [ bytarr(na), replicate(1b,nb) ] ;flag of which vector in combined 128 ;list 0 - a 1 - b 129 130 ; sort combined list 131 132 sub = sort(c) 133 c = c[sub] 134 ind = ind[sub] 135 vec = vec[sub] 136 137 ; find duplicates in sorted combined list 138 139 n = na + nb ;total elements in c 140 if epsilon eq 0. then $ 141 firstdup = where( (c EQ shift(c,-1)) and (vec NE shift(vec,-1)), Count ) $ 142 else $ 143 firstdup = where( (abs(c - shift(c,-1)) lt epsilon) and (vec NE shift(vec,-1)), Count ) 144 145 if Count EQ 0 then begin ;any found? 146 suba = lonarr(1)-1 147 subb = lonarr(1)-1 148 return 149 end 150 151 dup = lonarr( Count*2 ) ;both duplicate values 152 even = lindgen( N_elements(firstdup))*2 ;Changed to LINDGEN 6-Sep-1991 153 dup[even] = firstdup 154 dup[even+1] = firstdup+1 155 ind = ind[dup] ;indices of duplicates 156 vec = vec[dup] ;vector id of duplicates 157 subb = ind[ where( vec, complement = vzero) ] ;b subscripts 158 suba = ind[ vzero] 159 160 endif else begin ;Integer calculation using histogram. 161 162 minab = min(a, MAX=maxa) > min(b, MAX=maxb) ;Only need intersection of ranges 163 maxab = maxa < maxb 164 165 ;If either set is empty, or their ranges don't intersect: 166 ; result = NULL (which is denoted by integer = -1) 167 !ERR = -1 168 if !VERSION.RELEASE GE '8.0' then begin 169 suba = !NULL 170 subb = !NULL 171 endif else begin 172 suba = -1 173 subb = -1 174 endelse 175 COUNT = 0L 176 if maxab lt minab then return ;No overlap 177 178 ha = histogram([a], MIN=minab, MAX=maxab, reverse_indices=reva) 179 hb = histogram([b], MIN=minab, MAX=maxab, reverse_indices=revb) 180 181 r = where((ha ne 0) and (hb ne 0), count) 182 183 if count gt 0 then begin 184 suba = reva[reva[r]] 185 subb = revb[revb[r]] 186 endif 187 endelse 188 189 return 190 191end 192