1 /* -*- Mode: C; tab-width: 3; indent-tabs-mode: nil; c-basic-offset: 3 -*- */
2
3 /*
4 * GImageView
5 * Copyright (C) 2001 Takuro Ashie
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 *
21 * $Id: compare_similar.c,v 1.4 2004/04/08 10:38:50 makeinu Exp $
22 */
23
24 /*
25 * These codes are mostly taken from GQview.
26 * GQview code Copyright (C) 2001 John Ellis
27 */
28
29 #include <stdlib.h>
30
31 #include "gimv_image.h"
32 #include "gimv_thumb.h"
33 #include "gimv_dupl_finder.h"
34 #include "prefs.h"
35
36 /*
37 * These functions are intended to find images with similar color content. For
38 * example when an image was saved at different compression levels or dimensions
39 * (scaled down/up) the contents are similar, but these files do not match by file
40 * size, dimensions, or checksum.
41 *
42 * These functions create a 32 x 32 array for each color channel (red, green, blue).
43 * The array represents the average color of each corresponding part of the
44 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid.
45 * Each grid is then processed for the average color value, this is what
46 * is stored in the array)
47 *
48 * To compare two images, generate a ImageSimilarityData for each image, then
49 * pass them to the compare function. The return value is the percent match
50 * of the two images. (for this, simple comparisons are used, basically the return
51 * is an average of the corresponding array differences)
52 *
53 * for image_sim_compare(), the return is 0.0 to 1.0:
54 * 1.0 for exact matches (an image is compared to itself)
55 * 0.0 for exact opposite images (compare an all black to an all white image)
56 * generally only a match of > 0.85 are significant at all, and >.95 is useful to
57 * find images that have been re-saved to other formats, dimensions, or compression.
58 */
59
60 typedef struct ImageSimilarityData_Tag ImageSimilarityData;
61
62 struct ImageSimilarityData_Tag
63 {
64 guint8 avg_r[1024];
65 guint8 avg_g[1024];
66 guint8 avg_b[1024];
67
68 gint filled;
69 };
70
71
72 ImageSimilarityData *image_sim_new (void);
73 void image_sim_delete (ImageSimilarityData *sd);
74
75 void image_sim_fill_data (ImageSimilarityData *sd,
76 GimvImage *image);
77 ImageSimilarityData *image_sim_new_from_image (GimvImage *image);
78 ImageSimilarityData *image_sim_new_from_thumb (GimvThumb *thumb);
79
80 gfloat image_sim_compare (ImageSimilarityData *a,
81 ImageSimilarityData *b);
82 gfloat image_sim_compare_fast (ImageSimilarityData *a,
83 ImageSimilarityData *b,
84 gfloat min);
85
86
87 ImageSimilarityData *
image_sim_new(void)88 image_sim_new (void)
89 {
90 ImageSimilarityData *sd = g_new0 (ImageSimilarityData, 1);
91
92 return sd;
93 }
94
95 void
image_sim_delete(ImageSimilarityData * sd)96 image_sim_delete (ImageSimilarityData *sd)
97 {
98 g_free (sd);
99 }
100
101 void
image_sim_fill_data(ImageSimilarityData * sd,GimvImage * image)102 image_sim_fill_data (ImageSimilarityData *sd, GimvImage *image)
103 {
104 gint w, h;
105 gint rs;
106 guchar *pix;
107 gint has_alpha;
108 gint p_step;
109
110 guchar *p;
111 gint i;
112 gint j;
113 gint x_inc, y_inc;
114 gint xs, ys;
115
116 gint x_small = FALSE; /* if less than 32 w or h, set TRUE */
117 gint y_small = FALSE;
118
119 if (!sd || !image) return;
120
121 w = gimv_image_width (image);
122 h = gimv_image_height (image);
123 rs = gimv_image_rowstride (image);
124 pix = gimv_image_get_pixels (image);
125 has_alpha = gimv_image_has_alpha (image);
126
127 p_step = has_alpha ? 4 : 3;
128 x_inc = w / 32;
129 y_inc = h / 32;
130
131 if (x_inc < 1) {
132 x_inc = 1;
133 x_small = TRUE;
134 }
135 if (y_inc < 1) {
136 y_inc = 1;
137 y_small = TRUE;
138 }
139
140 j = 0;
141
142 for (ys = 0; ys < 32; ys++) {
143 if (y_small) j = (float)h / 32 * ys;
144
145 i = 0;
146
147 for (xs = 0; xs < 32; xs++) {
148 gint x, y;
149 gint r, g, b;
150
151 if (x_small)
152 i = (float) w / 32 * xs;
153
154 r = g = b = 0;
155
156 for (y = j; y < j + y_inc; y++) {
157 p = pix + (y * rs) + (i * p_step);
158 for (x = i; x < i + x_inc; x++) {
159 r += *p; p++;
160 g += *p; p++;
161 b += *p; p++;
162 if (has_alpha) p++;
163 }
164 }
165 r /= x_inc * y_inc;
166 g /= x_inc * y_inc;
167 b /= x_inc * y_inc;
168
169 sd->avg_r[ys * 32 + xs] = r;
170 sd->avg_g[ys * 32 + xs] = g;
171 sd->avg_b[ys * 32 + xs] = b;
172
173 i += x_inc;
174 }
175
176 j += y_inc;
177 }
178
179 sd->filled = TRUE;
180 }
181
182 ImageSimilarityData *
image_sim_new_from_image(GimvImage * image)183 image_sim_new_from_image (GimvImage *image)
184 {
185 ImageSimilarityData *sd;
186
187 sd = image_sim_new ();
188 image_sim_fill_data (sd, image);
189
190 return sd;
191 }
192
193
194 ImageSimilarityData *
image_sim_new_from_thumb(GimvThumb * thumb)195 image_sim_new_from_thumb (GimvThumb *thumb)
196 {
197 GimvImage *image;
198 GdkPixmap *pixmap;
199 GdkBitmap *mask;
200 ImageSimilarityData *data;
201
202 g_return_val_if_fail (GIMV_IS_THUMB(thumb), NULL);
203
204 if (!gimv_thumb_has_thumbnail (thumb)) return NULL;
205
206 gimv_thumb_get_thumb (thumb, &pixmap, &mask) ;
207 if (!pixmap) return NULL;;
208 image = gimv_image_create_from_drawable (pixmap, 0, 0,
209 thumb->thumb_width,
210 thumb->thumb_height);
211
212 if (!image) return NULL;
213 data = image_sim_new_from_image (image);
214
215 gimv_image_unref (image);
216
217 return data;
218 }
219
220
221 gfloat
image_sim_compare(ImageSimilarityData * a,ImageSimilarityData * b)222 image_sim_compare (ImageSimilarityData *a, ImageSimilarityData *b)
223 {
224 gfloat sim;
225 gint i;
226
227 if (!a || !b || !a->filled || !b->filled) return 0.0;
228
229 sim = 0.0;
230
231 for (i = 0; i < 1024; i++) {
232 sim += (float) abs (a->avg_r[i] - b->avg_r[i]) / 255.0;
233 sim += (float) abs (a->avg_g[i] - b->avg_g[i]) / 255.0;
234 sim += (float) abs (a->avg_b[i] - b->avg_b[i]) / 255.0;
235 }
236
237 sim /= (1024.0 * 3.0);
238
239 return 1.0 - sim;
240 }
241
242 /* this uses a cutoff point so that it can abort early when it gets to
243 * a point that can simply no longer make the cut-off point.
244 */
245 gfloat
image_sim_compare_fast(ImageSimilarityData * a,ImageSimilarityData * b,gfloat min)246 image_sim_compare_fast (ImageSimilarityData *a, ImageSimilarityData *b, gfloat min)
247 {
248 gfloat sim;
249 gint i;
250 gint j;
251
252 if (!a || !b || !a->filled || !b->filled) return 0.0;
253
254 min = 1.0 - min;
255 sim = 0.0;
256
257 for (j = 0; j < 1024; j+= 32) {
258 for (i = j; i < j + 32; i++) {
259 sim += (float) abs (a->avg_r[i] - b->avg_r[i]) / 255.0;
260 sim += (float) abs (a->avg_g[i] - b->avg_g[i]) / 255.0;
261 sim += (float) abs (a->avg_b[i] - b->avg_b[i]) / 255.0;
262 }
263 /* check for abort, if so return 0.0 */
264 if (sim / (1024.0 * 3.0) > min) return 0.0;
265 }
266
267 sim /= (1024.0 * 3.0);
268
269 return 1.0 - sim;
270 }
271
272
273 static gpointer
duplicates_similar_get_data(GimvThumb * thumb)274 duplicates_similar_get_data (GimvThumb *thumb)
275 {
276 return image_sim_new_from_thumb (thumb);
277 }
278
279
280 static gint
duplicates_similar_compare(gpointer data1,gpointer data2,gfloat * similarity)281 duplicates_similar_compare (gpointer data1, gpointer data2, gfloat *similarity)
282 {
283 gfloat sim = image_sim_compare (data1, data2);
284
285 if (similarity)
286 *similarity = sim;
287
288 if (sim > conf.search_similar_accuracy) {
289 return 0;
290 }
291
292 return -1;
293 }
294
295
296 static void
duplicates_similar_data_delete(gpointer data)297 duplicates_similar_data_delete (gpointer data)
298 {
299 image_sim_delete (data);
300 }
301
302
303 GimvDuplCompFuncTable gimv_dupl_similar_funcs =
304 {
305 label: N_("Similarity"),
306 get_data: duplicates_similar_get_data,
307 compare: duplicates_similar_compare,
308 data_delete: duplicates_similar_data_delete,
309 };
310