1 /* -*- Mode: C; tab-width: 3; indent-tabs-mode: nil; c-basic-offset: 3 -*- */
2 
3 /*
4  * GImageView
5  * Copyright (C) 2001 Takuro Ashie
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20  *
21  * $Id: compare_similar.c,v 1.4 2004/04/08 10:38:50 makeinu Exp $
22  */
23 
24 /*
25  * These codes are mostly taken from GQview.
26  * GQview code Copyright (C) 2001 John Ellis
27  */
28 
29 #include <stdlib.h>
30 
31 #include "gimv_image.h"
32 #include "gimv_thumb.h"
33 #include "gimv_dupl_finder.h"
34 #include "prefs.h"
35 
36 /*
37  * These functions are intended to find images with similar color content. For
38  * example when an image was saved at different compression levels or dimensions
39  * (scaled down/up) the contents are similar, but these files do not match by file
40  * size, dimensions, or checksum.
41  *
42  * These functions create a 32 x 32 array for each color channel (red, green, blue).
43  * The array represents the average color of each corresponding part of the
44  * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid.
45  * Each grid is then processed for the average color value, this is what
46  * is stored in the array)
47  *
48  * To compare two images, generate a ImageSimilarityData for each image, then
49  * pass them to the compare function. The return value is the percent match
50  * of the two images. (for this, simple comparisons are used, basically the return
51  * is an average of the corresponding array differences)
52  *
53  * for image_sim_compare(), the return is 0.0 to 1.0:
54  *  1.0 for exact matches (an image is compared to itself)
55  *  0.0 for exact opposite images (compare an all black to an all white image)
56  * generally only a match of > 0.85 are significant at all, and >.95 is useful to
57  * find images that have been re-saved to other formats, dimensions, or compression.
58  */
59 
60 typedef struct ImageSimilarityData_Tag ImageSimilarityData;
61 
62 struct ImageSimilarityData_Tag
63 {
64    guint8 avg_r[1024];
65    guint8 avg_g[1024];
66    guint8 avg_b[1024];
67 
68    gint filled;
69 };
70 
71 
72 ImageSimilarityData *image_sim_new            (void);
73 void                 image_sim_delete         (ImageSimilarityData *sd);
74 
75 void                 image_sim_fill_data      (ImageSimilarityData *sd,
76                                                GimvImage           *image);
77 ImageSimilarityData *image_sim_new_from_image (GimvImage           *image);
78 ImageSimilarityData *image_sim_new_from_thumb (GimvThumb           *thumb);
79 
80 gfloat               image_sim_compare        (ImageSimilarityData *a,
81                                                ImageSimilarityData *b);
82 gfloat               image_sim_compare_fast   (ImageSimilarityData *a,
83                                                ImageSimilarityData *b,
84                                                gfloat               min);
85 
86 
87 ImageSimilarityData *
image_sim_new(void)88 image_sim_new (void)
89 {
90    ImageSimilarityData *sd = g_new0 (ImageSimilarityData, 1);
91 
92    return sd;
93 }
94 
95 void
image_sim_delete(ImageSimilarityData * sd)96 image_sim_delete (ImageSimilarityData *sd)
97 {
98    g_free (sd);
99 }
100 
101 void
image_sim_fill_data(ImageSimilarityData * sd,GimvImage * image)102 image_sim_fill_data (ImageSimilarityData *sd, GimvImage *image)
103 {
104    gint w, h;
105    gint rs;
106    guchar *pix;
107    gint has_alpha;
108    gint p_step;
109 
110    guchar *p;
111    gint i;
112    gint j;
113    gint x_inc, y_inc;
114    gint xs, ys;
115 
116    gint x_small = FALSE;	/* if less than 32 w or h, set TRUE */
117    gint y_small = FALSE;
118 
119    if (!sd || !image) return;
120 
121    w = gimv_image_width (image);
122    h = gimv_image_height (image);
123    rs = gimv_image_rowstride (image);
124    pix = gimv_image_get_pixels (image);
125    has_alpha = gimv_image_has_alpha (image);
126 
127    p_step = has_alpha ? 4 : 3;
128    x_inc = w / 32;
129    y_inc = h / 32;
130 
131    if (x_inc < 1) {
132       x_inc = 1;
133       x_small = TRUE;
134    }
135    if (y_inc < 1) {
136       y_inc = 1;
137       y_small = TRUE;
138    }
139 
140    j = 0;
141 
142    for (ys = 0; ys < 32; ys++) {
143       if (y_small) j = (float)h / 32 * ys;
144 
145       i = 0;
146 
147       for (xs = 0; xs < 32; xs++) {
148          gint x, y;
149          gint r, g, b;
150 
151          if (x_small)
152             i = (float) w / 32 * xs;
153 
154          r = g = b = 0;
155 
156          for (y = j; y < j + y_inc; y++) {
157             p = pix + (y * rs) + (i * p_step);
158             for (x = i; x < i + x_inc; x++) {
159                r += *p; p++;
160                g += *p; p++;
161                b += *p; p++;
162                if (has_alpha) p++;
163             }
164          }
165          r /= x_inc * y_inc;
166          g /= x_inc * y_inc;
167          b /= x_inc * y_inc;
168 
169          sd->avg_r[ys * 32 + xs] = r;
170          sd->avg_g[ys * 32 + xs] = g;
171          sd->avg_b[ys * 32 + xs] = b;
172 
173          i += x_inc;
174       }
175 
176       j += y_inc;
177    }
178 
179    sd->filled = TRUE;
180 }
181 
182 ImageSimilarityData *
image_sim_new_from_image(GimvImage * image)183 image_sim_new_from_image (GimvImage *image)
184 {
185    ImageSimilarityData *sd;
186 
187    sd = image_sim_new ();
188    image_sim_fill_data (sd, image);
189 
190    return sd;
191 }
192 
193 
194 ImageSimilarityData *
image_sim_new_from_thumb(GimvThumb * thumb)195 image_sim_new_from_thumb (GimvThumb *thumb)
196 {
197    GimvImage *image;
198    GdkPixmap *pixmap;
199    GdkBitmap *mask;
200    ImageSimilarityData *data;
201 
202    g_return_val_if_fail (GIMV_IS_THUMB(thumb), NULL);
203 
204    if (!gimv_thumb_has_thumbnail (thumb)) return NULL;
205 
206    gimv_thumb_get_thumb (thumb, &pixmap, &mask)     ;
207    if (!pixmap) return NULL;;
208    image = gimv_image_create_from_drawable (pixmap, 0, 0,
209                                             thumb->thumb_width,
210                                             thumb->thumb_height);
211 
212    if (!image) return NULL;
213    data = image_sim_new_from_image (image);
214 
215    gimv_image_unref (image);
216 
217    return data;
218 }
219 
220 
221 gfloat
image_sim_compare(ImageSimilarityData * a,ImageSimilarityData * b)222 image_sim_compare (ImageSimilarityData *a, ImageSimilarityData *b)
223 {
224    gfloat sim;
225    gint i;
226 
227    if (!a || !b || !a->filled || !b->filled) return 0.0;
228 
229    sim = 0.0;
230 
231    for (i = 0; i < 1024; i++) {
232       sim += (float) abs (a->avg_r[i] - b->avg_r[i]) / 255.0;
233       sim += (float) abs (a->avg_g[i] - b->avg_g[i]) / 255.0;
234       sim += (float) abs (a->avg_b[i] - b->avg_b[i]) / 255.0;
235    }
236 
237    sim /= (1024.0 * 3.0);
238 
239    return 1.0 - sim;
240 }
241 
242 /* this uses a cutoff point so that it can abort early when it gets to
243  * a point that can simply no longer make the cut-off point.
244  */
245 gfloat
image_sim_compare_fast(ImageSimilarityData * a,ImageSimilarityData * b,gfloat min)246 image_sim_compare_fast (ImageSimilarityData *a, ImageSimilarityData *b, gfloat min)
247 {
248    gfloat sim;
249    gint i;
250    gint j;
251 
252    if (!a || !b || !a->filled || !b->filled) return 0.0;
253 
254    min = 1.0 - min;
255    sim = 0.0;
256 
257    for (j = 0; j < 1024; j+= 32) {
258       for (i = j; i < j + 32; i++) {
259          sim += (float) abs (a->avg_r[i] - b->avg_r[i]) / 255.0;
260          sim += (float) abs (a->avg_g[i] - b->avg_g[i]) / 255.0;
261          sim += (float) abs (a->avg_b[i] - b->avg_b[i]) / 255.0;
262       }
263       /* check for abort, if so return 0.0 */
264       if (sim / (1024.0 * 3.0) > min) return 0.0;
265    }
266 
267    sim /= (1024.0 * 3.0);
268 
269    return 1.0 - sim;
270 }
271 
272 
273 static gpointer
duplicates_similar_get_data(GimvThumb * thumb)274 duplicates_similar_get_data (GimvThumb *thumb)
275 {
276    return image_sim_new_from_thumb (thumb);
277 }
278 
279 
280 static gint
duplicates_similar_compare(gpointer data1,gpointer data2,gfloat * similarity)281 duplicates_similar_compare (gpointer data1, gpointer data2, gfloat *similarity)
282 {
283    gfloat sim = image_sim_compare (data1, data2);
284 
285    if (similarity)
286       *similarity = sim;
287 
288    if (sim > conf.search_similar_accuracy) {
289       return 0;
290    }
291 
292    return -1;
293 }
294 
295 
296 static void
duplicates_similar_data_delete(gpointer data)297 duplicates_similar_data_delete (gpointer data)
298 {
299    image_sim_delete (data);
300 }
301 
302 
303 GimvDuplCompFuncTable gimv_dupl_similar_funcs =
304 {
305    label:       N_("Similarity"),
306    get_data:    duplicates_similar_get_data,
307    compare:     duplicates_similar_compare,
308    data_delete: duplicates_similar_data_delete,
309 };
310