1 /*
2
3 pHash, the open source perceptual hash library
4 Copyright (C) 2009 Aetilius, Inc.
5 All rights reserved.
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 Evan Klinger - eklinger@phash.org
21 D Grant Starkweather - dstarkweather@phash.org
22
23 */
24
25 #include "pHash.h"
26 #include "config.h"
27 #ifdef HAVE_VIDEO_HASH
28 #include "cimgffmpeg.h"
29 #endif
30
31 #ifdef HAVE_PTHREAD
32 #include <pthread.h>
33
ph_num_threads()34 int ph_num_threads()
35 {
36 int numCPU = 1;
37 #ifdef _SC_NPROCESSORS_ONLN
38 numCPU = sysconf( _SC_NPROCESSORS_ONLN );
39 #else
40 int mib[2];
41 size_t len;
42
43 mib[0] = CTL_HW;
44 #ifdef HW_AVAILCPU
45 mib[1] = HW_AVAILCPU;
46 #else
47 mib[1] = HW_NCPU;
48 #endif
49
50 sysctl(mib, 2, &numCPU, &len, NULL, 0);
51
52 if( numCPU < 1 )
53 {
54 mib[1] = HW_NCPU;
55 sysctl( mib, 2, &numCPU, &len, NULL, 0 );
56
57 if( numCPU < 1 )
58 {
59 numCPU = 1;
60 }
61 }
62
63 #endif
64 return numCPU;
65 }
66 #endif
67
68 const char phash_project[] = "%s. Copyright 2008-2010 Aetilius, Inc.";
69 char phash_version[255] = {0};
ph_about()70 const char* ph_about(){
71 if(phash_version[0] != 0)
72 return phash_version;
73
74 snprintf(phash_version, sizeof(phash_version), phash_project, PACKAGE_STRING);
75 return phash_version;
76 }
77 #ifdef HAVE_IMAGE_HASH
ph_radon_projections(const CImg<uint8_t> & img,int N,Projections & projs)78 int ph_radon_projections(const CImg<uint8_t> &img,int N,Projections &projs){
79
80 int width = img.width();
81 int height = img.height();
82 int D = (width > height)?width:height;
83 float x_center = (float)width/2;
84 float y_center = (float)height/2;
85 int x_off = (int)std::floor(x_center + ROUNDING_FACTOR(x_center));
86 int y_off = (int)std::floor(y_center + ROUNDING_FACTOR(y_center));
87
88 projs.R = new CImg<uint8_t>(N,D,1,1,0);
89 projs.nb_pix_perline = (int*)calloc(N,sizeof(int));
90
91 if (!projs.R || !projs.nb_pix_perline)
92 return EXIT_FAILURE;
93
94 projs.size = N;
95
96 CImg<uint8_t> *ptr_radon_map = projs.R;
97 int *nb_per_line = projs.nb_pix_perline;
98
99 for (int k=0;k<N/4+1;k++){
100 double theta = k*cimg::PI/N;
101 double alpha = std::tan(theta);
102 for (int x=0;x < D;x++){
103 double y = alpha*(x-x_off);
104 int yd = (int)std::floor(y + ROUNDING_FACTOR(y));
105 if ((yd + y_off >= 0)&&(yd + y_off < height) && (x < width)){
106 *ptr_radon_map->data(k,x) = img(x,yd + y_off);
107 nb_per_line[k] += 1;
108 }
109 if ((yd + x_off >= 0) && (yd + x_off < width) && (k != N/4) && (x < height)){
110 *ptr_radon_map->data(N/2-k,x) = img(yd + x_off,x);
111 nb_per_line[N/2-k] += 1;
112 }
113 }
114 }
115 int j= 0;
116 for (int k=3*N/4;k<N;k++){
117 double theta = k*cimg::PI/N;
118 double alpha = std::tan(theta);
119 for (int x=0;x < D;x++){
120 double y = alpha*(x-x_off);
121 int yd = (int)std::floor(y + ROUNDING_FACTOR(y));
122 if ((yd + y_off >= 0)&&(yd + y_off < height) && (x < width)){
123 *ptr_radon_map->data(k,x) = img(x,yd + y_off);
124 nb_per_line[k] += 1;
125 }
126 if ((y_off - yd >= 0)&&(y_off - yd<width)&&(2*y_off-x>=0)&&(2*y_off-x<height)&&(k!=3*N/4)){
127 *ptr_radon_map->data(k-j,x) = img(-yd+y_off,-(x-y_off)+y_off);
128 nb_per_line[k-j] += 1;
129 }
130
131 }
132 j += 2;
133 }
134
135 return EXIT_SUCCESS;
136
137 }
ph_feature_vector(const Projections & projs,Features & fv)138 int ph_feature_vector(const Projections &projs, Features &fv)
139 {
140
141 CImg<uint8_t> *ptr_map = projs.R;
142 CImg<uint8_t> projection_map = *ptr_map;
143 int *nb_perline = projs.nb_pix_perline;
144 int N = projs.size;
145 int D = projection_map.height();
146
147 fv.features = (double*)malloc(N*sizeof(double));
148 fv.size = N;
149 if (!fv.features)
150 return EXIT_FAILURE;
151
152 double *feat_v = fv.features;
153 double sum = 0.0;
154 double sum_sqd = 0.0;
155 for (int k=0; k < N; k++){
156 double line_sum = 0.0;
157 double line_sum_sqd = 0.0;
158 int nb_pixels = nb_perline[k];
159 for (int i=0;i<D;i++){
160 line_sum += projection_map(k,i);
161 line_sum_sqd += projection_map(k,i)*projection_map(k,i);
162 }
163 feat_v[k] = (line_sum_sqd/nb_pixels) - (line_sum*line_sum)/(nb_pixels*nb_pixels);
164 sum += feat_v[k];
165 sum_sqd += feat_v[k]*feat_v[k];
166 }
167 double mean = sum/N;
168 double var = sqrt((sum_sqd/N) - (sum*sum)/(N*N));
169
170 for (int i=0;i<N;i++){
171 feat_v[i] = (feat_v[i] - mean)/var;
172 }
173
174 return EXIT_SUCCESS;
175 }
ph_dct(const Features & fv,Digest & digest)176 int ph_dct(const Features &fv,Digest &digest)
177 {
178 int N = fv.size;
179 const int nb_coeffs = 40;
180
181 digest.coeffs = (uint8_t*)malloc(nb_coeffs*sizeof(uint8_t));
182 if (!digest.coeffs)
183 return EXIT_FAILURE;
184
185 digest.size = nb_coeffs;
186
187 double *R = fv.features;
188
189 uint8_t *D = digest.coeffs;
190
191 double D_temp[nb_coeffs];
192 double max = 0.0;
193 double min = 0.0;
194 for (int k = 0;k<nb_coeffs;k++){
195 double sum = 0.0;
196 for (int n=0;n<N;n++){
197 double temp = R[n]*cos((cimg::PI*(2*n+1)*k)/(2*N));
198 sum += temp;
199 }
200 if (k == 0)
201 D_temp[k] = sum/sqrt((double)N);
202 else
203 D_temp[k] = sum*SQRT_TWO/sqrt((double)N);
204 if (D_temp[k] > max)
205 max = D_temp[k];
206 if (D_temp[k] < min)
207 min = D_temp[k];
208 }
209
210 for (int i=0;i<nb_coeffs;i++){
211
212 D[i] = (uint8_t)(UCHAR_MAX*(D_temp[i] - min)/(max - min));
213
214 }
215
216 return EXIT_SUCCESS;
217 }
218
ph_crosscorr(const Digest & x,const Digest & y,double & pcc,double threshold)219 int ph_crosscorr(const Digest &x,const Digest &y,double &pcc,double threshold){
220
221 int N = y.size;
222 int result = 0;
223
224 uint8_t *x_coeffs = x.coeffs;
225 uint8_t *y_coeffs = y.coeffs;
226
227 double *r = new double[N];
228 double sumx = 0.0;
229 double sumy = 0.0;
230 for (int i=0;i < N;i++){
231 sumx += x_coeffs[i];
232 sumy += y_coeffs[i];
233 }
234 double meanx = sumx/N;
235 double meany = sumy/N;
236 double max = 0;
237 for (int d=0;d<N;d++){
238 double num = 0.0;
239 double denx = 0.0;
240 double deny = 0.0;
241 for (int i=0;i<N;i++){
242 num += (x_coeffs[i]-meanx)*(y_coeffs[(N+i-d)%N]-meany);
243 denx += pow((x_coeffs[i]-meanx),2);
244 deny += pow((y_coeffs[(N+i-d)%N]-meany),2);
245 }
246 r[d] = num/sqrt(denx*deny);
247 if (r[d] > max)
248 max = r[d];
249 }
250 delete[] r;
251 pcc = max;
252 if (max > threshold)
253 result = 1;
254
255 return result;
256 }
257
258 #ifdef max
259 #undef max
260 #endif
261
_ph_image_digest(const CImg<uint8_t> & img,double sigma,double gamma,Digest & digest,int N)262 int _ph_image_digest(const CImg<uint8_t> &img,double sigma, double gamma,Digest &digest, int N){
263
264 int result = EXIT_FAILURE;
265 CImg<uint8_t> graysc;
266 if (img.spectrum() >= 3){
267 graysc = img.get_RGBtoYCbCr().channel(0);
268 }
269 else if (img.spectrum() == 1){
270 graysc = img;
271 }
272 else {
273 return result;
274 }
275
276
277 graysc.blur((float)sigma);
278
279 (graysc/graysc.max()).pow(gamma);
280
281 Projections projs;
282 if (ph_radon_projections(graysc,N,projs) < 0)
283 goto cleanup;
284
285 Features features;
286 if (ph_feature_vector(projs,features) < 0)
287 goto cleanup;
288
289 if (ph_dct(features,digest) < 0)
290 goto cleanup;
291
292 result = EXIT_SUCCESS;
293
294 cleanup:
295 free(projs.nb_pix_perline);
296 free(features.features);
297
298 delete projs.R;
299 return result;
300 }
301
302 #define max(a,b) (((a)>(b))?(a):(b))
303
ph_image_digest(const char * file,double sigma,double gamma,Digest & digest,int N)304 int ph_image_digest(const char *file, double sigma, double gamma, Digest &digest, int N){
305
306 CImg<uint8_t> src(file);
307 int res = -1;
308 int result = _ph_image_digest(src,sigma,gamma,digest,N);
309 res = result;
310 return res;
311 }
312
_ph_compare_images(const CImg<uint8_t> & imA,const CImg<uint8_t> & imB,double & pcc,double sigma,double gamma,int N,double threshold)313 int _ph_compare_images(const CImg<uint8_t> &imA,const CImg<uint8_t> &imB,double &pcc, double sigma, double gamma,int N,double threshold){
314
315 int result = 0;
316 Digest digestA;
317 if (_ph_image_digest(imA,sigma,gamma,digestA,N) < 0)
318 goto cleanup;
319
320 Digest digestB;
321 if (_ph_image_digest(imB,sigma,gamma,digestB,N) < 0)
322 goto cleanup;
323
324 if (ph_crosscorr(digestA,digestB,pcc,threshold) < 0)
325 goto cleanup;
326
327 if (pcc > threshold)
328 result = 1;
329
330 cleanup:
331
332 free(digestA.coeffs);
333 free(digestB.coeffs);
334 return result;
335 }
336
ph_compare_images(const char * file1,const char * file2,double & pcc,double sigma,double gamma,int N,double threshold)337 int ph_compare_images(const char *file1, const char *file2,double &pcc, double sigma, double gamma, int N,double threshold){
338
339 CImg<uint8_t> *imA = new CImg<uint8_t>(file1);
340 CImg<uint8_t> *imB = new CImg<uint8_t>(file2);
341
342 int res = _ph_compare_images(*imA,*imB,pcc,sigma,gamma,N,threshold);
343
344 delete imA;
345 delete imB;
346 return res;
347 }
348
ph_dct_matrix(const int N)349 CImg<float>* ph_dct_matrix(const int N){
350 CImg<float> *ptr_matrix = new CImg<float>(N,N,1,1,1/sqrt((float)N));
351 const float c1 = sqrt(2.0/N);
352 for (int x=0;x<N;x++){
353 for (int y=1;y<N;y++){
354 *ptr_matrix->data(x,y) = c1*cos((cimg::PI/2/N)*y*(2*x+1));
355 }
356 }
357 return ptr_matrix;
358 }
359
ph_dct_imagehash(const char * file,ulong64 & hash)360 int ph_dct_imagehash(const char* file,ulong64 &hash){
361
362 if (!file){
363 return -1;
364 }
365 CImg<uint8_t> src;
366 try {
367 src.load(file);
368 } catch (CImgIOException ex){
369 return -1;
370 }
371 CImg<float> meanfilter(7,7,1,1,1);
372 CImg<float> img;
373 if (src.spectrum() == 3){
374 img = src.RGBtoYCbCr().channel(0).get_convolve(meanfilter);
375 } else if (src.spectrum() == 4){
376 int width = img.width();
377 int height = img.height();
378 int depth = img.depth();
379 img = src.crop(0,0,0,0,width-1,height-1,depth-1,2).RGBtoYCbCr().channel(0).get_convolve(meanfilter);
380 } else {
381 img = src.channel(0).get_convolve(meanfilter);
382 }
383
384 img.resize(32,32);
385 CImg<float> *C = ph_dct_matrix(32);
386 CImg<float> Ctransp = C->get_transpose();
387
388 CImg<float> dctImage = (*C)*img*Ctransp;
389
390 CImg<float> subsec = dctImage.crop(1,1,8,8).unroll('x');;
391
392 float median = subsec.median();
393 ulong64 one = 0x0000000000000001;
394 hash = 0x0000000000000000;
395 for (int i=0;i< 64;i++){
396 float current = subsec(i);
397 if (current > median)
398 hash |= one;
399 one = one << 1;
400 }
401
402 delete C;
403
404 return 0;
405 }
406
407 #ifdef HAVE_PTHREAD
ph_image_thread(void * p)408 void *ph_image_thread(void *p)
409 {
410 slice *s = (slice *)p;
411 for(int i = 0; i < s->n; ++i)
412 {
413 DP *dp = (DP *)s->hash_p[i];
414 ulong64 hash;
415 int ret = ph_dct_imagehash(dp->id, hash);
416 dp->hash = (ulong64*)malloc(sizeof(hash));
417 memcpy(dp->hash, &hash, sizeof(hash));
418 dp->hash_length = 1;
419 }
420 }
421
ph_dct_image_hashes(char * files[],int count,int threads)422 DP** ph_dct_image_hashes(char *files[], int count, int threads)
423 {
424 if(!files || count <= 0)
425 return NULL;
426
427 int num_threads;
428 if(threads > count)
429 {
430 num_threads = count;
431 }
432 else if(threads > 0)
433 {
434 num_threads = threads;
435 }
436 else
437 {
438 num_threads = ph_num_threads();
439 }
440
441 DP **hashes = (DP**)malloc(count*sizeof(DP*));
442
443 for(int i = 0; i < count; ++i)
444 {
445 hashes[i] = (DP *)malloc(sizeof(DP));
446 hashes[i]->id = strdup(files[i]);
447 }
448
449 pthread_t thds[num_threads];
450
451 int rem = count % num_threads;
452 int start = 0;
453 int off = 0;
454 slice *s = new slice[num_threads];
455 for(int n = 0; n < num_threads; ++n)
456 {
457 off = (int)floor((count/(float)num_threads) + (rem>0?num_threads-(count % num_threads):0));
458
459 s[n].hash_p = &hashes[start];
460 s[n].n = off;
461 s[n].hash_params = NULL;
462 start += off;
463 --rem;
464 pthread_create(&thds[n], NULL, ph_image_thread, &s[n]);
465 }
466 for(int i = 0; i < num_threads; ++i)
467 {
468 pthread_join(thds[i], NULL);
469 }
470 delete[] s;
471
472 return hashes;
473
474 }
475 #endif
476
477
478 #endif
479
480 #if defined(HAVE_VIDEO_HASH) && defined(HAVE_IMAGE_HASH)
481
482
ph_getKeyFramesFromVideo(const char * filename)483 CImgList<uint8_t>* ph_getKeyFramesFromVideo(const char *filename){
484
485 long N = GetNumberVideoFrames(filename);
486
487 if (N < 0){
488 return NULL;
489 }
490
491 float frames_per_sec = 0.5*fps(filename);
492 if (frames_per_sec < 0){
493 return NULL;
494 }
495
496 int step = (int)(frames_per_sec + ROUNDING_FACTOR(frames_per_sec));
497 long nbframes = (long)(N/step);
498
499 float *dist = (float*)malloc((nbframes)*sizeof(float));
500 if (!dist){
501 return NULL;
502 }
503 CImg<float> prev(64,1,1,1,0);
504
505 VFInfo st_info;
506 st_info.filename = filename;
507 st_info.nb_retrieval = 100;
508 st_info.step = step;
509 st_info.pixelformat = 0;
510 st_info.pFormatCtx = NULL;
511 st_info.width = -1;
512 st_info.height = -1;
513
514 CImgList<uint8_t> *pframelist = new CImgList<uint8_t>();
515 if (!pframelist){
516 return NULL;
517 }
518 int nbread = 0;
519 int k=0;
520 do {
521 nbread = NextFrames(&st_info, pframelist);
522 if (nbread < 0){
523 delete pframelist;
524 free(dist);
525 return NULL;
526 }
527 unsigned int i = 0;
528 while ((i < pframelist->size()) && (k < nbframes)){
529 CImg<uint8_t> current = pframelist->at(i++);
530 CImg<float> hist = current.get_histogram(64,0,255);
531 float d = 0.0;
532 dist[k] = 0.0;
533 cimg_forX(hist,X){
534 d = hist(X) - prev(X);
535 d = (d>=0) ? d : -d;
536 dist[k] += d;
537 prev(X) = hist(X);
538 }
539 k++;
540 }
541 pframelist->clear();
542 } while ((nbread >= st_info.nb_retrieval)&&(k < nbframes));
543 vfinfo_close(&st_info);
544
545 int S = 10;
546 int L = 50;
547 int alpha1 = 3;
548 int alpha2 = 2;
549 int s_begin, s_end;
550 int l_begin, l_end;
551 uint8_t *bnds = (uint8_t*)malloc(nbframes*sizeof(uint8_t));
552 if (!bnds){
553 delete pframelist;
554 free(dist);
555 return NULL;
556 }
557
558 int nbboundaries = 0;
559 k = 1;
560 bnds[0] = 1;
561 do {
562 s_begin = (k-S >= 0) ? k-S : 0;
563 s_end = (k+S < nbframes) ? k+S : nbframes-1;
564 l_begin = (k-L >= 0) ? k-L : 0;
565 l_end = (k+L < nbframes) ? k+L : nbframes-1;
566
567 /* get global average */
568 float ave_global, sum_global = 0.0, dev_global = 0.0;
569 for (int i=l_begin;i<=l_end;i++){
570 sum_global += dist[i];
571 }
572 ave_global = sum_global/((float)(l_end-l_begin+1));
573
574 /*get global deviation */
575 for (int i=l_begin;i<=l_end;i++){
576 float dev = ave_global - dist[i];
577 dev = (dev >= 0) ? dev : -1*dev;
578 dev_global += dev;
579 }
580 dev_global = dev_global/((float)(l_end-l_begin+1));
581
582 /* global threshold */
583 float T_global = ave_global + alpha1*dev_global;
584
585 /* get local maximum */
586 int localmaxpos = s_begin;
587 for (int i=s_begin;i<=s_end;i++){
588 if (dist[i] > dist[localmaxpos])
589 localmaxpos = i;
590 }
591 /* get 2nd local maximum */
592 int localmaxpos2 = s_begin;
593 float localmax2 = 0;
594 for (int i=s_begin;i<=s_end;i++){
595 if (i == localmaxpos)
596 continue;
597 if (dist[i] > localmax2){
598 localmaxpos2 = i;
599 localmax2 = dist[i];
600 }
601 }
602 float T_local = alpha2*dist[localmaxpos2];
603 float Thresh = (T_global >= T_local) ? T_global : T_local;
604
605 if ((dist[k] == dist[localmaxpos])&&(dist[k] > Thresh)){
606 bnds[k] = 1;
607 nbboundaries++;
608 }
609 else {
610 bnds[k] = 0;
611 }
612 k++;
613 } while ( k < nbframes-1);
614 bnds[nbframes-1]=1;
615 nbboundaries += 2;
616
617 int start = 0;
618 int end = 0;
619 int nbselectedframes = 0;
620 do {
621 /* find next boundary */
622 do {end++;} while ((bnds[end]!=1)&&(end < nbframes));
623
624 /* find min disparity within bounds */
625 int minpos = start+1;
626 for (int i=start+1; i < end;i++){
627 if (dist[i] < dist[minpos])
628 minpos = i;
629 }
630 bnds[minpos] = 2;
631 nbselectedframes++;
632 start = end;
633 } while (start < nbframes-1);
634
635 st_info.nb_retrieval = 1;
636 st_info.width = 32;
637 st_info.height = 32;
638 k = 0;
639 do {
640 if (bnds[k]==2){
641 if (ReadFrames(&st_info, pframelist, k*st_info.step,k*st_info.step + 1) < 0){
642 delete pframelist;
643 free(dist);
644 return NULL;
645 }
646 }
647 k++;
648 } while (k < nbframes);
649 vfinfo_close(&st_info);
650
651 free(bnds);
652 bnds = NULL;
653 free(dist);
654 dist = NULL;
655
656 return pframelist;
657 }
658
659
ph_dct_videohash(const char * filename,int & Length)660 ulong64* ph_dct_videohash(const char *filename, int &Length){
661
662 CImgList<uint8_t> *keyframes = ph_getKeyFramesFromVideo(filename);
663 if (keyframes == NULL)
664 return NULL;
665
666 Length = keyframes->size();
667
668 ulong64 *hash = (ulong64*)malloc(sizeof(ulong64)*Length);
669 CImg<float> *C = ph_dct_matrix(32);
670 CImg<float> Ctransp = C->get_transpose();
671 CImg<float> dctImage;
672 CImg<float> subsec;
673 CImg<uint8_t> currentframe;
674
675 for (unsigned int i=0;i < keyframes->size(); i++){
676 currentframe = keyframes->at(i);
677 currentframe.blur(1.0);
678 dctImage = (*C)*(currentframe)*Ctransp;
679 subsec = dctImage.crop(1,1,8,8).unroll('x');
680 float med = subsec.median();
681 hash[i] = 0x0000000000000000;
682 ulong64 one = 0x0000000000000001;
683 for (int j=0;j<64;j++){
684 if (subsec(j) > med)
685 hash[i] |= one;
686 one = one << 1;
687 }
688 }
689
690 keyframes->clear();
691 delete keyframes;
692 keyframes = NULL;
693 delete C;
694 C = NULL;
695 return hash;
696 }
697
698 #ifdef HAVE_PTHREAD
ph_video_thread(void * p)699 void *ph_video_thread(void *p)
700 {
701 slice *s = (slice *)p;
702 for(int i = 0; i < s->n; ++i)
703 {
704 DP *dp = (DP *)s->hash_p[i];
705 int N;
706 ulong64 *hash = ph_dct_videohash(dp->id, N);
707 if(hash)
708 {
709 dp->hash = hash;
710 dp->hash_length = N;
711 }
712 else
713 {
714 dp->hash = NULL;
715 dp->hash_length = 0;
716 }
717 }
718 }
719
ph_dct_video_hashes(char * files[],int count,int threads)720 DP** ph_dct_video_hashes(char *files[], int count, int threads)
721 {
722 if(!files || count <= 0)
723 return NULL;
724
725 int num_threads;
726 if(threads > count)
727 {
728 num_threads = count;
729 }
730 else if(threads > 0)
731 {
732 num_threads = threads;
733 }
734 else
735 {
736 num_threads = ph_num_threads();
737 }
738
739 DP **hashes = (DP**)malloc(count*sizeof(DP*));
740
741 for(int i = 0; i < count; ++i)
742 {
743 hashes[i] = (DP *)malloc(sizeof(DP));
744 hashes[i]->id = strdup(files[i]);
745 }
746
747 pthread_t thds[num_threads];
748
749 int rem = count % num_threads;
750 int start = 0;
751 int off = 0;
752 slice *s = new slice[num_threads];
753 for(int n = 0; n < num_threads; ++n)
754 {
755 off = (int)floor((count/(float)num_threads) + (rem>0?num_threads-(count % num_threads):0));
756
757 s[n].hash_p = &hashes[start];
758 s[n].n = off;
759 s[n].hash_params = NULL;
760 start += off;
761 --rem;
762 pthread_create(&thds[n], NULL, ph_video_thread, &s[n]);
763 }
764 for(int i = 0; i < num_threads; ++i)
765 {
766 pthread_join(thds[i], NULL);
767 }
768 delete[] s;
769
770 return hashes;
771
772 }
773 #endif
774
ph_dct_videohash_dist(ulong64 * hashA,int N1,ulong64 * hashB,int N2,int threshold)775 double ph_dct_videohash_dist(ulong64 *hashA, int N1, ulong64 *hashB, int N2, int threshold){
776
777 int den = (N1 <= N2) ? N1 : N2;
778 int C[N1+1][N2+1];
779
780 for (int i=0;i<N1+1;i++){
781 C[i][0] = 0;
782 }
783 for (int j=0;j<N2+1;j++){
784 C[0][j] = 0;
785 }
786 for (int i=1;i<N1+1;i++){
787 for (int j=1;j<N2+1;j++){
788 int d = ph_hamming_distance(hashA[i-1],hashB[j-1]);
789 if (d <= threshold){
790 C[i][j] = C[i-1][j-1] + 1;
791 } else {
792 C[i][j] = ((C[i-1][j] >= C[i][j-1])) ? C[i-1][j] : C[i][j-1];
793 }
794 }
795 }
796
797 double result = (double)(C[N1][N2])/(double)(den);
798
799 return result;
800 }
801
802 #endif
803
ph_hamming_distance(const ulong64 hash1,const ulong64 hash2)804 int ph_hamming_distance(const ulong64 hash1,const ulong64 hash2){
805 ulong64 x = hash1^hash2;
806 const ulong64 m1 = 0x5555555555555555ULL;
807 const ulong64 m2 = 0x3333333333333333ULL;
808 const ulong64 h01 = 0x0101010101010101ULL;
809 const ulong64 m4 = 0x0f0f0f0f0f0f0f0fULL;
810 x -= (x >> 1) & m1;
811 x = (x & m2) + ((x >> 2) & m2);
812 x = (x + (x >> 4)) & m4;
813 return (x * h01)>>56;
814 }
815
ph_malloc_datapoint(int hashtype)816 DP* ph_malloc_datapoint(int hashtype){
817 DP* dp = (DP*)malloc(sizeof(DP));
818 dp->hash = NULL;
819 dp->id = NULL;
820 dp->hash_type = hashtype;
821 return dp;
822 }
ph_free_datapoint(DP * dp)823 void ph_free_datapoint(DP *dp){
824 if (!dp){
825 return;
826 }
827 free(dp);
828 dp=NULL;
829 return;
830 }
831
832
833 #ifdef HAVE_IMAGE_HASH
834
GetMHKernel(float alpha,float level)835 CImg<float>* GetMHKernel(float alpha, float level){
836 int sigma = (int)4*pow((float)alpha,(float)level);
837 static CImg<float> *pkernel = NULL;
838 float xpos, ypos, A;
839 if (!pkernel){
840 pkernel = new CImg<float>(2*sigma+1,2*sigma+1,1,1,0);
841 cimg_forXY(*pkernel,X,Y){
842 xpos = pow(alpha,-level)*(X-sigma);
843 ypos = pow(alpha,-level)*(Y-sigma);
844 A = xpos*xpos + ypos*ypos;
845 pkernel->atXY(X,Y) = (2-A)*exp(-A/2);
846 }
847 }
848 return pkernel;
849 }
850
ph_mh_imagehash(const char * filename,int & N,float alpha,float lvl)851 uint8_t* ph_mh_imagehash(const char *filename, int &N,float alpha, float lvl){
852 if (filename == NULL){
853 return NULL;
854 }
855 uint8_t *hash = (unsigned char*)malloc(72*sizeof(uint8_t));
856 N = 72;
857
858 CImg<uint8_t> src(filename);
859 CImg<uint8_t> img;
860
861 if (src.spectrum() == 3){
862 img = src.get_RGBtoYCbCr().channel(0).blur(1.0).resize(512,512,1,1,5).get_equalize(256);
863 } else{
864 img = src.channel(0).get_blur(1.0).resize(512,512,1,1,5).get_equalize(256);
865 }
866 src.clear();
867
868 CImg<float> *pkernel = GetMHKernel(alpha,lvl);
869 CImg<float> fresp = img.get_correlate(*pkernel);
870 img.clear();
871 fresp.normalize(0,1.0);
872 CImg<float> blocks(31,31,1,1,0);
873 for (int rindex=0;rindex < 31;rindex++){
874 for (int cindex=0;cindex < 31;cindex++){
875 blocks(rindex,cindex) = fresp.get_crop(rindex*16,cindex*16,rindex*16+16-1,cindex*16+16-1).sum();
876 }
877 }
878 int hash_index;
879 int nb_ones = 0, nb_zeros = 0;
880 int bit_index = 0;
881 unsigned char hashbyte = 0;
882 for (int rindex=0;rindex < 31-2;rindex+=4){
883 CImg<float> subsec;
884 for (int cindex=0;cindex < 31-2;cindex+=4){
885 subsec = blocks.get_crop(cindex,rindex, cindex+2, rindex+2).unroll('x');
886 float ave = subsec.mean();
887 cimg_forX(subsec, I){
888 hashbyte <<= 1;
889 if (subsec(I) > ave){
890 hashbyte |= 0x01;
891 nb_ones++;
892 } else {
893 nb_zeros++;
894 }
895 bit_index++;
896 if ((bit_index%8) == 0){
897 hash_index = (int)(bit_index/8) - 1;
898 hash[hash_index] = hashbyte;
899 hashbyte = 0x00;
900 }
901 }
902 }
903 }
904
905 return hash;
906 }
907 #endif
908
ph_readfilenames(const char * dirname,int & count)909 char** ph_readfilenames(const char *dirname,int &count){
910 count = 0;
911 struct dirent *dir_entry;
912 DIR *dir = opendir(dirname);
913 if (!dir)
914 return NULL;
915
916 /*count files */
917 while ((dir_entry = readdir(dir)) != NULL){
918 if (strcmp(dir_entry->d_name, ".") && strcmp(dir_entry->d_name,".."))
919 count++;
920 }
921
922 /* alloc list of files */
923 char **files = (char**)malloc(count*sizeof(*files));
924 if (!files)
925 return NULL;
926
927 errno = 0;
928 int index = 0;
929 char path[1024];
930 path[0] = '\0';
931 rewinddir(dir);
932 while ((dir_entry = readdir(dir)) != 0){
933 if (strcmp(dir_entry->d_name,".") && strcmp(dir_entry->d_name,"..")){
934 strcat(path, dirname);
935 strcat(path, "/");
936 strcat(path, dir_entry->d_name);
937 files[index++] = strdup(path);
938 }
939 path[0]='\0';
940 }
941 if (errno)
942 return NULL;
943 closedir(dir);
944 return files;
945 }
946
947
ph_bitcount8(uint8_t val)948 int ph_bitcount8(uint8_t val){
949 int num = 0;
950 while (val){
951 ++num;
952 val &= val - 1;
953 }
954 return num;
955 }
956
957
958
ph_hammingdistance2(uint8_t * hashA,int lenA,uint8_t * hashB,int lenB)959 double ph_hammingdistance2(uint8_t *hashA, int lenA, uint8_t *hashB, int lenB){
960 if (lenA != lenB){
961 return -1.0;
962 }
963 if ((hashA == NULL) || (hashB == NULL) || (lenA <= 0)){
964 return -1.0;
965 }
966 double dist = 0;
967 uint8_t D = 0;
968 for (int i=0;i<lenA;i++){
969 D = hashA[i]^hashB[i];
970 dist = dist + (double)ph_bitcount8(D);
971 }
972 double bits = (double)lenA*8;
973 return dist/bits;
974
975 }
976
977
ph_texthash(const char * filename,int * nbpoints)978 TxtHashPoint* ph_texthash(const char *filename,int *nbpoints){
979 int count;
980 TxtHashPoint *TxtHash = NULL;
981 TxtHashPoint WinHash[WindowLength];
982 char kgram[KgramLength];
983
984 FILE *pfile = fopen(filename,"r");
985 if (!pfile){
986 return NULL;
987 }
988 struct stat fileinfo;
989 fstat(fileno(pfile),&fileinfo);
990 count = fileinfo.st_size - WindowLength + 1;
991 count = (int)(0.01*count);
992 int d;
993 ulong64 hashword = 0ULL;
994
995 TxtHash = (TxtHashPoint*)malloc(count*sizeof(struct ph_hash_point));
996 if (!TxtHash){
997 return NULL;
998 }
999 *nbpoints=0;
1000 int i, first=0, last=KgramLength-1;
1001 int text_index = 0;
1002 int win_index = 0;
1003 for (i=0;i < KgramLength;i++){ /* calc first kgram */
1004 d = fgetc(pfile);
1005 if (d == EOF){
1006 free(TxtHash);
1007 return NULL;
1008 }
1009 if (d <= 47) /*skip cntrl chars*/
1010 continue;
1011 if ( ((d >= 58)&&(d <= 64)) || ((d >= 91)&&(d <= 96)) || (d >= 123) ) /*skip punct*/
1012 continue;
1013 if ((d >= 65)&&(d<=90)) /*convert upper to lower case */
1014 d = d + 32;
1015
1016 kgram[i] = (char)d;
1017 hashword = hashword << delta; /* rotate left or shift left ??? */
1018 hashword = hashword^textkeys[d];/* right now, rotate breaks it */
1019 }
1020
1021 WinHash[win_index].hash = hashword;
1022 WinHash[win_index++].index = text_index;
1023 struct ph_hash_point minhash;
1024 minhash.hash = ULLONG_MAX;
1025 minhash.index = 0;
1026 struct ph_hash_point prev_minhash;
1027 prev_minhash.hash = ULLONG_MAX;
1028 prev_minhash.index = 0;
1029
1030 while ((d=fgetc(pfile)) != EOF){ /*remaining kgrams */
1031 text_index++;
1032 if (d == EOF){
1033 free(TxtHash);
1034 return NULL;
1035 }
1036 if (d <= 47) /*skip cntrl chars*/
1037 continue;
1038 if ( ((d >= 58)&&(d <= 64)) || ((d >= 91)&&(d <= 96)) || (d >= 123) ) /*skip punct*/
1039 continue;
1040 if ((d >= 65)&&(d<=90)) /*convert upper to lower case */
1041 d = d + 32;
1042
1043 ulong64 oldsym = textkeys[kgram[first%KgramLength]];
1044
1045 /* rotate or left shift ??? */
1046 /* right now, rotate breaks it */
1047 oldsym = oldsym << delta*KgramLength;
1048 hashword = hashword << delta;
1049 hashword = hashword^textkeys[d];
1050 hashword = hashword^oldsym;
1051 kgram[last%KgramLength] = (char)d;
1052 first++;
1053 last++;
1054
1055 WinHash[win_index%WindowLength].hash = hashword;
1056 WinHash[win_index%WindowLength].index = text_index;
1057 win_index++;
1058
1059 if (win_index >= WindowLength){
1060 minhash.hash = ULLONG_MAX;
1061 for (i=win_index;i<win_index+WindowLength;i++){
1062 if (WinHash[i%WindowLength].hash <= minhash.hash){
1063 minhash.hash = WinHash[i%WindowLength].hash;
1064 minhash.index = WinHash[i%WindowLength].index;
1065 }
1066 }
1067 if (minhash.hash != prev_minhash.hash){
1068 TxtHash[(*nbpoints)].hash = minhash.hash;
1069 TxtHash[(*nbpoints)++].index = minhash.index;
1070 prev_minhash.hash = minhash.hash;
1071 prev_minhash.index = minhash.index;
1072
1073 } else {
1074 TxtHash[*nbpoints].hash = prev_minhash.hash;
1075 TxtHash[(*nbpoints)++].index = prev_minhash.index;
1076 }
1077 win_index = 0;
1078 }
1079 }
1080
1081 fclose(pfile);
1082 return TxtHash;
1083 }
1084
ph_compare_text_hashes(TxtHashPoint * hash1,int N1,TxtHashPoint * hash2,int N2,int * nbmatches)1085 TxtMatch* ph_compare_text_hashes(TxtHashPoint *hash1, int N1, TxtHashPoint *hash2,int N2, int *nbmatches){
1086
1087 int max_matches = (N1 >= N2) ? N1:N2;
1088 TxtMatch *found_matches = (TxtMatch*)malloc(max_matches*sizeof(TxtMatch));
1089 if (!found_matches){
1090 return NULL;
1091 }
1092
1093 *nbmatches = 0;
1094 int i,j;
1095 for (i=0;i<N1;i++){
1096 for (j=0;j<N2;j++){
1097 if (hash1[i].hash == hash2[j].hash){
1098 int m = i + 1;
1099 int n = j + 1;
1100 int cnt = 1;
1101 while((m < N1)&&(n < N2)&&(hash1[m++].hash == hash2[n++].hash)){
1102 cnt++;
1103 }
1104 found_matches[*nbmatches].first_index = i;
1105 found_matches[*nbmatches].second_index = j;
1106 found_matches[*nbmatches].length = cnt;
1107 (*nbmatches)++;
1108 }
1109 }
1110 }
1111 return found_matches;
1112 }
1113
1114