1 /*
2 
3     pHash, the open source perceptual hash library
4     Copyright (C) 2009 Aetilius, Inc.
5     All rights reserved.
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 
20     Evan Klinger - eklinger@phash.org
21     D Grant Starkweather - dstarkweather@phash.org
22 
23 */
24 
25 #include "pHash.h"
26 #include "config.h"
27 #ifdef HAVE_VIDEO_HASH
28 #include "cimgffmpeg.h"
29 #endif
30 
31 #ifdef HAVE_PTHREAD
32 #include <pthread.h>
33 
ph_num_threads()34 int ph_num_threads()
35 {
36 	int numCPU = 1;
37 #ifdef _SC_NPROCESSORS_ONLN
38 		numCPU = sysconf( _SC_NPROCESSORS_ONLN );
39 #else
40 		int mib[2];
41 		size_t len;
42 
43 		mib[0] = CTL_HW;
44 #ifdef HW_AVAILCPU
45 		mib[1] = HW_AVAILCPU;
46 #else
47 		mib[1] = HW_NCPU;
48 #endif
49 
50 		sysctl(mib, 2, &numCPU, &len, NULL, 0);
51 
52 		if( numCPU < 1 )
53 		{
54      			mib[1] = HW_NCPU;
55      			sysctl( mib, 2, &numCPU, &len, NULL, 0 );
56 
57 		     	if( numCPU < 1 )
58      			{
59           			numCPU = 1;
60      			}
61 		}
62 
63 #endif
64 	return numCPU;
65 }
66 #endif
67 
68 const char phash_project[] = "%s. Copyright 2008-2010 Aetilius, Inc.";
69 char phash_version[255] = {0};
ph_about()70 const char* ph_about(){
71 	if(phash_version[0] != 0)
72 		return phash_version;
73 
74 	snprintf(phash_version, sizeof(phash_version), phash_project, PACKAGE_STRING);
75 	return phash_version;
76 }
77 #ifdef HAVE_IMAGE_HASH
ph_radon_projections(const CImg<uint8_t> & img,int N,Projections & projs)78 int ph_radon_projections(const CImg<uint8_t> &img,int N,Projections &projs){
79 
80     int width = img.width();
81     int height = img.height();
82     int D = (width > height)?width:height;
83     float x_center = (float)width/2;
84     float y_center = (float)height/2;
85     int x_off = (int)std::floor(x_center + ROUNDING_FACTOR(x_center));
86     int y_off = (int)std::floor(y_center + ROUNDING_FACTOR(y_center));
87 
88     projs.R = new CImg<uint8_t>(N,D,1,1,0);
89     projs.nb_pix_perline = (int*)calloc(N,sizeof(int));
90 
91     if (!projs.R || !projs.nb_pix_perline)
92 	return EXIT_FAILURE;
93 
94     projs.size = N;
95 
96     CImg<uint8_t> *ptr_radon_map = projs.R;
97     int *nb_per_line = projs.nb_pix_perline;
98 
99     for (int k=0;k<N/4+1;k++){
100         double theta = k*cimg::PI/N;
101         double alpha = std::tan(theta);
102         for (int x=0;x < D;x++){
103 	    double y = alpha*(x-x_off);
104             int yd = (int)std::floor(y + ROUNDING_FACTOR(y));
105             if ((yd + y_off >= 0)&&(yd + y_off < height) && (x < width)){
106 		*ptr_radon_map->data(k,x) = img(x,yd + y_off);
107                 nb_per_line[k] += 1;
108 	    }
109             if ((yd + x_off >= 0) && (yd + x_off < width) && (k != N/4) && (x < height)){
110 		*ptr_radon_map->data(N/2-k,x) = img(yd + x_off,x);
111                 nb_per_line[N/2-k] += 1;
112 	    }
113 	}
114     }
115     int j= 0;
116     for (int k=3*N/4;k<N;k++){
117 	double theta = k*cimg::PI/N;
118         double alpha = std::tan(theta);
119         for (int x=0;x < D;x++){
120 	    double y = alpha*(x-x_off);
121             int yd = (int)std::floor(y + ROUNDING_FACTOR(y));
122             if ((yd + y_off >= 0)&&(yd + y_off < height) && (x < width)){
123 		*ptr_radon_map->data(k,x) = img(x,yd + y_off);
124                 nb_per_line[k] += 1;
125 	    }
126             if ((y_off - yd >= 0)&&(y_off - yd<width)&&(2*y_off-x>=0)&&(2*y_off-x<height)&&(k!=3*N/4)){
127 		*ptr_radon_map->data(k-j,x) = img(-yd+y_off,-(x-y_off)+y_off);
128                 nb_per_line[k-j] += 1;
129 	    }
130 
131 	}
132         j += 2;
133     }
134 
135     return EXIT_SUCCESS;
136 
137 }
ph_feature_vector(const Projections & projs,Features & fv)138 int ph_feature_vector(const Projections &projs, Features &fv)
139 {
140 
141     CImg<uint8_t> *ptr_map = projs.R;
142     CImg<uint8_t> projection_map = *ptr_map;
143     int *nb_perline = projs.nb_pix_perline;
144     int N = projs.size;
145     int D = projection_map.height();
146 
147     fv.features = (double*)malloc(N*sizeof(double));
148     fv.size = N;
149     if (!fv.features)
150 	return EXIT_FAILURE;
151 
152     double *feat_v = fv.features;
153     double sum = 0.0;
154     double sum_sqd = 0.0;
155     for (int k=0; k < N; k++){
156 	double line_sum = 0.0;
157         double line_sum_sqd = 0.0;
158         int nb_pixels = nb_perline[k];
159 	for (int i=0;i<D;i++){
160 	    line_sum += projection_map(k,i);
161     	    line_sum_sqd += projection_map(k,i)*projection_map(k,i);
162 	}
163 	feat_v[k] = (line_sum_sqd/nb_pixels) - (line_sum*line_sum)/(nb_pixels*nb_pixels);
164         sum += feat_v[k];
165         sum_sqd += feat_v[k]*feat_v[k];
166     }
167     double mean = sum/N;
168     double var  = sqrt((sum_sqd/N) - (sum*sum)/(N*N));
169 
170     for (int i=0;i<N;i++){
171     	feat_v[i] = (feat_v[i] - mean)/var;
172     }
173 
174     return EXIT_SUCCESS;
175 }
ph_dct(const Features & fv,Digest & digest)176 int ph_dct(const Features &fv,Digest &digest)
177 {
178     int N = fv.size;
179     const int nb_coeffs = 40;
180 
181     digest.coeffs = (uint8_t*)malloc(nb_coeffs*sizeof(uint8_t));
182     if (!digest.coeffs)
183 	return EXIT_FAILURE;
184 
185     digest.size = nb_coeffs;
186 
187     double *R = fv.features;
188 
189     uint8_t *D = digest.coeffs;
190 
191     double D_temp[nb_coeffs];
192     double max = 0.0;
193     double min = 0.0;
194     for (int k = 0;k<nb_coeffs;k++){
195 	double sum = 0.0;
196         for (int n=0;n<N;n++){
197 	    double temp = R[n]*cos((cimg::PI*(2*n+1)*k)/(2*N));
198             sum += temp;
199 	}
200         if (k == 0)
201 	    D_temp[k] = sum/sqrt((double)N);
202         else
203             D_temp[k] = sum*SQRT_TWO/sqrt((double)N);
204         if (D_temp[k] > max)
205             max = D_temp[k];
206         if (D_temp[k] < min)
207             min = D_temp[k];
208     }
209 
210     for (int i=0;i<nb_coeffs;i++){
211 
212 	D[i] = (uint8_t)(UCHAR_MAX*(D_temp[i] - min)/(max - min));
213 
214     }
215 
216     return EXIT_SUCCESS;
217 }
218 
ph_crosscorr(const Digest & x,const Digest & y,double & pcc,double threshold)219 int ph_crosscorr(const Digest &x,const Digest &y,double &pcc,double threshold){
220 
221     int N = y.size;
222     int result = 0;
223 
224     uint8_t *x_coeffs = x.coeffs;
225     uint8_t *y_coeffs = y.coeffs;
226 
227     double *r = new double[N];
228     double sumx = 0.0;
229     double sumy = 0.0;
230     for (int i=0;i < N;i++){
231 	sumx += x_coeffs[i];
232         sumy += y_coeffs[i];
233     }
234     double meanx = sumx/N;
235     double meany = sumy/N;
236     double max = 0;
237     for (int d=0;d<N;d++){
238         double num = 0.0;
239         double denx = 0.0;
240         double deny = 0.0;
241 	for (int i=0;i<N;i++){
242 	    num  += (x_coeffs[i]-meanx)*(y_coeffs[(N+i-d)%N]-meany);
243             denx += pow((x_coeffs[i]-meanx),2);
244             deny += pow((y_coeffs[(N+i-d)%N]-meany),2);
245 	}
246         r[d] = num/sqrt(denx*deny);
247         if (r[d] > max)
248 	    max = r[d];
249     }
250     delete[] r;
251     pcc = max;
252     if (max > threshold)
253 	    result = 1;
254 
255     return result;
256 }
257 
258 #ifdef max
259 #undef max
260 #endif
261 
_ph_image_digest(const CImg<uint8_t> & img,double sigma,double gamma,Digest & digest,int N)262 int _ph_image_digest(const CImg<uint8_t> &img,double sigma, double gamma,Digest &digest, int N){
263 
264     int result = EXIT_FAILURE;
265     CImg<uint8_t> graysc;
266     if (img.spectrum() >= 3){
267 	graysc = img.get_RGBtoYCbCr().channel(0);
268     }
269     else if (img.spectrum() == 1){
270 	graysc = img;
271     }
272     else {
273 	return result;
274     }
275 
276 
277     graysc.blur((float)sigma);
278 
279     (graysc/graysc.max()).pow(gamma);
280 
281     Projections projs;
282     if (ph_radon_projections(graysc,N,projs) < 0)
283 	goto cleanup;
284 
285     Features features;
286     if (ph_feature_vector(projs,features) < 0)
287 	goto cleanup;
288 
289     if (ph_dct(features,digest) < 0)
290         goto cleanup;
291 
292     result = EXIT_SUCCESS;
293 
294 cleanup:
295     free(projs.nb_pix_perline);
296     free(features.features);
297 
298     delete projs.R;
299     return result;
300 }
301 
302 #define max(a,b) (((a)>(b))?(a):(b))
303 
ph_image_digest(const char * file,double sigma,double gamma,Digest & digest,int N)304 int ph_image_digest(const char *file, double sigma, double gamma, Digest &digest, int N){
305 
306     CImg<uint8_t> src(file);
307 	int res = -1;
308     		int result = _ph_image_digest(src,sigma,gamma,digest,N);
309     		res = result;
310 	return res;
311 }
312 
_ph_compare_images(const CImg<uint8_t> & imA,const CImg<uint8_t> & imB,double & pcc,double sigma,double gamma,int N,double threshold)313 int _ph_compare_images(const CImg<uint8_t> &imA,const CImg<uint8_t> &imB,double &pcc, double sigma, double gamma,int N,double threshold){
314 
315     int result = 0;
316     Digest digestA;
317     if (_ph_image_digest(imA,sigma,gamma,digestA,N) < 0)
318 	goto cleanup;
319 
320     Digest digestB;
321     if (_ph_image_digest(imB,sigma,gamma,digestB,N) < 0)
322 	goto cleanup;
323 
324     if (ph_crosscorr(digestA,digestB,pcc,threshold) < 0)
325 	goto cleanup;
326 
327     if  (pcc  > threshold)
328         result = 1;
329 
330 cleanup:
331 
332     free(digestA.coeffs);
333     free(digestB.coeffs);
334     return result;
335 }
336 
ph_compare_images(const char * file1,const char * file2,double & pcc,double sigma,double gamma,int N,double threshold)337 int ph_compare_images(const char *file1, const char *file2,double &pcc, double sigma, double gamma, int N,double threshold){
338 
339     CImg<uint8_t> *imA = new CImg<uint8_t>(file1);
340     CImg<uint8_t> *imB = new CImg<uint8_t>(file2);
341 
342     int res = _ph_compare_images(*imA,*imB,pcc,sigma,gamma,N,threshold);
343 
344     delete imA;
345     delete imB;
346     return res;
347 }
348 
ph_dct_matrix(const int N)349 CImg<float>* ph_dct_matrix(const int N){
350     CImg<float> *ptr_matrix = new CImg<float>(N,N,1,1,1/sqrt((float)N));
351     const float c1 = sqrt(2.0/N);
352     for (int x=0;x<N;x++){
353 	for (int y=1;y<N;y++){
354 	    *ptr_matrix->data(x,y) = c1*cos((cimg::PI/2/N)*y*(2*x+1));
355 	}
356     }
357     return ptr_matrix;
358 }
359 
ph_dct_imagehash(const char * file,ulong64 & hash)360 int ph_dct_imagehash(const char* file,ulong64 &hash){
361 
362     if (!file){
363 	return -1;
364     }
365     CImg<uint8_t> src;
366     try {
367 	src.load(file);
368     } catch (CImgIOException ex){
369 	return -1;
370     }
371     CImg<float> meanfilter(7,7,1,1,1);
372     CImg<float> img;
373     if (src.spectrum() == 3){
374         img = src.RGBtoYCbCr().channel(0).get_convolve(meanfilter);
375     } else if (src.spectrum() == 4){
376 	int width = img.width();
377         int height = img.height();
378         int depth = img.depth();
379 	img = src.crop(0,0,0,0,width-1,height-1,depth-1,2).RGBtoYCbCr().channel(0).get_convolve(meanfilter);
380     } else {
381 	img = src.channel(0).get_convolve(meanfilter);
382     }
383 
384     img.resize(32,32);
385     CImg<float> *C  = ph_dct_matrix(32);
386     CImg<float> Ctransp = C->get_transpose();
387 
388     CImg<float> dctImage = (*C)*img*Ctransp;
389 
390     CImg<float> subsec = dctImage.crop(1,1,8,8).unroll('x');;
391 
392     float median = subsec.median();
393     ulong64 one = 0x0000000000000001;
394     hash = 0x0000000000000000;
395     for (int i=0;i< 64;i++){
396 	float current = subsec(i);
397         if (current > median)
398 	    hash |= one;
399 	one = one << 1;
400     }
401 
402     delete C;
403 
404     return 0;
405 }
406 
407 #ifdef HAVE_PTHREAD
ph_image_thread(void * p)408 void *ph_image_thread(void *p)
409 {
410         slice *s = (slice *)p;
411         for(int i = 0; i < s->n; ++i)
412         {
413                 DP *dp = (DP *)s->hash_p[i];
414 		ulong64 hash;
415 		int ret = ph_dct_imagehash(dp->id, hash);
416                 dp->hash = (ulong64*)malloc(sizeof(hash));
417 		memcpy(dp->hash, &hash, sizeof(hash));
418                 dp->hash_length = 1;
419         }
420 }
421 
ph_dct_image_hashes(char * files[],int count,int threads)422 DP** ph_dct_image_hashes(char *files[], int count, int threads)
423 {
424        	if(!files || count <= 0)
425                 return NULL;
426 
427         int num_threads;
428         if(threads > count)
429         {
430                 num_threads = count;
431         }
432 	else if(threads > 0)
433         {
434                 num_threads = threads;
435         }
436 	else
437 	{
438                 num_threads = ph_num_threads();
439         }
440 
441 	DP **hashes = (DP**)malloc(count*sizeof(DP*));
442 
443         for(int i = 0; i < count; ++i)
444         {
445                 hashes[i] = (DP *)malloc(sizeof(DP));
446                 hashes[i]->id = strdup(files[i]);
447 	}
448 
449 	pthread_t thds[num_threads];
450 
451         int rem = count % num_threads;
452         int start = 0;
453         int off = 0;
454         slice *s = new slice[num_threads];
455         for(int n = 0; n < num_threads; ++n)
456         {
457                 off = (int)floor((count/(float)num_threads) + (rem>0?num_threads-(count % num_threads):0));
458 
459                 s[n].hash_p = &hashes[start];
460                 s[n].n = off;
461                 s[n].hash_params = NULL;
462                 start += off;
463                 --rem;
464                 pthread_create(&thds[n], NULL, ph_image_thread, &s[n]);
465         }
466 	for(int i = 0; i < num_threads; ++i)
467         {
468                 pthread_join(thds[i], NULL);
469         }
470 	delete[] s;
471 
472         return hashes;
473 
474 }
475 #endif
476 
477 
478 #endif
479 
480 #if defined(HAVE_VIDEO_HASH) && defined(HAVE_IMAGE_HASH)
481 
482 
ph_getKeyFramesFromVideo(const char * filename)483 CImgList<uint8_t>* ph_getKeyFramesFromVideo(const char *filename){
484 
485     long N =  GetNumberVideoFrames(filename);
486 
487     if (N < 0){
488 	return NULL;
489     }
490 
491     float frames_per_sec = 0.5*fps(filename);
492     if (frames_per_sec < 0){
493 	return NULL;
494     }
495 
496     int step = (int)(frames_per_sec + ROUNDING_FACTOR(frames_per_sec));
497     long nbframes = (long)(N/step);
498 
499     float *dist = (float*)malloc((nbframes)*sizeof(float));
500     if (!dist){
501 	return NULL;
502     }
503     CImg<float> prev(64,1,1,1,0);
504 
505     VFInfo st_info;
506     st_info.filename = filename;
507     st_info.nb_retrieval = 100;
508     st_info.step = step;
509     st_info.pixelformat = 0;
510     st_info.pFormatCtx = NULL;
511     st_info.width = -1;
512     st_info.height = -1;
513 
514     CImgList<uint8_t> *pframelist = new CImgList<uint8_t>();
515     if (!pframelist){
516 	return NULL;
517     }
518     int nbread = 0;
519     int k=0;
520     do {
521 	nbread = NextFrames(&st_info, pframelist);
522 	if (nbread < 0){
523 	    delete pframelist;
524 	    free(dist);
525 	    return NULL;
526 	}
527 	unsigned int i = 0;
528         while ((i < pframelist->size()) && (k < nbframes)){
529 	    CImg<uint8_t> current = pframelist->at(i++);
530 	    CImg<float> hist = current.get_histogram(64,0,255);
531             float d = 0.0;
532 	    dist[k] = 0.0;
533 	    cimg_forX(hist,X){
534 		d =  hist(X) - prev(X);
535 		d = (d>=0) ? d : -d;
536 		dist[k] += d;
537 		prev(X) = hist(X);
538 	    }
539             k++;
540 	}
541         pframelist->clear();
542     } while ((nbread >= st_info.nb_retrieval)&&(k < nbframes));
543     vfinfo_close(&st_info);
544 
545     int S = 10;
546     int L = 50;
547     int alpha1 = 3;
548     int alpha2 = 2;
549     int s_begin, s_end;
550     int l_begin, l_end;
551     uint8_t *bnds = (uint8_t*)malloc(nbframes*sizeof(uint8_t));
552     if (!bnds){
553 	delete pframelist;
554 	free(dist);
555 	return NULL;
556     }
557 
558     int nbboundaries = 0;
559     k = 1;
560     bnds[0] = 1;
561     do {
562 	s_begin = (k-S >= 0) ? k-S : 0;
563 	s_end   = (k+S < nbframes) ? k+S : nbframes-1;
564 	l_begin = (k-L >= 0) ? k-L : 0;
565 	l_end   = (k+L < nbframes) ? k+L : nbframes-1;
566 
567 	/* get global average */
568 	float ave_global, sum_global = 0.0, dev_global = 0.0;
569 	for (int i=l_begin;i<=l_end;i++){
570 	    sum_global += dist[i];
571 	}
572 	ave_global = sum_global/((float)(l_end-l_begin+1));
573 
574 	/*get global deviation */
575 	for (int i=l_begin;i<=l_end;i++){
576 	    float dev = ave_global - dist[i];
577 	    dev = (dev >= 0) ? dev : -1*dev;
578 	    dev_global += dev;
579 	}
580 	dev_global = dev_global/((float)(l_end-l_begin+1));
581 
582 	/* global threshold */
583 	float T_global = ave_global + alpha1*dev_global;
584 
585 	/* get local maximum */
586 	int localmaxpos = s_begin;
587 	for (int i=s_begin;i<=s_end;i++){
588 	    if (dist[i] > dist[localmaxpos])
589 		localmaxpos = i;
590 	}
591         /* get 2nd local maximum */
592 	int localmaxpos2 = s_begin;
593 	float localmax2 = 0;
594 	for (int i=s_begin;i<=s_end;i++){
595 	    if (i == localmaxpos)
596 		continue;
597 	    if (dist[i] > localmax2){
598 		localmaxpos2 = i;
599 		localmax2 = dist[i];
600 	    }
601 	}
602         float T_local = alpha2*dist[localmaxpos2];
603 	float Thresh = (T_global >= T_local) ? T_global : T_local;
604 
605 	if ((dist[k] == dist[localmaxpos])&&(dist[k] > Thresh)){
606 	    bnds[k] = 1;
607 	    nbboundaries++;
608 	}
609 	else {
610 	    bnds[k] = 0;
611 	}
612 	k++;
613     } while ( k < nbframes-1);
614     bnds[nbframes-1]=1;
615     nbboundaries += 2;
616 
617     int start = 0;
618     int end = 0;
619     int nbselectedframes = 0;
620     do {
621 	/* find next boundary */
622 	do {end++;} while ((bnds[end]!=1)&&(end < nbframes));
623 
624 	/* find min disparity within bounds */
625 	int minpos = start+1;
626 	for (int i=start+1; i < end;i++){
627 	    if (dist[i] < dist[minpos])
628 		minpos = i;
629 	}
630 	bnds[minpos] = 2;
631 	nbselectedframes++;
632 	start = end;
633     } while (start < nbframes-1);
634 
635     st_info.nb_retrieval = 1;
636     st_info.width = 32;
637     st_info.height = 32;
638     k = 0;
639     do {
640 	if (bnds[k]==2){
641 	    if (ReadFrames(&st_info, pframelist, k*st_info.step,k*st_info.step + 1) < 0){
642 		delete pframelist;
643 		free(dist);
644 		return NULL;
645 	    }
646 	}
647 	k++;
648     } while (k < nbframes);
649     vfinfo_close(&st_info);
650 
651     free(bnds);
652     bnds = NULL;
653     free(dist);
654     dist = NULL;
655 
656     return pframelist;
657 }
658 
659 
ph_dct_videohash(const char * filename,int & Length)660 ulong64* ph_dct_videohash(const char *filename, int &Length){
661 
662     CImgList<uint8_t> *keyframes = ph_getKeyFramesFromVideo(filename);
663     if (keyframes == NULL)
664 	return NULL;
665 
666     Length = keyframes->size();
667 
668     ulong64 *hash = (ulong64*)malloc(sizeof(ulong64)*Length);
669     CImg<float> *C = ph_dct_matrix(32);
670     CImg<float> Ctransp = C->get_transpose();
671     CImg<float> dctImage;
672     CImg<float> subsec;
673     CImg<uint8_t> currentframe;
674 
675     for (unsigned int i=0;i < keyframes->size(); i++){
676 	currentframe = keyframes->at(i);
677 	currentframe.blur(1.0);
678 	dctImage = (*C)*(currentframe)*Ctransp;
679 	subsec = dctImage.crop(1,1,8,8).unroll('x');
680 	float med = subsec.median();
681 	hash[i] =     0x0000000000000000;
682 	ulong64 one = 0x0000000000000001;
683 	for (int j=0;j<64;j++){
684 	    if (subsec(j) > med)
685 		hash[i] |= one;
686 	    one = one << 1;
687 	}
688     }
689 
690     keyframes->clear();
691     delete keyframes;
692     keyframes = NULL;
693     delete C;
694     C = NULL;
695     return hash;
696 }
697 
698 #ifdef HAVE_PTHREAD
ph_video_thread(void * p)699 void *ph_video_thread(void *p)
700 {
701         slice *s = (slice *)p;
702         for(int i = 0; i < s->n; ++i)
703         {
704                 DP *dp = (DP *)s->hash_p[i];
705 		int N;
706 		ulong64 *hash = ph_dct_videohash(dp->id, N);
707 		if(hash)
708 		{
709                 	dp->hash = hash;
710 	                dp->hash_length = N;
711 		}
712 		else
713 		{
714 			dp->hash = NULL;
715 			dp->hash_length = 0;
716 		}
717         }
718 }
719 
ph_dct_video_hashes(char * files[],int count,int threads)720 DP** ph_dct_video_hashes(char *files[], int count, int threads)
721 {
722        	if(!files || count <= 0)
723                 return NULL;
724 
725         int num_threads;
726         if(threads > count)
727         {
728                 num_threads = count;
729         }
730 	else if(threads > 0)
731         {
732                 num_threads = threads;
733         }
734 	else
735 	{
736                 num_threads = ph_num_threads();
737         }
738 
739 	DP **hashes = (DP**)malloc(count*sizeof(DP*));
740 
741         for(int i = 0; i < count; ++i)
742         {
743                 hashes[i] = (DP *)malloc(sizeof(DP));
744                 hashes[i]->id = strdup(files[i]);
745 	}
746 
747 	pthread_t thds[num_threads];
748 
749         int rem = count % num_threads;
750         int start = 0;
751         int off = 0;
752         slice *s = new slice[num_threads];
753         for(int n = 0; n < num_threads; ++n)
754         {
755                 off = (int)floor((count/(float)num_threads) + (rem>0?num_threads-(count % num_threads):0));
756 
757                 s[n].hash_p = &hashes[start];
758                 s[n].n = off;
759                 s[n].hash_params = NULL;
760                 start += off;
761                 --rem;
762                 pthread_create(&thds[n], NULL, ph_video_thread, &s[n]);
763         }
764 	for(int i = 0; i < num_threads; ++i)
765         {
766                 pthread_join(thds[i], NULL);
767         }
768 	delete[] s;
769 
770         return hashes;
771 
772 }
773 #endif
774 
ph_dct_videohash_dist(ulong64 * hashA,int N1,ulong64 * hashB,int N2,int threshold)775 double ph_dct_videohash_dist(ulong64 *hashA, int N1, ulong64 *hashB, int N2, int threshold){
776 
777     int den = (N1 <= N2) ? N1 : N2;
778     int C[N1+1][N2+1];
779 
780     for (int i=0;i<N1+1;i++){
781 	C[i][0] = 0;
782     }
783     for (int j=0;j<N2+1;j++){
784 	C[0][j] = 0;
785     }
786     for (int i=1;i<N1+1;i++){
787 	for (int j=1;j<N2+1;j++){
788 	    int d = ph_hamming_distance(hashA[i-1],hashB[j-1]);
789 	    if (d <= threshold){
790 		C[i][j] = C[i-1][j-1] + 1;
791 	    } else {
792 		C[i][j] = ((C[i-1][j] >= C[i][j-1])) ? C[i-1][j] : C[i][j-1];
793 	    }
794 	}
795     }
796 
797     double result = (double)(C[N1][N2])/(double)(den);
798 
799     return result;
800 }
801 
802 #endif
803 
ph_hamming_distance(const ulong64 hash1,const ulong64 hash2)804 int ph_hamming_distance(const ulong64 hash1,const ulong64 hash2){
805     ulong64 x = hash1^hash2;
806     const ulong64 m1  = 0x5555555555555555ULL;
807     const ulong64 m2  = 0x3333333333333333ULL;
808     const ulong64 h01 = 0x0101010101010101ULL;
809     const ulong64 m4  = 0x0f0f0f0f0f0f0f0fULL;
810     x -= (x >> 1) & m1;
811     x = (x & m2) + ((x >> 2) & m2);
812     x = (x + (x >> 4)) & m4;
813     return (x * h01)>>56;
814 }
815 
ph_malloc_datapoint(int hashtype)816 DP* ph_malloc_datapoint(int hashtype){
817     DP* dp = (DP*)malloc(sizeof(DP));
818     dp->hash = NULL;
819     dp->id = NULL;
820     dp->hash_type = hashtype;
821     return dp;
822 }
ph_free_datapoint(DP * dp)823 void ph_free_datapoint(DP *dp){
824     if (!dp){
825 	return;
826     }
827     free(dp);
828     dp=NULL;
829     return;
830 }
831 
832 
833 #ifdef HAVE_IMAGE_HASH
834 
GetMHKernel(float alpha,float level)835 CImg<float>* GetMHKernel(float alpha, float level){
836     int sigma = (int)4*pow((float)alpha,(float)level);
837     static CImg<float> *pkernel = NULL;
838     float xpos, ypos, A;
839     if (!pkernel){
840 	pkernel = new CImg<float>(2*sigma+1,2*sigma+1,1,1,0);
841         cimg_forXY(*pkernel,X,Y){
842 	    xpos = pow(alpha,-level)*(X-sigma);
843             ypos = pow(alpha,-level)*(Y-sigma);
844             A = xpos*xpos + ypos*ypos;
845             pkernel->atXY(X,Y) = (2-A)*exp(-A/2);
846 	}
847     }
848     return pkernel;
849 }
850 
ph_mh_imagehash(const char * filename,int & N,float alpha,float lvl)851 uint8_t* ph_mh_imagehash(const char *filename, int &N,float alpha, float lvl){
852     if (filename == NULL){
853 	return NULL;
854     }
855     uint8_t *hash = (unsigned char*)malloc(72*sizeof(uint8_t));
856     N = 72;
857 
858     CImg<uint8_t> src(filename);
859     CImg<uint8_t> img;
860 
861     if (src.spectrum() == 3){
862 	img = src.get_RGBtoYCbCr().channel(0).blur(1.0).resize(512,512,1,1,5).get_equalize(256);
863     } else{
864 	img = src.channel(0).get_blur(1.0).resize(512,512,1,1,5).get_equalize(256);
865     }
866     src.clear();
867 
868     CImg<float> *pkernel = GetMHKernel(alpha,lvl);
869     CImg<float> fresp =  img.get_correlate(*pkernel);
870     img.clear();
871     fresp.normalize(0,1.0);
872     CImg<float> blocks(31,31,1,1,0);
873     for (int rindex=0;rindex < 31;rindex++){
874 		for (int cindex=0;cindex < 31;cindex++){
875 			blocks(rindex,cindex) = fresp.get_crop(rindex*16,cindex*16,rindex*16+16-1,cindex*16+16-1).sum();
876 		}
877     }
878     int hash_index;
879     int nb_ones = 0, nb_zeros = 0;
880     int bit_index = 0;
881     unsigned char hashbyte = 0;
882     for (int rindex=0;rindex < 31-2;rindex+=4){
883 		CImg<float> subsec;
884 		for (int cindex=0;cindex < 31-2;cindex+=4){
885 			subsec = blocks.get_crop(cindex,rindex, cindex+2, rindex+2).unroll('x');
886 			float ave = subsec.mean();
887 			cimg_forX(subsec, I){
888 				hashbyte <<= 1;
889 				if (subsec(I) > ave){
890 					hashbyte |= 0x01;
891 					nb_ones++;
892 				} else {
893 					nb_zeros++;
894 				}
895 				bit_index++;
896 				if ((bit_index%8) == 0){
897 					hash_index = (int)(bit_index/8) - 1;
898 					hash[hash_index] = hashbyte;
899 					hashbyte = 0x00;
900 				}
901 			}
902 		}
903 	}
904 
905     return hash;
906 }
907 #endif
908 
ph_readfilenames(const char * dirname,int & count)909 char** ph_readfilenames(const char *dirname,int &count){
910     count = 0;
911     struct dirent *dir_entry;
912     DIR *dir = opendir(dirname);
913     if (!dir)
914         return NULL;
915 
916     /*count files */
917     while ((dir_entry = readdir(dir)) != NULL){
918 	if (strcmp(dir_entry->d_name, ".") && strcmp(dir_entry->d_name,".."))
919 	    count++;
920     }
921 
922     /* alloc list of files */
923     char **files = (char**)malloc(count*sizeof(*files));
924     if (!files)
925 	return NULL;
926 
927     errno = 0;
928     int index = 0;
929     char path[1024];
930     path[0] = '\0';
931     rewinddir(dir);
932     while ((dir_entry = readdir(dir)) != 0){
933 	if (strcmp(dir_entry->d_name,".") && strcmp(dir_entry->d_name,"..")){
934 	    strcat(path, dirname);
935 	    strcat(path, "/");
936 	    strcat(path, dir_entry->d_name);
937 	    files[index++] = strdup(path);
938 	}
939         path[0]='\0';
940     }
941     if (errno)
942 	return NULL;
943     closedir(dir);
944     return files;
945 }
946 
947 
ph_bitcount8(uint8_t val)948 int ph_bitcount8(uint8_t val){
949     int num = 0;
950     while (val){
951 	++num;
952 	val &= val - 1;
953     }
954     return num;
955 }
956 
957 
958 
ph_hammingdistance2(uint8_t * hashA,int lenA,uint8_t * hashB,int lenB)959 double ph_hammingdistance2(uint8_t *hashA, int lenA, uint8_t *hashB, int lenB){
960     if (lenA != lenB){
961 	return -1.0;
962     }
963     if ((hashA == NULL) || (hashB == NULL) || (lenA <= 0)){
964 	return -1.0;
965     }
966     double dist = 0;
967     uint8_t D = 0;
968     for (int i=0;i<lenA;i++){
969 	D = hashA[i]^hashB[i];
970 	dist = dist + (double)ph_bitcount8(D);
971     }
972     double bits = (double)lenA*8;
973     return dist/bits;
974 
975 }
976 
977 
ph_texthash(const char * filename,int * nbpoints)978 TxtHashPoint* ph_texthash(const char *filename,int *nbpoints){
979     int count;
980     TxtHashPoint *TxtHash = NULL;
981     TxtHashPoint WinHash[WindowLength];
982     char kgram[KgramLength];
983 
984     FILE *pfile = fopen(filename,"r");
985     if (!pfile){
986 	return NULL;
987     }
988     struct stat fileinfo;
989     fstat(fileno(pfile),&fileinfo);
990     count = fileinfo.st_size - WindowLength + 1;
991     count = (int)(0.01*count);
992     int d;
993     ulong64 hashword = 0ULL;
994 
995     TxtHash = (TxtHashPoint*)malloc(count*sizeof(struct ph_hash_point));
996     if (!TxtHash){
997 	return NULL;
998     }
999     *nbpoints=0;
1000     int i, first=0, last=KgramLength-1;
1001     int text_index = 0;
1002     int win_index = 0;
1003     for (i=0;i < KgramLength;i++){    /* calc first kgram */
1004 	d = fgetc(pfile);
1005 	if (d == EOF){
1006 	    free(TxtHash);
1007 	    return NULL;
1008 	}
1009 	if (d <= 47)         /*skip cntrl chars*/
1010 	    continue;
1011 	if ( ((d >= 58)&&(d <= 64)) || ((d >= 91)&&(d <= 96)) || (d >= 123) ) /*skip punct*/
1012 	    continue;
1013 	if ((d >= 65)&&(d<=90))       /*convert upper to lower case */
1014 	    d = d + 32;
1015 
1016 	kgram[i] = (char)d;
1017         hashword = hashword << delta;   /* rotate left or shift left ??? */
1018         hashword = hashword^textkeys[d];/* right now, rotate breaks it */
1019     }
1020 
1021     WinHash[win_index].hash = hashword;
1022     WinHash[win_index++].index = text_index;
1023     struct ph_hash_point minhash;
1024     minhash.hash = ULLONG_MAX;
1025     minhash.index = 0;
1026     struct ph_hash_point prev_minhash;
1027     prev_minhash.hash = ULLONG_MAX;
1028     prev_minhash.index = 0;
1029 
1030     while ((d=fgetc(pfile)) != EOF){    /*remaining kgrams */
1031         text_index++;
1032 	if (d == EOF){
1033 	    free(TxtHash);
1034 	    return NULL;
1035 	}
1036 	if (d <= 47)         /*skip cntrl chars*/
1037 	    continue;
1038 	if ( ((d >= 58)&&(d <= 64)) || ((d >= 91)&&(d <= 96)) || (d >= 123) ) /*skip punct*/
1039 	    continue;
1040 	if ((d >= 65)&&(d<=90))       /*convert upper to lower case */
1041 	    d = d + 32;
1042 
1043 	ulong64 oldsym = textkeys[kgram[first%KgramLength]];
1044 
1045 	/* rotate or left shift ??? */
1046 	/* right now, rotate breaks it */
1047 	oldsym = oldsym << delta*KgramLength;
1048 	hashword = hashword << delta;
1049 	hashword = hashword^textkeys[d];
1050 	hashword = hashword^oldsym;
1051 	kgram[last%KgramLength] = (char)d;
1052 	first++;
1053 	last++;
1054 
1055         WinHash[win_index%WindowLength].hash = hashword;
1056 	WinHash[win_index%WindowLength].index = text_index;
1057 	win_index++;
1058 
1059         if (win_index >= WindowLength){
1060 	    minhash.hash = ULLONG_MAX;
1061 	    for (i=win_index;i<win_index+WindowLength;i++){
1062 		if (WinHash[i%WindowLength].hash <= minhash.hash){
1063 		    minhash.hash = WinHash[i%WindowLength].hash;
1064 		    minhash.index = WinHash[i%WindowLength].index;
1065 		}
1066 	    }
1067             if (minhash.hash != prev_minhash.hash){
1068 		TxtHash[(*nbpoints)].hash = minhash.hash;
1069 		TxtHash[(*nbpoints)++].index = minhash.index;
1070 		prev_minhash.hash = minhash.hash;
1071 		prev_minhash.index = minhash.index;
1072 
1073 	    } else {
1074 		TxtHash[*nbpoints].hash = prev_minhash.hash;
1075 		TxtHash[(*nbpoints)++].index = prev_minhash.index;
1076 	    }
1077 	    win_index = 0;
1078 	}
1079     }
1080 
1081     fclose(pfile);
1082     return TxtHash;
1083 }
1084 
ph_compare_text_hashes(TxtHashPoint * hash1,int N1,TxtHashPoint * hash2,int N2,int * nbmatches)1085 TxtMatch* ph_compare_text_hashes(TxtHashPoint *hash1, int N1, TxtHashPoint *hash2,int N2, int *nbmatches){
1086 
1087     int max_matches = (N1 >= N2) ? N1:N2;
1088     TxtMatch *found_matches = (TxtMatch*)malloc(max_matches*sizeof(TxtMatch));
1089     if (!found_matches){
1090 	return NULL;
1091     }
1092 
1093     *nbmatches = 0;
1094     int i,j;
1095     for (i=0;i<N1;i++){
1096 	for (j=0;j<N2;j++){
1097 	    if (hash1[i].hash == hash2[j].hash){
1098 		int m = i + 1;
1099 		int n = j + 1;
1100                 int cnt = 1;
1101 		while((m < N1)&&(n < N2)&&(hash1[m++].hash == hash2[n++].hash)){
1102 		    cnt++;
1103 		}
1104                 found_matches[*nbmatches].first_index = i;
1105 		found_matches[*nbmatches].second_index = j;
1106 		found_matches[*nbmatches].length = cnt;
1107 		(*nbmatches)++;
1108 	    }
1109 	}
1110     }
1111     return found_matches;
1112 }
1113 
1114