1# Before `make install' is performed this script should be runnable with
2# `make test'. After `make install' it should work as `perl t/normalize.t'
3# Note that because of the file paths used this must be run from the
4# directory in which /t resides
5#
6# Last modified by : $Id: normalize.t,v 1.1.1.1 2013/06/26 02:38:12 tpederse Exp $
7#########################
8
9# change 'tests => 1' to 'tests => last_test_to_print';
10
11use Test::More tests => 29;
12
13BEGIN {use_ok Text::Similarity}
14BEGIN {use_ok Text::Similarity::Overlaps}
15
16# these results should be normalized
17
18# this constant passing method not supported in 5.6
19# my %opt_hash = (Text::Similarity::NORMALIZE => 1);
20
21my %opt_hash = ('normalize' => 1);
22
23my $overlapmod = Text::Similarity::Overlaps->new (\%opt_hash);
24ok ($overlapmod);
25
26# create test files in such a way that their absolute location doesn't
27# need to be known, and is hopefully portable across various os platforms
28
29my $tempfile0 = "tempfile$$.temp0";
30my $tempfile1 = "tempfile$$.temp1";
31my $tempfile2 = "tempfile$$.temp2";
32my $tempfile3 = "tempfile$$.temp3";
33my $tempfile4 = "tempfile$$.temp4";
34
35ok (open (FH0, '>', $tempfile0));
36print FH0 "   \n";
37close FH0;
38
39ok (open (FH1, '>', $tempfile1));
40print FH1 "aaa bbb ccc ddd eee fff ggg hhh\n";
41close FH1;
42
43ok (open (FH2, '>', $tempfile2));
44print FH2 "aaa ccc eee ggg\n";
45close FH2;
46
47
48ok (open (FH3, '>', $tempfile3));
49print FH3 "aaa               ccc                 eee     \n ggg\n";
50close FH3;
51
52ok (open (FH4, '>', $tempfile4));
53print FH4 "this file has actual words, unlike the files with aaa bbbn";
54close FH4;
55
56# exact matching between two identical files
57$score = $overlapmod->getSimilarity ($tempfile1, $tempfile1);
58is ($score, 1, "self similarity of tempfile1");
59
60$score = $overlapmod->getSimilarity ($tempfile2,$tempfile2);
61is ($score, 1, "self similarity of tempfile2");
62
63# self similarity of an empty file? call it 0 since nothing matches
64
65$score = $overlapmod->getSimilarity ($tempfile0, $tempfile0);
66is ($score, 0, "self similarity of tempfile0");
67
68# exact matching between two files that only differ with white space
69
70$score = $overlapmod->getSimilarity ($tempfile2, $tempfile3);
71is ($score, 1, "similarity of tempfile2 and tempfile3");
72
73# no match to an empty file (text0.txt)
74# caused divide by zero error in 0.02
75
76$score = $overlapmod->getSimilarity ($tempfile2, $tempfile0);
77is ($score, 0, "similarity of tempfile2 and tempfile0");
78
79$score = $overlapmod->getSimilarity ($tempfile0, $tempfile1);
80is ($score, 0, "similarity of tempfile0 and tempfile1");
81
82# partial match, above .5 score
83
84$score = $overlapmod->getSimilarity ($tempfile1, $tempfile2);
85cmp_ok ($score, '<', 1);
86cmp_ok ($score, '>', .5);
87
88# incidental match, small nonzero score
89
90$score = $overlapmod->getSimilarity ($tempfile1, $tempfile4);
91cmp_ok ($score, '<', .5);
92cmp_ok ($score, '>', 0);
93
94END {ok (unlink ($tempfile0, $tempfile1, $tempfile2, $tempfile3, $tempfile4))}
95
96#
97# now test with existing files
98#
99
100# set up file access in an OS neutral way
101use File::Spec;
102
103my $file1_txt = File::Spec->catfile ('t','file1.txt');
104ok (-e $file1_txt);
105
106my $file11_txt = File::Spec->catfile ('t','file11.txt');
107ok (-e $file11_txt);
108
109my $file2_txt = File::Spec->catfile ('t','file2.txt');
110ok (-e $file2_txt);
111
112my $file22_txt = File::Spec->catfile ('t','file22.txt');
113ok (-e $file22_txt);
114
115# self similarity
116
117$score = $overlapmod->getSimilarity ($file1_txt, $file1_txt);
118is ($score, 1, "self similarity file 1");
119
120$score = $overlapmod->getSimilarity ($file2_txt, $file2_txt);
121is ($score, 1, "self similarity file 2");
122
123$score = $overlapmod->getSimilarity ($file11_txt, $file11_txt);
124is ($score, 1, "self similarity file 11");
125
126$score = $overlapmod->getSimilarity ($file22_txt, $file22_txt);
127is ($score, 1, "self similarity file 22");
128
129# file11 is single line version of file1
130# file22 is single line version of file2
131
132$score = $overlapmod->getSimilarity ($file1_txt, $file11_txt);
133is ($score, 1, "self similarity file 1 and 11");
134
135$score = $overlapmod->getSimilarity ($file2_txt, $file22_txt);
136is ($score, 1, "self similarity file 2 and 22");
137
138