1# Before `make install' is performed this script should be runnable with 2# `make test'. After `make install' it should work as `perl t/normalize.t' 3# Note that because of the file paths used this must be run from the 4# directory in which /t resides 5# 6# Last modified by : $Id: normalize.t,v 1.1.1.1 2013/06/26 02:38:12 tpederse Exp $ 7######################### 8 9# change 'tests => 1' to 'tests => last_test_to_print'; 10 11use Test::More tests => 29; 12 13BEGIN {use_ok Text::Similarity} 14BEGIN {use_ok Text::Similarity::Overlaps} 15 16# these results should be normalized 17 18# this constant passing method not supported in 5.6 19# my %opt_hash = (Text::Similarity::NORMALIZE => 1); 20 21my %opt_hash = ('normalize' => 1); 22 23my $overlapmod = Text::Similarity::Overlaps->new (\%opt_hash); 24ok ($overlapmod); 25 26# create test files in such a way that their absolute location doesn't 27# need to be known, and is hopefully portable across various os platforms 28 29my $tempfile0 = "tempfile$$.temp0"; 30my $tempfile1 = "tempfile$$.temp1"; 31my $tempfile2 = "tempfile$$.temp2"; 32my $tempfile3 = "tempfile$$.temp3"; 33my $tempfile4 = "tempfile$$.temp4"; 34 35ok (open (FH0, '>', $tempfile0)); 36print FH0 " \n"; 37close FH0; 38 39ok (open (FH1, '>', $tempfile1)); 40print FH1 "aaa bbb ccc ddd eee fff ggg hhh\n"; 41close FH1; 42 43ok (open (FH2, '>', $tempfile2)); 44print FH2 "aaa ccc eee ggg\n"; 45close FH2; 46 47 48ok (open (FH3, '>', $tempfile3)); 49print FH3 "aaa ccc eee \n ggg\n"; 50close FH3; 51 52ok (open (FH4, '>', $tempfile4)); 53print FH4 "this file has actual words, unlike the files with aaa bbbn"; 54close FH4; 55 56# exact matching between two identical files 57$score = $overlapmod->getSimilarity ($tempfile1, $tempfile1); 58is ($score, 1, "self similarity of tempfile1"); 59 60$score = $overlapmod->getSimilarity ($tempfile2,$tempfile2); 61is ($score, 1, "self similarity of tempfile2"); 62 63# self similarity of an empty file? call it 0 since nothing matches 64 65$score = $overlapmod->getSimilarity ($tempfile0, $tempfile0); 66is ($score, 0, "self similarity of tempfile0"); 67 68# exact matching between two files that only differ with white space 69 70$score = $overlapmod->getSimilarity ($tempfile2, $tempfile3); 71is ($score, 1, "similarity of tempfile2 and tempfile3"); 72 73# no match to an empty file (text0.txt) 74# caused divide by zero error in 0.02 75 76$score = $overlapmod->getSimilarity ($tempfile2, $tempfile0); 77is ($score, 0, "similarity of tempfile2 and tempfile0"); 78 79$score = $overlapmod->getSimilarity ($tempfile0, $tempfile1); 80is ($score, 0, "similarity of tempfile0 and tempfile1"); 81 82# partial match, above .5 score 83 84$score = $overlapmod->getSimilarity ($tempfile1, $tempfile2); 85cmp_ok ($score, '<', 1); 86cmp_ok ($score, '>', .5); 87 88# incidental match, small nonzero score 89 90$score = $overlapmod->getSimilarity ($tempfile1, $tempfile4); 91cmp_ok ($score, '<', .5); 92cmp_ok ($score, '>', 0); 93 94END {ok (unlink ($tempfile0, $tempfile1, $tempfile2, $tempfile3, $tempfile4))} 95 96# 97# now test with existing files 98# 99 100# set up file access in an OS neutral way 101use File::Spec; 102 103my $file1_txt = File::Spec->catfile ('t','file1.txt'); 104ok (-e $file1_txt); 105 106my $file11_txt = File::Spec->catfile ('t','file11.txt'); 107ok (-e $file11_txt); 108 109my $file2_txt = File::Spec->catfile ('t','file2.txt'); 110ok (-e $file2_txt); 111 112my $file22_txt = File::Spec->catfile ('t','file22.txt'); 113ok (-e $file22_txt); 114 115# self similarity 116 117$score = $overlapmod->getSimilarity ($file1_txt, $file1_txt); 118is ($score, 1, "self similarity file 1"); 119 120$score = $overlapmod->getSimilarity ($file2_txt, $file2_txt); 121is ($score, 1, "self similarity file 2"); 122 123$score = $overlapmod->getSimilarity ($file11_txt, $file11_txt); 124is ($score, 1, "self similarity file 11"); 125 126$score = $overlapmod->getSimilarity ($file22_txt, $file22_txt); 127is ($score, 1, "self similarity file 22"); 128 129# file11 is single line version of file1 130# file22 is single line version of file2 131 132$score = $overlapmod->getSimilarity ($file1_txt, $file11_txt); 133is ($score, 1, "self similarity file 1 and 11"); 134 135$score = $overlapmod->getSimilarity ($file2_txt, $file22_txt); 136is ($score, 1, "self similarity file 2 and 22"); 137 138