1# WordNet::Similarity::PathFinder version 2.04 2# (Last updated $Id: PathFinder.pm,v 1.39 2008/03/27 06:21:17 sidz1979 Exp $) 3# 4# Module containing path-finding code for the various measures of semantic 5# relatedness. 6# 7# Copyright (c) 2005, 8# 9# Ted Pedersen, University of Minnesota Duluth 10# tpederse at d.umn.edu 11# 12# Jason Michelizzi, Univeristy of Minnesota Duluth 13# mich0212 at d.umn.edu 14# 15# Siddharth Patwardhan, University of Utah, Salt Lake City 16# sidd at cs.utah.edu 17# 18# This program is free software; you can redistribute it and/or 19# modify it under the terms of the GNU General Public License 20# as published by the Free Software Foundation; either version 2 21# of the License, or (at your option) any later version. 22# 23# This program is distributed in the hope that it will be useful, 24# but WITHOUT ANY WARRANTY; without even the implied warranty of 25# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 26# GNU General Public License for more details. 27# 28# You should have received a copy of the GNU General Public License 29# along with this program; if not, write to 30# 31# The Free Software Foundation, Inc., 32# 59 Temple Place - Suite 330, 33# Boston, MA 02111-1307, USA. 34# 35# ------------------------------------------------------------------ 36 37package WordNet::Similarity::PathFinder; 38 39=head1 NAME 40 41WordNet::Similarity::PathFinder - module to implement path finding methods 42(by node counting) for WordNet::Similarity measures of semantic relatedness 43 44=head1 SYNOPSIS 45 46 use WordNet::QueryData; 47 my $wn = WordNet::QueryData->new; 48 49 use WordNet::Similarity::PathFinder; 50 my $obj = WordNet::Similarity::PathFinder->new ($wn); 51 52 my $wps1 = 'winston_churchill#n#1'; 53 my $wps2 = 'england#n#1'; 54 55 # parseWps returns reference to an array that contains 56 # word1 pos1 sense1 offset1 word2 pos2 sense2 offset2 57 58 my $result = $obj->parseWps($wps1, $wps2); 59 print "@$result\n"; 60 61 # path is a reference to an array that contains the path between 62 # wps1 and wps2 expressed as a series of wps values 63 64 my @paths = $obj->getShortestPath($wps1, $wps2, 'n', 'wps'); 65 my ($length, $path) = @{shift @paths}; 66 defined $path or die "No path between synsets"; 67 print "shortest path between $wps1 and $wps2 is $length edges long\n"; 68 print "@$path\n"; 69 70 my $offset1 = $wn -> offset($wps1); 71 my $offset2 = $wn -> offset($wps2); 72 73 # path is a reference to an array that contains the path between 74 # offset1 and offset2 expressed as a series of offset values 75 76 my @paths = $obj->getShortestPath($offset1, $offset2, 'n', 'offset'); 77 my ($length, $path) = @{shift @paths}; 78 defined $path or die "No path between synsets"; 79 print "shortest path between $offset1 and $offset2 is $length edges long\n"; 80 print "@$path\n"; 81 82=head1 DESCRIPTION 83 84=head2 Introduction 85 86This class is derived from (i.e., is a sub-class of) WordNet::Similarity. 87 88The methods in this module are useful for finding paths between concepts 89in WordNet's 'is-a' taxonomies. Concept A is-a concept B if, and only if, 90B is a hypernym of A or A is in the hypernym tree of B. N.B., only nouns 91and verbs have hypernyms. 92 93The methods that find path lengths (such as C<getShortestPath()> and 94C<getAllPaths()> compute the lengths using node-counting not edge-counting. 95In general, the length of a path using node-counting will always be one 96more than the length using edge-counting. For example, if concept A 97is a hyponym of concept B, then the path length between A and B using 98node-counting is 2, but the length using edge-counting is 1. Likewise, the 99path between A and A is 1 using node-counting and 0 using edge-counting. 100 101=head2 Methods 102 103This module inherits all the methods of WordNet::Similarity. Additionally, 104the following methods are also defined. 105 106=head3 Public methods 107 108=over 109 110=cut 111 112use strict; 113use warnings; 114use WordNet::Similarity; 115use File::Spec; 116 117our @ISA = qw/WordNet::Similarity/; 118 119our $VERSION = '2.04'; 120 121WordNet::Similarity::addConfigOption ('rootNode', 0, 'i', 1); 122 123=item $measure->setPosList(Z<>) 124 125Specifies the parts of speech that measures derived from this module 126support (namely, nouns and verbs). 127 128parameters: none 129 130returns: true 131 132=cut 133 134sub setPosList 135{ 136 my $self = shift; 137 $self->{n} = 1; 138 $self->{v} = 1; 139 return 1; 140} 141 142 143=item $self->traceOptions(Z<>) 144 145Overrides method of same name in WordNet::Similarity. Prints module-specific 146configuration options to the trace string (if tracing is on). PathFinder 147supports one module specific option: rootNode. 148 149Parameters: none 150 151returns: nothing 152 153=cut 154 155sub traceOptions 156{ 157 my $self = shift; 158 $self->{traceString} .= "root node :: $self->{rootNode}\n"; 159 $self->SUPER::traceOptions(); 160} 161 162 163=item $measure->parseWps($synset1, $synset2) 164 165parameters: synset1, synset2 -- two synsets in wps format 166 167returns: a reference to an array, WordNet::Similarity::UNRELATED, or undef 168 169Overrides the parseWps() method in WordNet::Similarity in order to run 170additional checks, but calls WordNet::Similarity::parseWps() to get 171those checks accomplished as well. Thus, this method does everything 172that WordNet::Similarity::parseWps does. 173 174=over 175 176=item quote from WordNet::Similarity::parseWps: 177 178This method checks the format of the two input synsets by calling 179validateSynset() for each synset. 180 181If the synsets are in wps format, a reference to an array will be returned. 182This array has the form [$word1, $pos1, $sense1, $offset1, $word2, $pos2, 183$sense2, $offset2] where $word1 is the word part of $wps1, $pos1, is the 184part of speech of $wps1, $sense1 is the sense from $wps. $offset1 is the 185offset for $wps1. 186 187If an error occurs (such as a synset being poorly-formed), then undef 188is returned, the error level is set to non-zero, and an error message is 189appended to the error string. 190 191=back 192 193In addition, if the two synsets are from different parts of speech, then 194WordNet::Similarity::UNRELATED is returned, the error level is set to 1, and 195a message is appended to the error string. 196 197If either synset is not a noun or a verb, then the error level 198is set to 1, a message is appended to the error string, and undef 199is returned. 200 201If the synsets are in wps format, a reference to an array will be returned. 202This array has the form [$word1, $pos1, $sense1, $offset1, $word2, $pos2, 203$sense2, $offset2]. 204 205=cut 206 207sub parseWps 208{ 209 my $self = shift; 210 my $ret = $self->SUPER::parseWps (@_); 211 my $class = ref $self || $self; 212 213 ref $ret or return $ret; 214 my ($w1, $pos1, $s1, $off1, $w2, $pos2, $s2, $off2) = @{$ret}; 215 216 # check to make sure both input words are of the same part of speech 217 if ($pos1 ne $pos2) { 218 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 219 $self->{errorString} .= "\nWarning (${class}::parseWps()) - "; 220 $self->{errorString} .= 221 "$w1#$pos1 and $w2#$pos2 belong to different parts of speech."; 222 if ($self->{trace}) { 223 $self->{traceString} .= "\n"; 224 $self->printSet ($pos1, 'wps', "$w1#$pos1#$s1"); 225 $self->{traceString} .= " and "; 226 $self->printSet ($pos2, 'wps', "$w2#$pos2#$s2"); 227 $self->{traceString} .= " belong to different parts of speech."; 228 } 229 return $self->UNRELATED; 230 } 231 232 # check to make sure that the pos is a noun or verb 233 if (index ("nv", $pos1) < $[) { 234 if ($self->{trace}) { 235 $self->{traceString} .= 236 "Only verbs and nouns have hypernym trees ($w1#$pos1, $w2#$pos2).\n"; 237 } 238 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 239 $self->{errorString} .= "\nWarning (${class}::parseWps()) - "; 240 $self->{errorString} .= 241 "Only verbs and nouns have hypernym trees ($w1#$pos1, $w2#$pos2)."; 242 return undef; 243 } 244 245 return $ret; 246} 247 248 249=item $measure->getShortestPath($synset1, $synset2, $pos, $mode) 250 251Given two input synsets, returns the shortest path between the two synsets. 252 253Parameters: two synsets, a part-of-speech, and a mode indicator 254(i.e., the string 'offset' or 'wps'). If the mode is 'offset', then the 255synsets should be WordNet offsets. If the mode is 'wps', then the synsets 256should be in word#pos#sense format. 257 258Returns: a list of references to arrays. Each array has the form 259C<($path_length, $path_ref)> where $path_ref is 260a reference to an array whose elements are the synsets along the shortest 261path between the two input synsets. There will be as many array references 262returned as there are shortest paths between the synsets. That is, there 263will be no arrays returned if there is no path between the synsets, and there 264will be at least one array returned if there is a path between the synsets. 265If there are multiple paths tied for being shortest in length, then all 266those paths are returned (hence, this is why multiple array references 267can be returned). 268 269Upon error, returns undef, sets the error level to non-zero, and appends 270a message to the error string. 271 272=cut 273 274sub getShortestPath 275{ 276 my $self = shift; 277 my $synset1 = shift; 278 my $synset2 = shift; 279 my $pos = shift; 280 my $mode = shift; 281 282 my $class = ref $self || $self; 283 my $wn = $self->{wn}; 284 285 # JM 2/9/04 - we do this in validateSynset() now 286 #if ($mode eq 'wps') { 287 # # this prevents problems when the two input words are different word 288 # # senses from the same synset (e.g., car#n#1 and auto#n#1) 289 # ($synset1) = $wn->querySense ($synset1, "syns"); 290 # ($synset2) = $wn->querySense ($synset2, "syns"); 291 #} 292 293 my @paths = $self->getAllPaths ($synset1, $synset2, $pos, $mode); 294 295 # check to see if any paths were found; if none were found, then 296 # $paths[0] will be undefined 297 unless (defined $paths[0]) { 298 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 299 $self->{errorString} .= "\nWarning (${class}::getShortestPath()) - "; 300 301 my ($wps1, $wps2) = ($synset1, $synset2); 302 if ($mode eq 'offset') { 303 $wps1 = $wn->getSense ($synset1, $pos); 304 $wps2 = $wn->getSense ($synset2, $pos); 305 } 306 $self->{errorString} .= "No path between synsets $wps1 and $wps2 found."; 307 308 if ($self->{trace}) { 309 $self->{traceString} .= "\nNo path between synsets "; 310 $self->printSet ($pos, 'wps', $wps1); 311 $self->{traceString} .= " and "; 312 $self->printSet ($pos, 'wps', $wps2); 313 $self->{traceString} .= " found."; 314 } 315 return undef; 316 } 317 318 my $best_length = $paths[0]->[1]; 319 320 my @return = ([$paths[0]->[1], $paths[0]->[2]]); 321 322 foreach (1..$#paths) { 323 last if $paths[$_]->[1] > $best_length; 324 push @return, [$paths[$_]->[1], $paths[$_]->[2]]; 325 } 326 327 #my $length = $paths[0]->[1]; 328 #my $path = $paths[0]->[2]; 329 330 if ($self->{trace}) { 331 for (@return) { 332 $self->{traceString} .= "\nShortest path: "; 333 $self->printSet ($pos, $mode, @{$_->[1]}); 334 $self->{traceString} .= "\nPath length = " . $_->[0]; 335 } 336 } 337 return @return; 338} 339 340 341=item $measure->getAllPaths($synset1, $synset2, $pos, $mode) 342 343Given two input synsets, returns all the paths between the two synsets. 344 345Parameters: a reference to the object, two synsets, a part-of-speech, and 346a mode indicator (the string 'offset' or 'wps'). 347 348If the mode is 'offset', then the synsets should be WordNet offsets. If the 349mode is 'wps', then they should be strings in word#pos#sense format. 350 351Returns: A list of all paths, sorted by path length in ascending order. The 352format for each item in the list is a reference to an array that has the 353format: [$top, $length, [@synsets_list]] where @synset_list is a list 354of synsets along the path (including the two input synsets) 355 356Returns undef on error. 357 358=cut 359 360sub getAllPaths 361{ 362 my $self = shift; 363 my $class = ref $self || $self; 364 my $synset1 = shift; 365 my $synset2 = shift; 366 my $pos = shift; 367 my $mode = shift; 368 369 if (($mode ne 'offset') && ($mode ne 'wps')) { 370 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 371 $self->{errorString} .= "\nWarning (${class}::getAllPaths()) - "; 372 $self->{errorString} .= "Mode must be either 'offset' or 'wps'"; 373 return undef; 374 } 375 376 my @lTrees = $self->_getHypernymTrees ($synset1, $pos, $mode); 377 my @rTrees = $self->_getHypernymTrees ($synset2, $pos, $mode); 378 379 # [trace] 380 if($self->{trace}) { 381 foreach my $lTree (@lTrees) { 382 $self->{traceString} .= "HyperTree: "; 383 $self->printSet ($pos, $mode, @$lTree); 384 $self->{traceString} .= "\n"; 385 } 386 foreach my $rTree (@rTrees) { 387 $self->{traceString} .= "HyperTree: "; 388 $self->printSet ($pos, $mode, @$rTree); 389 $self->{traceString} .= "\n"; 390 } 391 } 392 # [/trace] 393 394 # Find the length of each path in these trees. 395 my @return; 396# my $root = $mode eq 'offset' 397# ? 0 398# : ($pos eq 'n') ? $self->ROOT_N : $self->ROOT_V; 399 400 LTREE: 401 foreach my $lTree (@lTrees) { 402 RTREE: 403 foreach my $rTree (@rTrees) { 404 my $subsumer; 405 $subsumer = $self->_getSubsumerFromTrees ($lTree, $rTree, $mode); 406 407 next RTREE unless defined $subsumer; 408 #next RTREE if ($subsumer eq $root) and !$self->{rootNode}; 409 410 my $lCount = 0; 411 my @lpath; 412 foreach my $offset (reverse @{$lTree}) { 413 $lCount++; 414 last if($offset eq $subsumer); 415 push @lpath, $offset; 416 } 417 my $rCount = 0; 418 my @rpath; 419 foreach my $offset (reverse @{$rTree}) { 420 $rCount++; 421 last if($offset eq $subsumer); 422 unshift @rpath, $offset; 423 } 424 425 my $path = [@lpath, $subsumer, @rpath]; 426 427 push @return, [$subsumer, $rCount + $lCount - 1, $path]; 428 } 429 } 430 431 return sort {$a->[1] <=> $b->[1]} @return; 432} 433 434 435=item $measure->validateSynset($synset) 436 437parameters: synset -- a string in word#pos#sense format 438 439returns: a list or undef on error 440 441This method overrides the method of the same name in WordNet::Similarity 442to provide additional behavior but calls WordNet::Similarity::validateSynset 443to accomplish that method's behavior. Thus, this method does everything 444that WordNet::Similarity::validateSynset does. 445 446=over 447 448=item quote from WordNet::Similarity::validateSynset: 449 450This method does the following: 451 452=over 453 454=item 1. 455 456Verifies that the synset is well-formed (i.e., that it consists of three 457parts separated by #s, the pos is one of {n, v, a, r} and that sense 458is a natural number). A synset that matches the pattern '[^\#]+\#[nvar]\#\d+' 459is considered well-formed. 460 461=item 2. 462 463Checks if the synset exists by trying to find the offset for the synset 464 465=back 466 467=back 468 469This method, however, has a slightly different return value. Instead of 470merely breaking the synset into three parts, it returns the "safe" form 471of the synset. That is, if a synset has multiple word senses, this 472method returns the first word sense in that synset (this is so that 473other path-finding methods work properly). For example, if the input 474to this method is auto#n#1, the return value is ('car', 'n', 1, 2853224) 475since the sense 'car#n#1' is the first member of the synset to which 476'auto#n#1' belongs. 477 478If any of these tests fails, then the error level is set to non-zero, a 479message is appended to the error string, and undef is returned. 480 481=cut 482 483sub validateSynset 484{ 485 my $self = shift; 486 my $synset = shift; 487 my ($word, $pos, $sense, $offset) = $self->SUPER::validateSynset ($synset); 488 my $class = ref $self || $self; 489 490 # check to see if previous call encountered an error: 491 return undef if $self->{error}; 492 493 my @synset = $self->{wn}->querySense ($synset, "syns"); 494 my $safewps = shift @synset; 495 496 unless (defined $safewps) { 497 # safety check--we shouldn't ever get here. querySense shouldn't 498 # return undef unless the input synset is bad, but we've already 499 # checked that synset 500 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 501 $self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; 502 $self->{errorString} .= "No synset appears to exist for $synset."; 503 return undef; 504 } 505 506 unless ($safewps =~ /^([^\s\#]+)\#([nvar])\#(\d+)$/) { 507 # we should never get here -- if QueryData doesn't return word senses 508 # in the right format, then we're in a lot of trouble... nevertheless, 509 # we check just to be sure 510 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 511 $self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; 512 $self->{errorString} .= "Internal error: $safewps is not well-formed. Has WordNet or WordNet::QueryData changed format?"; 513 return undef; 514 } 515 516 return ($1, $2, $3, $offset); 517} 518 519 520=back 521 522=head3 Private methods 523 524=over 525 526=item $measure->_getHypernymTrees($synset, $pos, $mode) 527 528This method takes as input a synset and returns a list of references 529to arrays where these arrays are paths from the input synset to the 530top of the taxonomy (*Root*#[nv]#1 if the root node is on). 531 532Parameters: a synset, a part-of-speech, and a mode. 533The mode must be either the string 'wps' or 'offset'. If 534the mode is 'wps', then the synset must be in wps format; otherwise, it 535must be an offset. 536 537Returns: a list of references to arrays. These arrays are paths (hypernym 538trees). 539 540=cut 541 542# Suroutine that returns an array of hypernym trees, given the offset of 543# the synset. Each hypernym tree is an array of offsets. 544# INPUT PARAMS : $offset .. Offset of the synset. 545# : $pos .. Part of speech. 546# RETURN VALUES : (@tree1, @tree2, ...) .. an array of Hypernym trees (offsets) 547sub _getHypernymTrees 548{ 549 my $self = shift; 550 my $wn = $self->{wn}; 551 my $synset = shift; 552 my $pos = shift; 553 my $mode = shift; 554 my $curPath = shift; 555 $curPath = {} if(!defined($curPath)); 556 $curPath->{$synset} = 1; 557 558 559 my $wordForm = $synset; 560 if ($mode eq 'offset') { 561 # check if the input synset is one of the imaginary root nodes 562 if ($synset == 0) { 563 return ([0]); 564 } 565 $wordForm = $wn->getSense($synset, $pos); 566 } 567 else { 568 # check for root node 569 if ($synset =~ /\*ROOT\*/i) { 570 return ([$synset]); 571 } 572 } 573 574 my @hypernyms = $wn->querySense($wordForm, "hypes"); 575 my @returnArray = (); 576 if($#hypernyms < 0) { 577 my @tmpArray = $synset; 578 if ($self->{rootNode}) { 579 if ($mode eq 'offset') { 580 unshift @tmpArray, 0; 581 } 582 else { 583 unshift @tmpArray, ($pos eq 'n') ? $self->ROOT_N : $self->ROOT_V; 584 } 585 } 586 push @returnArray, [@tmpArray]; 587 } 588 else { 589 foreach my $hypernym (@hypernyms) { 590 my $hypesynset = $mode eq 'offset' ? $wn->offset ($hypernym) : $hypernym; 591 if(!defined($curPath->{$hypesynset})) 592 { 593 my %localCopy = %{$curPath}; 594 my @tmpArray = $self->_getHypernymTrees ($hypesynset, $pos, $mode, \%localCopy); 595 596 foreach my $element (@tmpArray) { 597 push @$element, $synset; 598 push @returnArray, [@$element]; 599 } 600 } 601 if(scalar(@returnArray) <= 0) { 602 my @tmpArray = $synset; 603 if ($self->{rootNode}) { 604 if ($mode eq 'offset') { 605 unshift @tmpArray, 0; 606 } 607 else { 608 unshift @tmpArray, ($pos eq 'n') ? $self->ROOT_N : $self->ROOT_V; 609 } 610 } 611 push @returnArray, [@tmpArray]; 612 } 613 } 614 } 615 return @returnArray; 616} 617 618=item getLCSbyPath($synset1, $synset2, $pos, $mode) 619 620Given two input synsets, finds the least common subsumer (LCS) of them. 621If there are multiple candidates for the LCS (due to multiple inheritance), 622the LCS that results in the shortest path between in input concepts is 623chosen. 624 625Parameters: two synsets, a part of speech, and a mode. 626 627Returns: a list of references to arrays where each array has the from 628C<($lcs, $pathlength)>. $pathlength is the length 629of the path between the two input concepts. There can be multiple LCSs 630returned if there are ties for the shortest path between the two synsets. 631Returns undef on error. 632 633=cut 634 635sub getLCSbyPath 636{ 637 my $self = shift; 638 my $synset1 = shift; 639 my $synset2 = shift; 640 my $pos = shift; 641 my $mode = shift; 642 my $class = ref $self || $self; 643 644 my @paths = $self->getAllPaths ($synset1, $synset2, $pos, $mode); 645 646 # if no paths were found, $paths[0] should be undefined 647 unless (defined $paths[0]) { 648 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 649 $self->{errorString} .= "\nWarning (${class}::getLCSbyPath()) - "; 650 $self->{errorString} .= "No LCS found."; 651 return undef; 652 } 653 654 if ($self->{trace}) { 655 $self->{traceString} .= "Lowest Common Subsumer(s): "; 656 } 657 658 my @return; 659 660 # put the best LCS(s) into @return; do some tracing at the same time. 661 foreach my $pathref (@paths) { 662 if ($self->{trace}) { 663 # print path to trace string 664 $self->printSet ($pos, $mode, $pathref->[0]); 665 $self->{traceString} .= " (Length=".$pathref->[1].")\n"; 666 } 667 668 # push onto return array if this path length is tied for best 669 if ($pathref->[1] <= $paths[0]->[1]) { 670 push @return, [$pathref->[0], $pathref->[1]]; 671 } 672 } 673 674 if ($self->{trace}) { 675 $self->{traceString} .= "\n\n"; 676 } 677 678 return @return; 679} 680 681 682=item $measure->_getSubsumerFromTrees($treeref1, $treeref2, $mode) 683 684This subroutine returns takes two trees as produced by getHypernymTrees 685and returns the most specific subsumer from them. 686 687Parameters: two references to arrays, and 688a string indicating mode ('wps' or 'offset'). 689 690Returns: the subsumer or undef 691 692=cut 693 694sub _getSubsumerFromTrees 695{ 696 my $self = shift; 697 my $array1 = shift; 698 my $array2 = shift; 699 my $mode = shift; 700 my @tree1 = reverse @{$array1}; 701 my @tree2 = reverse @{$array2}; 702 my $class = ref $self || $self; 703 704 my $tmpString = " " . join (" ", @tree1) . " "; 705 706 foreach my $element (@tree2) { 707 my $pattern = ($mode eq 'offset') ? qr/ 0*$element / : qr/ \Q$element\E /; 708 if ($tmpString =~ /$pattern/) { 709 return $element; 710 } 711 } 712 713 # no common subsumer found, check to see if we are using a root node 714 return undef unless $self->{rootNode}; 715 716 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 717 $self->{error} .= "\nWarning (${class}::getSubsumerFromTrees()) - "; 718 $self->{errorString} .= "root node 'on' but no subsumer found."; 719 return undef; 720} 721 722=item getDepth() 723 724This method is non-functional and likely to be moved to a different module 725soon. 726 727=cut 728 729sub getDepth 730{ 731 use Carp; 732 croak "This method is non-functional"; 733 my $self = shift; 734 my $synset = shift; 735 my $pos = shift; 736 my $mode = shift; 737 my $class = ref $self || $self; 738 my $offset; 739 740 if ($mode eq 'offset') { 741 $offset = $synset; 742 return 1 if $offset == 0; 743 } 744 elsif ($mode eq 'wps') { 745 $offset = $self->{wn}->offset ($synset); 746 return 1 if $synset =~ /^\*Root\*/i; 747 } 748 else { 749 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 750 $self->{errorString} .= "\nWarning (${class}::getAllPaths()) - "; 751 $self->{errorString} .= "Mode must be either 'offset' or 'wps'"; 752 return undef; 753 } 754 755 my $depth = $self->{depths}->{$pos}->{$offset}; 756 defined $depth and return $depth; 757 758 $self->{error} = $self->{error} < 1 ? 1 : $self->{error}; 759 $self->{errorString} .= "\nWarning (${class}::getDepth) - "; 760 $self->{errorString} .= "$synset appears to have undefined depth."; 761 return undef; 762} 763 764 7651; 766 767__END__ 768 769=back 770 771=head2 Discussion 772 773Many of the methods in this module can work with either offsets or 774wps strings internally. There are several interesting consequences 775of each mode. 776 777=over 778 779=item 1. 780 781An offset is not a unique identifier for a synset, but neither is 782a wps string. An offset only indicates a byte offset in one of the 783WordNet data files (data.noun, data.verb, etc. on Unix-like systems). 784An offset along with a part of speech, however, does uniquely identify 785a synset. 786 787A word#pos#sense string, on the other hand, is the opposite extreme. 788A word#pos#sense string is an identifier for a unique word sense. A 789synset can have several word senses in it (i.e., a synset is a set 790of word senses that are synonymous). The synset {beer_mug#n#1, stein#n#1} 791has two word senses. The wps strings 'beer_mug#n#1' and 'stein#n#1' can 792both be used to refer to the synset. For simplicity, we usually just 793use the first wps string when referring to the synset. N.B., the 794wps representation was developed by WordNet::QueryData. 795 796=item 2. 797 798Early versions of WordNet::Similarity::* used offsets internally for 799finding paths, hypernym trees, subsumers, etc. The module WordNet::QueryData 800that is used by Similarity, however, accepts only wps strings as input 801to its querySense method, which is used to find hypernyms. We have found 802that it is more efficient (faster) to use wps strings internally. 803 804=back 805 806=head1 AUTHORS 807 808 Ted Pedersen, University of Minnesota Duluth 809 tpederse at d.umn.edu 810 811 Jason Michelizzi, University of Minnesota Duluth 812 mich0212 at d.umn.edu 813 814 Siddharth Patwardhan, University of Utah, Salt Lake City 815 sidd at cs.utah.edu 816 817=head1 BUGS 818 819None. 820 821=head1 SEE ALSO 822 823WordNet::Similarity(3) 824WordNet::Similarity::path(3) 825WordNet::Similarity::lch(3) 826WordNet::Similarity::wup(3) 827 828=head1 COPYRIGHT 829 830Copyright (c) 2005, Ted Pedersen, Siddharth Patwardhan and Jason Michelizzi 831 832This program is free software; you can redistribute it and/or modify it 833under the terms of the GNU General Public License as published by the Free 834Software Foundation; either version 2 of the License, or (at your option) 835any later version. 836 837This program is distributed in the hope that it will be useful, but 838WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 839or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 840for more details. 841 842You should have received a copy of the GNU General Public License along 843with this program; if not, write to 844 845 The Free Software Foundation, Inc., 846 59 Temple Place - Suite 330, 847 Boston, MA 02111-1307, USA. 848 849Note: a copy of the GNU General Public License is available on the web 850at L<http://www.gnu.org/licenses/gpl.txt> and is included in this 851distribution as GPL.txt. 852 853=cut 854