1# $Id: Negotiate.pm,v 1.1.1.1 2003/08/02 23:39:40 takezoe Exp $ 2# 3 4package HTTP::Negotiate; 5 6$VERSION = sprintf("%d.%02d", q$Revision: 1.1.1.1 $ =~ /(\d+)\.(\d+)/); 7sub Version { $VERSION; } 8 9require 5.002; 10require Exporter; 11@ISA = qw(Exporter); 12@EXPORT = qw(choose); 13 14require HTTP::Headers; 15 16$DEBUG = 0; 17 18sub choose ($;$) 19{ 20 my($variants, $request) = @_; 21 my(%accept); 22 23 unless (defined $request) { 24 # Create a request object from the CGI envirionment variables 25 $request = new HTTP::Headers; 26 $request->header('Accept', $ENV{HTTP_ACCEPT}) 27 if $ENV{HTTP_ACCEPT}; 28 $request->header('Accept-Charset', $ENV{HTTP_ACCEPT_CHARSET}) 29 if $ENV{HTTP_ACCEPT_CHARSET}; 30 $request->header('Accept-Encoding', $ENV{HTTP_ACCEPT_ENCODING}) 31 if $ENV{HTTP_ACCEPT_ENCODING}; 32 $request->header('Accept-Language', $ENV{HTTP_ACCEPT_LANGUAGE}) 33 if $ENV{HTTP_ACCEPT_LANGUAGE}; 34 } 35 36 # Get all Accept values from the request. Build a hash initialized 37 # like this: 38 # 39 # %accept = ( type => { 'audio/*' => { q => 0.2, mbx => 20000 }, 40 # 'audio/basic' => { q => 1 }, 41 # }, 42 # language => { 'no' => { q => 1 }, 43 # } 44 # ); 45 46 $request->scan(sub { 47 my($key, $val) = @_; 48 49 my $type; 50 if ($key =~ s/^Accept-//) { 51 $type = lc($key); 52 } 53 elsif ($key eq "Accept") { 54 $type = "type"; 55 } 56 else { 57 return; 58 } 59 60 $val =~ s/\s+//g; 61 my $default_q = 1; 62 for my $name (split(/,/, $val)) { 63 my(%param, $param); 64 if ($name =~ s/;(.*)//) { 65 for $param (split(/;/, $1)) { 66 my ($pk, $pv) = split(/=/, $param, 2); 67 $param{lc $pk} = $pv; 68 } 69 } 70 $name = lc $name; 71 if (defined $param{'q'}) { 72 $param{'q'} = 1 if $param{'q'} > 1; 73 $param{'q'} = 0 if $param{'q'} < 0; 74 } else { 75 $param{'q'} = $default_q; 76 77 # This makes sure that the first ones are slightly better off 78 # and therefore more likely to be chosen. 79 $default_q -= 0.0001; 80 } 81 $accept{$type}{$name} = \%param; 82 } 83 }); 84 85 # Check if any of the variants specify a language. We do this 86 # because it influences how we treat those without (they default to 87 # 0.5 instead of 1). 88 my $any_lang = 0; 89 for $var (@$variants) { 90 if ($var->[5]) { 91 $any_lang = 1; 92 last; 93 } 94 } 95 96 if ($DEBUG) { 97 print "Negotiation parameters in the request\n"; 98 for $type (keys %accept) { 99 print " $type:\n"; 100 for $name (keys %{$accept{$type}}) { 101 print " $name\n"; 102 for $pv (keys %{$accept{$type}{$name}}) { 103 print " $pv = $accept{$type}{$name}{$pv}\n"; 104 } 105 } 106 } 107 } 108 109 my @Q = (); # This is where we collect the results of the 110 # quality calcualtions 111 112 # Calculate quality for all the variants that are available. 113 for (@$variants) { 114 my($id, $qs, $ct, $enc, $cs, $lang, $bs) = @$_; 115 $qs = 1 unless defined $qs; 116 $ct = '' unless defined $ct; 117 $bs = 0 unless defined $bs; 118 $lang = lc($lang) if $lang; # lg tags are always case-insensitive 119 if ($DEBUG) { 120 print "\nEvaluating $id (ct='$ct')\n"; 121 printf " qs = %.3f\n", $qs; 122 print " enc = $enc\n" if $enc && !ref($enc); 123 print " enc = @$enc\n" if $enc && ref($enc); 124 print " cs = $cs\n" if $cs; 125 print " lang = $lang\n" if $lang; 126 print " bs = $bs\n" if $bs; 127 } 128 129 # Calculate encoding quality 130 my $qe = 1; 131 # If the variant has no assignes Content-Encoding, or if no 132 # Accept-Encoding field is present, then the value assigned 133 # is "qe=1". If *all* of the variant's content encoddings 134 # are listed in the Accept-Encoding field, then the value 135 # assigned is "qw=1". If *any* of the variant's content 136 # encodings are not listed in the provided Accept-Encoding 137 # field, then the value assigned is "qe=0" 138 if (exists $accept{'encoding'} && $enc) { 139 my @enc = ref($enc) ? @$enc : ($enc); 140 for (@enc) { 141 print "Is encoding $_ accepted? " if $DEBUG; 142 unless(exists $accept{'encoding'}{$_}) { 143 print "no\n" if $DEBUG; 144 $qe = 0; 145 last; 146 } else { 147 print "yes\n" if $DEBUG; 148 } 149 } 150 } 151 152 # Calculate charset quality 153 my $qc = 1; 154 # If the variant's media-type has not charset parameter, 155 # or the variant's charset is US-ASCII, or if no Accept-Charset 156 # field is present, then the value assigned is "qc=1". If the 157 # variant's charset is listed in the Accept-Charset field, 158 # then the value assigned is "qc=1. Otherwise, if the variant's 159 # charset is not listed in the provided Accept-Encoding field, 160 # then the value assigned is "qc=0". 161 if (exists $accept{'charset'} && $cs && $cs ne 'us-ascii' ) { 162 $qc = 0 unless $accept{'charset'}{$cs}; 163 } 164 165 # Calculate language quality 166 my $ql = 1; 167 if ($lang && exists $accept{'language'}) { 168 my @lang = ref($lang) ? @$lang : ($lang); 169 # If any of the variant's content languages are listed 170 # in the Accept-Language field, the the value assigned is 171 # the maximus of the "q" paramet values for thos language 172 # tags. 173 my $q = undef; 174 for (@lang) { 175 next unless exists $accept{'language'}{$_}; 176 my $this_q = $accept{'language'}{$_}{'q'}; 177 $q = $this_q unless defined $q; 178 $q = $this_q if $this_q > $q; 179 } 180 if(defined $q) { 181 $DEBUG and print " -- Exact language match at q=$q\n"; 182 } else { 183 # If there was no exact match and at least one of 184 # the Accept-Language field values is a complete 185 # subtag prefix of the content language tag(s), then 186 # the "q" parameter value of the largest matching 187 # prefix is used. 188 $DEBUG and print " -- No exact language match\n"; 189 my $selected = undef; 190 for $al (keys %{ $accept{'language'} }) { 191 if (substr($lang, 0, 1 + length($al)) eq "$al-") { 192 # $lang starting with $al isn't enough, or else 193 # Accept-Language: hu (Hungarian) would seem 194 # to accept a document in hup (Hupa) 195 $DEBUG and print " -- $lang ISA $al\n"; 196 $selected = $al unless defined $selected; 197 $selected = $al if length($al) > length($selected); 198 } else { 199 $DEBUG and print " -- $lang isn't a $al\n"; 200 } 201 } 202 $q = $accept{'language'}{$selected}{'q'} if $selected; 203 204 # If none of the variant's content language tags or 205 # tag prefixes are listed in the provided 206 # Accept-Language field, then the value assigned 207 # is "ql=0.001" 208 $q = 0.001 unless defined $q; 209 } 210 $ql = $q; 211 } else { 212 $ql = 0.5 if $any_lang && exists $accept{'language'}; 213 } 214 215 my $q = 1; 216 my $mbx = undef; 217 # If no Accept field is given, then the value assigned is "q=1". 218 # If at least one listed media range matches the variant's media 219 # type, then the "q" parameter value assigned to the most specific 220 # of those matched is used (e.g. "text/html;version=3.0" is more 221 # specific than "text/html", which is more specific than "text/*", 222 # which in turn is more specific than "*/*"). If not media range 223 # in the provided Accept field matches the variant's media type, 224 # then the value assigned is "q=0". 225 if (exists $accept{'type'} && $ct) { 226 # First we clean up our content-type 227 $ct =~ s/\s+//g; 228 my $params = ""; 229 $params = $1 if $ct =~ s/;(.*)//; 230 my($type, $subtype) = split("/", $ct, 2); 231 my %param = (); 232 for $param (split(/;/, $params)) { 233 my($pk,$pv) = split(/=/, $param, 2); 234 $param{$pk} = $pv; 235 } 236 237 my $sel_q = undef; 238 my $sel_mbx = undef; 239 my $sel_specificness = 0; 240 241 ACCEPT_TYPE: 242 for $at (keys %{ $accept{'type'} }) { 243 print "Consider $at...\n" if $DEBUG; 244 my($at_type, $at_subtype) = split("/", $at, 2); 245 # Is it a match on the type 246 next if $at_type ne '*' && $at_type ne $type; 247 next if $at_subtype ne '*' && $at_subtype ne $subtype; 248 my $specificness = 0; 249 $specificness++ if $at_type ne '*'; 250 $specificness++ if $at_subtype ne '*'; 251 # Let's see if content-type parameters also match 252 while (($pk, $pv) = each %param) { 253 print "Check if $pk = $pv is true\n" if $DEBUG; 254 next unless exists $accept{'type'}{$at}{$pk}; 255 next ACCEPT_TYPE 256 unless $accept{'type'}{$at}{$pk} eq $pv; 257 print "yes it is!!\n" if $DEBUG; 258 $specificness++; 259 } 260 print "Hurray, type match with specificness = $specificness\n" 261 if $DEBUG; 262 263 if (!defined($sel_q) || $sel_specificness < $specificness) { 264 $sel_q = $accept{'type'}{$at}{'q'}; 265 $sel_mbx = $accept{'type'}{$at}{'mbx'}; 266 $sel_specificness = $specificness; 267 } 268 } 269 $q = $sel_q || 0; 270 $mbx = $sel_mbx; 271 } 272 273 my $Q; 274 if (!defined($mbx) || $mbx >= $bs) { 275 $Q = $qs * $qe * $qc * $ql * $q; 276 } else { 277 $Q = 0; 278 print "Variant's size is too large ==> Q=0\n" if $DEBUG; 279 } 280 281 if ($DEBUG) { 282 $mbx = "undef" unless defined $mbx; 283 printf "Q=%.4f", $Q; 284 print " (q=$q, mbx=$mbx, qe=$qe, qc=$qc, ql=$ql, qs=$qs)\n"; 285 } 286 287 push(@Q, [$id, $Q, $bs]); 288 } 289 290 291 @Q = sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @Q; 292 293 return @Q if wantarray; 294 return undef unless @Q; 295 return undef if $Q[0][1] == 0; 296 $Q[0][0]; 297} 298 2991; 300 301__END__ 302 303 304=head1 NAME 305 306choose - choose a variant of a document to serve (HTTP content negotiation) 307 308=head1 SYNOPSIS 309 310 use HTTP::Negotiate; 311 312 # ID QS Content-Type Encoding Char-Set Lang Size 313 $variants = 314 [['var1', 1.000, 'text/html', undef, 'iso-8859-1', 'en', 3000], 315 ['var2', 0.950, 'text/plain', 'gzip', 'us-ascii', 'no', 400], 316 ['var3', 0.3, 'image/gif', undef, undef, undef, 43555], 317 ]; 318 319 @prefered = choose($variants, $request_headers); 320 $the_one = choose($variants); 321 322=head1 DESCRIPTION 323 324This module provides a complete implementation of the HTTP content 325negotiation algorithm specified in F<draft-ietf-http-v11-spec-00.ps> 326chapter 12. Content negotiation allows for the selection of a 327preferred content representation based upon attributes of the 328negotiable variants and the value of the various Accept* header fields 329in the request. 330 331The variants are ordered by preference by calling the function 332choose(). 333 334The first parameter is reference to an array of the variants to 335choose among. 336Each element in this array is an array with the values [$id, $qs, 337$content_type, $content_encoding, $charset, $content_language, 338$content_length] whose meanings are described 339below. The $content_encoding and $content_language can be either a 340single scalar value or an array reference if there are several values. 341 342The second optional parameter is either a HTTP::Headers or a HTTP::Request 343object which is searched for "Accept*" headers. If this 344parameter is missing, then the accept specification is initialized 345from the CGI environment variables HTTP_ACCEPT, HTTP_ACCEPT_CHARSET, 346HTTP_ACCEPT_ENCODING and HTTP_ACCEPT_LANGUAGE. 347 348In an array context, choose() returns a list of [variant 349identifier, calculated quality, size] tuples. The values are sorted by 350quality, highest quality first. If the calculated quality is the same 351for two variants, then they are sorted by size (smallest first). I<E.g.>: 352 353 (['var1', 1, 2000], ['var2', 0.3, 512], ['var3', 0.3, 1024]); 354 355Note that also zero quality variants are included in the return list 356even if these should never be served to the client. 357 358In a scalar context, it returns the identifier of the variant with the 359highest score or C<undef> if none have non-zero quality. 360 361If the $HTTP::Negotiate::DEBUG variable is set to TRUE, then a lot of 362noise is generated on STDOUT during evaluation of choose(). 363 364=head1 VARIANTS 365 366A variant is described by a list of the following values. If the 367attribute does not make sense or is unknown for a variant, then use 368C<undef> instead. 369 370=over 3 371 372=item identifier 373 374This is a string that you use as the name for the variant. This 375identifier for the preferred variants returned by choose(). 376 377=item qs 378 379This is a number between 0.000 and 1.000 that describes the "source 380quality". This is what F<draft-ietf-http-v11-spec-00.ps> says about this 381value: 382 383Source quality is measured by the content provider as representing the 384amount of degradation from the original source. For example, a 385picture in JPEG form would have a lower qs when translated to the XBM 386format, and much lower qs when translated to an ASCII-art 387representation. Note, however, that this is a function of the source 388- an original piece of ASCII-art may degrade in quality if it is 389captured in JPEG form. The qs values should be assigned to each 390variant by the content provider; if no qs value has been assigned, the 391default is generally "qs=1". 392 393=item content-type 394 395This is the media type of the variant. The media type does not 396include a charset attribute, but might contain other parameters. 397Examples are: 398 399 text/html 400 text/html;version=2.0 401 text/plain 402 image/gif 403 image/jpg 404 405=item content-encoding 406 407This is one or more content encodings that has been applied to the 408variant. The content encoding is generally used as a modifier to the 409content media type. The most common content encodings are: 410 411 gzip 412 compress 413 414=item content-charset 415 416This is the character set used when the variant contains text. 417The charset value should generally be C<undef> or one of these: 418 419 us-ascii 420 iso-8859-1 ... iso-8859-9 421 iso-2022-jp 422 iso-2022-jp-2 423 iso-2022-kr 424 unicode-1-1 425 unicode-1-1-utf-7 426 unicode-1-1-utf-8 427 428=item content-language 429 430This describes one or more languages that are used in the variant. 431Language is described like this in F<draft-ietf-http-v11-spec-00.ps>: A 432language is in this context a natural language spoken, written, or 433otherwise conveyed by human beings for communication of information to 434other human beings. Computer languages are explicitly excluded. 435 436The language tags are defined by RFC 3066. Examples 437are: 438 439 no Norwegian 440 en International English 441 en-US US English 442 en-cockney 443 444=item content-length 445 446This is the number of bytes used to represent the content. 447 448=back 449 450=head1 ACCEPT HEADERS 451 452The following Accept* headers can be used for describing content 453preferences in a request (This description is an edited extract from 454F<draft-ietf-http-v11-spec-00.ps>): 455 456=over 3 457 458=item Accept 459 460This header can be used to indicate a list of media ranges which are 461acceptable as a reponse to the request. The "*" character is used to 462group media types into ranges, with "*/*" indicating all media types 463and "type/*" indicating all subtypes of that type. 464 465The parameter q is used to indicate the quality factor, which 466represents the user's preference for that range of media types. The 467parameter mbx gives the maximum acceptable size of the response 468content. The default values are: q=1 and mbx=infinity. If no Accept 469header is present, then the client accepts all media types with q=1. 470 471For example: 472 473 Accept: audio/*;q=0.2;mbx=200000, audio/basic 474 475would mean: "I prefer audio/basic (of any size), but send me any audio 476type if it is the best available after an 80% mark-down in quality and 477its size is less than 200000 bytes" 478 479 480=item Accept-Charset 481 482Used to indicate what character sets are acceptable for the response. 483The "us-ascii" character set is assumed to be acceptable for all user 484agents. If no Accept-Charset field is given, the default is that any 485charset is acceptable. Example: 486 487 Accept-Charset: iso-8859-1, unicode-1-1 488 489 490=item Accept-Encoding 491 492Restricts the Content-Encoding values which are acceptable in the 493response. If no Accept-Encoding field is present, the server may 494assume that the client will accept any content encoding. An empty 495Accept-Encoding means that no content encoding is acceptable. Example: 496 497 Accept-Encoding: compress, gzip 498 499 500=item Accept-Language 501 502This field is similar to Accept, but restricts the set of natural 503languages that are preferred in a response. Each language may be 504given an associated quality value which represents an estimate of the 505user's comprehension of that language. For example: 506 507 Accept-Language: no, en-gb;q=0.8, de;q=0.55 508 509would mean: "I prefer Norwegian, but will accept British English (with 51080% comprehension) or German (with 55% comprehension). 511 512=back 513 514 515=head1 COPYRIGHT 516 517Copyright 1996,2001 Gisle Aas. 518 519This library is free software; you can redistribute it and/or 520modify it under the same terms as Perl itself. 521 522=head1 AUTHOR 523 524Gisle Aas <gisle@aas.no> 525 526=cut 527