1############################################################################
2# Copyright (c) 1998 Enno Derksen
3# All rights reserved.
4# This program is free software; you can redistribute it and/or modify it
5# under the same terms as Perl itself.
6############################################################################
7#
8# Extra functionality that is not part of the XQL spec
9#
10
11package XML::XQL;
12use strict;
13
14BEGIN
15{
16    die "don't use/require XML::XQL::Plus, either use/require XML::XQL or XML::XQL::Strict" unless $XML::XQL::Included;
17};
18
19defineComparisonOperators
20(
21 "=~"		=> \&XML::XQL::match_oper,
22 "!~"		=> \&XML::XQL::no_match_oper,
23 "match"	=> \&XML::XQL::match_oper,
24 "no_match"	=> \&XML::XQL::no_match_oper,
25 "isa"		=> \&XML::XQL::isa_oper,
26 "can"		=> \&XML::XQL::can_oper,
27);
28
29sub match_oper
30{
31    my ($node, $expr) = @_;
32
33    return [] if isEmptyList ($node);
34#?? can this happen?
35
36    my $str = $node->xql_toString;
37
38    $expr = prepareRvalue ($expr->solve ([$node]));
39    return [] if isEmptyList ($expr);
40#?? can this happen?
41
42    $expr = $expr->xql_toString;
43    croak "bad search pattern '$expr' for =~" unless $expr =~ m!^\s*[m/]!o;
44
45    my $res = eval "\$str =~ $expr";
46    croak "bad search pattern '$expr' for =~ operator: $@"  if ($@);
47    $res;
48}
49
50sub no_match_oper
51{
52    my ($node, $expr) = @_;
53
54    return [] if isEmptyList ($node);
55#?? can this happen?
56
57    my $str = $node->xql_toString;
58
59    $expr = prepareRvalue ($expr->solve ([$node]));
60    return [] if isEmptyList ($expr);
61#?? can this happen?
62
63    $expr = $expr->xql_toString;
64    croak "bad search pattern '$expr' for !~" unless $expr =~ m!^\s*[m/]!o;
65
66    my $res = eval "\$str !~ $expr";
67    croak "bad search pattern '$expr' for !~ operator: $@"  if ($@);
68    $res;
69}
70
71sub isa_oper
72{
73    my ($node, $expr) = @_;
74
75    return [] if isEmptyList ($node);
76#?? can this happen?
77
78    $expr = prepareRvalue ($expr->solve ([$node]));
79    return [] if isEmptyList ($expr);
80#?? can this happen?
81
82    $expr = $expr->xql_toString;
83
84    # Expand "number" to "XML::XQL::Number" etc.
85    $expr = expandType ($expr);
86
87#?? I don't think empty lists are possible here. If so, add "[]" as expr
88
89    ref($node) and $node->isa ($expr);
90}
91
92#
93# Not sure how useful this is, unless it supports XQL functions/methods...
94#
95sub can_oper
96{
97    my ($node, $expr) = @_;
98
99    return [] if isEmptyList ($node);
100#?? can this happen?
101
102    $expr = prepareRvalue ($expr->solve ([$node]));
103    return [] if isEmptyList ($expr);
104#?? can this happen?
105
106    $expr = $expr->xql_toString;
107
108    ref ($node) and $node->can ($expr);
109}
110
111sub once
112{
113    my ($context, $list, $expr) = @_;
114    $expr->solve ($context, $list);
115}
116
117sub xql_eval
118{
119    my ($context, $list, $query, $type) = @_;
120
121#   return [] if @$list == 0;
122
123    $query = toList ($query->solve ($context, $list));
124    return [] unless @$query;
125
126    if (defined $type)
127    {
128	$type = prepareRvalue ($type->solve ($context, $list));
129	$type = isEmptyList ($type) ? "Text" : $type->xql_toString;
130
131	# Expand "number" to "XML::XQL::Number" etc.
132	$type = expandType ($type);
133    }
134    else
135    {
136	$type = "XML::XQL::Text";
137    }
138
139    my @result = ();
140    for my $val (@$query)
141    {
142	$val = $val->xql_toString;
143	$val = eval $val;
144
145#print "eval result=$val\n";
146#?? check result?
147	push @result, eval "new $type (\$val)" if defined $val;
148    }
149    \@result;
150}
151
152sub subst
153{
154    my ($context, $list, $query, $expr, $repl, $mod, $mode) = @_;
155
156#?? not sure?
157    return [] if @$list == 0;
158
159    $expr = prepareRvalue ($expr->solve ($context, $list));
160    return [] if isEmptyList ($expr);
161    $expr = $expr->xql_toString;
162
163    $repl = prepareRvalue ($repl->solve ($context, $list));
164    return [] if isEmptyList ($repl);
165    $repl = $repl->xql_toString;
166
167    if (defined $mod)
168    {
169	$mod = prepareRvalue ($mod->solve ($context, $list));
170	$mod = isEmptyList ($mod) ? "" : $mod->xql_toString;
171    }
172
173    if (defined $mode)
174    {
175	$mode = prepareRvalue ($mode->solve ($context, $list));
176	$mode = isEmptyList ($mode) ? 0 : $mode->xql_toString;
177    }
178    else
179    {
180	$mode = 0;	# default mode: use textBlocks for Elements
181    }
182
183    my @result = ();
184    my $nodes = toList ($query->solve ($context, $list));
185
186    for my $node (@$nodes)
187    {
188	if ($mode == 0 && $node->xql_nodeType == 1)	# 1: Element node
189	{
190	    # For Element nodes, replace text in consecutive text blocks
191	    # Note that xql_rawtextBlocks, returns the blocks in reverse order,
192	    # so that the indices of nodes within previous blocks don't need
193	    # to be adjusted when a replacement occurs.
194	    my $block_matched = 0;
195	    BLOCK: for my $block ($node->xql_rawTextBlocks)
196	    {
197		my $str = $block->[2];
198		my $result = eval "\$str =~ s/\$expr/\$repl/$mod";
199		croak "bad subst expression s/$expr/$repl/$mod: $@" if ($@);
200		next BLOCK unless $result;
201
202		$block_matched++;
203		$node->xql_replaceBlockWithText ($block->[0], $block->[1], $str);
204	    }
205	    # Return the input parameter only if a substitution occurred
206	    push @result, $node if $block_matched;
207	}
208	else
209	{
210	    my $str = $node->xql_toString;
211	    next unless defined $str;
212
213	    my $result = eval "\$str =~ s/\$expr/\$repl/$mod";
214	    croak "bad subst expression s/$expr/$repl/$mod: $@" if ($@);
215	    next unless $result;
216#print "result=$result for str[$str] =~ s/$expr/$repl/$mod\n";
217
218	    # Return the input parameter only if a substitution occurred
219	    $node->xql_setValue ($str);
220	    push @result, $node;
221	}
222	# xql_setValue will actually change the value of the node for an Attr,
223	# Text, CDataSection, EntityRef or Element
224    }
225    \@result;
226}
227
228#?? redo match - what should it return?
229sub match
230{
231    my ($context, $list, $query, $repl, $mod) = @_;
232
233    return [] if @$list == 0;
234
235    $query = prepareRvalue ($query->solve ($context, $list));
236    return [] if isEmptyList ($query);
237    $query = $query->xql_toString;
238
239    if (defined $mod)
240    {
241	$mod = prepareRvalue ($mod->solve ($context, $list));
242	$mod = isEmptyList ($mod) ? "" : $mod->xql_toString;
243    }
244
245    my $str = $list->[0]->xql_toString;
246    return [] unless defined $str;
247
248    my (@matches) = ();
249    eval "\@matches = (\$str =~ /\$query/$mod)";
250    croak "bad match expression m/$query/$mod" if ($@);
251
252#?? or should I map undef to XML::XQL::Text("") ?
253    @matches = map { defined($_) ? new XML::XQL::Text ($_) : [] } @matches;
254    \@matches;
255}
256
257sub xql_map
258{
259    my ($context, $list, $query, $code) = @_;
260
261#?? not sure?
262    return [] if @$list == 0;
263
264    $code = prepareRvalue ($code->solve ($context, $list));
265    return [] if isEmptyList ($code);
266    $code = $code->xql_toString;
267
268    my @result = ();
269    my $nodes = toList ($query->solve ($context, $list));
270
271    for my $node (@$nodes)
272    {
273	my $str = $node->xql_toString;
274	next unless defined $str;
275
276	my (@mapresult) = ($str);
277
278#?? NOTE: the $code should
279	eval "\@mapresult = map { $code } (\$str)";
280	croak "bad map expression '$code' ($@)" if ($@);
281
282	# Return the input parameter only if a change occurred
283	next unless $mapresult[0] eq $str;
284
285	# xql_setValue will actually change the value of the node for an Attr,
286	# Text, CDataSection, EntityRef or Element
287	$node->xql_setValue ($str);
288	push @result, $node;
289    }
290    \@result;
291}
292
293sub xql_new
294{
295    my ($type, @arg) = @_;
296
297    # Expand "number" to "XML::XQL::Number" etc.
298    $type = expandType ($type);
299
300    my $obj = eval "new $type (\@arg)";
301    $@ ? [] : $obj;	# return empty list on exception
302}
303
304my $DOM_PARSER;	# used by xql_document (below)
305sub setDocParser
306{
307    $DOM_PARSER = shift;
308}
309
310sub xql_document
311{
312    my ($docname) = @_;
313    my $parser = $DOM_PARSER ||= new XML::DOM::Parser;
314    my $doc;
315    eval
316    {
317	$doc = $parser->parsefile ($docname);
318    };
319    if ($@)
320    {
321	warn "xql_document: could not read XML file [$docname]: $@";
322    }
323    return defined $doc ? $doc : [];
324}
325
326#----------- XQL+ methods --------------------------------------------
327
328
329sub DOM_nodeType
330{
331    my ($context, $list) = @_;
332
333    return [] if @$list == 0;
334
335    new XML::XQL::Number ($list->[0]->xql_DOM_nodeType, $list->[0]);
336}
337
338#----------- Perl Builtin Functions ----------------------------------
339
340# Note that certain functions (like mkdir) are not considered "constant"
341# because we don't want their invocation values cached. (We want the
342# function to be called every time the Invocation is solved/evaluated.)
343my %PerlFunc =
344(
345 # Format:
346 #  "funcName", => [ARGCOUNT, RETURN_TYPE [, CONSTANT = 0, [QUERY_ARG = 0]]]
347
348 #-------- Arithmetic Functions
349
350 "abs" => [1, "Number", 1],
351 "atan2" => [2, "Number", 1, -1],
352 "cos" => [1, "Number", 1],
353 "exp" => [1, "Number", 1],
354 "int" => [1, "Number", 1],
355 "log" => [1, "Number", 1],
356 "rand" => [[0, 1], "Number", 0, -1],
357 "sin" => [1, "Number", 1],
358 "sqrt" => [1, "Number", 1],
359 "srand" => [[0, 1], "Number", 0, -1],
360 "time" => [0, "Number", 0, -1],
361
362 #-------- Conversion Functions
363
364 "chr" => [1, "Text", 1],
365# "gmtime" => [1, "List of Number", 1],
366 "hex" => [1, "Number", 1],
367# "localtime" => [1, "List of Number", 1],
368 "oct" => [1, "Number", 1],
369 "ord" => [1, "Text", 1],
370 "vec" => [3, "Number", 1],
371 "pack" => [[1, -1], "Text", 1, -1], #?? how should this work??
372# "unpack" => [2, "List of ?", 1],
373
374 #-------- String Functions
375
376 "chomp" => [1, "Text", 1],
377 "chop" => [1, "Text", 1],
378 "crypt" => [2, "Text", 1],
379 "lindex" => [[2, 3], "Number", 1],	# "index" is already taken by XQL
380 "length" => [1, "Number", 1],
381 "lc" => [1, "Text", 1],
382 "lcfirst" => [1, "Text", 1],
383 "quotemeta" => [1, "Text", 1],
384 "rindex" => [[2, 3], "Number", 1],
385 "substr" => [[2, 3], "Text", 1],
386 "uc" => [1, "Text", 1],
387 "ucfirst" => [1, "Text", 1],
388 "reverse" => [1, "Text", 1],
389 "sprintf" => [[1, -1], "Text", 1, -1],
390
391 #-------- Array Functions
392
393 "join" => [[1, -1], "Text", 1],
394# "split" => [[2, 3], "List of Text", 1],
395
396 #-------- File Functions
397
398 "chmod" => [2, "Boolean", 0, 1],
399 "chown" => [3, "Boolean", 0, 2],
400 "link" => [2, "Number", 0, -1],		#?? no return value
401# "lstat" => [1, "List of Number"],
402 "mkdir" => [2, "Boolean"],		#?? or is 1 arg also allowed?
403 "readlink" => [1, "Text"],
404 "rename" => [2, "Boolean", 0, -1],
405 "rmdir" => [1, "Boolean"],
406# "stat" => [1, "List of Number"],
407 "symlink" => [2, "Boolean", 0, -1],
408 "unlink" => [1, "Boolean"],
409 "utime" => [3, "Boolean", 0, 2],
410 "truncate" => [2, "Number"],		#?? no return value
411
412 #-------- System Interaction
413
414 "exit" => [[0, 1], "Number"],
415# "glob" => [1, "List of Text"],
416 "system" => [[1, -1], "Number", 0, -1],
417# "times" => [0, "List of Number"],
418
419 #-------- Miscellaneous
420
421 "defined" => [1, "Boolean"],	# is this useful??
422 "dump" => [[0, 1], "Number", 0, -1],
423 "ref" => [1, "Text"],
424);
425#?? die, warn, croak (etc.),
426#?? file test (-X), tr// (same as y//)
427#?? array functions, sort
428
429# Generate wrapper for Perl builtin function on the fly
430sub generatePerlWrapper
431{
432    my ($name) = @_;
433    my $args = $PerlFunc{$name};
434    return undef unless defined $args;	# not found
435
436    my ($argCount, $returnType, $const, $queryArg) = @$args;
437    my $funcName = $name;
438    if ($name eq "lindex")	# "index" is already taken
439    {
440	$funcName = "index";
441    }
442    generateFunction ($name, $funcName, $returnType, $argCount, 0, $const,
443		      $queryArg);
444    $Func{$name};
445}
446
447#?? Inline functions, do they make sense? E.g. 'elem!sub("code", "arg1")'
448#?? Normally, user should use defineFunction, but if most of them have
449#?? a lot of common code, I could provide the pre- and post-code.
450#?? After processing the user-supplied code block, how should I convert the
451#?? user's result back to an Invocation result. E.g. do I get a single value
452#?? or a list back?
453
454defineFunction ("eval",  \&XML::XQL::xql_eval,		[1, 2]);
455defineFunction ("subst", \&XML::XQL::subst,		[3, 5], 1);
456defineFunction ("s",	 \&XML::XQL::subst,		[3, 5], 1);
457defineFunction ("match", \&XML::XQL::match,		[1, 2]);
458defineFunction ("m",     \&XML::XQL::match,		[1, 2]);
459defineFunction ("map",   \&XML::XQL::xql_map,		2,      1);
460defineFunction ("once",  \&XML::XQL::once,		1,      1);
461
462defineMethod ("DOM_nodeType", \&XML::XQL::DOM_nodeType, 0, 0);
463
464generateFunction ("new", "XML::XQL::xql_new", "*", [1, -1], 1, 0, 1);
465generateFunction ("document", "XML::XQL::xql_document", "*", 1, 1, 0, 0);
466
467# doc() is an alias for document()
468defineFunction ("doc", \&XML::XQL::xql_wrap_document, 1, 1);
469
470#------------------------------------------------------------------------------
471# The following functions were found in the XPath spec.
472
473# Found in XPath but not (yet) implemented in XML::XQL:
474# - type casting (string, number, boolean) - Not sure if needed...
475#   Note that string() converts booleans to 'true' and 'false', but our
476#   internal type casting converts it to perl values '0' and '1'...
477# - math (+,-,*,mod,div) - Use eval() for now
478# - last(), position() - Similar to end() and index() except they're 1-based
479# - local-name(node-set?), namespace-uri(node-set?)
480# - name(node-set?) - Can we pass a node-set in XQL?
481# - lang(string)
482
483sub xpath_concat	{ join ("", @_) }
484sub xpath_starts_with	{ $_[0] =~ /^\Q$_[1]\E/ }
485# ends-with is not part of XPath
486sub xpath_ends_with	{ $_[0] =~ /\Q$_[1]\E$/ }
487sub xpath_contains	{ $_[0] =~ /\Q$_[1]\E/ }
488
489# The following methods don't know about NaN, +/-Infinity or -0.
490sub xpath_floor		{ use POSIX; POSIX::floor ($_[0]) }
491sub xpath_ceiling	{ use POSIX; POSIX::ceil ($_[0]) }
492sub xpath_round  	{ use POSIX; POSIX::floor ($_[0] + 0.5) }
493
494# Note that the start-index is 1-based in XPath
495sub xpath_substring
496{
497    defined $_[2] ? substr ($_[0], $_[1] - 1, $_[2])
498		  : substr ($_[0], $_[1] - 1)
499}
500
501sub xpath_substring_before
502{
503    my $i = index ($_[0], $_[1]);
504    $i == -1 ? undef : substr ($_[0], 0, $i)
505}
506
507sub xpath_substring_after
508{
509    my $i = index ($_[0], $_[1]);
510    $i == -1 ? undef : substr ($_[0], $i + length($_[1]))
511}
512
513# Note that d,c,s are tr/// modifiers. Also can't use open delimiters i.e. {[(<
514my @TR_DELIMITERS = split //, "/!%^&*)-_=+|~]}'\";:,.>/?abefghijklmnopqrtuvwxyz";
515
516sub xpath_translate
517{
518    my ($str, $from, $to) = @_;
519
520    my $delim;
521    for my $d (@TR_DELIMITERS)
522    {
523	if (index ($from, $d) == -1 && index ($to, $d) == -1)
524	{
525	    $delim = $d;
526	    last;
527	}
528    }
529    die "(xpath_)translate: can't find suitable 'tr' delimiter"
530	unless defined $delim;
531
532    # XPath defines that if length($from) > length($to), characters in $from
533    # for which there is no match in $to, should be deleted.
534    # (So we must use the 's' modifier.)
535    eval "\$str =~ tr$delim$from$delim$to${delim}d";
536    $str;
537}
538
539sub xpath_string_length
540{
541    my ($context, $list, $text) = @_;
542
543    if (defined $text)
544    {
545	$text = XML::XQL::prepareRvalue ($text->solve ($context, $list));
546	return [] unless defined $text;
547
548	return new XML::XQL::Number (length $text->xql_toString,
549				     $text->xql_sourceNode);
550    }
551    else
552    {
553	return [] if @$list == 0;
554
555	my @result;
556	for my $node (@$list)
557	{
558	    push @result, new XML::XQL::Number (length $node->xql_toString,
559						$node);
560	}
561	return \@result;
562    }
563}
564
565sub _normalize
566{
567    $_[0] =~ s/\s+/ /g;
568    $_[0] =~ s/^\s+//;
569    $_[0] =~ s/\s+$//;
570    $_[0];
571}
572
573sub xpath_normalize_space
574{
575    my ($context, $list, $text) = @_;
576
577    return [] if @$list == 0;
578
579    if (defined $text)
580    {
581	$text = XML::XQL::prepareRvalue ($text->solve ($context, $list));
582	return [] unless defined $text;
583
584	return new XML::XQL::Text (_normalize ($text->xql_toString),
585				   $text->xql_sourceNode);
586    }
587    else
588    {
589	my @result;
590	for my $node (@$list)
591	{
592	    push @result, new XML::XQL::Text (_normalize ($node->xql_toString),
593					      $node);
594	}
595	return \@result;
596    }
597}
598
599sub xpath_sum
600{
601    my ($context, $list, $expr) = @_;
602
603    return [] if @$list == 0;
604#?? or return Number(0) ?
605
606    my $sum = 0;
607    $expr = XML::XQL::toList ($expr->solve ($context, $list));
608    for my $r (@{ $expr })
609    {
610	$sum += $r->xql_toString;
611    }
612    return new XML::XQL::Number ($sum, undef);
613}
614
615generateFunction ("round", "XML::XQL::xpath_round", "Number", 1, 1);
616generateFunction ("floor", "XML::XQL::xpath_floor", "Number", 1, 1);
617generateFunction ("ceiling", "XML::XQL::xpath_ceiling", "Number", 1, 1);
618
619generateFunction ("concat", "XML::XQL::xpath_concat", "Text", [2, -1], 1);
620generateFunction ("starts-with", "XML::XQL::xpath_starts_with", "Boolean", 2, 1);
621generateFunction ("ends-with", "XML::XQL::xpath_ends_with", "Boolean", 2, 1);
622generateFunction ("contains", "XML::XQL::xpath_contains", "Boolean", 2, 1);
623generateFunction ("substring-before", "XML::XQL::xpath_substring_before", "Text", 2, 1);
624generateFunction ("substring-after", "XML::XQL::xpath_substring_after", "Text", 2, 1);
625# Same as Perl substr() except index is 1-based
626generateFunction ("substring", "XML::XQL::xpath_substring", "Text", [2, 3], 1);
627generateFunction ("translate", "XML::XQL::xpath_translate", "Text", 3, 1);
628
629defineMethod ("string-length", \&XML::XQL::xpath_string_length, [0, 1], 1);
630defineMethod ("normalize-space", \&XML::XQL::xpath_normalize_space, [0, 1], 1);
631
632defineFunction ("sum", \&XML::XQL::xpath_sum, 1, 1);
633
6341;	# module return code
635