1 
2 /*
3  * Copyright 2006-2008 The FLWOR Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 #include "stdafx.h"
18 
19 #include "runtime/core/fncall_iterator.h"
20 #include "runtime/core/var_iterators.h"
21 
22 #include "compiler/codegen/plan_visitor.h"
23 #include "compiler/expression/expr.h"
24 #include "compiler/api/compiler_api.h"
25 #include "compiler/api/compilercb.h"
26 #include "compiler/rewriter/framework/rewriter_context.h"
27 #include "compiler/rewriter/framework/rewriter.h"
28 #include "compiler/rewriter/tools/dataflow_annotations.h"
29 
30 #include "functions/udf.h"
31 #include "functions/function_impl.h"
32 
33 #include "annotations/annotations.h"
34 
35 #include "diagnostics/xquery_warning.h"
36 #include "diagnostics/assert.h"
37 
38 #include "types/typeops.h"
39 
40 #include "zorbaserialization/serialize_template_types.h"
41 #include "zorbaserialization/serialize_zorba_types.h"
42 
43 #include "util/hashmap32.h"
44 
45 #include "store/api/index.h" // needed for destruction of the cache
46 
47 
48 namespace zorba
49 {
50 
SERIALIZABLE_CLASS_VERSIONS(user_function)51 SERIALIZABLE_CLASS_VERSIONS(user_function)
52 
53 
54 /*******************************************************************************
55 
56 ********************************************************************************/
57 user_function::user_function(
58     const QueryLoc& loc,
59     const signature& sig,
60     expr* expr_body,
61     unsigned short scriptingKind,
62     CompilerCB* ccb)
63   :
64   function(sig, FunctionConsts::FN_UNKNOWN),
65   theCCB(ccb),
66   theLoc(loc),
67   theScriptingKind(scriptingKind),
68   theBodyExpr(expr_body),
69   theIsExiting(false),
70   theIsLeaf(true),
71   theIsOptimized(false),
72   thePlanStateSize(0),
73   theCache(0),
74   theCacheResults(false),
75   theCacheComputed(false)
76 {
77   setFlag(FunctionConsts::isUDF);
78   resetFlag(FunctionConsts::isBuiltin);
79   setDeterministic(true);
80   setPrivate(false);
81 }
82 
83 
84 /*******************************************************************************
85 
86 ********************************************************************************/
user_function(::zorba::serialization::Archiver & ar)87 user_function::user_function(::zorba::serialization::Archiver& ar)
88   :
89   function(ar)
90 {
91   setFlag(FunctionConsts::isUDF);
92   resetFlag(FunctionConsts::isBuiltin);
93 
94   theIsOptimized = true;
95 }
96 
97 
98 /*******************************************************************************
99 
100 ********************************************************************************/
~user_function()101 user_function::~user_function()
102 {
103 }
104 
105 
106 /*******************************************************************************
107 
108 ********************************************************************************/
serialize(::zorba::serialization::Archiver & ar)109 void user_function::serialize(::zorba::serialization::Archiver& ar)
110 {
111   if (ar.is_serializing_out())
112   {
113     uint32_t planStateSize;
114     getPlan(planStateSize);
115     ZORBA_ASSERT(thePlan != NULL);
116 
117     computeResultCaching(theCCB->theXQueryDiagnostics);
118 
119     if (theCCB->theHasEval)
120     {
121       SourceFinder sourceFinder;
122       std::vector<expr*> sources;
123       sourceFinder.findLocalNodeSources(theBodyExpr, sources);
124 
125       if (!sources.empty())
126       {
127         std::vector<expr*>::const_iterator ite = sources.begin();
128         std::vector<expr*>::const_iterator end = sources.end();
129         for (; ite != end; ++ite)
130         {
131           expr* source = (*ite);
132 
133           if (source->get_expr_kind() == doc_expr_kind)
134           {
135             doc_expr* e = static_cast<doc_expr*>(source);
136             if (!e->copyInputNodes())
137               e->setCopyInputNodes();
138           }
139           else if (source->get_expr_kind() == elem_expr_kind)
140           {
141             elem_expr* e = static_cast<elem_expr*>(source);
142             if (!e->copyInputNodes())
143               e->setCopyInputNodes();
144           }
145           else
146           {
147             ZORBA_ASSERT(false);
148           }
149         }
150 
151         invalidatePlan();
152         getPlan(planStateSize);
153         ZORBA_ASSERT(thePlan != NULL);
154       }
155     }
156   }
157   else
158   {
159     thePlan = NULL;
160     theBodyExpr = NULL;
161   }
162 
163   serialize_baseclass(ar, (function*)this);
164   ar & theCCB;
165   //ar & theLoc;
166   ar & theScriptingKind;
167   //ar & theBodyExpr;
168   //ar & theArgVars;
169   ar & theIgnoresSortedNodes;
170   ar & theIgnoresDuplicateNodes;
171 
172   ar & theMustCopyInputNodes;
173   ar & thePropagatesInputNodes;
174 
175   //ar & theIsExiting;
176   //ar & theIsLeaf;
177   //ar & theMutuallyRecursiveUDFs;
178 
179   // ar & theIsOptimized;
180 
181   ar & thePlan;
182   ar & thePlanStateSize;
183   ar & theArgVarsRefs;
184 
185   ar & theCacheResults;
186   ar & theCacheComputed;
187 }
188 
189 
190 #if 0
191 /*******************************************************************************
192 
193 ********************************************************************************/
194 xqtref_t user_function::getUDFReturnType(static_context* sctx) const
195 {
196   xqtref_t bodyType = theBodyExpr->return_type(sctx);
197   xqtref_t declaredType = get_signature().return_type();
198 
199   if (TypeOps::is_subtype(*bodyType, *declaredType))
200     return bodyType;
201 
202   return declaredType;
203 
204 }
205 #endif
206 
207 
208 /*******************************************************************************
209 
210 ********************************************************************************/
getScriptingKind() const211 unsigned short user_function::getScriptingKind() const
212 {
213   // Return the declared scripting kind. If the declared kind is updating/sequential,
214   // but the function body is not really updating/sequential, an error/warning is
215   // raised by the translator.
216   return theScriptingKind;
217 }
218 
219 
220 /*******************************************************************************
221 
222 ********************************************************************************/
setBody(expr * body)223 void user_function::setBody(expr* body)
224 {
225   theBodyExpr = body;
226 }
227 
228 
229 /*******************************************************************************
230 
231 ********************************************************************************/
getBody() const232 expr* user_function::getBody() const
233 {
234   return theBodyExpr;
235 }
236 
237 
238 /*******************************************************************************
239 
240 ********************************************************************************/
setArgVars(std::vector<var_expr * > & args)241 void user_function::setArgVars(std::vector<var_expr*>& args)
242 {
243   theArgVars = args;
244 }
245 
246 
247 /*******************************************************************************
248 
249 ********************************************************************************/
getArgVars() const250 const std::vector<var_expr*>& user_function::getArgVars() const
251 {
252   return theArgVars;
253 }
254 
255 
256 /*******************************************************************************
257 
258 ********************************************************************************/
addMutuallyRecursiveUDFs(const std::vector<user_function * > & udfs,const std::vector<user_function * >::const_iterator & cycle)259 void user_function::addMutuallyRecursiveUDFs(
260     const std::vector<user_function*>& udfs,
261     const std::vector<user_function*>::const_iterator& cycle)
262 {
263   assert(theBodyExpr != NULL);
264 
265   theMutuallyRecursiveUDFs.insert(theMutuallyRecursiveUDFs.end(),
266                                   cycle,
267                                   udfs.end());
268 }
269 
270 
271 /*******************************************************************************
272 
273 ********************************************************************************/
addRecursiveCall(expr * call)274 void user_function::addRecursiveCall(expr* call)
275 {
276   assert(theBodyExpr != NULL);
277 
278   if (std::find(theRecursiveCalls.begin(), theRecursiveCalls.end(), call) ==
279       theRecursiveCalls.end())
280   {
281     theRecursiveCalls.push_back(call);
282   }
283 }
284 
285 
286 /*******************************************************************************
287 
288 ********************************************************************************/
isRecursive() const289 bool user_function::isRecursive() const
290 {
291   assert(isOptimized());
292   assert(theBodyExpr != NULL);
293   return !theMutuallyRecursiveUDFs.empty();
294 }
295 
296 
297 /*******************************************************************************
298 
299 ********************************************************************************/
isMutuallyRecursiveWith(const user_function * udf)300 bool user_function::isMutuallyRecursiveWith(const user_function* udf)
301 {
302   assert(isOptimized());
303   assert(theBodyExpr != NULL);
304 
305   if (std::find(theMutuallyRecursiveUDFs.begin(),
306                 theMutuallyRecursiveUDFs.end(),
307                 udf) != theMutuallyRecursiveUDFs.end())
308     return true;
309 
310   return false;
311 }
312 
313 
314 /*******************************************************************************
315 
316 ********************************************************************************/
accessesDynCtx() const317 bool user_function::accessesDynCtx() const
318 {
319   if (!isOptimized())
320   {
321     std::cerr << "accessesDynCtx invoked on non-optimized UDF"
322               << getName()->getStringValue() << std::endl;
323     assert(isOptimized());
324   }
325 
326   return testFlag(FunctionConsts::AccessesDynCtx);
327 }
328 
329 
330 /*******************************************************************************
331   This method may be called after deserializing a query plan, and the query
332   contains an eval expr.
333 ********************************************************************************/
mustCopyInputNodes(expr * fo,csize input) const334 bool user_function::mustCopyInputNodes(expr* fo, csize input) const
335 {
336   assert(theBodyExpr == NULL);
337 
338   return (theMustCopyInputNodes[input] == 0 ? false : true);
339 }
340 
341 
342 /*******************************************************************************
343   This method may be called after deserializing a query plan, and the query
344   contains an eval expr.
345 ********************************************************************************/
propagatesInputNodes(expr * fo,csize input) const346 bool user_function::propagatesInputNodes(expr* fo, csize input) const
347 {
348   assert(theBodyExpr == NULL);
349 
350   return (thePropagatesInputNodes[input] == 0 ? false : true);
351 }
352 
353 
354 /*******************************************************************************
355 
356 ********************************************************************************/
ignoresSortedNodes(expr * fo,csize input) const357 BoolAnnotationValue user_function::ignoresSortedNodes(expr* fo, csize input) const
358 {
359   assert(isOptimized());
360 
361   if (theBodyExpr == NULL)
362   {
363     assert(input < theIgnoresSortedNodes.size());
364     return static_cast<BoolAnnotationValue>(theIgnoresSortedNodes[input]);
365   }
366 
367   assert(input < theArgVars.size());
368   return theArgVars[input]->getIgnoresSortedNodes();
369 }
370 
371 
372 /*******************************************************************************
373 
374 ********************************************************************************/
ignoresDuplicateNodes(expr * fo,csize input) const375 BoolAnnotationValue user_function::ignoresDuplicateNodes(
376     expr* fo,
377     csize input) const
378 {
379   assert(isOptimized());
380 
381   if (theBodyExpr == NULL)
382   {
383     assert(input < theIgnoresDuplicateNodes.size());
384     return static_cast<BoolAnnotationValue>(theIgnoresDuplicateNodes[input]);
385   }
386 
387   assert(input < theArgVars.size());
388   return theArgVars[input]->getIgnoresDuplicateNodes();
389 }
390 
391 
392 /*******************************************************************************
393 
394 ********************************************************************************/
optimize()395 void user_function::optimize()
396 {
397   ZORBA_ASSERT(theBodyExpr);
398 
399   if (!theIsOptimized &&
400       theCCB->theConfig.opt_level > CompilerCB::config::O0)
401   {
402     // Set the Optimized flag in advance to prevent an infinte loop (for
403     // recursive functions, an optimization could be attempted again)
404     theIsOptimized = true;
405 
406     csize numParams = theArgVars.size();
407 
408     expr* body = getBody();
409 
410     RewriterContext rctx(theCCB,
411                          body,
412                          this,
413                          zstring(),
414                          body->get_sctx()->is_in_ordered_mode());
415 
416     GENV_COMPILERSUBSYS.getDefaultOptimizingRewriter()->rewrite(rctx);
417     body = rctx.getRoot();
418     setBody(body);
419 
420     if (theBodyExpr->isUnfoldable())
421       setFlag(FunctionConsts::AccessesDynCtx);
422 
423     numParams = theArgVars.size();
424 
425     theIgnoresSortedNodes.resize(numParams);
426     theIgnoresDuplicateNodes.resize(numParams);
427     theMustCopyInputNodes.resize(numParams);
428     thePropagatesInputNodes.resize(numParams);
429 
430     for (csize i = 0; i < numParams; ++i)
431     {
432       theIgnoresSortedNodes[i] = theArgVars[i]->getIgnoresSortedNodes();
433       theIgnoresDuplicateNodes[i] = theArgVars[i]->getIgnoresDuplicateNodes();
434       theMustCopyInputNodes[i] = 1;
435       thePropagatesInputNodes[i] = 1;
436     }
437 
438     if (theCCB->theConfig.optimize_cb != NULL)
439     {
440       if (getName())
441       {
442         theCCB->theConfig.optimize_cb(body, getName()->getStringValue().c_str());
443       }
444       else
445       {
446         theCCB->theConfig.optimize_cb(body, "inline function");
447       }
448     }
449   }
450 }
451 
452 
453 /*******************************************************************************
454 
455 ********************************************************************************/
invalidatePlan()456 void user_function::invalidatePlan()
457 {
458   thePlan = NULL;
459   theArgVarsRefs.clear();
460 }
461 
462 
463 /*******************************************************************************
464 
465 ********************************************************************************/
getPlan(uint32_t & planStateSize)466 PlanIter_t user_function::getPlan(uint32_t& planStateSize)
467 {
468   if (thePlan == NULL)
469   {
470     optimize();
471 
472     csize numArgs = theArgVars.size();
473 
474     hash64map<std::vector<LetVarIter_t> *> argVarToRefsMap;
475 
476     theArgVarsRefs.resize(numArgs);
477 
478     for (csize i = 0; i < numArgs; ++i)
479     {
480       argVarToRefsMap.put((uint64_t)&*theArgVars[i], &theArgVarsRefs[i]);
481     }
482 
483     ulong nextVarId = 1;
484     const store::Item* lName = getName();
485     //lName may be null of inlined functions
486     thePlan = zorba::codegen((lName == 0 ?
487                               "inline function" :
488                               lName->getStringValue().c_str()),
489                              &*theBodyExpr,
490                              theCCB,
491                              nextVarId,
492                              &argVarToRefsMap);
493   }
494 
495   planStateSize = thePlanStateSize;
496 
497   return thePlan;
498 }
499 
500 
501 /*******************************************************************************
502 
503 ********************************************************************************/
codegen(CompilerCB * cb,static_context * sctx,const QueryLoc & loc,std::vector<PlanIter_t> & argv,expr & ann) const504 PlanIter_t user_function::codegen(
505       CompilerCB* cb,
506       static_context* sctx,
507       const QueryLoc& loc,
508       std::vector<PlanIter_t>& argv,
509       expr& ann) const
510 {
511   return new UDFunctionCallIterator(sctx, loc, argv, this);
512 }
513 
514 
515 /*******************************************************************************
516 
517 ********************************************************************************/
getArgVarsRefs() const518 const std::vector<user_function::ArgVarRefs>& user_function::getArgVarsRefs() const
519 {
520   return theArgVarsRefs;
521 }
522 
523 
524 /*******************************************************************************
525 
526 ********************************************************************************/
getCache() const527 store::Index* user_function::getCache() const
528 {
529   return theCache.getp();
530 }
531 
532 
533 /*******************************************************************************
534 
535 ********************************************************************************/
setCache(store::Index * aCache)536 void user_function::setCache(store::Index* aCache)
537 {
538   theCache = aCache;
539 }
540 
541 
542 /*******************************************************************************
543 
544 ********************************************************************************/
cacheResults() const545 bool user_function::cacheResults() const
546 {
547   return theCacheResults;
548 }
549 
550 
551 /*******************************************************************************
552  only cache recursive (non-sequential, non-updating, deterministic)
553  functions with singleton atomic input and output
554 ********************************************************************************/
computeResultCaching(XQueryDiagnostics * diag)555 void user_function::computeResultCaching(XQueryDiagnostics* diag)
556 {
557   if (theCacheComputed)
558   {
559     return;
560   }
561 
562   struct OnExit
563   {
564   private:
565     bool& theResult;
566     bool& theCacheComputed;
567 
568   public:
569     OnExit(bool& aResult, bool& aCacheComputed)
570       :
571       theResult(aResult),
572       theCacheComputed(aCacheComputed) {}
573 
574     void cache() { theResult = true; }
575 
576     ~OnExit()
577     {
578       theCacheComputed = true;
579     }
580   };
581 
582   // will be destroyed when the function is exited
583   // set caching to true if cache() is called
584   OnExit lExit(theCacheResults, theCacheComputed);
585 
586   // check necessary conditions
587   // %ann:cache or not %ann:no-cache
588   if (theAnnotationList &&
589       theAnnotationList->contains(AnnotationInternal::zann_nocache))
590   {
591     return;
592   }
593 
594   // was the %ann:cache annotation given explicitly by the user
595   bool lExplicitCacheRequest =
596     (theAnnotationList ?
597      theAnnotationList->contains(AnnotationInternal::zann_cache) :
598      false);
599 
600   if (isVariadic())
601   {
602     if (lExplicitCacheRequest)
603     {
604       diag->add_warning(
605       NEW_XQUERY_WARNING(zwarn::ZWST0005_CACHING_NOT_POSSIBLE,
606       WARN_PARAMS(getName()->getStringValue(), ZED(ZWST0005_VARIADIC)),
607       WARN_LOC(theLoc)));
608     }
609     return;
610   }
611 
612   // parameter and return types are subtype of xs:anyAtomicType?
613   const xqtref_t& lRes = theSignature.returnType();
614   TypeManager* tm = theBodyExpr->get_sctx()->get_typemanager();
615 
616   if (!TypeOps::is_subtype(tm,
617                            *lRes,
618                            *GENV_TYPESYSTEM.ANY_ATOMIC_TYPE_ONE,
619                            theLoc))
620   {
621     if (lExplicitCacheRequest)
622     {
623       diag->add_warning(
624       NEW_XQUERY_WARNING(zwarn::ZWST0005_CACHING_NOT_POSSIBLE,
625       WARN_PARAMS(getName()->getStringValue(),
626                   ZED(ZWST0005_RETURN_TYPE),
627                   lRes->toString()),
628       WARN_LOC(theLoc)));
629     }
630     return;
631   }
632 
633   csize lArity = theSignature.paramCount();
634   for (csize i = 0; i < lArity; ++i)
635   {
636     const xqtref_t& lArg = theSignature[i];
637     if (!TypeOps::is_subtype(tm,
638                              *lArg,
639                              *GENV_TYPESYSTEM.ANY_ATOMIC_TYPE_ONE,
640                              theLoc))
641     {
642       if (lExplicitCacheRequest)
643       {
644         diag->add_warning(
645         NEW_XQUERY_WARNING(zwarn::ZWST0005_CACHING_NOT_POSSIBLE,
646         WARN_PARAMS(getName()->getStringValue(),
647                     ZED(ZWST0005_PARAM_TYPE),
648                     i+1,
649                     lArg->toString()),
650         WARN_LOC(theLoc)));
651       }
652       return;
653     }
654   }
655 
656   // function updating?
657   if (isUpdating())
658   {
659     if (lExplicitCacheRequest)
660     {
661       diag->add_warning(
662       NEW_XQUERY_WARNING(zwarn::ZWST0005_CACHING_NOT_POSSIBLE,
663       WARN_PARAMS(getName()->getStringValue(), ZED(ZWST0005_UPDATING)),
664       WARN_LOC(theLoc)));
665     }
666     return;
667   }
668 
669   if (isSequential() || !isDeterministic())
670   {
671     if (lExplicitCacheRequest)
672     {
673       diag->add_warning(
674       NEW_XQUERY_WARNING(zwarn::ZWST0006_CACHING_MIGHT_NOT_BE_INTENDED,
675       WARN_PARAMS(getName()->getStringValue(),
676                   (isSequential()?"sequential":"non-deterministic")),
677       WARN_LOC(theLoc)));
678 
679       lExit.cache();
680     }
681     return;
682   }
683 
684 
685   // optimization is prerequisite before invoking isRecursive
686   if (!lExplicitCacheRequest && isOptimized() && !isRecursive())
687   {
688     return;
689   }
690 
691   lExit.cache();
692 }
693 
694 
695 }
696 
697 /* vim:set et sw=2 ts=2: */
698