1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtXmlPatterns module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include <QHash>
41 
42 #include "qpatternistlocale_p.h"
43 
44 #include "qpatternplatform_p.h"
45 
46 QT_BEGIN_NAMESPACE
47 
48 using namespace QPatternist;
49 
50 namespace QPatternist
51 {
52     /**
53      * @short Used internally by PatternPlatform and describes
54      * a flag that affects how a pattern is treated.
55      *
56      * The member variables aren't declared @c const, in order
57      * to make the synthesized assignment operator and copy constructor work.
58      *
59      * @ingroup Patternist_utils
60      * @author Frans Englich <frans.englich@nokia.com>
61      */
62     class PatternFlag
63     {
64     public:
65         typedef QMap<QChar, PatternFlag> Hash;
66 
PatternFlag()67         inline PatternFlag() : flag(PatternPlatform::NoFlags)
68         {
69         }
70 
PatternFlag(const PatternPlatform::Flag opt,const QString & descr)71         inline PatternFlag(const PatternPlatform::Flag opt,
72                            const QString &descr) : flag(opt),
73                                                    description(descr)
74         {
75         }
76 
77         PatternPlatform::Flag   flag;
78         QString                 description;
79 
80         static inline Hash flagDescriptions();
81     };
82 }
83 
flagDescriptions()84 static inline PatternFlag::Hash flagDescriptions()
85 {
86     PatternFlag::Hash retval;
87 
88     retval.insert(QChar(QLatin1Char('s')),
89                   PatternFlag(PatternPlatform::DotAllMode,
90                               QtXmlPatterns::tr("%1 matches newline characters").arg(formatKeyword(QLatin1Char('.')))));
91 
92     retval.insert(QChar(QLatin1Char('m')),
93                   PatternFlag(PatternPlatform::MultiLineMode,
94                               QtXmlPatterns::tr("%1 and %2 match the start and end of a line.")
95                                    .arg(formatKeyword(QLatin1Char('^')))
96                                    .arg(formatKeyword(QLatin1Char('$')))));
97 
98     retval.insert(QChar(QLatin1Char('i')),
99                   PatternFlag(PatternPlatform::CaseInsensitive,
100                               QtXmlPatterns::tr("Matches are case insensitive")));
101 
102     retval.insert(QChar(QLatin1Char('x')),
103                   PatternFlag(PatternPlatform::SimplifyWhitespace,
104                               QtXmlPatterns::tr("Whitespace characters are removed, except when they appear "
105                                  "in character classes")));
106 
107     return retval;
108 }
109 
PatternPlatform(const qint8 flagsPosition)110 PatternPlatform::PatternPlatform(const qint8 flagsPosition) : m_compiledParts(NoPart),
111                                                               m_flags(NoFlags),
112                                                               m_flagsPosition(flagsPosition)
113 {
114 }
115 
pattern(const DynamicContext::Ptr & context) const116 QRegExp PatternPlatform::pattern(const DynamicContext::Ptr &context) const
117 {
118     if(m_compiledParts == FlagsAndPattern) /* This is the most common case. */
119     {
120         Q_ASSERT(m_pattern.isValid());
121         return m_pattern;
122     }
123 
124     QRegExp retvalPattern;
125     Flags flags;
126 
127     /* Compile the flags, if necessary. */
128     if(m_compiledParts.testFlag(FlagsPrecompiled))
129         flags = m_flags;
130     else
131     {
132         const Expression::Ptr flagsOp(m_operands.value(m_flagsPosition));
133 
134         if(flagsOp)
135             flags = parseFlags(flagsOp->evaluateSingleton(context).stringValue(), context);
136         else
137             flags = NoFlags;
138     }
139 
140     /* Compile the pattern, if necessary. */
141     if(m_compiledParts.testFlag(PatternPrecompiled))
142         retvalPattern = m_pattern;
143     else
144     {
145         retvalPattern = parsePattern(m_operands.at(1)->evaluateSingleton(context).stringValue(),
146                                      context);
147 
148     }
149 
150     applyFlags(flags, retvalPattern);
151 
152     Q_ASSERT(m_pattern.isValid());
153     return retvalPattern;
154 }
155 
applyFlags(const Flags flags,QRegExp & patternP)156 void PatternPlatform::applyFlags(const Flags flags, QRegExp &patternP)
157 {
158     Q_ASSERT(patternP.isValid());
159     if(flags == NoFlags)
160         return;
161 
162     if(flags & CaseInsensitive)
163     {
164         patternP.setCaseSensitivity(Qt::CaseInsensitive);
165     }
166     // TODO Apply the other flags, like 'x'.
167 }
168 
parsePattern(const QString & pattern,const ReportContext::Ptr & context) const169 QRegExp PatternPlatform::parsePattern(const QString &pattern,
170                                       const ReportContext::Ptr &context) const
171 {
172     return parsePattern(pattern, context, this);
173 }
174 
parsePattern(const QString & patternP,const ReportContext::Ptr & context,const SourceLocationReflection * const location)175 QRegExp PatternPlatform::parsePattern(const QString &patternP,
176                                       const ReportContext::Ptr &context,
177                                       const SourceLocationReflection *const location)
178 {
179     if(patternP == QLatin1String("(.)\\3") ||
180        patternP == QLatin1String("\\3")    ||
181        patternP == QLatin1String("(.)\\2"))
182     {
183         context->error(QLatin1String("We don't want to hang infinitely on K2-MatchesFunc-9, "
184                                      "10 and 11."),
185                        ReportContext::FOER0000, location);
186         return QRegExp();
187     }
188 
189     QString rewrittenPattern(patternP);
190 
191     /* We rewrite some well known patterns to QRegExp style here. Note that
192      * these character classes only works in the ASCII range, and fail for
193      * others. This support needs to be in QRegExp, since it's about checking
194      * QChar::category(). */
195     rewrittenPattern.replace(QLatin1String("[\\i-[:]]"), QLatin1String("[a-zA-Z_]"));
196     rewrittenPattern.replace(QLatin1String("[\\c-[:]]"), QLatin1String("[a-zA-Z0-9_\\-\\.]"));
197 
198     QRegExp retval(rewrittenPattern, Qt::CaseSensitive, QRegExp::W3CXmlSchema11);
199 
200     if(retval.isValid())
201         return retval;
202     else
203     {
204         context->error(QtXmlPatterns::tr("%1 is an invalid regular expression pattern: %2")
205                                         .arg(formatExpression(patternP), retval.errorString()),
206                                    ReportContext::FORX0002, location);
207         return QRegExp();
208     }
209 }
210 
parseFlags(const QString & flags,const DynamicContext::Ptr & context) const211 PatternPlatform::Flags PatternPlatform::parseFlags(const QString &flags,
212                                                    const DynamicContext::Ptr &context) const
213 {
214 
215     if(flags.isEmpty())
216         return NoFlags;
217 
218     const PatternFlag::Hash flagDescrs(flagDescriptions());
219     const int len = flags.length();
220     Flags retval = NoFlags;
221 
222     for(int i = 0; i < len; ++i)
223     {
224         const QChar flag(flags.at(i));
225         const Flag specified = flagDescrs.value(flag).flag;
226 
227         if(specified != NoFlags)
228         {
229             retval |= specified;
230             continue;
231         }
232 
233         /* Generate a nice error message. */
234         QString message(QtXmlPatterns::tr("%1 is an invalid flag for regular expressions. Valid flags are:")
235                              .arg(formatKeyword(flag)));
236 
237         /* This is formatting, so don't bother translators with it. */
238         message.append(QLatin1Char('\n'));
239 
240         const PatternFlag::Hash::const_iterator end(flagDescrs.constEnd());
241         PatternFlag::Hash::const_iterator it(flagDescrs.constBegin());
242 
243         for(; it != end;)
244         {
245             // TODO handle bidi correctly
246             // TODO format this with rich text(list/table)
247             message.append(formatKeyword(it.key()));
248             message.append(QLatin1String(" - "));
249             message.append(it.value().description);
250 
251             ++it;
252             if(it != end)
253                 message.append(QLatin1Char('\n'));
254         }
255 
256         context->error(message, ReportContext::FORX0001, this);
257         return NoFlags;
258     }
259 
260     return retval;
261 }
262 
compress(const StaticContext::Ptr & context)263 Expression::Ptr PatternPlatform::compress(const StaticContext::Ptr &context)
264 {
265     const Expression::Ptr me(FunctionCall::compress(context));
266     if(me != this)
267         return me;
268 
269     if(m_operands.at(1)->is(IDStringValue))
270     {
271         const DynamicContext::Ptr dynContext(context->dynamicContext());
272 
273         m_pattern = parsePattern(m_operands.at(1)->evaluateSingleton(dynContext).stringValue(),
274                                  dynContext);
275         m_compiledParts |= PatternPrecompiled;
276     }
277 
278     const Expression::Ptr flagOperand(m_operands.value(m_flagsPosition));
279 
280     if(!flagOperand)
281     {
282         m_flags = NoFlags;
283         m_compiledParts |= FlagsPrecompiled;
284     }
285     else if(flagOperand->is(IDStringValue))
286     {
287         const DynamicContext::Ptr dynContext(context->dynamicContext());
288         m_flags = parseFlags(flagOperand->evaluateSingleton(dynContext).stringValue(),
289                              dynContext);
290         m_compiledParts |= FlagsPrecompiled;
291     }
292 
293     if(m_compiledParts == FlagsAndPattern)
294         applyFlags(m_flags, m_pattern);
295 
296     return me;
297 }
298 
299 QT_END_NAMESPACE
300