1// Copyright 2021 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14import { Diagnostic } from '@codemirror/lint';
15import { SyntaxNode, Tree } from '@lezer/common';
16import {
17  AggregateExpr,
18  And,
19  BinaryExpr,
20  BinModifiers,
21  Bool,
22  Bottomk,
23  CountValues,
24  Eql,
25  EqlSingle,
26  Expr,
27  FunctionCall,
28  FunctionCallArgs,
29  FunctionCallBody,
30  Gte,
31  Gtr,
32  Identifier,
33  LabelMatcher,
34  LabelMatchers,
35  LabelMatchList,
36  Lss,
37  Lte,
38  MatrixSelector,
39  MetricIdentifier,
40  Neq,
41  Or,
42  ParenExpr,
43  Quantile,
44  StepInvariantExpr,
45  SubqueryExpr,
46  Topk,
47  UnaryExpr,
48  Unless,
49  VectorSelector,
50} from '../grammar/parser.terms';
51import { containsAtLeastOneChild, retrieveAllRecursiveNodes, walkThrough } from './path-finder';
52import { getType } from './type';
53import { buildLabelMatchers } from './matcher';
54import { EditorState } from '@codemirror/state';
55import { syntaxTree } from '@codemirror/language';
56import { getFunction, Matcher, VectorMatchCardinality, ValueType } from '../types';
57import { buildVectorMatching } from './vector';
58
59export class Parser {
60  private readonly tree: Tree;
61  private readonly state: EditorState;
62  private readonly diagnostics: Diagnostic[];
63
64  constructor(state: EditorState) {
65    this.tree = syntaxTree(state);
66    this.state = state;
67    this.diagnostics = [];
68  }
69
70  getDiagnostics(): Diagnostic[] {
71    return this.diagnostics.sort((a, b) => {
72      return a.from - b.from;
73    });
74  }
75
76  analyze(): void {
77    // when you are at the root of the tree, the first node is not `Expr` but a node with no name.
78    // So to be able to iterate other the node relative to the promql node, we have to get the first child at the beginning
79    this.checkAST(this.tree.topNode.firstChild);
80    this.diagnoseAllErrorNodes();
81  }
82
83  private diagnoseAllErrorNodes() {
84    const cursor = this.tree.cursor();
85    while (cursor.next()) {
86      // usually there is an error node at the end of the expression when user is typing
87      // so it's not really a useful information to say the expression is wrong.
88      // Hopefully if there is an error node at the end of the tree, checkAST should yell more precisely
89      if (cursor.type.id === 0 && cursor.to !== this.tree.topNode.to) {
90        const node = cursor.node.parent;
91        this.diagnostics.push({
92          severity: 'error',
93          message: 'unexpected expression',
94          from: node ? node.from : cursor.from,
95          to: node ? node.to : cursor.to,
96        });
97      }
98    }
99  }
100
101  // checkAST is inspired of the same named method from prometheus/prometheus:
102  // https://github.com/prometheus/prometheus/blob/3470ee1fbf9d424784eb2613bab5ab0f14b4d222/promql/parser/parse.go#L433
103  checkAST(node: SyntaxNode | null): ValueType {
104    if (!node) {
105      return ValueType.none;
106    }
107    switch (node.type.id) {
108      case Expr:
109        return this.checkAST(node.firstChild);
110      case AggregateExpr:
111        this.checkAggregationExpr(node);
112        break;
113      case BinaryExpr:
114        this.checkBinaryExpr(node);
115        break;
116      case FunctionCall:
117        this.checkCallFunction(node);
118        break;
119      case ParenExpr:
120        this.checkAST(walkThrough(node, Expr));
121        break;
122      case UnaryExpr:
123        const unaryExprType = this.checkAST(walkThrough(node, Expr));
124        if (unaryExprType !== ValueType.scalar && unaryExprType !== ValueType.vector) {
125          this.addDiagnostic(node, `unary expression only allowed on expressions of type scalar or instant vector, got ${unaryExprType}`);
126        }
127        break;
128      case SubqueryExpr:
129        const subQueryExprType = this.checkAST(walkThrough(node, Expr));
130        if (subQueryExprType !== ValueType.vector) {
131          this.addDiagnostic(node, `subquery is only allowed on instant vector, got ${subQueryExprType} in ${node.name} instead`);
132        }
133        break;
134      case MatrixSelector:
135        this.checkAST(walkThrough(node, Expr));
136        break;
137      case VectorSelector:
138        this.checkVectorSelector(node);
139        break;
140      case StepInvariantExpr:
141        const exprValue = this.checkAST(walkThrough(node, Expr));
142        if (exprValue !== ValueType.vector && exprValue !== ValueType.matrix) {
143          this.addDiagnostic(node, `@ modifier must be preceded by an instant selector vector or range vector selector or a subquery`);
144        }
145        // if you are looking at the Prometheus code, you will likely find that some checks are missing here.
146        // Specially the one checking if the timestamp after the `@` is ok: https://github.com/prometheus/prometheus/blob/ad5ed416ba635834370bfa06139258b31f8c33f9/promql/parser/parse.go#L722-L725
147        // Since Javascript is managing the number as a float64 and so on 53 bits, we cannot validate that the maxInt64 number is a valid value.
148        // So, to manage properly this issue, we would need to use the BigInt which is possible or by using ES2020.BigInt, or by using the lib: https://github.com/GoogleChromeLabs/jsbi.
149        //   * Introducing a lib just for theses checks is quite overkilled
150        //   * Using ES2020 would be the way to go. Unfortunately moving to ES2020 is breaking the build of the lib.
151        //     So far I didn't find the way to fix it. I think it's likely due to the fact we are building an ESM package which is now something stable in nodeJS/javascript but still experimental in typescript.
152        // For the above reason, we decided to drop these checks.
153        break;
154    }
155
156    return getType(node);
157  }
158
159  private checkAggregationExpr(node: SyntaxNode): void {
160    // according to https://github.com/promlabs/lezer-promql/blob/master/src/promql.grammar#L26
161    // the name of the aggregator function is stored in the first child
162    const aggregateOp = node.firstChild?.firstChild;
163    if (!aggregateOp) {
164      this.addDiagnostic(node, 'aggregation operator expected in aggregation expression but got nothing');
165      return;
166    }
167    const expr = walkThrough(node, FunctionCallBody, FunctionCallArgs, Expr);
168    if (!expr) {
169      this.addDiagnostic(node, 'unable to find the parameter for the expression');
170      return;
171    }
172    this.expectType(expr, ValueType.vector, 'aggregation expression');
173    // get the parameter of the aggregation operator
174    const params = walkThrough(node, FunctionCallBody, FunctionCallArgs, FunctionCallArgs, Expr);
175    if (aggregateOp.type.id === Topk || aggregateOp.type.id === Bottomk || aggregateOp.type.id === Quantile) {
176      if (!params) {
177        this.addDiagnostic(node, 'no parameter found');
178        return;
179      }
180      this.expectType(params, ValueType.scalar, 'aggregation parameter');
181    }
182    if (aggregateOp.type.id === CountValues) {
183      if (!params) {
184        this.addDiagnostic(node, 'no parameter found');
185        return;
186      }
187      this.expectType(params, ValueType.string, 'aggregation parameter');
188    }
189  }
190
191  private checkBinaryExpr(node: SyntaxNode): void {
192    // Following the definition of the BinaryExpr, the left and the right
193    // expression are respectively the first and last child
194    // https://github.com/promlabs/lezer-promql/blob/master/src/promql.grammar#L52
195    const lExpr = node.firstChild;
196    const rExpr = node.lastChild;
197    if (!lExpr || !rExpr) {
198      this.addDiagnostic(node, 'left or right expression is missing in binary expression');
199      return;
200    }
201    const lt = this.checkAST(lExpr);
202    const rt = this.checkAST(rExpr);
203    const boolModifierUsed = walkThrough(node, BinModifiers, Bool);
204    const isComparisonOperator = containsAtLeastOneChild(node, Eql, Neq, Lte, Lss, Gte, Gtr);
205    const isSetOperator = containsAtLeastOneChild(node, And, Or, Unless);
206
207    // BOOL modifier check
208    if (boolModifierUsed) {
209      if (!isComparisonOperator) {
210        this.addDiagnostic(node, 'bool modifier can only be used on comparison operators');
211      }
212    } else {
213      if (isComparisonOperator && lt === ValueType.scalar && rt === ValueType.scalar) {
214        this.addDiagnostic(node, 'comparisons between scalars must use BOOL modifier');
215      }
216    }
217
218    const vectorMatching = buildVectorMatching(this.state, node);
219    if (vectorMatching !== null && vectorMatching.on) {
220      for (const l1 of vectorMatching.matchingLabels) {
221        for (const l2 of vectorMatching.include) {
222          if (l1 === l2) {
223            this.addDiagnostic(node, `label "${l1}" must not occur in ON and GROUP clause at once`);
224          }
225        }
226      }
227    }
228
229    if (lt !== ValueType.scalar && lt !== ValueType.vector) {
230      this.addDiagnostic(lExpr, 'binary expression must contain only scalar and instant vector types');
231    }
232    if (rt !== ValueType.scalar && rt !== ValueType.vector) {
233      this.addDiagnostic(rExpr, 'binary expression must contain only scalar and instant vector types');
234    }
235
236    if ((lt !== ValueType.vector || rt !== ValueType.vector) && vectorMatching !== null) {
237      if (vectorMatching.matchingLabels.length > 0) {
238        this.addDiagnostic(node, 'vector matching only allowed between instant vectors');
239      }
240    } else {
241      if (isSetOperator) {
242        if (vectorMatching?.card === VectorMatchCardinality.CardOneToMany || vectorMatching?.card === VectorMatchCardinality.CardManyToOne) {
243          this.addDiagnostic(node, 'no grouping allowed for set operations');
244        }
245        if (vectorMatching?.card !== VectorMatchCardinality.CardManyToMany) {
246          this.addDiagnostic(node, 'set operations must always be many-to-many');
247        }
248      }
249    }
250    if ((lt === ValueType.scalar || rt === ValueType.scalar) && isSetOperator) {
251      this.addDiagnostic(node, 'set operator not allowed in binary scalar expression');
252    }
253  }
254
255  private checkCallFunction(node: SyntaxNode): void {
256    const funcID = node.firstChild?.firstChild;
257    if (!funcID) {
258      this.addDiagnostic(node, 'function not defined');
259      return;
260    }
261
262    const args = retrieveAllRecursiveNodes(walkThrough(node, FunctionCallBody), FunctionCallArgs, Expr);
263    const funcSignature = getFunction(funcID.type.id);
264    const nargs = funcSignature.argTypes.length;
265
266    if (funcSignature.variadic === 0) {
267      if (args.length !== nargs) {
268        this.addDiagnostic(node, `expected ${nargs} argument(s) in call to "${funcSignature.name}", got ${args.length}`);
269      }
270    } else {
271      const na = nargs - 1;
272      if (na > args.length) {
273        this.addDiagnostic(node, `expected at least ${na} argument(s) in call to "${funcSignature.name}", got ${args.length}`);
274      } else {
275        const nargsmax = na + funcSignature.variadic;
276        if (funcSignature.variadic > 0 && nargsmax < args.length) {
277          this.addDiagnostic(node, `expected at most ${nargsmax} argument(s) in call to "${funcSignature.name}", got ${args.length}`);
278        }
279      }
280    }
281
282    let j = 0;
283    for (let i = 0; i < args.length; i++) {
284      j = i;
285      if (j >= funcSignature.argTypes.length) {
286        if (funcSignature.variadic === 0) {
287          // This is not a vararg function so we should not check the
288          // type of the extra arguments.
289          break;
290        }
291        j = funcSignature.argTypes.length - 1;
292      }
293      this.expectType(args[i], funcSignature.argTypes[j], `call to function "${funcSignature.name}"`);
294    }
295  }
296
297  private checkVectorSelector(node: SyntaxNode): void {
298    const labelMatchers = buildLabelMatchers(
299      retrieveAllRecursiveNodes(walkThrough(node, LabelMatchers, LabelMatchList), LabelMatchList, LabelMatcher),
300      this.state
301    );
302    let vectorSelectorName = '';
303    // VectorSelector ( MetricIdentifier ( Identifier ) )
304    // https://github.com/promlabs/lezer-promql/blob/71e2f9fa5ae6f5c5547d5738966cd2512e6b99a8/src/promql.grammar#L200
305    const vectorSelectorNodeName = walkThrough(node, MetricIdentifier, Identifier);
306    if (vectorSelectorNodeName) {
307      vectorSelectorName = this.state.sliceDoc(vectorSelectorNodeName.from, vectorSelectorNodeName.to);
308    }
309    if (vectorSelectorName !== '') {
310      // In this case the last LabelMatcher is checking for the metric name
311      // set outside the braces. This checks if the name has already been set
312      // previously
313      const labelMatcherMetricName = labelMatchers.find((lm) => lm.name === '__name__');
314      if (labelMatcherMetricName) {
315        this.addDiagnostic(node, `metric name must not be set twice: ${vectorSelectorName} or ${labelMatcherMetricName.value}`);
316      }
317      // adding the metric name as a Matcher to avoid a false positive for this kind of expression:
318      // foo{bare=''}
319      labelMatchers.push(new Matcher(EqlSingle, '__name__', vectorSelectorName));
320    }
321
322    // A Vector selector must contain at least one non-empty matcher to prevent
323    // implicit selection of all metrics (e.g. by a typo).
324    const empty = labelMatchers.every((lm) => lm.matchesEmpty());
325    if (empty) {
326      this.addDiagnostic(node, 'vector selector must contain at least one non-empty matcher');
327    }
328  }
329
330  private expectType(node: SyntaxNode, want: ValueType, context: string): void {
331    const t = this.checkAST(node);
332    if (t !== want) {
333      this.addDiagnostic(node, `expected type ${want} in ${context}, got ${t}`);
334    }
335  }
336
337  private addDiagnostic(node: SyntaxNode, msg: string): void {
338    this.diagnostics.push({
339      severity: 'error',
340      message: msg,
341      from: node.from,
342      to: node.to,
343    });
344  }
345}
346