1# Analyze an R file for possible extra or missing commas. Returns FALSE if any
2# problems detected, TRUE otherwise.
3diagnoseCode <- function(path = NULL, text = NULL) {
4  if (!xor(is.null(path), is.null(text))) {
5    stop("Must specify `path` or `text`, but not both.")
6  }
7
8  if (!is.null(path)) {
9    tokens <- sourcetools::tokenize_file(path)
10  } else {
11    tokens <- sourcetools::tokenize_string(text)
12  }
13
14  find_scopes <- function(tokens) {
15    # Strip whitespace and comments
16    tokens <- tokens[!(tokens$type %in% c("whitespace", "comment")),]
17
18    # Replace various types of things with "value"
19    tokens$type[tokens$type %in% c("string", "number", "symbol", "keyword")] <- "value"
20
21    # Record types for close and open brace/bracket/parens, and commas
22    brace_idx <- tokens$value %in% c("(", ")", "{", "}", "[", "]", ",")
23    tokens$type[brace_idx] <- tokens$value[brace_idx]
24
25    # Stack-related function for recording scope. Starting scope is "{"
26    stack <- "{"
27    push <- function(x) {
28      stack <<- c(stack, x)
29    }
30    pop <- function() {
31      if (length(stack) == 1) {
32        # Stack underflow, but we need to keep going
33        return(NA_character_)
34      }
35      res <- stack[length(stack)]
36      stack <<- stack[-length(stack)]
37      res
38    }
39    peek <- function() {
40      stack[length(stack)]
41    }
42
43    # First, establish a scope for each token. For opening and closing
44    # braces/brackets/parens, the scope at that location is the *surrounding*
45    # scope, not the new scope created by the brace/bracket/paren.
46    for (i in seq_len(nrow(tokens))) {
47      value <- tokens$value[i]
48
49      tokens$scope[i] <- peek()
50      if (value %in% c("{", "(", "[")) {
51        push(value)
52
53      } else if (value == "}") {
54        if (!identical(pop(), "{"))
55          tokens$err[i] <- "unmatched_brace"
56        # For closing brace/paren/bracket, get the scope after popping
57        tokens$scope[i] <- peek()
58
59      } else if (value == ")") {
60        if (!identical(pop(), "("))
61          tokens$err[i] <- "unmatched_paren"
62        tokens$scope[i] <- peek()
63
64      } else if (value == "]") {
65        if (!identical(pop(), "["))
66          tokens$err[i] <- "unmatched_bracket"
67        tokens$scope[i] <- peek()
68      }
69    }
70
71    tokens
72  }
73
74  check_commas <- function(tokens) {
75    # Find extra and missing commas
76    tokens$err <- mapply(
77      tokens$type,
78      c("", tokens$type[-length(tokens$type)]),
79      c(tokens$type[-1],  ""),
80      tokens$scope,
81      tokens$err,
82      SIMPLIFY = FALSE,
83      FUN = function(type, prevType, nextType, scope, err) {
84        # If an error was already found, just return it. This could have
85        # happened in the brace/paren/bracket matching phase.
86        if (!is.na(err)) {
87          return(err)
88        }
89        if (scope == "(") {
90          if (type == "," &&
91              (prevType == "(" || prevType == "," || nextType == ")"))
92          {
93            return("extra_comma")
94          }
95
96          if ((prevType == ")" && type == "value") ||
97              (prevType == "value" && type == "value")) {
98            return("missing_comma")
99          }
100        }
101
102        NA_character_
103      }
104    )
105
106    tokens
107  }
108
109
110  tokens$err <- NA_character_
111  tokens <- find_scopes(tokens)
112  tokens <- check_commas(tokens)
113
114  # No errors found
115  if (all(is.na(tokens$err))) {
116    return(TRUE)
117  }
118
119  # If we got here, errors were found; print messages.
120  if (!is.null(path)) {
121    lines <- readLines(path)
122  } else {
123    lines <- strsplit(text, "\n")[[1]]
124  }
125
126  # Print out the line of code with the error, and point to the column with
127  # the error.
128  show_code_error <- function(msg, lines, row, col) {
129    message(paste0(
130      msg, "\n",
131      row, ":", lines[row], "\n",
132      paste0(rep.int(" ", nchar(as.character(row)) + 1), collapse = ""),
133      gsub(perl = TRUE, "[^\\s]", " ", substr(lines[row], 1, col-1)), "^"
134    ))
135  }
136
137  err_idx <- which(!is.na(tokens$err))
138  msg <- ""
139  for (i in err_idx) {
140    row <- tokens$row[i]
141    col <- tokens$column[i]
142    err <- tokens$err[i]
143
144    if (err == "missing_comma") {
145      show_code_error("Possible missing comma at:", lines, row, col)
146    } else if (err == "extra_comma") {
147      show_code_error("Possible extra comma at:", lines, row, col)
148    } else if (err == "unmatched_brace") {
149      show_code_error("Possible unmatched '}' at:", lines, row, col)
150    } else if (err == "unmatched_paren") {
151      show_code_error("Possible unmatched ')' at:", lines, row, col)
152    } else if (err == "unmatched_bracket") {
153      show_code_error("Possible unmatched ']' at:", lines, row, col)
154    }
155  }
156  return(FALSE)
157}
158