1#' Generate DOT code using a graph object
2#'
3#' Generates Graphviz DOT code as an R character object using DiagrammeR graph
4#' object.
5#'
6#' @inheritParams render_graph
7#'
8#' @return A character vector of length 1 containing Graphviz DOT code.
9#'
10#' @export
11generate_dot <- function(graph) {
12
13  # Get the name of the function
14  fcn_name <- get_calling_fcn()
15
16  # Validation: Graph object is valid
17  if (graph_object_valid(graph) == FALSE) {
18
19    emit_error(
20      fcn_name = fcn_name,
21      reasons = "The graph object is not valid")
22  }
23
24  # Extract objects from the graph objecct
25  nodes_df <- graph$nodes_df
26  edges_df <- graph$edges_df
27  directed <- graph$directed
28  global_attrs <- graph$global_attrs
29
30  if ("graph" %in% global_attrs$attr_type) {
31    graph_attrs <-
32      global_attrs %>%
33      dplyr::filter(attr_type == "graph") %>%
34      dplyr::mutate(string = paste0(attr, " = '", value, "'"))
35
36    graph_attrs <-
37      graph_attrs %>%
38      dplyr::pull(string)
39
40  } else {
41    graph_attrs <- NA
42  }
43
44  if ("node" %in% global_attrs$attr_type) {
45    node_attrs <-
46      global_attrs %>%
47      dplyr::filter(attr_type == "node") %>%
48      dplyr::mutate(string = paste0(attr, " = '", value, "'"))
49
50    node_attrs <-
51      node_attrs %>%
52      dplyr::pull(string)
53
54    # Fill in NA attribute values with global preset values
55    for (i in 1:nrow(global_attrs %>% dplyr::filter(attr_type == "node"))) {
56
57      node_attr_to_set <- (global_attrs %>% dplyr::filter(attr_type == "node"))[i, 1]
58
59      if (node_attr_to_set %in% colnames(nodes_df)) {
60
61        col_num <- which(colnames(nodes_df) == node_attr_to_set)
62
63        nodes_df[which(is.na(nodes_df[, col_num])), col_num] <-
64          (global_attrs %>% dplyr::filter(attr_type == "node"))[i, 2]
65      }
66    }
67
68  } else {
69    node_attrs <- NA
70  }
71
72  if ("edge" %in% global_attrs$attr_type) {
73    edge_attrs <-
74      global_attrs %>%
75      dplyr::filter(attr_type == "edge") %>%
76      dplyr::mutate(string = paste0(attr, " = '", value, "'"))
77
78    edge_attrs <-
79      edge_attrs %>%
80      dplyr::pull(string)
81
82    # Fill in NA attribute values with global preset values
83    for (i in 1:nrow(global_attrs %>% dplyr::filter(attr_type == "edge"))) {
84
85      edge_attr_to_set <- (global_attrs %>% dplyr::filter(attr_type == "edge"))[i, 1]
86
87      if (edge_attr_to_set %in% colnames(edges_df)) {
88
89        col_num <- which(colnames(edges_df) == edge_attr_to_set)
90
91        edges_df[which(is.na(edges_df[, col_num])), col_num] <-
92          (global_attrs %>% dplyr::filter(attr_type == "edge"))[i, 2]
93      }
94    }
95
96  } else {
97    edge_attrs <- NA
98  }
99
100
101  # Replace NA values with empty strings in `nodes_df`
102  if (!is.null(nodes_df)) {
103
104    if (ncol(nodes_df) >= 4) {
105
106      nodes_df <-
107        nodes_df %>%
108        dplyr::mutate_at(
109          .vars = base::setdiff(colnames(nodes_df), c("id", "type", "label")),
110          .funs =  ~ tidyr::replace_na(., "")
111        )
112    }
113  }
114
115
116  # Replace NA values with empty strings in `edges_df`
117  if (!is.null(edges_df)) {
118
119    if (ncol(edges_df) >= 5) {
120
121      edges_df <-
122        edges_df %>%
123        dplyr::mutate_at(
124          .vars = base::setdiff(colnames(edges_df), c("id", "from", "to", "rel")),
125          .funs =  ~ tidyr::replace_na(., "")
126        )
127    }
128  }
129
130  # If `equation` column in `nodes_df`, ensure the
131  # right amount of escaping is present
132  if ("equation" %in% colnames(nodes_df)) {
133    equation_col <- which(colnames(nodes_df) == "equation")
134
135    for (i in 1:nrow(nodes_df)) {
136      if (grepl("^\\$.*\\$$", nodes_df[i, equation_col])) {
137        nodes_df[i, equation_col] <-
138          stringr::str_replace_all(
139            nodes_df[i, equation_col], "\\\\", "\\\\\\\\")
140      } else {
141        nodes_df[i, equation_col] <- ""
142      }
143    }
144  }
145
146  # If `display` column in `nodes_df`, modify label
147  # column for this render
148  if ("display" %in% colnames(nodes_df)) {
149
150    display_col <- which(colnames(nodes_df) == "display")
151    label_col <- which(colnames(nodes_df) == "label")
152
153    for (i in 1:nrow(nodes_df)) {
154
155      if (nodes_df[i, display_col] != "") {
156
157        nodes_df[i, label_col] <-
158          nodes_df[
159            i, which(colnames(nodes_df) == nodes_df[i, display_col])]
160
161      } else {
162        nodes_df[i, label_col] <- ""
163      }
164    }
165  }
166
167  # If `display` column in `edges_df`, modify label
168  # column for this render
169  if ("display" %in% colnames(edges_df)) {
170
171    display_col <- which(colnames(edges_df) == "display")
172
173    if (!("label" %in% colnames(edges_df))) {
174
175      edges_df <-
176        edges_df %>%
177        dplyr::mutate(label = as.character(NA))
178    }
179
180    label_col <- which(colnames(edges_df) == "label")
181
182    for (i in 1:nrow(edges_df)) {
183      if (!is.na(edges_df[i, display_col]) ) {
184        if (edges_df[i, display_col] != "") {
185
186          edges_df[i, label_col] <-
187            edges_df[
188              i, which(colnames(edges_df) == edges_df[i, display_col])]
189        }
190      } else {
191        edges_df[i, label_col] <- ""
192      }
193    }
194  }
195
196  # Create vector of graph attributes
197  graph_attributes <- gv_graph_attributes()
198
199  # Create vector of node attributes
200  node_attributes <- gv_node_attributes()
201
202  # Create vector of edge attributes
203  edge_attributes <- gv_edge_attributes()
204
205  if (nrow(nodes_df) == 0 &
206      nrow(edges_df) == 0) {
207
208    # Create DOT code with nothing in graph
209    dot_code <-
210      paste0(ifelse(directed,
211                    "digraph", "graph"),
212             " {\n", "\n}")
213
214  } else {
215
216    #
217    # Create the DOT attributes block
218    #
219
220    # Create the default attributes statement
221    # for graph attributes
222    if (!(any(is.na(graph_attrs)))) {
223      graph_attr_stmt <-
224        paste0("graph [",
225               paste(graph_attrs,
226                     collapse = ",\n       "),
227               "]\n")
228    } else {
229      graph_attr_stmt <- ""
230    }
231
232    # Create the default attributes statement
233    # for node attributes
234    if (!(any(is.na(node_attrs)))) {
235      node_attr_stmt <-
236        paste0("node [", paste(node_attrs,
237                               collapse = ",\n      "),
238               "]\n")
239    } else {
240      node_attr_stmt <- ""
241    }
242
243    # Create the default attributes statement
244    # for edge attributes
245    if (!(any(is.na(edge_attrs)))) {
246      edge_attr_stmt <-
247        paste0("edge [", paste(edge_attrs,
248                               collapse = ",\n     "),
249               "]\n")
250    } else {
251      edge_attr_stmt <- ""
252    }
253
254    # Combine default attributes into a single block
255    combined_attr_stmts <-
256      paste(
257        graph_attr_stmt,
258        node_attr_stmt,
259        edge_attr_stmt, sep = "\n")
260
261    #
262    # Create the DOT node block
263    #
264
265    if (nrow(nodes_df) > 0) {
266
267      # Determine whether positional (x,y) data is included
268      column_with_x <- which(colnames(nodes_df) %in% "x")[1]
269
270      column_with_y <- which(colnames(nodes_df) %in% "y")[1]
271
272      if (!is.na(column_with_x) & !is.na(column_with_y)) {
273        pos <-
274          paste0(
275            nodes_df %>% dplyr::pull(column_with_x), ",",
276            nodes_df %>% dplyr::pull(column_with_y), "!"
277          )
278
279        nodes_df <-
280          nodes_df %>%
281          dplyr::mutate(pos = !!pos)
282      }
283
284      # Determine whether column 'alpha' exists
285      if (any(grepl("$alpha^", colnames(nodes_df)))) {
286        column_with_alpha_assigned <-
287          grep("$alpha^", colnames(nodes_df))
288      } else {
289        column_with_alpha_assigned <- NA
290      }
291
292      if (!is.na(column_with_alpha_assigned)) {
293
294        # Determine the number of color attributes in
295        # the node data frame
296        number_of_col_attr <-
297          length(which(colnames(nodes_df) %in%
298                         c("color", "fillcolor",
299                           "fontcolor")))
300
301        # If the number of color attrs in df is 1,
302        # rename referencing alpha column
303        if (number_of_col_attr == 1) {
304
305          name_of_col_attr <-
306            colnames(nodes_df)[
307              which(colnames(nodes_df) %in%
308                      c("color", "fillcolor",
309                        "fontcolor"))]
310
311          colnames(nodes_df)[column_with_alpha_assigned] <-
312            paste0("alpha:", name_of_col_attr)
313        }
314      }
315
316      # Determine whether column 'alpha' with
317      # color attr exists
318      if (any(grepl("alpha:.*", colnames(nodes_df)))) {
319
320        alpha_column_no <- grep("alpha:.*", colnames(nodes_df))
321
322        color_attr_column_name <-
323          unlist(strsplit(colnames(nodes_df)[
324            (which(grepl("alpha:.*", colnames(nodes_df))))
325          ], ":"))[-1]
326
327        color_attr_column_no <-
328          which(colnames(nodes_df) %in% color_attr_column_name)
329
330        # Append alpha value only if referenced
331        # column is for color
332        if (any(c("color", "fillcolor", "fontcolor") %in%
333                colnames(nodes_df)[color_attr_column_no])) {
334
335          # Append alpha for color values that are
336          # X11 color names
337          if (all(grepl("[a-z]*",
338                        as.character(nodes_df[, color_attr_column_no]))) &
339              all(as.character(nodes_df[, color_attr_column_no]) %in%
340                  x11_hex()[, 1])) {
341
342            for (i in 1:nrow(nodes_df)) {
343              nodes_df[i, color_attr_column_no] <-
344                paste0(x11_hex()[
345                  which(x11_hex()[, 1] %in%
346                          as.character(nodes_df[i, color_attr_column_no])), 2],
347                  formatC(round(as.numeric(nodes_df[i, alpha_column_no]), 0),
348                          flag = "0", width = 2))
349            }
350          }
351
352          # Append alpha for color values that
353          # are hex color values
354          if (all(grepl("#[0-9a-fA-F]{6}$",
355                        as.character(nodes_df[, color_attr_column_no])))) {
356
357            for (i in 1:nrow(nodes_df)) {
358              nodes_df[, color_attr_column_no] <-
359                as.character(nodes_df[, color_attr_column_no])
360
361              nodes_df[i, color_attr_column_no] <-
362                paste0(nodes_df[i, color_attr_column_no],
363                       round(as.numeric(nodes_df[i, alpha_column_no]), 0))
364            }
365          }
366        }
367      }
368
369      # Determine which other columns correspond
370      # to node attribute values
371      other_columns_with_node_attributes <-
372        which(colnames(nodes_df) %in% node_attributes)
373
374      # Construct the 'node_block' character object
375      for (i in 1:nrow(nodes_df)) {
376        if (i == 1) {
377          node_block <- vector(mode = "character", length = 0)
378        }
379
380        if (length(other_columns_with_node_attributes) > 0) {
381
382          for (j in other_columns_with_node_attributes) {
383
384            if (j == other_columns_with_node_attributes[1]) {
385              attr_string <- vector(mode = "character", length = 0)
386            }
387
388            # Create the node attributes for labels
389            # and tooltips when provided
390            if (all(colnames(nodes_df)[j] %in%
391                    c("label", "tooltip"),
392                    is.na(nodes_df[i, j]))) {
393              attribute <- NULL
394            } else if (all(colnames(nodes_df)[j] %in%
395                           c("label", "tooltip"),
396                           !is.na(nodes_df[i, j]))) {
397              attribute <-
398                paste0(colnames(nodes_df)[j],
399                       " = ", "'", nodes_df[i, j], "'")
400            } else if (all(!(colnames(nodes_df)[j] %in%
401                             c("label", "tooltip")),
402                           is.na(nodes_df[i, j]))) {
403              attribute <- NULL
404            } else if (all(!(colnames(nodes_df)[j] %in%
405                             c("label", "tooltip")),
406                           !is.na(nodes_df[i, j]))) {
407              attribute <-
408                paste0(colnames(nodes_df)[j],
409                       " = ", "'", nodes_df[i, j], "'")
410            }
411            attr_string <- c(attr_string, attribute)
412          }
413
414          if (j == other_columns_with_node_attributes[
415            length(other_columns_with_node_attributes)]) {
416            attr_string <- paste(attr_string, collapse = ", ")
417          }
418        }
419
420        # Generate a line of node objects when an
421        # attribute string exists
422        if (exists("attr_string")) {
423          line <- paste0("  '", nodes_df[i, 1], "'",
424                         " [", attr_string, "] ")
425        }
426
427        # Generate a line of node objects when an
428        # attribute string doesn't exist
429        if (!exists("attr_string")) {
430          line <-
431            paste0("  '",
432                   nodes_df[i, 1],
433                   "'")
434        }
435        node_block <- c(node_block, line)
436      }
437
438      if ("rank" %in% colnames(nodes_df)) {
439        node_block <-
440          c(node_block,
441            tapply(node_block,
442                   nodes_df$rank, FUN = function(x) {
443                     if(length(x) > 1) {
444                       x <- paste0('subgraph{rank = same\n',
445                                   paste0(x, collapse = '\n'),
446                                   '}\n')
447                     }
448                     return(x)
449                   })
450          )
451
452      } else if ('cluster' %in% colnames(nodes_df)) {
453
454        cluster_vals <- nodes_df$cluster
455        cluster_vals[cluster_vals == ""] <- NA_character_
456
457        clustered_node_block <- character(0)
458        clusters <- split(node_block, cluster_vals)
459
460        for (i in seq_along(clusters)) {
461          if (names(clusters)[[i]] == "") {
462            # nodes not in clusters
463            cluster_block <- clusters[[i]]
464          } else {
465            cluster_block <-
466              paste0(
467                "subgraph cluster", i, "{\nlabel='",
468                names(clusters)[[i]], "'\n",
469                paste0(clusters[[i]], collapse = "\n"), "}\n"
470              )
471          }
472
473          clustered_node_block <- c(clustered_node_block, cluster_block)
474        }
475
476        node_block <- clustered_node_block
477
478        # cleanup variables
479        rm(clustered_node_block, clusters, cluster_block)
480      }
481
482      # Construct the `node_block` character object
483      node_block <- paste(node_block, collapse = "\n")
484
485      # Remove the `attr_string` object if it exists
486      if (exists("attr_string")) {
487        rm(attr_string)
488      }
489
490      # Remove the `attribute` object if it exists
491      if (exists("attribute")) {
492        rm(attribute)
493      }
494    }
495
496    #
497    # Create the DOT edge block
498    #
499
500    if (nrow(edges_df) > 0) {
501
502      # Determine whether `from` or `to` columns are
503      # in `edges_df`
504      from_to_columns <-
505        ifelse(any(c("from", "to") %in%
506                     colnames(edges_df)), TRUE, FALSE)
507
508      # Determine which columns in `edges_df`
509      # contain edge attributes
510      other_columns_with_edge_attributes <-
511        which(colnames(edges_df) %in% edge_attributes)
512
513      # Determine whether the complementary set of
514      # columns is present
515      if (from_to_columns) {
516        both_from_to_columns <-
517          all(c(any(c("from") %in%
518                      colnames(edges_df))),
519              any(c("to") %in%
520                    colnames(edges_df)))
521      }
522
523      # If the complementary set of columns is present,
524      # determine the positions
525      if (exists("both_from_to_columns")) {
526        if (both_from_to_columns) {
527          from_column <-
528            which(colnames(edges_df) %in% c("from"))[1]
529          to_column <-
530            which(colnames(edges_df) %in% c("to"))[1]
531        }
532      }
533
534      # Construct the `edge_block` character object
535      if (exists("from_column") &
536          exists("to_column")) {
537
538        if (length(from_column) == 1 &
539            length(from_column) == 1) {
540
541          for (i in 1:nrow(edges_df)) {
542
543            if (i == 1) {
544              edge_block <-
545                vector(mode = "character", length = 0)
546            }
547
548            if (length(other_columns_with_edge_attributes) > 0) {
549
550              for (j in other_columns_with_edge_attributes) {
551
552                if (j == other_columns_with_edge_attributes[1]) {
553                  attr_string <- vector(mode = "character", length = 0)
554                }
555
556                # Create the edge attributes for labels
557                # and tooltips when provided
558                if (all(colnames(edges_df)[j] %in%
559                        c("edgetooltip", "headtooltip",
560                          "label", "labeltooltip",
561                          "taillabel", "tailtooltip",
562                          "tooltip"),
563                        is.na(edges_df[i, j]))) {
564                  attribute <- NULL
565                } else if (all(colnames(edges_df)[j] %in%
566                               c("edgetooltip", "headtooltip",
567                                 "label", "labeltooltip",
568                                 "taillabel", "tailtooltip",
569                                 "tooltip"),
570                               edges_df[i, j] != '')) {
571                  attribute <-
572                    paste0(colnames(edges_df)[j],
573                           " = ", "'", edges_df[i, j],
574                           "'")
575                } else if (all(!(colnames(edges_df)[j] %in%
576                                 c("edgetooltip", "headtooltip",
577                                   "label", "labeltooltip",
578                                   "taillabel", "tailtooltip",
579                                   "tooltip")),
580                               is.na(edges_df[i, j]))) {
581
582                  attribute <- NULL
583                } else if (all(!(colnames(edges_df)[j] %in%
584                                 c("edgetooltip", "headtooltip",
585                                   "label", "labeltooltip",
586                                   "taillabel", "tailtooltip",
587                                   "tooltip")),
588                               edges_df[i, j] != '')) {
589                  attribute <-
590                    paste0(colnames(edges_df)[j],
591                           " = ", "'", edges_df[i, j], "'")
592                }
593                attr_string <- c(attr_string, attribute)
594              }
595
596              if (j == other_columns_with_edge_attributes[
597                length(other_columns_with_edge_attributes)]) {
598                attr_string <- paste(attr_string, collapse = ", ")
599              }
600            }
601
602            # Generate a line of edge objects when an
603            # attribute string exists
604            if (exists("attr_string")) {
605              line <-
606                paste0("'", edges_df[i, from_column], "'",
607                       ifelse(directed, "->", "--"),
608                       "'", edges_df[i, to_column], "'",
609                       paste0(" [", attr_string, "] "))
610            }
611
612            # Generate a line of edge objects when an
613            # attribute string doesn't exist
614            if (!exists("attr_string")) {
615              line <-
616                paste0("  ",
617                       "'", edges_df[i, from_column], "'",
618                       ifelse(directed, "->", "--"),
619                       "'", edges_df[i, to_column], "'",
620                       " ")
621            }
622            edge_block <- c(edge_block, line)
623          }
624        }
625      }
626
627      # Construct the `edge_block` character object
628      if (exists("edge_block")) {
629        edge_block <- paste(edge_block, collapse = "\n")
630      }
631    }
632
633    # Create the graph code from the chosen attributes,
634    # and the nodes and edges blocks
635    if (exists("combined_attr_stmts")) {
636      if (exists("edge_block") & exists("node_block")) {
637        combined_block <-
638          paste(combined_attr_stmts,
639                node_block, edge_block,
640                sep = "\n")
641      }
642      if (!exists("edge_block") & exists("node_block")) {
643        combined_block <-
644          paste(combined_attr_stmts,
645                node_block,
646                sep = "\n")
647      }
648    }
649    if (!exists("combined_attr_stmts")) {
650      if (exists("edge_block")) {
651        combined_block <- paste(node_block, edge_block,
652                                sep = "\n")
653      }
654      if (!exists("edge_block")) {
655        combined_block <- node_block
656      }
657    }
658
659    # Create DOT code
660    dot_code <-
661      paste0(ifelse(directed, "digraph", "graph"),
662             " {\n", "\n", combined_block, "\n}")
663
664    # Remove empty node or edge attribute statements
665    dot_code <- gsub(" \\[\\] ", "", dot_code)
666  }
667
668  dot_code
669}
670