graph.R

              #' @include traversal.R
NULL

We use R6 instead of the built-in reference classes for several reasons.

Their definition is much more compact.
It is possible to extend R6 definitions cross-packages.
They suppor the notion of public and private membership.

A graph is better represented as a reference object, rather than an S3 or S4 class, to allow for mutability. That being said, it is certainly possible to create an S3 or S4 equivalent.

A graph node is primarily defined by its value and its edges. A (connected) graph is identified by any node, as it is possible to recover the rest of the graph through traversal.

              #' An R6 representation of a node for a graph data structure.
#'
#' @name graphNode
#' @format NULL
#' @docType class
graphNode_ <- R6::R6Class("graphNode",
  public = list(
    .edges   = list(),                
    .backwards_edges = list(),
    .value   = NULL,
    .address = NULL,

    initialize = function(value) {
      self$.value   <- value
      self$.address <- pryr::address(self)
    },

    value = function() {
      self$.value
    },

    add_edge = function(node) {
      stopifnot(is(node, "graphNode"))
      # Avoid copy creation.
      self$.edges[[node$address()]] <- node
      node$add_backwards_edge(self)
    },

    add_backwards_edge = function(edge_node) {
      stopifnot(is(edge_node, "graphNode"))
      self$.backwards_edges[[edge_node$address()]] <- edge_node
    },

    edges = function() {
      self$.edges
    },

    backwards_edges = function() {
      self$.backwards_edges
    },

    num_edges = function() {
      length(self$.edges)
    },

    address = function() {
      self$.address
    }
  )
)

A little trick to ensure that a graphNode can be constructed both as graphNode(...) and graphNode$new(...).

              #' @rdname graphNode
#' @param ... Arguments to pass to graphNode initialization.
#' @export
graphNode <- structure(
  function(...) { graphNode_$new(...) },
  class = "graphNode_"
)

To make the above trick work, we need to prevent access to everything except new.

              #' @export
`$.graphNode_` <- function(...) {
  stopifnot(identical(..2, "new"))
  ..1
}

#' Check whether an R object is a graphNode object
#'
#' @export
#' @param obj any object.
#' @return \code{TRUE} if the object is of class
#'    \code{graphNode}, \code{FALSE} otherwise.
is.graphNode <- function(obj) {
  inherits(obj, "graphNode")
}

# # # # Graph # # # #

#' An R6 representation of a graph data structure.
#'
#' All graphs are assumed to be connected (if they are not, use a list
#' of graphs instead). Any distinguished canonical node on the graph,
#' called the boot node, will be used to represent the entire graph
#' and will be the first node for purposes of traversal.
#'
#' @name graph
#' @format NULL
#' @docType class
graph_ <- R6::R6Class("graph",
  public = list(
    .bootnode = NULL,

    initialize = function(bootnode) {
      stopifnot(is(bootnode, "graphNode"))
      self$.bootnode <- bootnode
    },

    bootnode = function() {
      self$.bootnode
    },

    bootnode_value = function() {
      self$.bootnode$value()
    },

    size = function() {
      counter  <- list2env(list(n = 0), parent = emptyenv())
      strategy <- graphBFSTraversalStrategy$new(function(node) { counter$n <- counter$n + 1 })
      strategy$traverse(self)
      counter$n
    }
  )
)

A little trick to ensure that a graph can be constructed both as graph(...) and graph$new(...).

              #' @rdname graph
#' @param ... Arguments to pass to graph initialization.
#' @export
graph <- structure(
  function(...) { graph_$new(...) },
  class = "graph_"
)

To make the above trick work, we need to prevent access to everything except new.

              #' @export
`$.graph_` <- function(...) {
  stopifnot(identical(..2, "new"))
  ..1
}

#' Check whether an R object is a graph object
#'
#' @export
#' @param obj any object.
#' @return \code{TRUE} if the object is of class
#'    \code{graph}, \code{FALSE} otherwise.
is.graph <- function(obj) {
  is(obj, "graph") || inherits(obj, "graph")
}

package.R

              #' Graphrunner is a directed graph task execution engine
#'
#' Most collections of tasks can be represented as cyclic or
#' acyclic directed graphs. This package makes the management
#' and execution of such graphs of tasks straightforward.
#' 
#' @docType package
#' @name graphrunner
#' @import crayon pryr R6
#' @author Robert Krzyzanowski <\url{http://syberia.io}>
#' @seealso The core function in this package: \code{\link{stagerunner}}. It
#'   defines the constructor creating graphrunner objects that allow you to
#'   wrap complicated task execution structures in a single object. 
#' @references Full documentation and demos: \url{http://robertzk.github.io/graphrunner/};
#'   FAQ's: \url{http://robertzk.github.io/graphrunner/faq/}
NULL

traversal.R

              #' An R6 representation of a graph traversal strategy.
#'
#' A graph traversal strategy specifies how to execute tasks on
#' a given graph.
#'
#' @name graphTraversalStrategy
#' @format NULL
#' @docType class
graphTraversalStrategy <- R6::R6Class('graphTraversalStrategy',
  public = list(
    .visited = logical(0),
    .strategy = NULL,

    initialize = function(strategy) {
      stopifnot(is.function(strategy))
      self$.strategy <- strategy
    },

    traverse = function(node) {
      .NotYetImplemented()
    }
  )
)

#' A breadth-first traversal strategy.
#'
#' This graph traversal strategy begins from the boot node and
#' traverses "breadth-first" outward.
#'
#' @name graphTraversalStrategy
#' @format NULL
#' @docType class
graphBFSTraversalStrategy <- R6::R6Class('graphBFSTraversalStrategy',
  inherit = graphTraversalStrategy,
  public = list(
    traverse = function(node, directed = FALSE) {
      if (is.graph(node)) {
        self$.visited <- logical(0)
        on.exit(self$.visited <- logical(0))
        self$traverse(node$bootnode(), directed = directed)
      } else if (is.graphNode(node)) {
        if (isTRUE(self$.visited[node$address()])) {
          return()
        }
        self$.strategy(node)
        self$.visited[node$address()] <- TRUE
        edges <- node$edges()
        if (!isTRUE(directed)) {
          # Go through edges *and* backwards edges in case some nodes
          # are directionally isolated (e.g., they flow out to the bootnode,
          # but the bootnode does not flow back to the through any path).
          edges <- c(edges, node$backwards_edges())
        }
        for (edge in edges) {
          if (!isTRUE(self$.visited[edge$address()])) {
            self$traverse(edge, directed = directed)
          }
        }
      } else {
        stop("Can only traverse a graph or graphNode.")
      }
    }
  )
)