2. Blocks registry • blockr

Introduction

The registry is a environment which provides access to multiple blocks as well as some metadata:

The block description.
Allowed input (default to data.frame, expect for new_dataset_block blocks)
Returned output.
…

In other words, the registry is a “supermarket” for data analysis. As shown below, if you develop your own blocks package and registers blocks on load, these blocks become available to the end user. Therefore this makes it powerful for collaboration between data science teams.

Previewing available blocks

Upon loading, blockr registers its internal blocks with register_blockr_blocks(). You won’t have to call this function as it is not exported anyway. This makes the registry environment ready to be queried by available_blocks(). A truncated output example below:

$dataset_block
function(...) {
  ...
}
<environment: namespace:blockr>
attr(,"name")
[1] "data block"
attr(,"description")
[1] "Choose a dataset from a package"
attr(,"classes")
[1] "dataset_block" "data_block"   
attr(,"input")
[1] NA
attr(,"output")
[1] "data.frame"
attr(,"package")
[1] "blockr"
attr(,"class")
[1] "block_descr"

$select_block
function(data, ...) {
  ...
}
<environment: namespace:blockr>
attr(,"name")
[1] "select block"
attr(,"description")
[1] "select columns in a table"
attr(,"classes")
[1] "select_block"    "transform_block"
attr(,"input")
[1] "data.frame"
attr(,"output")
[1] "data.frame"
attr(,"package")
[1] "blockr"
attr(,"class")
[1] "block_descr"

names(available_blocks())
#>  [1] "arrange_block"      "csv_block"          "dataset_block"     
#>  [4] "filesbrowser_block" "filter_block"       "group_by_block"    
#>  [7] "head_block"         "join_block"         "json_block"        
#> [10] "mutate_block"       "rds_block"          "result_block"      
#> [13] "select_block"       "summarize_block"    "upload_block"      
#> [16] "xpt_block"

Register a block

To register your own blocks, user facing functions are:

register_block() to register a block in the registry. If the block is already registered, it overwrites the existing one.
register_blocks() to register multiple blocks.

Let’s say you want to create a new new_tail_block which returns the n last rows of the selected dataset:

new_tail_block <- function(data, n_rows = numeric(), ...) {
  n_rows_max <- function(data) nrow(data)

  new_block(
    fields = list(
      n_rows = new_numeric_field(n_rows, 1L, n_rows_max)
    ),
    expr = quote(tail(n = .(n_rows))),
    class = c("tail_block", "transform_block"),
    ...
  )
}

register_block(
  constructor = new_tail_block,
  name = "tail block",
  description = "return last n rows",
  category = "transform",
  classes = c("tail_block", "transform_block"),
  input = "data.frame",
  output = "data.frame"
)

If we now query the registry, the new block is available:

names(available_blocks())
#>  [1] "arrange_block"      "csv_block"          "dataset_block"     
#>  [4] "filesbrowser_block" "filter_block"       "group_by_block"    
#>  [7] "head_block"         "join_block"         "json_block"        
#> [10] "mutate_block"       "rds_block"          "result_block"      
#> [13] "select_block"       "summarize_block"    "tail_block"        
#> [16] "upload_block"       "xpt_block"

We can now run the demo app to add blocks. The new tail_block should be visible in the new choices:

If you had to register multiple blocks:

register_blocks(
  constructor = c(
    new_dataset_block, new_filter_block, new_select_block, new_summarize_block
  ),
  name = c(
    "data block", "filter block", "select block", "summarize block"
  ),
  description = c(
    "choose a dataset in a package",
    "filter rows in a table",
    "select columns in a table",
    "summarize data groups"
  ),
  category = c("data", rep("transform", 3)),
  classes = list(
    c("dataset_block", "data_block"),
    c("filter_block", "transform_block", "submit_block"),
    c("select_block", "transform_block"),
    c("summarize_block", "transform_block", "submit_block")
  ),
  input = c(NA_character_, "data.frame", "data.frame", "data.frame"),
  output = c("data.frame", "data.frame", "data.frame", "data.frame"),
  package = "<PKG_NAME>"
)

Unregister a block

The counterpart of register_block() is unregister_blocks(). We can remove our new tail_block from the registry:

unregister_blocks(ids = "tail_block")

# Check it out
names(available_blocks())
#>  [1] "arrange_block"      "csv_block"          "dataset_block"     
#>  [4] "filesbrowser_block" "filter_block"       "group_by_block"    
#>  [7] "head_block"         "join_block"         "json_block"        
#> [10] "mutate_block"       "rds_block"          "result_block"      
#> [13] "select_block"       "summarize_block"    "upload_block"      
#> [16] "xpt_block"

where ids is the first entry we applied in the class attributes when the block was created, that is tail_block. If you had to remove multiple blocks at once, you can pass a vector in ids.