Skip to contents

This function updates names of existing results by re-hashing each set of parameters with potentially updated values based on adjustments made to a hash table (see ?create_hash_table) by user. It loads RDS files based on their existing hashes, compares to the corresponding entry in a hash table, generates new hashes where needed, and saves the files with the new hashes. The old files are deleted if their hashes differ from the new ones.

Usage

update_from_hash_table(
  hash_table,
  rds_folder,
  hash_includes_timestamp = FALSE,
  ignore_na = TRUE,
  alphabetical_order = TRUE,
  algo = "xxhash64"
)

Arguments

hash_table

A file path to a modified hash table generated by create_hash_table.

rds_folder

A string specifying the directory containing the RDS files associated with the hash table.

hash_includes_timestamp

Logical; if TRUE, timestamps are included in the hash generation.

ignore_na

Logical; if TRUE, NA values are ignored during hash generation.

alphabetical_order

Logical; if TRUE, parameters are sorted alphabetically before hash generation.

algo

Character string specifying the hashing algorithm to use. Default is "xxhash64". See ?digest

Value

The function does not return a value but saves updated RDS files and deletes old files as needed.

Examples

## Setup
tmp_dir <- file.path(tempdir(), "example")
dir.create(tmp_dir)

## Save objects
obj1 <- rnorm(1000)
obj2 <- data.frame(
  x = runif(100),
  y = "something",
  z = rep(c(TRUE, FALSE), 50)
)
obj3 <- list(obj1, obj2)

params1 <- list(
  distribution = "normal",
  other_params = list(param1 = TRUE, param2 = 1, param3 = NA)
)
params2 <- list(
  distribution = "uniform",
  other_params = list(param1 = FALSE, param2 = 2, param3 = "1", param4 = 4)
)
params3 <- list(
  distribution = "composite",
  other_params = list(param1 = TRUE, param2 = 3, param3 = 1)
)

save_objects(tmp_dir, obj1, params1)
save_objects(tmp_dir, obj2, params2)
save_objects(tmp_dir, obj3, params3)

## Create hash table
create_hash_table(tmp_dir, save_path = file.path(tmp_dir, "hash_table.csv"))
#>   distribution other_params[[param1]] other_params[[param2]]
#> 1       normal                   TRUE                      1
#> 2      uniform                  FALSE                      2
#> 3    composite                   TRUE                      3
#>   other_params[[param3]]                          script_name
#> 1                   <NA> 4918b77f-ae69-4ae2-bea4-07e6f6a89e41
#> 2                      1 4918b77f-ae69-4ae2-bea4-07e6f6a89e41
#> 3                      1 4918b77f-ae69-4ae2-bea4-07e6f6a89e41
#>             timestamp             hash other_params[[param4]]
#> 1 2025-07-23 13:23:38 b4d0ab79d10e4e7b                   <NA>
#> 2 2025-07-23 13:23:38 40650b573a1cf710                      4
#> 3 2025-07-23 13:23:38 3ad8d8534a75f850                   <NA>

## Read in hash table, make a change, and save
hash_table <- read.csv(file.path(tmp_dir, "hash_table.csv"))
hash_table$distribution <- "something different"
write.csv(hash_table, file.path(tmp_dir, "hash_table.csv"))

## See file names before change
list.files(tmp_dir)
#> [1] "3ad8d8534a75f850.rds" "40650b573a1cf710.rds" "b4d0ab79d10e4e7b.rds"
#> [4] "hash_table.csv"       "indexr.yaml"          "indexr.yaml.lock"    

update_from_hash_table(
  hash_table = file.path(tmp_dir, "hash_table.csv"),
  rds_folder = tmp_dir
)
#> New names:
#>  `` -> `...1`
#> Updating '...1' for YAML hash 'b4d0ab79d10e4e7b'
#> Updating 'distribution' for YAML hash 'b4d0ab79d10e4e7b'
#> Updating 'other_params..param1..' for YAML hash 'b4d0ab79d10e4e7b'
#> Updating 'other_params..param2..' for YAML hash 'b4d0ab79d10e4e7b'
#> Rehashed YAML 'b4d0ab79d10e4e7b' -> 'fcf7d84c35c9da29'
#> Updating '...1' for YAML hash '40650b573a1cf710'
#> Updating 'distribution' for YAML hash '40650b573a1cf710'
#> Updating 'other_params..param1..' for YAML hash '40650b573a1cf710'
#> Updating 'other_params..param2..' for YAML hash '40650b573a1cf710'
#> Updating 'other_params..param3..' for YAML hash '40650b573a1cf710'
#> Updating 'other_params..param4..' for YAML hash '40650b573a1cf710'
#> Rehashed YAML '40650b573a1cf710' -> 'b4189d72c8963f0c'
#> Updating '...1' for YAML hash '3ad8d8534a75f850'
#> Updating 'distribution' for YAML hash '3ad8d8534a75f850'
#> Updating 'other_params..param1..' for YAML hash '3ad8d8534a75f850'
#> Updating 'other_params..param2..' for YAML hash '3ad8d8534a75f850'
#> Updating 'other_params..param3..' for YAML hash '3ad8d8534a75f850'
#> Rehashed YAML '3ad8d8534a75f850' -> 'c59f542f23b9e439'

## See difference to before running update_hash_table()
list.files(tmp_dir)
#> [1] "b4189d72c8963f0c.rds" "c59f542f23b9e439.rds" "fcf7d84c35c9da29.rds"
#> [4] "hash_table.csv"       "indexr.yaml"          "indexr.yaml.lock"    

## Cleanup
unlink(tmp_dir, recursive = TRUE)