3 File Layout
3.1 Introduction
Implementing Sykdomspulsen Core requires a number of functions to be called in the correct order. To make this as simple as possible, we have provided a skeleton implementation at https://github.com/sykdomspulsen-org/scskeleton
We suggest that you clone this GitHub repo to your server, and then do a global find/replace on scskeleton
with the name you want for your R package.
Descriptions of the required files/functions are detailed below.
3.2 00_env_and_namespace.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/00_env_and_namespace.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/00_env_and_namespace.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 00_env_and_namespace.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # Use roxygen2 to import ggplot2, data.table, %>%, and %<>% into the namespace,
## 8 | # because these are the most commonly used packages/functions.
## 9 | #
## 10 | # PURPOSE 2:
## 11 | # Declaring our own "tm_run_task" inside this package, as a wrapper around
## 12 | # sc::tm_run_task.
## 13 | #
## 14 | # We cannot run sc::tm_run_task directly, because we need to load all of the
## 15 | # database connections, db schemas, tasks, etc. *before* we run the task.
## 16 | # Hence, this wrapper ensures that all of this package's configs files are
## 17 | # loaded via OURPACKAGE::.onLoad() first, and then sc::tm_run_task can run.
## 18 | #
## 19 | # PURPOSE 3:
## 20 | # Declaration of environments that can be used globally.
## 21 | #
## 22 | # PURPOSE 4:
## 23 | # Fix issues/integration with other packages.
## 24 | #
## 25 | # Most notably is the issue with rmarkdown, where an error is thrown when
## 26 | # rendering multiple rmarkdown documents in parallel.
## 27 | #
## 28 | # ******************************************************************************
## 29 | # ******************************************************************************
## 30 |
## 31 | #' @import ggplot2
## 32 | #' @import data.table
## 33 | #' @importFrom magrittr %>% %<>%
## 34 | 1
## 35 |
## 36 | #' Shortcut to run task
## 37 | #'
## 38 | #' This task is needed to ensure that all the definitions/db schemas/tasks/etc
## 39 | #' are loaded from the package scskeleton. We cannot run sc::tm_run_task directly,
## 40 | #' because we need to load all of the database connections, db schemas, tasks,
## 41 | #' etc. *before* we run the task. Hence, this wrapper ensures that all of this
## 42 | #' package's configs files are loaded via OURPACKAGE::.onLoad() first, and then
## 43 | #' sc::tm_run_task can run.
## 44 | #'
## 45 | #' @param task_name Name of the task
## 46 | #' @param index_plan Not used
## 47 | #' @param index_analysis Not used
## 48 | #' @export
## 49 | tm_run_task <- function(task_name, index_plan = NULL, index_analysis = NULL) {
## 50 | sc::tm_run_task(
## 51 | task_name = task_name,
## 52 | index_plan = index_plan,
## 53 | index_analysis = index_analysis
## 54 | )
## 55 | }
## 56 |
## 57 | #' Declaration of environments that can be used globally
## 58 | #' @export config
## 59 | config <- new.env()
## 60 |
## 61 | # https://github.com/rstudio/rmarkdown/issues/1632
## 62 | # An error is thrown when rendering multiple rmarkdown documents in parallel.
## 63 | clean_tmpfiles_mod <- function() {
## 64 | # message("Calling clean_tmpfiles_mod()")
## 65 | }
3.3 01_definitions.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/01_definitions.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/01_definitions.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 01_definitions.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # Set global definitions that are used throughout the package, and further
## 8 | # (e.g. in shiny/plumber creations).
## 9 | #
## 10 | # Examples of global definitions are:
## 11 | # - Border years
## 12 | # - Age definitions
## 13 | # - Diagnosis mappings (e.g. "R80" = "Influenza")
## 14 | #
## 15 | # ******************************************************************************
## 16 | # ******************************************************************************
## 17 |
## 18 | #' Set global definitions
## 19 | set_definitions <- function() {
## 20 |
## 21 | # Norway's last redistricting occurred 2020-01-01
## 22 | config$border <- 2020
## 23 |
## 24 | # fhidata needs to know which border is in use
## 25 | # fhidata should also replace the population of 1900 with the current year,
## 26 | # because year = 1900 is shorthand for granularity_geo = "total".
## 27 | # This means that it is more appropriate to use the current year's population
## 28 | # for year = 1900.
## 29 | fhidata::set_config(
## 30 | border = config$border,
## 31 | use_current_year_as_1900_pop = TRUE
## 32 | )
## 33 | }
3.4 02_permissions.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/02_permissions.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/02_permissions.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 02_permissions.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # Set permissions that can be used in this package.
## 8 | #
## 9 | # PURPOSE 2:
## 10 | # Permissions are a way of ensuring that a task only runs once per hour/day/week.
## 11 | # This can be useful when you want to be 100% sure that you don't want to spam
## 12 | # emails to your recipients.
## 13 | #
## 14 | # PURPOSE 3:
## 15 | # Permissions can also be used to differentiate between "production days" and
## 16 | # "preliminary days". This can be useful when you have different email lists
## 17 | # for production days (everyone) and preliminary days (a smaller group).
## 18 | #
## 19 | # ******************************************************************************
## 20 | # ******************************************************************************
## 21 |
## 22 | set_permissions <- function() {
## 23 | # sc::add_permission(
## 24 | # name = "khtemails_send_emails",
## 25 | # permission = sc::Permission$new(
## 26 | # key = "khtemails_send_emails",
## 27 | # value = as.character(lubridate::today()), # one time per day
## 28 | # production_days = c(3) # wed, send to everyone, otherwise prelim
## 29 | # )
## 30 | # )
## 31 | }
3.5 03_db_schemas.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/03_db_schemas.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/03_db_schemas.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 03_db_schemas.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # Set db schemas that are used throughout the package.
## 8 | #
## 9 | # These are basically all of the database tables that you will be writing to,
## 10 | # and reading from.
## 11 | #
## 12 | # ******************************************************************************
## 13 | # ******************************************************************************
## 14 |
## 15 | set_db_schemas <- function() {
## 16 | # __________ ----
## 17 | # Weather ----
## 18 | ## > anon_example_weather_rawdata ----
## 19 | sc::add_schema_v8(
## 20 | name_access = c("anon"),
## 21 | name_grouping = "example_weather",
## 22 | name_variant = "rawdata",
## 23 | db_configs = sc::config$db_configs,
## 24 | field_types = c(
## 25 | "granularity_time" = "TEXT",
## 26 | "granularity_geo" = "TEXT",
## 27 | "country_iso3" = "TEXT",
## 28 | "location_code" = "TEXT",
## 29 | "border" = "INTEGER",
## 30 | "age" = "TEXT",
## 31 | "sex" = "TEXT",
## 32 |
## 33 | "date" = "DATE",
## 34 |
## 35 | "isoyear" = "INTEGER",
## 36 | "isoweek" = "INTEGER",
## 37 | "isoyearweek" = "TEXT",
## 38 | "season" = "TEXT",
## 39 | "seasonweek" = "DOUBLE",
## 40 |
## 41 | "calyear" = "INTEGER",
## 42 | "calmonth" = "INTEGER",
## 43 | "calyearmonth" = "TEXT",
## 44 |
## 45 | "temp_max" = "DOUBLE",
## 46 | "temp_min" = "DOUBLE",
## 47 | "precip" = "DOUBLE"
## 48 | ),
## 49 | keys = c(
## 50 | "granularity_time",
## 51 | "location_code",
## 52 | "date",
## 53 | "age",
## 54 | "sex"
## 55 | ),
## 56 | censors = list(
## 57 | anon = list(
## 58 |
## 59 | )
## 60 | ),
## 61 | validator_field_types = sc::validator_field_types_sykdomspulsen,
## 62 | validator_field_contents = sc::validator_field_contents_sykdomspulsen,
## 63 | info = "This db table is used for..."
## 64 | )
## 65 |
## 66 | ## > anon_example_weather_data ----
## 67 | sc::add_schema_v8(
## 68 | name_access = c("anon"),
## 69 | name_grouping = "example_weather",
## 70 | name_variant = "data",
## 71 | db_configs = sc::config$db_configs,
## 72 | field_types = c(
## 73 | "granularity_time" = "TEXT",
## 74 | "granularity_geo" = "TEXT",
## 75 | "country_iso3" = "TEXT",
## 76 | "location_code" = "TEXT",
## 77 | "border" = "INTEGER",
## 78 | "age" = "TEXT",
## 79 | "sex" = "TEXT",
## 80 |
## 81 | "date" = "DATE",
## 82 |
## 83 | "isoyear" = "INTEGER",
## 84 | "isoweek" = "INTEGER",
## 85 | "isoyearweek" = "TEXT",
## 86 | "season" = "TEXT",
## 87 | "seasonweek" = "DOUBLE",
## 88 |
## 89 | "calyear" = "INTEGER",
## 90 | "calmonth" = "INTEGER",
## 91 | "calyearmonth" = "TEXT",
## 92 |
## 93 | "temp_max" = "DOUBLE",
## 94 | "temp_min" = "DOUBLE",
## 95 | "precip" = "DOUBLE"
## 96 | ),
## 97 | keys = c(
## 98 | "granularity_time",
## 99 | "location_code",
## 100 | "date",
## 101 | "age",
## 102 | "sex"
## 103 | ),
## 104 | censors = list(
## 105 | anon = list(
## 106 |
## 107 | )
## 108 | ),
## 109 | validator_field_types = sc::validator_field_types_sykdomspulsen,
## 110 | validator_field_contents = sc::validator_field_contents_sykdomspulsen,
## 111 | info = "This db table is used for..."
## 112 | )
## 113 | }
3.6 04_tasks.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/04_tasks.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/04_tasks.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 04_tasks.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # Set all the tasks that are run by the package.
## 8 | #
## 9 | # These are basically all of the "things" that you want to do.
## 10 | # E.g. Downloading data, cleaning data, importing data, analyzing data,
## 11 | # making Excel files, making docx/pdf reports, sending emails, etc.
## 12 | #
## 13 | # ******************************************************************************
## 14 | # ******************************************************************************
## 15 |
## 16 | set_tasks <- function() {
## 17 | # __________ ----
## 18 | # Weather ----
## 19 | ## > weather_download_and_import_rawdata ----
## 20 | # tm_run_task("weather_download_and_import_rawdata")
## 21 | sc::add_task_from_config_v8(
## 22 | name_grouping = "weather",
## 23 | name_action = "download_and_import_rawdata",
## 24 | name_variant = NULL,
## 25 | cores = 1,
## 26 | plan_analysis_fn_name = NULL,
## 27 | for_each_plan = plnr::expand_list(
## 28 | location_code = fhidata::norway_locations_names()[granularity_geo %in% c("municip")]$location_code
## 29 | ),
## 30 | for_each_analysis = NULL,
## 31 | universal_argset = NULL,
## 32 | upsert_at_end_of_each_plan = FALSE,
## 33 | insert_at_end_of_each_plan = FALSE,
## 34 | action_fn_name = "scskeleton::weather_download_and_import_rawdata_action",
## 35 | data_selector_fn_name = "scskeleton::weather_download_and_import_rawdata_data_selector",
## 36 | schema = list(
## 37 | # input
## 38 |
## 39 | # output
## 40 | "anon_example_weather_rawdata" = sc::config$schemas$anon_example_weather_rawdata
## 41 | ),
## 42 | info = "This task downloads and imports the raw weather data from MET's API at the municipal level"
## 43 | )
## 44 |
## 45 | ## > weather_clean_data ----
## 46 | # tm_run_task("weather_clean_data")
## 47 | sc::add_task_from_config_v8(
## 48 | name_grouping = "weather",
## 49 | name_action = "clean_data",
## 50 | name_variant = NULL,
## 51 | cores = 1,
## 52 | plan_analysis_fn_name = NULL,
## 53 | for_each_plan = plnr::expand_list(
## 54 | x = 1
## 55 | ),
## 56 | for_each_analysis = NULL,
## 57 | universal_argset = NULL,
## 58 | upsert_at_end_of_each_plan = FALSE,
## 59 | insert_at_end_of_each_plan = FALSE,
## 60 | action_fn_name = "scskeleton::weather_clean_data_action",
## 61 | data_selector_fn_name = "scskeleton::weather_clean_data_data_selector",
## 62 | schema = list(
## 63 | # input
## 64 | "anon_example_weather_rawdata" = sc::config$schemas$anon_example_weather_rawdata,
## 65 |
## 66 | # output
## 67 | "anon_example_weather_data" = sc::config$schemas$anon_example_weather_data
## 68 | ),
## 69 | info = "This task cleans the raw data and aggregates it to county and national level"
## 70 | )
## 71 |
## 72 | ## > weather_clean_data ----
## 73 | # tm_run_task("weather_export_plots")
## 74 | sc::add_task_from_config_v8(
## 75 | name_grouping = "weather",
## 76 | name_action = "export_plots",
## 77 | name_variant = NULL,
## 78 | cores = 1,
## 79 | plan_analysis_fn_name = NULL,
## 80 | for_each_plan = plnr::expand_list(
## 81 | location_code = fhidata::norway_locations_names()[granularity_geo %in% c("county")]$location_code
## 82 | ),
## 83 | for_each_analysis = NULL,
## 84 | universal_argset = list(
## 85 | output_dir = tempdir(),
## 86 | output_filename = "weather_{argset$location_code}.png",
## 87 | output_absolute_path = fs::path("{argset$output_dir}", "{argset$output_filename}")
## 88 | ),
## 89 | upsert_at_end_of_each_plan = FALSE,
## 90 | insert_at_end_of_each_plan = FALSE,
## 91 | action_fn_name = "scskeleton::weather_export_plots_action",
## 92 | data_selector_fn_name = "scskeleton::weather_export_plots_data_selector",
## 93 | schema = list(
## 94 | # input
## 95 | "anon_example_weather_data" = sc::config$schemas$anon_example_weather_data
## 96 |
## 97 | # output
## 98 | ),
## 99 | info = "This task ploduces plots"
## 100 | )
## 101 | }
3.7 05_deliverables.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/05_deliverables.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/05_deliverables.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 05_deliverables.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # Set all the deliverables that team members are supposed to manually do/check
## 8 | # every day/week/month.
## 9 | #
## 10 | # ******************************************************************************
## 11 | # ******************************************************************************
## 12 |
## 13 | set_deliverables <- function() {
## 14 |
## 15 | }
3.8 06_config.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/06_config.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/06_config.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 06_config.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # Call all the functions defined in 01, 02, 03, 04, and 05 in the correct order.
## 8 | #
## 9 | # PURPOSE 2:
## 10 | # Set all necessary configs that do not belong anywhere else.
## 11 | #
## 12 | # E.g. Formatting for progress bars.
## 13 | #
## 14 | # ******************************************************************************
## 15 | # ******************************************************************************
## 16 |
## 17 | set_config <- function() {
## 18 | # 01_definitions.r
## 19 | set_definitions()
## 20 |
## 21 | # 02_permissions.r
## 22 | set_permissions()
## 23 |
## 24 | # 03_db_schemas.r
## 25 | set_db_schemas()
## 26 |
## 27 | # 04_tasks.r
## 28 | set_tasks()
## 29 |
## 30 | # 05_deliverables.r
## 31 | set_deliverables()
## 32 |
## 33 | # 06_config.r
## 34 | set_progressr()
## 35 | }
## 36 |
## 37 | set_progressr <- function() {
## 38 | progressr::handlers(progressr::handler_progress(
## 39 | format = "[:bar] :current/:total (:percent) in :elapsedfull, eta: :eta",
## 40 | clear = FALSE
## 41 | ))
## 42 | }
3.9 07_onLoad.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/07_onLoad.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/07_onLoad.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 07_onLoad.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # Initializing everything that happens when the package is loaded.
## 8 | #
## 9 | # E.g. Calling bash scripts that authenticate against Kerebros, setting the
## 10 | # configs as defined in 06_config.r.
## 11 | #
## 12 | # ******************************************************************************
## 13 | # ******************************************************************************
## 14 |
## 15 | .onLoad <- function(libname, pkgname) {
## 16 | # Mechanism to authenticate as necessary (e.g. Kerebros)
## 17 | try(system2("/bin/authenticate.sh", stdout = NULL), TRUE)
## 18 |
## 19 | # 5_config.r
## 20 | set_config()
## 21 |
## 22 | # https://github.com/rstudio/rmarkdown/issues/1632
## 23 | assignInNamespace("clean_tmpfiles", clean_tmpfiles_mod, ns = "rmarkdown")
## 24 |
## 25 | invisible()
## 26 | }
3.10 08_onAttach.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/08_onAttach.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/08_onAttach.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 08_onAttach.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # What you want to happen when someone types library(yourpackage)
## 8 | #
## 9 | # ******************************************************************************
## 10 | # ******************************************************************************
## 11 |
## 12 | .onAttach <- function(libname, pkgname) {
## 13 |
## 14 | }
3.11 99_util_*.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/99_util_no_data_plot.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/99_util_no_data_plot.r
##
## 1 | # ******************************************************************************
## 2 | # ******************************************************************************
## 3 | #
## 4 | # 99_util_*.r
## 5 | #
## 6 | # PURPOSE 1:
## 7 | # Utility functions that are used across multiple tasks
## 8 | #
## 9 | # ******************************************************************************
## 10 | # ******************************************************************************
## 11 |
## 12 | no_data_plot <- function(){
## 13 | data=data.frame(x=0,y=0)
## 14 | q <- ggplot(data=data)
## 15 | q <- q + theme_void()
## 16 | q <- q + annotate("text", label=glue::glue("Ikke noe data {fhi::nb$aa} vise"), x=0, y=0, size=10)
## 17 | q
## 18 | }
3.12 Task files
Task files are placed in .r files under their own names.
3.12.1 weather_download_and_import_rawdata.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/weather_download_and_import_rawdata.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/weather_download_and_import_rawdata.r
##
## 1 | # **** action **** ----
## 2 | #' weather_download_and_import_rawdata (action)
## 3 | #' @param data Data
## 4 | #' @param argset Argset
## 5 | #' @param schema DB Schema
## 6 | #' @export
## 7 | weather_download_and_import_rawdata_action <- function(data, argset, schema) {
## 8 | # tm_run_task("weather_download_and_import_rawdata")
## 9 |
## 10 | if (plnr::is_run_directly()) {
## 11 | # sc::tm_get_plans_argsets_as_dt("weather_download_and_import_rawdata")
## 12 |
## 13 | index_plan <- 1
## 14 | index_analysis <- 1
## 15 |
## 16 | data <- sc::tm_get_data("weather_download_and_import_rawdata", index_plan = index_plan)
## 17 | argset <- sc::tm_get_argset("weather_download_and_import_rawdata", index_plan = index_plan, index_analysis = index_analysis)
## 18 | schema <- sc::tm_get_schema("weather_download_and_import_rawdata")
## 19 | }
## 20 |
## 21 | # special case that runs before everything
## 22 | if (argset$first_analysis == TRUE) {
## 23 |
## 24 | }
## 25 |
## 26 | a <- data$data
## 27 |
## 28 | baz <- xml2::xml_find_all(a, ".//maxTemperature")
## 29 | res <- vector("list", length = length(baz))
## 30 | for (i in seq_along(baz)) {
## 31 | parent <- xml2::xml_parent(baz[[i]])
## 32 | grandparent <- xml2::xml_parent(parent)
## 33 | time_from <- xml2::xml_attr(grandparent, "from")
## 34 | time_to <- xml2::xml_attr(grandparent, "to")
## 35 | x <- xml2::xml_find_all(parent, ".//minTemperature")
## 36 | temp_min <- xml2::xml_attr(x, "value")
## 37 | x <- xml2::xml_find_all(parent, ".//maxTemperature")
## 38 | temp_max <- xml2::xml_attr(x, "value")
## 39 | x <- xml2::xml_find_all(parent, ".//precipitation")
## 40 | precip <- xml2::xml_attr(x, "value")
## 41 | res[[i]] <- data.frame(
## 42 | time_from = as.character(time_from),
## 43 | time_to = as.character(time_to),
## 44 | temp_max = as.numeric(temp_max),
## 45 | temp_min = as.numeric(temp_min),
## 46 | precip = as.numeric(precip)
## 47 | )
## 48 | }
## 49 | res <- rbindlist(res)
## 50 | res <- res[stringr::str_sub(time_from, 12, 13) %in% c("00", "06", "12", "18")]
## 51 | res[, date := as.Date(stringr::str_sub(time_from, 1, 10))]
## 52 | res[, N := .N, by = date]
## 53 | res <- res[N == 4]
## 54 | res <- res[
## 55 | ,
## 56 | .(
## 57 | temp_max = max(temp_max),
## 58 | temp_min = min(temp_min),
## 59 | precip = sum(precip)
## 60 | ),
## 61 | keyby = .(date)
## 62 | ]
## 63 |
## 64 | # we look at the downloaded data
## 65 | # res
## 66 |
## 67 | # we now need to format it
## 68 | res[, granularity_time := "day"]
## 69 | res[, sex := "total"]
## 70 | res[, age := "total"]
## 71 | res[, location_code := argset$location_code]
## 72 |
## 73 | # fill in missing structural variables
## 74 | sc::fill_in_missing_v8(res, border = 2020)
## 75 |
## 76 | # we look at the downloaded data
## 77 | # res
## 78 |
## 79 | # put data in db table
## 80 | schema$anon_example_weather_rawdata$insert_data(res)
## 81 |
## 82 | # special case that runs after everything
## 83 | if (argset$last_analysis == TRUE) {
## 84 |
## 85 | }
## 86 | }
## 87 |
## 88 | # **** data_selector **** ----
## 89 | #' weather_download_and_import_rawdata (data selector)
## 90 | #' @param argset Argset
## 91 | #' @param schema DB Schema
## 92 | #' @export
## 93 | weather_download_and_import_rawdata_data_selector <- function(argset, schema) {
## 94 | if (plnr::is_run_directly()) {
## 95 | # sc::tm_get_plans_argsets_as_dt("weather_download_and_import_rawdata")
## 96 |
## 97 | index_plan <- 1
## 98 |
## 99 | argset <- sc::tm_get_argset("weather_download_and_import_rawdata", index_plan = index_plan)
## 100 | schema <- sc::tm_get_schema("weather_download_and_import_rawdata")
## 101 | }
## 102 |
## 103 | # find the mid lat/long for the specified location_code
## 104 | gps <- fhimaps::norway_lau2_map_b2020_default_dt[location_code == argset$location_code,.(
## 105 | lat = mean(lat),
## 106 | long = mean(long)
## 107 | )]
## 108 |
## 109 | # download the forecast for the specified location_code
## 110 | d <- httr::GET(glue::glue("https://api.met.no/weatherapi/locationforecast/2.0/classic?lat={gps$lat}&lon={gps$long}"), httr::content_type_xml())
## 111 | d <- xml2::read_xml(d$content)
## 112 |
## 113 | # The variable returned must be a named list
## 114 | retval <- list(
## 115 | "data" = d
## 116 | )
## 117 |
## 118 | retval
## 119 | }
## 120 |
## 121 | # **** functions **** ----
3.12.2 weather_clean_data.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/weather_clean_data.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/weather_clean_data.r
##
## 1 | # **** action **** ----
## 2 | #' weather_clean_data (action)
## 3 | #' @param data Data
## 4 | #' @param argset Argset
## 5 | #' @param schema DB Schema
## 6 | #' @export
## 7 | weather_clean_data_action <- function(data, argset, schema) {
## 8 | # tm_run_task("weather_clean_data")
## 9 |
## 10 | if (plnr::is_run_directly()) {
## 11 | # sc::tm_get_plans_argsets_as_dt("weather_clean_data")
## 12 |
## 13 | index_plan <- 1
## 14 | index_analysis <- 1
## 15 |
## 16 | data <- sc::tm_get_data("weather_clean_data", index_plan = index_plan)
## 17 | argset <- sc::tm_get_argset("weather_clean_data", index_plan = index_plan, index_analysis = index_analysis)
## 18 | schema <- sc::tm_get_schema("weather_clean_data")
## 19 | }
## 20 |
## 21 | # special case that runs before everything
## 22 | if (argset$first_analysis == TRUE) {
## 23 |
## 24 | }
## 25 |
## 26 | # make sure there's no missing data via the creation of a skeleton
## 27 | # https://folkehelseinstituttet.github.io/fhidata/articles/Skeletons.html
## 28 |
## 29 | # Create a variable (possibly a list) to hold the data
## 30 | d_agg <- list()
## 31 | d_agg$day_municip <- copy(data$day_municip)
## 32 |
## 33 | # Pull out important dates
## 34 | date_min <- min(d_agg$day_municip$date, na.rm = T)
## 35 | date_max <- max(d_agg$day_municip$date, na.rm = T)
## 36 |
## 37 | # Create `multiskeleton`
## 38 | # granularity_geo should have the following groups:
## 39 | # - nodata (when no data is available, and there is no "finer" data available to aggregate up)
## 40 | # - all levels of granularity_geo where you have data available
## 41 | # If you do not have data for a specific granularity_geo, but there is "finer" data available
## 42 | # then you should not include this granularity_geo in the multiskeleton, because you will create
## 43 | # it later when you aggregate up your data (baregion)
## 44 | multiskeleton_day <- fhidata::make_skeleton(
## 45 | date_min = date_min,
## 46 | date_max = date_max,
## 47 | granularity_geo = list(
## 48 | "nodata" = c(
## 49 | "wardoslo",
## 50 | "extrawardoslo",
## 51 | "missingwardoslo",
## 52 | "wardbergen",
## 53 | "missingwardbergen",
## 54 | "wardstavanger",
## 55 | "missingwardstavanger",
## 56 | "notmainlandmunicip",
## 57 | "missingmunicip",
## 58 | "notmainlandcounty",
## 59 | "missingcounty"
## 60 | ),
## 61 | "municip" = c(
## 62 | "municip"
## 63 | )
## 64 | )
## 65 | )
## 66 |
## 67 | # Merge in the information you have at different geographical granularities
## 68 | # one level at a time
## 69 | # municip
## 70 | multiskeleton_day$municip[
## 71 | d_agg$day_municip,
## 72 | on = c("location_code", "date"),
## 73 | c(
## 74 | "temp_max",
## 75 | "temp_min",
## 76 | "precip"
## 77 | ) := .(
## 78 | temp_max,
## 79 | temp_min,
## 80 | precip
## 81 | )
## 82 | ]
## 83 |
## 84 | multiskeleton_day$municip[]
## 85 |
## 86 | # Aggregate up to higher geographical granularities (county)
## 87 | multiskeleton_day$county <- multiskeleton_day$municip[
## 88 | fhidata::norway_locations_hierarchy(
## 89 | from = "municip",
## 90 | to = "county"
## 91 | ),
## 92 | on = c(
## 93 | "location_code==from_code"
## 94 | )
## 95 | ][,
## 96 | .(
## 97 | temp_max = mean(temp_max, na.rm = T),
## 98 | temp_min = mean(temp_min, na.rm = T),
## 99 | precip = mean(precip, na.rm = T),
## 100 | granularity_geo = "county"
## 101 | ),
## 102 | by = .(
## 103 | granularity_time,
## 104 | date,
## 105 | location_code = to_code
## 106 | )
## 107 | ]
## 108 |
## 109 | multiskeleton_day$county[]
## 110 |
## 111 | # Aggregate up to higher geographical granularities (nation)
## 112 | multiskeleton_day$nation <- multiskeleton_day$municip[
## 113 | ,
## 114 | .(
## 115 | temp_max = mean(temp_max, na.rm = T),
## 116 | temp_min = mean(temp_min, na.rm = T),
## 117 | precip = mean(precip, na.rm = T),
## 118 | granularity_geo = "nation",
## 119 | location_code = "norge"
## 120 | ),
## 121 | by = .(
## 122 | granularity_time,
## 123 | date
## 124 | )
## 125 | ]
## 126 |
## 127 | multiskeleton_day$nation[]
## 128 |
## 129 | # combine all the different granularity_geos
## 130 | skeleton_day <- rbindlist(multiskeleton_day, fill = TRUE, use.names = TRUE)
## 131 |
## 132 | skeleton_day[]
## 133 |
## 134 | # 10. (If desirable) aggregate up to higher time granularities
## 135 | # if necessary, it is now easy to aggregate up to weekly data from here
## 136 | skeleton_isoweek <- copy(skeleton_day)
## 137 | skeleton_isoweek[, isoyearweek := fhiplot::isoyearweek_c(date)]
## 138 | skeleton_isoweek <- skeleton_isoweek[
## 139 | ,
## 140 | .(
## 141 | temp_max = mean(temp_max, na.rm = T),
## 142 | temp_min = mean(temp_min, na.rm = T),
## 143 | precip = mean(precip, na.rm = T),
## 144 | granularity_time = "isoweek"
## 145 | ),
## 146 | keyby = .(
## 147 | isoyearweek,
## 148 | granularity_geo,
## 149 | location_code
## 150 | )
## 151 | ]
## 152 |
## 153 | skeleton_isoweek[]
## 154 |
## 155 | # we now need to format it and fill in missing structural variables
## 156 | # day
## 157 | skeleton_day[, sex := "total"]
## 158 | skeleton_day[, age := "total"]
## 159 | sc::fill_in_missing_v8(skeleton_day, border = config$border)
## 160 |
## 161 | # isoweek
## 162 | skeleton_isoweek[, sex := "total"]
## 163 | skeleton_isoweek[, age := "total"]
## 164 | sc::fill_in_missing_v8(skeleton_isoweek, border = config$border)
## 165 | skeleton_isoweek[, date := as.Date(date)]
## 166 |
## 167 | skeleton <- rbindlist(
## 168 | list(
## 169 | skeleton_day,
## 170 | skeleton_isoweek
## 171 | ),
## 172 | use.names = T
## 173 | )
## 174 |
## 175 | # put data in db table
## 176 | schema$anon_example_weather_data$drop_all_rows_and_then_insert_data(skeleton)
## 177 |
## 178 | # special case that runs after everything
## 179 | if (argset$last_analysis == TRUE) {
## 180 |
## 181 | }
## 182 | }
## 183 |
## 184 | # **** data_selector **** ----
## 185 | #' weather_clean_data (data selector)
## 186 | #' @param argset Argset
## 187 | #' @param schema DB Schema
## 188 | #' @export
## 189 | weather_clean_data_data_selector <- function(argset, schema) {
## 190 | if (plnr::is_run_directly()) {
## 191 | # sc::tm_get_plans_argsets_as_dt("weather_clean_data")
## 192 |
## 193 | index_plan <- 1
## 194 |
## 195 | argset <- sc::tm_get_argset("weather_clean_data", index_plan = index_plan)
## 196 | schema <- sc::tm_get_schema("weather_clean_data")
## 197 | }
## 198 |
## 199 | # The database schemas can be accessed here
## 200 | d <- schema$anon_example_weather_rawdata$tbl() %>%
## 201 | sc::mandatory_db_filter(
## 202 | granularity_time = "day",
## 203 | granularity_time_not = NULL,
## 204 | granularity_geo = "municip",
## 205 | granularity_geo_not = NULL,
## 206 | country_iso3 = NULL,
## 207 | location_code = NULL,
## 208 | age = "total",
## 209 | age_not = NULL,
## 210 | sex = "total",
## 211 | sex_not = NULL
## 212 | ) %>%
## 213 | dplyr::select(
## 214 | granularity_time,
## 215 | # granularity_geo,
## 216 | # country_iso3,
## 217 | location_code,
## 218 | # border,
## 219 | # age,
## 220 | # sex,
## 221 |
## 222 | date,
## 223 |
## 224 | # isoyear,
## 225 | # isoweek,
## 226 | # isoyearweek,
## 227 | # season,
## 228 | # seasonweek,
## 229 |
## 230 | # calyear,
## 231 | # calmonth,
## 232 | # calyearmonth,
## 233 |
## 234 | temp_max,
## 235 | temp_min,
## 236 | precip
## 237 | ) %>%
## 238 | dplyr::collect() %>%
## 239 | as.data.table() %>%
## 240 | setorder(
## 241 | location_code,
## 242 | date
## 243 | )
## 244 |
## 245 | # The variable returned must be a named list
## 246 | retval <- list(
## 247 | "day_municip" = d
## 248 | )
## 249 |
## 250 | retval
## 251 | }
## 252 |
## 253 | # **** functions **** ----
3.12.3 weather_export_weather_plots.r
https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/weather_export_plots.r
## https://github.com/sykdomspulsen-org/scskeleton/blob/main/R/weather_export_plots.r
##
## 1 | # **** action **** ----
## 2 | #' weather_export_plots (action)
## 3 | #' @param data Data
## 4 | #' @param argset Argset
## 5 | #' @param schema DB Schema
## 6 | #' @export
## 7 | weather_export_plots_action <- function(data, argset, schema) {
## 8 | # tm_run_task("weather_export_plots")
## 9 |
## 10 | if(plnr::is_run_directly()){
## 11 | # sc::tm_get_plans_argsets_as_dt("weather_export_plots")
## 12 |
## 13 | index_plan <- 1
## 14 | index_analysis <- 1
## 15 |
## 16 | data <- sc::tm_get_data("weather_export_plots", index_plan = index_plan)
## 17 | argset <- sc::tm_get_argset("weather_export_plots", index_plan = index_plan, index_analysis = index_analysis)
## 18 | schema <- sc::tm_get_schema("weather_export_plots")
## 19 | }
## 20 |
## 21 | # code goes here
## 22 | # special case that runs before everything
## 23 | if(argset$first_analysis == TRUE){
## 24 |
## 25 | }
## 26 |
## 27 | # create the output_dir (if it doesn't exist)
## 28 | fs::dir_create(glue::glue(argset$output_dir))
## 29 |
## 30 | q <- ggplot(data$data, aes(x = date, ymin = temp_min, ymax = temp_max))
## 31 | q <- q + geom_ribbon(alpha = 0.5)
## 32 |
## 33 | ggsave(
## 34 | filename = glue::glue(argset$output_absolute_path),
## 35 | plot = q
## 36 | )
## 37 |
## 38 | # special case that runs after everything
## 39 | # copy to anon_web?
## 40 | if(argset$last_analysis == TRUE){
## 41 |
## 42 | }
## 43 | }
## 44 |
## 45 | # **** data_selector **** ----
## 46 | #' weather_export_plots (data selector)
## 47 | #' @param argset Argset
## 48 | #' @param schema DB Schema
## 49 | #' @export
## 50 | weather_export_plots_data_selector = function(argset, schema){
## 51 | if(plnr::is_run_directly()){
## 52 | # sc::tm_get_plans_argsets_as_dt("weather_export_plots")
## 53 |
## 54 | index_plan <- 1
## 55 |
## 56 | argset <- sc::tm_get_argset("weather_export_plots", index_plan = index_plan)
## 57 | schema <- sc::tm_get_schema("weather_export_plots")
## 58 | }
## 59 |
## 60 | # The database schemas can be accessed here
## 61 | d <- schema$anon_example_weather_data$tbl() %>%
## 62 | sc::mandatory_db_filter(
## 63 | granularity_time = NULL,
## 64 | granularity_time_not = NULL,
## 65 | granularity_geo = NULL,
## 66 | granularity_geo_not = NULL,
## 67 | country_iso3 = NULL,
## 68 | location_code = argset$location_code,
## 69 | age = NULL,
## 70 | age_not = NULL,
## 71 | sex = NULL,
## 72 | sex_not = NULL
## 73 | ) %>%
## 74 | dplyr::select(
## 75 | # granularity_time,
## 76 | # granularity_geo,
## 77 | # country_iso3,
## 78 | # location_code,
## 79 | # border,
## 80 | # age,
## 81 | # sex,
## 82 |
## 83 | date,
## 84 |
## 85 | # isoyear,
## 86 | # isoweek,
## 87 | # isoyearweek,
## 88 | # season,
## 89 | # seasonweek,
## 90 | #
## 91 | # calyear,
## 92 | # calmonth,
## 93 | # calyearmonth,
## 94 |
## 95 | temp_max,
## 96 | temp_min
## 97 | ) %>%
## 98 | dplyr::collect() %>%
## 99 | as.data.table() %>%
## 100 | setorder(
## 101 | # location_code,
## 102 | date
## 103 | )
## 104 |
## 105 | # The variable returned must be a named list
## 106 | retval <- list(
## 107 | "data" = d
## 108 | )
## 109 | retval
## 110 | }
## 111 |
## 112 | # **** functions **** ----
## 113 |
## 114 |
## 115 |
## 116 |