--- title: "Getting Started" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{getting-started} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(countmaskr) library(knitr) ```   # Code logic plot ```{r flowchart, echo=FALSE, results='asis'} library(htmltools) diagram <- ' %%{init: { "theme": "base", "themeVariables": { "fontSize": "20px", "primaryColor": "#bbdefb", "primaryTextColor": "#0d47a1", "primaryBorderColor": "#1976d2", "lineColor": "#5a5957" }, "flowchart": { "nodeSpacing": 50, "rankSpacing": 60, "curve": "basis" } }}%% flowchart TD A([frequency table\nfor a variable]):::gray --> B{is a primary\ncell present?}:::decision B -- No --> T([terminate]):::terminal B -- Yes --> C[mask values of primary cells\nas < threshold]:::pc C --> D{are there 2 or more primary\ncells in the same column?}:::decision D -- Yes --> E[mask a column-wise secondary\ncell in the same column\nin a different row]:::csc D -- No --> F{one-way table?}:::decision E --> F F -- Yes --> T F -- No --> G{can any masked cells be calculated\nfrom the same row across\ndifferent columns?}:::decision G -- No --> T G -- Yes --> H[mask a row-wise secondary cell\nin a different column\nin the same row]:::rsc H --> I{can this cell be calculated in\nthe same column from\na different row?}:::decision I -- No --> T I -- Yes --> J[mask a column-wise secondary cell\nin the same column\nin a different row]:::csc J --> G classDef gray fill:#e8e7e3,stroke:#aaa,color:#333,font-weight:500 classDef terminal fill:#c8e6c9,stroke:#4caf50,color:#1b5e20,font-weight:600 classDef decision fill:#bbdefb,stroke:#1976d2,color:#0d47a1,font-weight:600 classDef pc fill:#fff9c4,stroke:#f9a825,color:#7a3000,font-weight:600 classDef csc fill:#ffe0b2,stroke:#fb8c00,color:#4e342e,font-weight:600 classDef rsc fill:#dcedc8,stroke:#8bc34a,color:#33691e,font-weight:600 ' browsable(tagList( tags$script( src = "https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js" ), tags$div(class = "mermaid", diagram), tags$script(HTML( "mermaid.initialize({ startOnLoad: true, securityLevel: 'loose' });" )) )) ```   # One dimensional frequency table ```{r message=FALSE,error=FALSE,warning=FALSE} data("countmaskr_data") aggregate_table <- countmaskr_data %>% select(-c(id, age)) %>% gather(block, Characteristics) %>% group_by(block, Characteristics) %>% summarise(N = n()) %>% ungroup() ```   ## Algorithm 1 ```{r message=FALSE,error=FALSE,warning=FALSE} aggregate_table %>% group_by(block) %>% mutate(N_masked = mask_counts(N)) %>% kable() ```   ## Algorithm 2 ```{r message=FALSE,error=FALSE,warning=FALSE} aggregate_table %>% group_by(block) %>% mutate(N_masked = mask_counts_2(N)) %>% kable() ```   ## Algorithm 3 ```{r message=FALSE,error=FALSE,warning=FALSE} aggregate_table %>% group_by(block) %>% mutate(N_masked = perturb_counts(N)) %>% kable() ```   # Using `mask_table()` mask_table() is a multi-tasking function which allows for masking, obtaining original and masked percentages on an aggregated table.   ## One-way masking on the original column. ```{r message=FALSE,error=FALSE,warning=FALSE} mask_table(aggregate_table, group_by = "block", col_groups = list("N")) %>% kable() ```   ## One-way masking while preserving original column and creating new masked columns Naming convention for the masked columns follow {col}_N_masked pattern. ```{r message=FALSE,error=FALSE,warning=FALSE} mask_table( aggregate_table, group_by = "block", col_groups = list("N"), overwrite_columns = FALSE ) %>% kable() ```   ## Owo-way masking with computing original and masked percentages Naming convention for the original and masked percentages follow {col}_perc and {col}_perc_masked pattern. ```{r message=FALSE,error=FALSE,warning=FALSE} mask_table( aggregate_table, group_by = "block", col_groups = list("N"), overwrite_columns = TRUE, percentages = TRUE ) %>% kable() ```   # Two-way frequency table ```{r message=FALSE,error=FALSE,warning=FALSE} two_way_freq_table <- countmaskr_data %>% count(race, gender) %>% pivot_wider(names_from = gender, values_from = n) %>% mutate( across(all_of(c("Female", "Male", "Other")), ~ ifelse(is.na(.), 0, .)), Overall = Female + Male + Other, .after = 1 ) mask_table( two_way_freq_table, col_groups = list(c("Overall", "Female", "Male", "Other")), overwrite_columns = TRUE, percentages = FALSE ) %>% kable() ```