library(tidyverse)
df <- read_csv("pokemon_df.csv")
df |>
group_by(type_1) |>
summarize(hp = mean(hp)) |>
arrange(-hp)
#> # A tibble: 18 × 2
#> type_1 hp
#> <chr> <dbl>
#> 1 dragon 84.5
#> 2 normal 77.3
#> 3 fairy 72.9
#> 4 psychic 72.6
#> 5 fighting 71.6
#> 6 ground 71.6
#> 7 water 71.1
#> 8 flying 70.8
#> 9 ice 70.5
#> 10 fire 69.7
#> 11 dark 69.7
#> 12 steel 67.3
#> 13 poison 67.2
#> 14 grass 66.7
#> 15 rock 65.3
#> 16 ghost 64.2
#> 17 bug 57.7
#> 18 electric 55.8
library(tidyverse)
library(duckplyr)
df <- read_parquet_duckdb("flights.parquet")
df |>
summarize(.by = month, arr_delay = mean(arr_delay, na.rm = TRUE)) |>
arrange(month)
#> # A duckplyr data frame: 2 variables
#> month arr_delay
#> <int> <dbl>
#> 1 1 6.13
#> 2 2 5.61
#> 3 3 5.81
#> 4 4 11.2
#> 5 5 3.52
#> 6 6 16.5
#> 7 7 16.7
#> 8 8 6.04
#> 9 9 -4.02
#> 10 10 -0.167
#> 11 11 0.461
#> 12 12 14.9
library(tidyverse)
library(duckplyr)
df <- read_parquet_duckdb("flights.parquet")
df |>
summarize(.by = month, arr_delay = mean(arr_delay, na.rm = TRUE)) |>
arrange(month) |>
collect()
#> # A tibble: 12 × 2
#> month arr_delay
#> <int> <dbl>
#> 1 1 6.13
#> 2 2 5.61
#> 3 3 5.81
#> 4 4 11.2
#> 5 5 3.52
#> 6 6 16.5
#> 7 7 16.7
#> 8 8 6.04
#> 9 9 -4.02
#> 10 10 -0.167
#> 11 11 0.461
#> 12 12 14.9
library(tidyverse)
library(duckplyr)
urls <- glue::glue(
"https://blobs.duckdb.org/flight-data-partitioned/Year={2022:2024}/data_0.parquet"
)
df <- read_parquet_duckdb(urls)
df |>
summarize(.by = Month, ArrDelay = mean(ArrDelay, na.rm = TRUE)) |>
arrange(Month)
#> # A duckplyr data frame: 2 variables
#> Month ArrDelay
#> <dbl> <dbl>
#> 1 1 7.42
#> 2 2 3.08
#> 3 3 7.55
#> 4 4 7.64
#> 5 5 8.51
#> 6 6 12.3
#> 7 7 13.3
#> 8 8 8.58
#> 9 9 3.92
#> 10 10 1.62
#> 11 11 1.66
#> 12 12 6.89
Tips
library(tidyverse)
con <- connections::connection_open(RSQLite::SQLite(), "nycflights13.sqlite")
df <- tbl(con, "flights")
df |>
group_by(month) |>
summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
#> # Source: SQL [?? x 2]
#> # Database: sqlite 3.50.1 [/Users/juliasilge/Work/talks/useR-2025/nycflights13.sqlite]
#> month arr_delay
#> <int> <dbl>
#> 1 1 6.13
#> 2 2 5.61
#> 3 3 5.81
#> 4 4 11.2
#> 5 5 3.52
#> 6 6 16.5
#> 7 7 16.7
#> 8 8 6.04
#> 9 9 -4.02
#> 10 10 -0.167
#> 11 11 0.461
#> 12 12 14.9
library(tidyverse)
con <- connections::connection_open(RSQLite::SQLite(), "nycflights13.sqlite")
df <- tbl(con, "flights")
df |>
group_by(month) |>
summarise(arr_delay = mean(arr_delay, na.rm = TRUE)) |>
collect()
#> # A tibble: 12 × 2
#> month arr_delay
#> <int> <dbl>
#> 1 1 6.13
#> 2 2 5.61
#> 3 3 5.81
#> 4 4 11.2
#> 5 5 3.52
#> 6 6 16.5
#> 7 7 16.7
#> 8 8 6.04
#> 9 9 -4.02
#> 10 10 -0.167
#> 11 11 0.461
#> 12 12 14.9
Tips
Tips
kernelSupervisor.shutdownTimeout
setting