16 Insights from North Dakota School Enrollment Data
Source:vignettes/enrollment_hooks.Rmd
enrollment_hooks.Rmd
library(ndschooldata)
library(dplyr)
library(tidyr)
library(ggplot2)
theme_set(theme_minimal(base_size = 14))North Dakota’s school enrollment tells a story of oil booms, suburban growth, and rural consolidation. This vignette visualizes the key insights from the ndschooldata package.
1. The Oil Boom Reshaped North Dakota Schools
Enrollment grew 23% from 2008 to 2024 as the Bakken brought families to the state.
enr <- tryCatch(
fetch_enr_multi(2008:2024, use_cache = TRUE),
error = function(e) {
warning("Failed to fetch enrollment data: ", e$message)
stop(e)
}
)
statewide <- enr %>%
filter(is_state, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
select(end_year, n_students)
stopifnot(nrow(statewide) > 0)
statewide
#> end_year n_students
#> 1 2008 94052
#> 2 2009 93406
#> 3 2010 93715
#> 4 2011 94729
#> 5 2012 95778
#> 6 2013 99192
#> 7 2014 101656
#> 8 2015 104278
#> 9 2016 106070
#> 10 2017 106863
#> 11 2018 108945
#> 12 2019 110842
#> 13 2020 112858
#> 14 2021 112045
#> 15 2022 113858
#> 16 2023 115385
#> 17 2024 115767
ggplot(statewide, aes(x = end_year, y = n_students)) +
geom_line(color = "#2E86AB", linewidth = 1.2) +
geom_point(color = "#2E86AB", size = 3) +
geom_vline(xintercept = 2015, linetype = "dashed", color = "gray50", alpha = 0.7) +
annotate("text", x = 2015.5, y = max(statewide$n_students) * 0.95,
label = "Oil boom peak", hjust = 0, color = "gray40") +
geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.3) +
annotate("text", x = 2020.7, y = min(statewide$n_students) * 1.02,
label = "COVID", hjust = 0, color = "red", alpha = 0.5, size = 3) +
scale_y_continuous(labels = scales::comma, limits = c(90000, NA)) +
labs(
title = "North Dakota K-12 Enrollment: 2008-2024",
subtitle = "From 94,000 to 116,000 students in 16 years",
x = "School Year (ending)",
y = "Total Students",
caption = "Source: North Dakota Department of Public Instruction"
)
2. Bismarck Leads the State in Enrollment
The capital city edges out West Fargo and Fargo as the state’s largest district.
enr_2024 <- tryCatch(
fetch_enr(2024, use_cache = TRUE),
error = function(e) {
warning("Failed to fetch 2024 enrollment data: ", e$message)
stop(e)
}
)
top_districts <- enr_2024 %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
arrange(desc(n_students)) %>%
head(10) %>%
select(district_name, n_students) %>%
mutate(district_name = gsub(" Public Schools| School District", "", district_name))
stopifnot(nrow(top_districts) > 0)
top_districts
#> district_name n_students
#> 1 Bismarck 1 13732
#> 2 West Fargo 6 12676
#> 3 Fargo 1 11319
#> 4 Minot 1 7510
#> 5 Grand Forks 1 7428
#> 6 Williston Basin 7 5198
#> 7 Mandan 1 4368
#> 8 Dickinson 1 3977
#> 9 McKenzie Co 1 2105
#> 10 Jamestown 1 2080
ggplot(top_districts, aes(x = reorder(district_name, n_students), y = n_students)) +
geom_col(fill = "#A23B72") +
geom_text(aes(label = scales::comma(n_students)), hjust = -0.1, size = 3.5) +
coord_flip() +
scale_y_continuous(labels = scales::comma, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Top 10 North Dakota School Districts by Enrollment (2024)",
subtitle = "Bismarck, West Fargo, and Fargo lead with 11,000-14,000 students each",
x = NULL,
y = "Total Students",
caption = "Source: North Dakota Department of Public Instruction"
)
3. West Fargo Doubled in Size Since 2008
The Fargo suburb is one of the fastest-growing districts in the country.
growth_districts <- enr %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL",
grepl("West Fargo|Fargo|Bismarck|Williston|Minot", district_name)) %>%
mutate(district_name = trimws(gsub(" Public Schools| School District| Basin| [0-9]+$", "", district_name))) %>%
filter(district_name %in% c("Fargo", "West Fargo", "Bismarck", "Williston", "Minot"))
stopifnot(nrow(growth_districts) > 0)
# Normalize to 2008 baseline
growth_indexed <- growth_districts %>%
group_by(district_name) %>%
mutate(baseline = n_students[end_year == min(end_year)],
index = n_students / baseline * 100) %>%
ungroup()
growth_indexed %>%
filter(end_year %in% c(2008, 2024)) %>%
select(district_name, end_year, n_students, index)
#> # A tibble: 10 × 4
#> district_name end_year n_students index
#> <chr> <int> <dbl> <dbl>
#> 1 Bismarck 2008 10638 100
#> 2 Fargo 2008 10493 100
#> 3 West Fargo 2008 6179 100
#> 4 Minot 2008 6243 100
#> 5 Williston 2008 2110 100
#> 6 Bismarck 2024 13732 129.
#> 7 Fargo 2024 11319 108.
#> 8 West Fargo 2024 12676 205.
#> 9 Minot 2024 7510 120.
#> 10 Williston 2024 5198 246.
ggplot(growth_indexed, aes(x = end_year, y = index, color = district_name)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_hline(yintercept = 100, linetype = "dashed", color = "gray50") +
geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.3) +
annotate("text", x = 2020.7, y = 105, label = "COVID", hjust = 0, color = "red", alpha = 0.5, size = 3) +
scale_color_brewer(palette = "Set1") +
labs(
title = "District Growth Compared (Indexed to 2008 = 100)",
subtitle = "West Fargo and Williston saw explosive growth; others held steady",
x = "School Year (ending)",
y = "Enrollment Index (2008 = 100)",
color = "District",
caption = "Source: North Dakota Department of Public Instruction"
) +
theme(legend.position = "bottom")
4. Kindergarten Enrollment Dropped 10% from Its Peak
The enrollment wave from the oil boom is aging out. Kindergarten peaked at 9,620 in 2020 and fell to 8,636 by 2024.
grade_levels <- enr %>%
filter(is_state, subgroup == "total_enrollment") %>%
mutate(level = case_when(
grade_level %in% c("K", "01", "02", "03", "04", "05") ~ "Elementary (K-5)",
grade_level %in% c("06", "07", "08") ~ "Middle (6-8)",
grade_level %in% c("09", "10", "11", "12") ~ "High School (9-12)",
TRUE ~ NA_character_
)) %>%
filter(!is.na(level)) %>%
group_by(end_year, level) %>%
summarize(total = sum(n_students, na.rm = TRUE), .groups = "drop")
stopifnot(nrow(grade_levels) > 0)
grade_levels %>% filter(end_year %in% c(2008, 2019, 2024)) %>% arrange(end_year, level)
#> # A tibble: 9 × 3
#> end_year level total
#> <int> <chr> <dbl>
#> 1 2008 Elementary (K-5) 40768
#> 2 2008 High School (9-12) 31492
#> 3 2008 Middle (6-8) 21792
#> 4 2019 Elementary (K-5) 53721
#> 5 2019 High School (9-12) 31430
#> 6 2019 Middle (6-8) 25691
#> 7 2024 Elementary (K-5) 54642
#> 8 2024 High School (9-12) 34556
#> 9 2024 Middle (6-8) 26569
ggplot(grade_levels, aes(x = end_year, y = total, fill = level)) +
geom_area(alpha = 0.8) +
geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.3) +
annotate("text", x = 2020.7, y = max(grade_levels$total) * 0.5,
label = "COVID", hjust = 0, color = "red", alpha = 0.5, size = 3) +
scale_fill_manual(values = c("Elementary (K-5)" = "#F18F01",
"Middle (6-8)" = "#C73E1D",
"High School (9-12)" = "#3C1642")) +
scale_y_continuous(labels = scales::comma) +
labs(
title = "Enrollment by Grade Level: 2008-2024",
subtitle = "Elementary grew from 2008-2023 but dipped in 2024; high school still growing",
x = "School Year (ending)",
y = "Total Students",
fill = "Grade Level",
caption = "Source: North Dakota Department of Public Instruction"
) +
theme(legend.position = "bottom")
5. 35 Districts Have Under 100 Students
Tiny rural schools define the North Dakota landscape.
size_dist <- enr_2024 %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
mutate(size_category = case_when(
n_students < 100 ~ "Under 100",
n_students < 500 ~ "100-499",
n_students < 1000 ~ "500-999",
n_students < 5000 ~ "1,000-4,999",
TRUE ~ "5,000+"
)) %>%
mutate(size_category = factor(size_category,
levels = c("Under 100", "100-499", "500-999",
"1,000-4,999", "5,000+"))) %>%
count(size_category)
stopifnot(nrow(size_dist) > 0)
size_dist
#> size_category n
#> 1 Under 100 35
#> 2 100-499 98
#> 3 500-999 20
#> 4 1,000-4,999 8
#> 5 5,000+ 6
ggplot(size_dist, aes(x = size_category, y = n)) +
geom_col(fill = "#048A81") +
geom_text(aes(label = n), vjust = -0.5, size = 4, fontface = "bold") +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
labs(
title = "North Dakota Districts by Size (2024)",
subtitle = "35 districts (21%) have fewer than 100 students",
x = "District Size (students)",
y = "Number of Districts",
caption = "Source: North Dakota Department of Public Instruction"
)
6. COVID Barely Dented North Dakota Enrollment (-0.7%)
Unlike other states, North Dakota saw only a small pandemic enrollment drop.
covid_years <- enr %>%
filter(is_state, subgroup == "total_enrollment", grade_level == "TOTAL",
end_year %in% 2018:2024) %>%
select(end_year, n_students) %>%
mutate(change = n_students - lag(n_students),
pct_change = round(change / lag(n_students) * 100, 1))
stopifnot(nrow(covid_years) > 0)
covid_years
#> end_year n_students change pct_change
#> 1 2018 108945 NA NA
#> 2 2019 110842 1897 1.7
#> 3 2020 112858 2016 1.8
#> 4 2021 112045 -813 -0.7
#> 5 2022 113858 1813 1.6
#> 6 2023 115385 1527 1.3
#> 7 2024 115767 382 0.3
ggplot(covid_years, aes(x = end_year, y = n_students)) +
geom_line(color = "#2E86AB", linewidth = 1.2) +
geom_point(aes(color = end_year == 2021), size = 4) +
geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.5) +
annotate("text", x = 2020.7, y = max(covid_years$n_students),
label = "COVID-19", hjust = 0, color = "red", alpha = 0.7) +
scale_color_manual(values = c("FALSE" = "#2E86AB", "TRUE" = "#C73E1D"), guide = "none") +
scale_y_continuous(labels = scales::comma, limits = c(108000, NA)) +
labs(
title = "COVID Impact on North Dakota Enrollment",
subtitle = "Only -0.7% in 2021 vs. 3-5% drops in other states",
x = "School Year (ending)",
y = "Total Students",
caption = "Source: North Dakota Department of Public Instruction"
)
7. Oil Counties vs. Traditional Farming Areas
The Bakken oil formation transformed Williams and McKenzie counties while agricultural areas stayed flat.
oil_districts <- enr %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL",
grepl("Williston|Watford|Tioga|Alexander|Dickinson|Mandan", district_name)) %>%
mutate(region = case_when(
grepl("Williston|Watford|Tioga|Alexander", district_name) ~ "Oil Counties",
TRUE ~ "Traditional"
)) %>%
group_by(end_year, region) %>%
summarize(total = sum(n_students, na.rm = TRUE), .groups = "drop")
stopifnot(nrow(oil_districts) > 0)
oil_districts %>% filter(end_year %in% c(2008, 2015, 2024))
#> # A tibble: 6 × 3
#> end_year region total
#> <int> <chr> <dbl>
#> 1 2008 Oil Counties 2412
#> 2 2008 Traditional 5629
#> 3 2015 Oil Counties 4035
#> 4 2015 Traditional 6879
#> 5 2024 Oil Counties 6052
#> 6 2024 Traditional 8345
ggplot(oil_districts, aes(x = end_year, y = total, color = region)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2) +
geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.3) +
annotate("text", x = 2020.7, y = min(oil_districts$total) * 1.05,
label = "COVID", hjust = 0, color = "red", alpha = 0.5, size = 3) +
scale_color_manual(values = c("Oil Counties" = "#E63946", "Traditional" = "#457B9D")) +
scale_y_continuous(labels = scales::comma) +
labs(
title = "Oil Boom Counties vs. Traditional Areas",
subtitle = "Williams/McKenzie oil counties surged while Dickinson/Mandan held steady",
x = "School Year (ending)",
y = "Combined Enrollment",
color = "Region",
caption = "Source: North Dakota Department of Public Instruction"
) +
theme(legend.position = "bottom")
8. Bismarck: Steady Growth as State Capital
While West Fargo doubled in size, Bismarck’s enrollment has grown steadily without the volatility.
bismarck_growth <- enr %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL",
grepl("Bismarck|Fargo|West Fargo", district_name)) %>%
mutate(district_name = trimws(gsub(" Public Schools| School District| [0-9]+$", "", district_name))) %>%
filter(district_name %in% c("Bismarck", "Fargo", "West Fargo")) %>%
group_by(district_name) %>%
mutate(yoy_change = (n_students - lag(n_students)) / lag(n_students) * 100) %>%
ungroup() %>%
filter(!is.na(yoy_change))
stopifnot(nrow(bismarck_growth) > 0)
bismarck_growth %>%
group_by(district_name) %>%
summarize(avg_yoy = round(mean(yoy_change, na.rm = TRUE), 2),
max_yoy = round(max(yoy_change, na.rm = TRUE), 2),
min_yoy = round(min(yoy_change, na.rm = TRUE), 2))
#> # A tibble: 3 × 4
#> district_name avg_yoy max_yoy min_yoy
#> <chr> <dbl> <dbl> <dbl>
#> 1 Bismarck 1.62 3.72 -1.16
#> 2 Fargo 0.48 2.39 -1.87
#> 3 West Fargo 4.61 7.78 1.33
ggplot(bismarck_growth, aes(x = end_year, y = yoy_change, color = district_name)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.3) +
annotate("text", x = 2020.7, y = max(bismarck_growth$yoy_change) * 0.9,
label = "COVID", hjust = 0, color = "red", alpha = 0.5, size = 3) +
scale_color_manual(values = c("Bismarck" = "#1D3557", "Fargo" = "#E63946", "West Fargo" = "#A8DADC")) +
labs(
title = "Year-over-Year Enrollment Change: Big Three Districts",
subtitle = "Bismarck grows steadily; West Fargo shows boom-era volatility",
x = "School Year (ending)",
y = "Percent Change from Previous Year",
color = "District",
caption = "Source: North Dakota Department of Public Instruction"
) +
theme(legend.position = "bottom")
9. Kindergarten as a Leading Indicator
Kindergarten enrollment predicts total enrollment 12 years later. The recent K decline signals future challenges.
k_vs_total <- enr %>%
filter(is_state, subgroup == "total_enrollment") %>%
filter(grade_level %in% c("K", "TOTAL")) %>%
select(end_year, grade_level, n_students) %>%
pivot_wider(names_from = grade_level, values_from = n_students) %>%
rename(kindergarten = K, total = TOTAL) %>%
mutate(k_pct = kindergarten / total * 100)
stopifnot(nrow(k_vs_total) > 0)
k_vs_total %>% select(end_year, kindergarten, k_pct)
#> # A tibble: 17 × 3
#> end_year kindergarten k_pct
#> <int> <dbl> <dbl>
#> 1 2008 6729 7.15
#> 2 2009 7214 7.72
#> 3 2010 7470 7.97
#> 4 2011 7446 7.86
#> 5 2012 8236 8.60
#> 6 2013 8575 8.64
#> 7 2014 8822 8.68
#> 8 2015 9033 8.66
#> 9 2016 8925 8.41
#> 10 2017 8841 8.27
#> 11 2018 9271 8.51
#> 12 2019 9324 8.41
#> 13 2020 9620 8.52
#> 14 2021 8992 8.03
#> 15 2022 9524 8.36
#> 16 2023 9235 8.00
#> 17 2024 8636 7.46
ggplot(k_vs_total, aes(x = end_year)) +
geom_line(aes(y = kindergarten), color = "#F18F01", linewidth = 1.2) +
geom_point(aes(y = kindergarten), color = "#F18F01", size = 2) +
geom_vline(xintercept = 2020, linetype = "dashed", color = "gray50", alpha = 0.7) +
annotate("text", x = 2020.3, y = max(k_vs_total$kindergarten, na.rm = TRUE) * 0.95,
label = "Peak K enrollment", hjust = 0, color = "gray40", size = 3) +
scale_y_continuous(labels = scales::comma) +
labs(
title = "Kindergarten Enrollment: The Pipeline Indicator",
subtitle = "K enrollment peaked in 2020 and has declined since",
x = "School Year (ending)",
y = "Kindergarten Students",
caption = "Source: North Dakota Department of Public Instruction"
)
10. Grand Forks: Holding Steady While Others Surge
Grand Forks has stabilized while Fargo grew modestly and Minot plateaued after its oil-boom surge.
gf_trend <- enr %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL",
grepl("Grand Forks|Fargo|Minot", district_name)) %>%
mutate(district_name = trimws(gsub(" Public Schools| School District| [0-9]+$", "", district_name))) %>%
filter(district_name %in% c("Grand Forks", "Fargo", "Minot")) %>%
group_by(district_name) %>%
mutate(indexed = n_students / first(n_students) * 100) %>%
ungroup()
stopifnot(nrow(gf_trend) > 0)
gf_trend %>% filter(end_year %in% c(2008, 2016, 2024)) %>% select(district_name, end_year, n_students, indexed)
#> # A tibble: 9 × 4
#> district_name end_year n_students indexed
#> <chr> <int> <dbl> <dbl>
#> 1 Fargo 2008 10493 100
#> 2 Grand Forks 2008 7192 100
#> 3 Minot 2008 6243 100
#> 4 Fargo 2016 11167 106.
#> 5 Grand Forks 2016 7264 101.
#> 6 Minot 2016 7529 121.
#> 7 Fargo 2024 11319 108.
#> 8 Grand Forks 2024 7428 103.
#> 9 Minot 2024 7510 120.
ggplot(gf_trend, aes(x = end_year, y = indexed, color = district_name)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2) +
geom_hline(yintercept = 100, linetype = "dashed", color = "gray50") +
geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.3) +
annotate("text", x = 2020.7, y = 98, label = "COVID", hjust = 0, color = "red", alpha = 0.5, size = 3) +
scale_color_manual(values = c("Grand Forks" = "#2A9D8F", "Fargo" = "#E76F51", "Minot" = "#264653")) +
labs(
title = "Grand Forks, Fargo, and Minot: Growth Trajectories",
subtitle = "Grand Forks has stabilized while Fargo grew modestly and Minot plateaued",
x = "School Year (ending)",
y = "Enrollment Index (2008 = 100)",
color = "District",
caption = "Source: North Dakota Department of Public Instruction"
) +
theme(legend.position = "bottom")
11. The Smallest Districts Are Getting Smaller
Rural consolidation continues as tiny districts shrink further.
# Track smallest districts over time
small_district_trend <- enr %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
group_by(end_year) %>%
summarize(
under_50 = sum(n_students < 50, na.rm = TRUE),
under_100 = sum(n_students < 100, na.rm = TRUE),
under_200 = sum(n_students < 200, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_longer(cols = starts_with("under"), names_to = "category", values_to = "count") %>%
mutate(category = case_when(
category == "under_50" ~ "Under 50 students",
category == "under_100" ~ "Under 100 students",
category == "under_200" ~ "Under 200 students"
))
stopifnot(nrow(small_district_trend) > 0)
small_district_trend %>% filter(end_year %in% c(2008, 2016, 2024)) %>% arrange(end_year, category)
#> # A tibble: 9 × 3
#> end_year category count
#> <int> <chr> <int>
#> 1 2008 Under 100 students 57
#> 2 2008 Under 200 students 100
#> 3 2008 Under 50 students 33
#> 4 2016 Under 100 students 39
#> 5 2016 Under 200 students 78
#> 6 2016 Under 50 students 20
#> 7 2024 Under 100 students 35
#> 8 2024 Under 200 students 73
#> 9 2024 Under 50 students 13
ggplot(small_district_trend, aes(x = end_year, y = count, fill = category)) +
geom_area(alpha = 0.7, position = "identity") +
geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.3) +
annotate("text", x = 2020.7, y = max(small_district_trend$count) * 0.95,
label = "COVID", hjust = 0, color = "red", alpha = 0.5, size = 3) +
scale_fill_manual(values = c("Under 50 students" = "#C73E1D",
"Under 100 students" = "#F18F01",
"Under 200 students" = "#048A81")) +
labs(
title = "Small Districts in North Dakota Over Time",
subtitle = "The number of tiny districts remains high despite consolidation pressure",
x = "School Year (ending)",
y = "Number of Districts",
fill = "Size Category",
caption = "Source: North Dakota Department of Public Instruction"
) +
theme(legend.position = "bottom")
12. Native American Graduation Rates Lag State Average
Native American students face a 19-point graduation gap compared to the state average.
grad_2024 <- tryCatch(
fetch_graduation(2024, use_cache = TRUE),
error = function(e) {
warning("Failed to fetch 2024 graduation data: ", e$message)
stop(e)
}
)
# Compare subgroups at state level
grad_subgroups <- grad_2024 %>%
filter(is_state, subgroup %in% c("all", "native_american", "white", "low_income")) %>%
select(subgroup, grad_rate, cohort_count, graduate_count) %>%
arrange(desc(grad_rate))
stopifnot(nrow(grad_subgroups) > 0)
grad_subgroups
#> # A tibble: 3 × 4
#> subgroup grad_rate cohort_count graduate_count
#> <chr> <dbl> <int> <int>
#> 1 white 0.875 6420 5620
#> 2 all 0.824 8681 7154
#> 3 native_american 0.634 939 595
ggplot(grad_subgroups, aes(x = reorder(subgroup, grad_rate), y = grad_rate)) +
geom_col(fill = "#E63946") +
geom_text(aes(label = paste0(round(grad_rate * 100, 1), "%")),
hjust = -0.1, size = 4, fontface = "bold") +
coord_flip() +
scale_y_continuous(labels = scales::percent, limits = c(0, 1.1)) +
labs(
title = "2024 Graduation Rates by Subgroup (State Level)",
subtitle = "Native American students graduate at 63% vs. 82% overall",
x = NULL,
y = "4-Year Graduation Rate",
caption = "Source: ND Insights (insights.nd.gov)"
)
13. Graduation Rates Dropped 7 Points from Their Peak
The statewide graduation rate peaked at 89% in 2020 and has declined to 82% in four years.
grad_multi <- tryCatch(
fetch_graduation_multi(2013:2024, use_cache = TRUE),
error = function(e) {
warning("Failed to fetch graduation data: ", e$message)
stop(e)
}
)
grad_trend <- grad_multi %>%
filter(is_state, subgroup == "all") %>%
select(end_year, grad_rate, cohort_count, graduate_count)
stopifnot(nrow(grad_trend) > 0)
grad_trend
#> # A tibble: 12 × 4
#> end_year grad_rate cohort_count graduate_count
#> <int> <dbl> <int> <int>
#> 1 2013 0.872 7567 6598
#> 2 2014 0.869 7603 6609
#> 3 2015 0.863 7635 6589
#> 4 2016 0.873 7661 6687
#> 5 2017 0.87 7572 6588
#> 6 2018 0.88 7399 6512
#> 7 2019 0.883 7626 6730
#> 8 2020 0.89 7486 6660
#> 9 2021 0.87 7843 6825
#> 10 2022 0.843 8092 6823
#> 11 2023 0.827 8294 6863
#> 12 2024 0.824 8681 7154
ggplot(grad_trend, aes(x = end_year, y = grad_rate)) +
geom_line(color = "#2E86AB", linewidth = 1.2) +
geom_point(color = "#2E86AB", size = 3) +
geom_vline(xintercept = 2020, linetype = "dashed", color = "gray50", alpha = 0.7) +
annotate("text", x = 2020.3, y = 0.89, label = "COVID-era peak (89%)", hjust = 0, color = "gray40") +
scale_y_continuous(labels = scales::percent, limits = c(0.75, 0.95)) +
labs(
title = "North Dakota 4-Year Graduation Rate: 2013-2024",
subtitle = "Peaked at 89% in 2020, dropped to 82% by 2024",
x = "Cohort Year",
y = "4-Year Graduation Rate",
caption = "Source: ND Insights (insights.nd.gov)"
)
14. McKenzie County Leads in Graduation Rates
Among districts with 100+ student cohorts, the oil-country districts outperform the metro areas.
top_grad_districts <- grad_2024 %>%
filter(is_district, subgroup == "all", cohort_count >= 100) %>%
arrange(desc(grad_rate)) %>%
head(10) %>%
select(district_name, grad_rate, cohort_count, graduate_count) %>%
mutate(district_name = gsub(" Public School.*| School District.*", "", district_name))
stopifnot(nrow(top_grad_districts) > 0)
top_grad_districts
#> # A tibble: 10 × 4
#> district_name grad_rate cohort_count graduate_count
#> <chr> <dbl> <int> <int>
#> 1 McKenzie Co 1 0.858 106 91
#> 2 Mandan 1 0.853 334 285
#> 3 Bismarck 1 0.845 1057 893
#> 4 Grand Forks 1 0.828 599 496
#> 5 Devils Lake 1 0.823 130 107
#> 6 Jamestown 1 0.821 207 170
#> 7 Fargo 1 0.8 949 759
#> 8 West Fargo 6 0.799 884 706
#> 9 Wahpeton 37 0.79 119 94
#> 10 Dickinson 1 0.78 296 231
ggplot(top_grad_districts, aes(x = reorder(district_name, grad_rate), y = grad_rate)) +
geom_col(fill = "#048A81") +
geom_text(aes(label = paste0(round(grad_rate * 100, 1), "%")),
hjust = -0.1, size = 3.5) +
coord_flip() +
scale_y_continuous(labels = scales::percent, limits = c(0, 1.1)) +
labs(
title = "Top 10 Districts by Graduation Rate (2024)",
subtitle = "Minimum 100 students in cohort",
x = NULL,
y = "4-Year Graduation Rate",
caption = "Source: ND Insights (insights.nd.gov)"
)
15. Cohort Size Has Grown 15% Since 2013
More students are reaching senior year as the oil boom generation ages through.
cohort_trend <- grad_multi %>%
filter(is_state, subgroup == "all") %>%
select(end_year, cohort_count, graduate_count) %>%
mutate(
non_grad = cohort_count - graduate_count,
pct_change = round((cohort_count / first(cohort_count) - 1) * 100, 1)
)
stopifnot(nrow(cohort_trend) > 0)
cohort_trend
#> # A tibble: 12 × 5
#> end_year cohort_count graduate_count non_grad pct_change
#> <int> <int> <int> <int> <dbl>
#> 1 2013 7567 6598 969 0
#> 2 2014 7603 6609 994 0.5
#> 3 2015 7635 6589 1046 0.9
#> 4 2016 7661 6687 974 1.2
#> 5 2017 7572 6588 984 0.1
#> 6 2018 7399 6512 887 -2.2
#> 7 2019 7626 6730 896 0.8
#> 8 2020 7486 6660 826 -1.1
#> 9 2021 7843 6825 1018 3.6
#> 10 2022 8092 6823 1269 6.9
#> 11 2023 8294 6863 1431 9.6
#> 12 2024 8681 7154 1527 14.7
cohort_long <- cohort_trend %>%
select(end_year, graduate_count, non_grad) %>%
pivot_longer(cols = c(graduate_count, non_grad),
names_to = "status", values_to = "count") %>%
mutate(status = ifelse(status == "graduate_count", "Graduated", "Did Not Graduate"))
ggplot(cohort_long, aes(x = end_year, y = count, fill = status)) +
geom_area(alpha = 0.8) +
geom_vline(xintercept = 2020, linetype = "dashed", color = "gray50", alpha = 0.7) +
annotate("text", x = 2020.3, y = max(cohort_long$count) * 0.9,
label = "COVID", hjust = 0, color = "gray40", size = 3) +
scale_fill_manual(values = c("Graduated" = "#2A9D8F", "Did Not Graduate" = "#E76F51")) +
scale_y_continuous(labels = scales::comma) +
labs(
title = "4-Year Cohort Outcomes Over Time",
subtitle = "Cohort grew 15% but non-graduates increased faster",
x = "Cohort Year",
y = "Number of Students",
fill = "Outcome",
caption = "Source: ND Insights (insights.nd.gov)"
) +
theme(legend.position = "bottom")
16. Medium-Sized Districts Lead in Graduation Rates
Mid-size districts (200-999 students) outperform both small rural and large urban districts.
# Join enrollment to graduation data
# Note: enrollment uses CC-DDD format, graduation uses CCDDD format
district_size <- enr_2024 %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
mutate(district_id_clean = gsub("-", "", district_id)) %>%
select(district_id_clean, enrollment = n_students)
grad_with_size <- grad_2024 %>%
filter(is_district, subgroup == "all", cohort_count >= 10) %>%
mutate(district_id_clean = district_id) %>%
left_join(district_size, by = "district_id_clean") %>%
mutate(size_category = case_when(
enrollment < 200 ~ "Small (<200)",
enrollment < 1000 ~ "Medium (200-999)",
enrollment < 5000 ~ "Large (1,000-4,999)",
TRUE ~ "Very Large (5,000+)"
)) %>%
filter(!is.na(size_category))
size_summary <- grad_with_size %>%
group_by(size_category) %>%
summarize(
n_districts = n(),
avg_grad_rate = weighted.mean(grad_rate, cohort_count, na.rm = TRUE),
total_cohort = sum(cohort_count, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(size_category = factor(size_category,
levels = c("Small (<200)", "Medium (200-999)",
"Large (1,000-4,999)", "Very Large (5,000+)")))
stopifnot(nrow(size_summary) > 0)
size_summary
#> # A tibble: 4 × 4
#> size_category n_districts avg_grad_rate total_cohort
#> <fct> <int> <dbl> <int>
#> 1 Large (1,000-4,999) 8 0.814 1408
#> 2 Medium (200-999) 79 0.883 2302
#> 3 Small (<200) 29 0.857 418
#> 4 Very Large (5,000+) 6 0.792 4414
ggplot(size_summary, aes(x = size_category, y = avg_grad_rate)) +
geom_col(fill = "#457B9D") +
geom_text(aes(label = paste0(round(avg_grad_rate * 100, 1), "%")),
vjust = -0.5, size = 4, fontface = "bold") +
scale_y_continuous(labels = scales::percent, limits = c(0, 1.1)) +
labs(
title = "Graduation Rate by District Size",
subtitle = "Medium-sized districts outperform both small and large districts",
x = "District Size (by enrollment)",
y = "Weighted Average Graduation Rate",
caption = "Source: ND Insights & NDDPI"
)
Learn More
These insights just scratch the surface. Use
ndschooldata to explore:
- Individual district trends over time
- Grade-level patterns within districts
- Regional comparisons across the state
# Get started
library(ndschooldata)
# Fetch all available years
enr_all <- fetch_enr_multi(get_available_years()$min_year:get_available_years()$max_year, use_cache = TRUE)
# Explore your district
enr_all %>%
filter(grepl("Your District", district_name))Session Info
sessionInfo()
#> R version 4.5.2 (2025-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggplot2_4.0.2 tidyr_1.3.2 dplyr_1.2.0 ndschooldata_0.1.0
#>
#> loaded via a namespace (and not attached):
#> [1] rappdirs_0.3.4 sass_0.4.10 utf8_1.2.6 generics_0.1.4
#> [5] stringi_1.8.7 hms_1.1.4 digest_0.6.39 magrittr_2.0.4
#> [9] evaluate_1.0.5 grid_4.5.2 RColorBrewer_1.1-3 fastmap_1.2.0
#> [13] cellranger_1.1.0 jsonlite_2.0.0 httr_1.4.8 purrr_1.2.1
#> [17] scales_1.4.0 codetools_0.2-20 textshaping_1.0.5 jquerylib_0.1.4
#> [21] cli_3.6.5 crayon_1.5.3 rlang_1.1.7 bit64_4.6.0-1
#> [25] withr_3.0.2 cachem_1.1.0 yaml_2.3.12 parallel_4.5.2
#> [29] tools_4.5.2 tzdb_0.5.0 curl_7.0.0 vctrs_0.7.1
#> [33] R6_2.6.1 lifecycle_1.0.5 stringr_1.6.0 bit_4.6.0
#> [37] fs_1.6.7 vroom_1.7.0 ragg_1.5.1 pkgconfig_2.0.3
#> [41] desc_1.4.3 pkgdown_2.2.0 pillar_1.11.1 bslib_0.10.0
#> [45] gtable_0.3.6 glue_1.8.0 systemfonts_1.3.2 xfun_0.56
#> [49] tibble_3.3.1 tidyselect_1.2.1 knitr_1.51 farver_2.1.2
#> [53] htmltools_0.5.9 rmarkdown_2.30 labeling_0.4.3 readr_2.2.0
#> [57] compiler_4.5.2 S7_0.2.1 readxl_1.4.5