15 Insights from North Carolina School Enrollment Data
Source:vignettes/enrollment_hooks.Rmd
enrollment_hooks.Rmd
library(ncschooldata)
library(dplyr)
library(tidyr)
library(ggplot2)
theme_set(theme_minimal(base_size = 14))1. North Carolina’s 1.5 million students peaked in 2019 – COVID erased a decade of growth
NC grew steadily from 1.39M to 1.54M, then lost 66,000 students by 2021. Recovery is underway but the state is still 27K below its 2019 peak.
enr <- fetch_enr_multi(c(2006, 2010, 2015, 2020, 2024), use_cache = TRUE)
statewide <- enr %>%
filter(is_state, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
select(end_year, n_students)
stopifnot(nrow(statewide) > 0)
statewide
ggplot(statewide, aes(x = end_year, y = n_students / 1e6)) +
geom_line(color = "#2171B5", linewidth = 1.2) +
geom_point(color = "#2171B5", size = 3) +
geom_text(aes(label = scales::comma(n_students)), vjust = -1, size = 3.5) +
scale_y_continuous(
labels = scales::label_number(suffix = "M"),
limits = c(1.2, 1.7)
) +
labs(
title = "North Carolina Public School Enrollment",
subtitle = "1.39M (2006) to 1.51M (2024) -- peaked at 1.54M in 2019",
x = "Year",
y = "Total Students"
)2. Wake County: 160,000 students and bigger than many states
Wake County Schools is the largest district in NC, with nearly 160,000 students. Charlotte-Mecklenburg follows at 140,000.
enr_2024 <- fetch_enr(2024, use_cache = TRUE)
top_districts <- enr_2024 %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
arrange(desc(n_students)) %>%
head(10) %>%
select(district_name, n_students)
stopifnot(nrow(top_districts) == 10)
top_districts
top_districts %>%
mutate(district_name = reorder(district_name, n_students)) %>%
ggplot(aes(x = n_students / 1000, y = district_name)) +
geom_col(fill = "#2171B5") +
geom_text(aes(label = scales::comma(n_students)), hjust = -0.1, size = 3.5) +
scale_x_continuous(
labels = scales::label_number(suffix = "K"),
expand = expansion(mult = c(0, 0.15))
) +
labs(
title = "Top 10 North Carolina School Districts by Enrollment (2024)",
subtitle = "Wake County: 159,675 -- Charlotte-Mecklenburg: 140,415",
x = "Students (thousands)",
y = NULL
)3. White students dropped from 47% to 43% while Hispanic enrollment surged
Between 2018 and 2024, white enrollment fell by 106,000 students while Hispanic enrollment grew by 57,000. North Carolina is becoming a majority-minority school system.
enr_demo <- fetch_enr_multi(c(2018, 2019, 2020, 2021, 2024), use_cache = TRUE)
demographics <- enr_demo %>%
filter(is_state, grade_level == "TOTAL",
subgroup %in% c("white", "black", "hispanic", "asian")) %>%
select(end_year, subgroup, n_students) %>%
mutate(subgroup = factor(subgroup,
levels = c("white", "black", "hispanic", "asian"),
labels = c("White", "Black", "Hispanic", "Asian")))
stopifnot(nrow(demographics) > 0)
demographics
ggplot(demographics, aes(x = end_year, y = n_students / 1000, color = subgroup)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
scale_color_manual(values = c(
"White" = "#4292C6",
"Black" = "#807DBA",
"Hispanic" = "#41AB5D",
"Asian" = "#EF6548"
)) +
scale_y_continuous(labels = scales::label_number(suffix = "K")) +
labs(
title = "Demographic Shifts in NC Public Schools (2018-2024)",
subtitle = "White: -106K | Hispanic: +57K | Black: -24K | Asian: +13K",
x = "Year",
y = "Students (thousands)",
color = "Race/Ethnicity"
) +
theme(legend.position = "bottom")4. Charlotte-Mecklenburg lost 10,000 students during COVID then partially recovered
CMS dropped from 147,600 in 2019 to 137,600 in 2021 – a staggering 10,000-student loss. By 2024 it has clawed back to 140,400 but remains 7,200 below its peak.
enr_cms <- fetch_enr_multi(c(2018, 2019, 2020, 2021, 2024), use_cache = TRUE)
cms_trend <- enr_cms %>%
filter(is_district, grepl("Charlotte-Mecklenburg", district_name),
subgroup == "total_enrollment", grade_level == "TOTAL") %>%
select(end_year, district_name, n_students) %>%
mutate(change = n_students - lag(n_students))
stopifnot(nrow(cms_trend) > 0)
cms_trend
ggplot(cms_trend, aes(x = end_year, y = n_students / 1000)) +
geom_line(color = "#CB181D", linewidth = 1.2) +
geom_point(color = "#CB181D", size = 3) +
geom_text(aes(label = scales::comma(n_students)), vjust = -1, size = 3.5) +
annotate("rect", xmin = 2019.5, xmax = 2021.5, ymin = -Inf, ymax = Inf,
fill = "gray80", alpha = 0.3) +
annotate("text", x = 2020.5, y = 148, label = "COVID", color = "gray40", size = 3) +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
limits = c(135, 150)
) +
labs(
title = "Charlotte-Mecklenburg Schools: COVID Enrollment Shock",
subtitle = "-10,000 students from 2019 to 2021; partial recovery by 2024",
x = "Year",
y = "Students (thousands)"
)5. Charter schools grew from 26,000 to 144,000 students in 18 years
North Carolina’s charter sector has exploded. In 2006, 99 charter schools served 26,000 students. By 2024, 219 charters serve 144,000 – nearly 10% of all students.
charter_summary <- enr_2024 %>%
filter(is_campus, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
group_by(is_charter) %>%
summarize(
n_schools = n(),
students = sum(n_students, na.rm = TRUE),
.groups = "drop"
)
state_total <- enr_2024 %>%
filter(is_state, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
pull(n_students)
charter_summary <- charter_summary %>%
mutate(pct = round(students / state_total * 100, 1))
stopifnot(nrow(charter_summary) == 2)
charter_summary
charter_summary %>%
mutate(label = ifelse(is_charter, "Charter", "Traditional")) %>%
ggplot(aes(x = label, y = students / 1000, fill = is_charter)) +
geom_col(width = 0.6) +
geom_text(aes(label = paste0(scales::comma(students), "\n(", pct, "%)")),
vjust = -0.3, size = 4) +
scale_fill_manual(values = c("FALSE" = "#2171B5", "TRUE" = "#41AB5D"), guide = "none") +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
expand = expansion(mult = c(0, 0.2))
) +
labs(
title = "Charter Schools Now Serve 9.5% of NC Students",
subtitle = "219 charter schools with 143,916 students (2024)",
x = NULL,
y = "Students (thousands)"
)6. Half of NC students are economically disadvantaged
757,944 students – 50.3% of the state – qualify as economically disadvantaged. This is the first year econ_disadv data is available in the package.
econ_data <- enr_2024 %>%
filter(is_state, grade_level == "TOTAL",
subgroup %in% c("total_enrollment", "econ_disadv")) %>%
select(subgroup, n_students) %>%
mutate(pct = round(n_students / max(n_students) * 100, 1))
stopifnot(nrow(econ_data) == 2)
econ_data
ggplot(econ_data, aes(x = reorder(subgroup, -n_students), y = n_students / 1000, fill = subgroup)) +
geom_col(width = 0.6) +
geom_text(aes(label = paste0(scales::comma(n_students), "\n(", pct, "%)")),
vjust = -0.3, size = 4) +
scale_fill_manual(
values = c("total_enrollment" = "#2171B5", "econ_disadv" = "#CB181D"),
guide = "none"
) +
scale_x_discrete(labels = c("total_enrollment" = "All Students",
"econ_disadv" = "Econ. Disadvantaged")) +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
expand = expansion(mult = c(0, 0.2))
) +
labs(
title = "Half of NC Students Are Economically Disadvantaged (2024)",
subtitle = "757,944 students (50.3%) qualify",
x = NULL,
y = "Students (thousands)"
)7. Durham is nearly a 50/50 Black-Hispanic district
In 2018 Durham was 46% Black and 32% Hispanic. By 2024 the gap closed to 39% Black and 38% Hispanic. The crossover is imminent.
enr_durham <- fetch_enr_multi(c(2018, 2020, 2024), use_cache = TRUE)
durham_demographics <- enr_durham %>%
filter(is_district, district_id == "320", grade_level == "TOTAL",
subgroup %in% c("white", "black", "hispanic", "asian")) %>%
group_by(end_year) %>%
mutate(
total = sum(n_students),
pct = round(n_students / total * 100, 1)
) %>%
ungroup() %>%
select(end_year, subgroup, n_students, pct)
stopifnot(nrow(durham_demographics) > 0)
durham_demographics
durham_demographics %>%
mutate(subgroup = factor(subgroup,
levels = c("white", "black", "hispanic", "asian"),
labels = c("White", "Black", "Hispanic", "Asian"))) %>%
ggplot(aes(x = end_year, y = pct, fill = subgroup)) +
geom_area(alpha = 0.7) +
scale_fill_manual(values = c(
"White" = "#4292C6",
"Black" = "#807DBA",
"Hispanic" = "#41AB5D",
"Asian" = "#EF6548"
)) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
labs(
title = "Durham Public Schools: Demographic Transformation",
subtitle = "Black: 46% to 39% | Hispanic: 32% to 38% -- crossover approaching",
x = "Year",
y = "Share of Enrollment",
fill = "Race/Ethnicity"
) +
theme(legend.position = "bottom")8. English Learners grew 42% in six years
From 118,569 in 2018 to 168,383 in 2024, NC schools added nearly 50,000 English Learners. LEP students now make up 11.2% of total enrollment.
enr_lep <- fetch_enr_multi(c(2018, 2019, 2020, 2021, 2024), use_cache = TRUE)
lep_trend <- enr_lep %>%
filter(is_state, grade_level == "TOTAL", subgroup == "lep") %>%
select(end_year, n_students) %>%
mutate(pct_change = round((n_students / first(n_students) - 1) * 100, 1))
stopifnot(nrow(lep_trend) > 0)
lep_trend
ggplot(lep_trend, aes(x = end_year, y = n_students / 1000)) +
geom_area(fill = "#41AB5D", alpha = 0.3) +
geom_line(color = "#41AB5D", linewidth = 1.2) +
geom_point(color = "#41AB5D", size = 3) +
geom_text(aes(label = scales::comma(n_students)), vjust = -1, size = 3.5) +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
limits = c(0, 200)
) +
labs(
title = "English Learners in North Carolina Schools",
subtitle = "+42% from 118,569 (2018) to 168,383 (2024)",
x = "Year",
y = "English Learners (thousands)"
)9. Wake County grew 33% since 2006 but may have peaked
Wake County added nearly 40,000 students from 2006 to 2020, then dipped slightly by 2024. After years of relentless growth, has the Research Triangle’s anchor district plateaued?
enr_wake <- fetch_enr_multi(c(2006, 2010, 2015, 2020, 2024), use_cache = TRUE)
wake_trend <- enr_wake %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL",
grepl("Wake", district_name)) %>%
select(end_year, district_name, n_students) %>%
mutate(change = n_students - lag(n_students))
stopifnot(nrow(wake_trend) > 0)
wake_trend
ggplot(wake_trend, aes(x = end_year, y = n_students / 1000)) +
geom_area(fill = "#2171B5", alpha = 0.3) +
geom_line(color = "#2171B5", linewidth = 1.2) +
geom_point(color = "#2171B5", size = 3) +
geom_text(aes(label = scales::comma(n_students)), vjust = -1, size = 3.5) +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
limits = c(100, 175)
) +
labs(
title = "Wake County Schools: Growth Plateau?",
subtitle = "+39,000 students since 2006, but -947 since 2020",
x = "Year",
y = "Students (thousands)"
)10. The coast is growing while the Piedmont shrinks
Coastal counties (New Hanover, Brunswick, Pender) gained students while Piedmont cities (Guilford, Forsyth, Alamance) lost nearly 9,000 since 2015.
enr_regional <- fetch_enr_multi(c(2015, 2024), use_cache = TRUE)
coastal <- c("New Hanover", "Brunswick", "Pender")
piedmont <- c("Guilford", "Forsyth", "Alamance")
regional <- enr_regional %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
mutate(region = case_when(
grepl(paste(coastal, collapse = "|"), district_name) ~ "Coast",
grepl(paste(piedmont, collapse = "|"), district_name) ~ "Piedmont",
TRUE ~ "Other"
)) %>%
filter(region %in% c("Coast", "Piedmont")) %>%
group_by(end_year, region) %>%
summarize(total = sum(n_students, na.rm = TRUE), .groups = "drop")
stopifnot(nrow(regional) == 4)
regional
ggplot(regional, aes(x = factor(end_year), y = total / 1000, fill = region)) +
geom_col(position = "dodge", width = 0.7) +
geom_text(
aes(label = scales::comma(total)),
position = position_dodge(width = 0.7),
vjust = -0.5,
size = 3.5
) +
scale_fill_manual(values = c("Coast" = "#41AB5D", "Piedmont" = "#EF6548")) +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
expand = expansion(mult = c(0, 0.15))
) +
labs(
title = "Coastal vs Piedmont School Enrollment",
subtitle = "Coast: +3.7% | Piedmont: -6.1% since 2015",
x = "Year",
y = "Students (thousands)",
fill = "Region"
) +
theme(legend.position = "bottom")11. Rural eastern NC lost 25% of its students since 2015
Eight tobacco belt counties (Edgecombe, Halifax, Hertford, Northampton, Bertie, Martin, Washington, Tyrrell) dropped from 21,596 to 16,219 students – a 25% decline in under a decade.
enr_multi <- fetch_enr_multi(c(2015, 2024), use_cache = TRUE)
eastern_rural <- c("Edgecombe", "Halifax", "Hertford", "Northampton",
"Bertie", "Martin", "Washington", "Tyrrell")
eastern_data <- enr_multi %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
mutate(is_eastern = grepl(paste(eastern_rural, collapse = "|"), district_name)) %>%
filter(is_eastern) %>%
group_by(end_year) %>%
summarize(total = sum(n_students, na.rm = TRUE), .groups = "drop") %>%
mutate(pct_change = round((total / first(total) - 1) * 100, 1))
stopifnot(nrow(eastern_data) == 2)
eastern_data
ggplot(eastern_data, aes(x = factor(end_year), y = total / 1000)) +
geom_col(fill = "#8B4513", width = 0.6) +
geom_text(aes(label = scales::comma(total)), vjust = -0.5, size = 4) +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
expand = expansion(mult = c(0, 0.15))
) +
labs(
title = "Eastern NC Rural Counties: Enrollment Decline",
subtitle = "8 tobacco belt counties lost 24.9% of students since 2015",
x = "Year",
y = "Students (thousands)"
)12. Union County grew 32% since 2006 on Charlotte suburban boom
Union County Public Schools went from 31,330 to 41,378 students. Weddington, Waxhaw, and Indian Trail fueled the growth, though the pace has slowed since 2015.
enr_union <- fetch_enr_multi(c(2006, 2010, 2015, 2020, 2024), use_cache = TRUE)
union_trend <- enr_union %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL",
grepl("Union", district_name)) %>%
select(end_year, district_name, n_students)
stopifnot(nrow(union_trend) > 0)
union_trend
ggplot(union_trend, aes(x = end_year, y = n_students / 1000)) +
geom_area(fill = "#2171B5", alpha = 0.3) +
geom_line(color = "#2171B5", linewidth = 1.2) +
geom_point(color = "#2171B5", size = 3) +
geom_text(aes(label = scales::comma(n_students)), vjust = -1, size = 3.5) +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
limits = c(20, 50)
) +
labs(
title = "Union County Public Schools: Charlotte's Growth Engine",
subtitle = "31,330 to 41,378 students (+32%) since 2006",
x = "Year",
y = "Students (thousands)"
)13. Asheville-area mountain counties lost 10% of students
Seven mountain counties around Asheville (Buncombe, Henderson, Haywood, Madison, Transylvania, Yancey, Mitchell) dropped from 55,623 to 49,817 students since 2015.
enr_mountain <- fetch_enr_multi(c(2015, 2024), use_cache = TRUE)
mountain <- c("Buncombe", "Henderson", "Haywood", "Madison",
"Transylvania", "Yancey", "Mitchell")
mountain_data <- enr_mountain %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
mutate(is_mountain = grepl(paste(mountain, collapse = "|"), district_name)) %>%
filter(is_mountain) %>%
group_by(end_year) %>%
summarize(
total = sum(n_students, na.rm = TRUE),
n_districts = n(),
.groups = "drop"
)
stopifnot(nrow(mountain_data) == 2)
mountain_data
ggplot(mountain_data, aes(x = factor(end_year), y = total / 1000)) +
geom_col(fill = "#4A7C59", width = 0.6) +
geom_text(aes(label = scales::comma(total)), vjust = -0.5, size = 4) +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
expand = expansion(mult = c(0, 0.15))
) +
labs(
title = "Western NC Mountain Counties: Enrollment Trends",
subtitle = "7 Asheville-area counties: -10.4% as retirees replace families",
x = "Year",
y = "Students (thousands)"
)14. Special education holds steady at 13-14% of enrollment
NC’s special education population has been remarkably stable at around 13-14% of total enrollment since 2018, even as overall enrollment fluctuated with COVID.
enr_sped <- fetch_enr_multi(c(2018, 2019, 2020, 2021, 2024), use_cache = TRUE)
sped_trend <- enr_sped %>%
filter(is_state, grade_level == "TOTAL",
subgroup %in% c("total_enrollment", "special_ed")) %>%
select(end_year, subgroup, n_students) %>%
pivot_wider(names_from = subgroup, values_from = n_students) %>%
mutate(
pct_sped = round(special_ed / total_enrollment * 100, 1),
sped_change = round((special_ed / first(special_ed) - 1) * 100, 1)
)
stopifnot(nrow(sped_trend) > 0)
sped_trend
ggplot(sped_trend, aes(x = end_year)) +
geom_col(aes(y = special_ed / 1000), fill = "#6A51A3", width = 1.5) +
geom_text(aes(y = special_ed / 1000, label = paste0(pct_sped, "%")),
vjust = -0.5, size = 4) +
scale_y_continuous(
labels = scales::label_number(suffix = "K"),
expand = expansion(mult = c(0, 0.15))
) +
labs(
title = "Special Education Students in NC (2018-2024)",
subtitle = "Stable at 13-14% of enrollment; 202,380 students in 2024",
x = "Year",
y = "Students with IEPs (thousands)"
)15. Triangle vs Triad: diverging metro areas
The Research Triangle (Wake, Durham, Orange, Johnston, Chatham) grew from 237K to 243K while the Piedmont Triad (Guilford, Forsyth, Davidson, Randolph, Alamance) shrank from 185K to 171K since 2015.
enr_metro <- fetch_enr_multi(c(2015, 2020, 2024), use_cache = TRUE)
triangle <- c("Wake", "Durham", "Orange", "Johnston", "Chatham")
triad <- c("Guilford", "Forsyth", "Davidson", "Randolph", "Alamance")
metro_data <- enr_metro %>%
filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
mutate(region = case_when(
grepl(paste(triangle, collapse = "|"), district_name) ~ "Triangle",
grepl(paste(triad, collapse = "|"), district_name) ~ "Triad",
TRUE ~ "Other"
)) %>%
filter(region %in% c("Triangle", "Triad")) %>%
group_by(end_year, region) %>%
summarize(total = sum(n_students, na.rm = TRUE), .groups = "drop")
stopifnot(nrow(metro_data) == 6)
metro_data
ggplot(metro_data, aes(x = end_year, y = total / 1000, color = region)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
geom_text(aes(label = scales::comma(total)), vjust = -1, size = 3.5,
show.legend = FALSE) +
scale_color_manual(values = c("Triangle" = "#2171B5", "Triad" = "#CB181D")) +
scale_y_continuous(labels = scales::label_number(suffix = "K")) +
labs(
title = "Triangle vs Triad: Diverging Metro Areas",
subtitle = "Triangle: +2.7% | Triad: -7.4% since 2015",
x = "Year",
y = "Students (thousands)",
color = "Metro Area"
) +
theme(legend.position = "bottom")Explore the data yourself
library(ncschooldata)
# Fetch recent years
enr <- fetch_enr_multi(c(2018, 2019, 2020, 2021, 2024), use_cache = TRUE)
# State totals
enr %>%
filter(is_state, subgroup == "total_enrollment", grade_level == "TOTAL")
# Your district
enr %>%
filter(grepl("Wake", district_name),
subgroup == "total_enrollment",
grade_level == "TOTAL")