Skip to contents
theme_readme <- function() {
  theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(face = "bold", size = 16),
      plot.subtitle = element_text(color = "gray40"),
      panel.grid.minor = element_blank(),
      legend.position = "bottom"
    )
}

colors <- c("total" = "#2C3E50", "white" = "#3498DB", "black" = "#E74C3C",
            "hispanic" = "#F39C12", "asian" = "#9B59B6")
# Fetch data -- graceful degradation if API returns errors for specific years
enr <- fetch_enr_multi(2016:2024, use_cache = TRUE)

# Use the most recent year available for current-year snapshots
max_year <- max(enr$end_year)
enr_current <- enr %>% filter(end_year == max_year)

if (max_year < 2024) {
  warning("2024 data unavailable from MA DOE API; using ", max_year, " as most recent year")
}

1. Boston lost 6,000+ students since 2016

Boston Public Schools enrollment has fallen from over 51,000 in 2016 to under 46,000 in 2024, a decline of more than 6,000 students.

boston <- enr %>%
  filter(is_district, district_id == "0035",
         subgroup == "total_enrollment", grade_level == "TOTAL")
stopifnot(nrow(boston) > 0)

print(boston %>% select(end_year, district_name, n_students))
#>   end_year district_name n_students
#> 1     2016        Boston      53530
#> 2     2017        Boston      53263
#> 3     2018        Boston      52665
#> 4     2019        Boston      51433
#> 5     2020        Boston      50480
#> 6     2021        Boston      48112
#> 7     2022        Boston      46169
#> 8     2023        Boston      46001
#> 9     2024        Boston      45742

ggplot(boston, aes(x = end_year, y = n_students)) +
  geom_line(linewidth = 1.5, color = colors["total"]) +
  geom_point(size = 3, color = colors["total"]) +
  geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.5) +
  scale_y_continuous(labels = comma, limits = c(0, NA)) +
  labs(title = "Boston Public Schools Enrollment",
       subtitle = "Steady decline over the past decade",
       x = "School Year", y = "Students") +
  theme_readme()

2. Gateway Cities under pressure

Springfield, Worcester, and Lowell – Massachusetts’ “Gateway Cities” – show different enrollment trajectories. Springfield has declined while Worcester and Lowell hold relatively steady.

gateway <- enr %>%
  filter(is_district, district_id %in% c("0281", "0348", "0160"),
         subgroup == "total_enrollment", grade_level == "TOTAL")
stopifnot(nrow(gateway) > 0)

print(gateway %>% select(end_year, district_name, n_students))
#>    end_year district_name n_students
#> 1      2016        Lowell      14152
#> 2      2016   Springfield      25479
#> 3      2016     Worcester      25076
#> 4      2017        Lowell      14416
#> 5      2017   Springfield      25633
#> 6      2017     Worcester      25479
#> 7      2018        Lowell      14436
#> 8      2018   Springfield      25604
#> 9      2018     Worcester      25306
#> 10     2019        Lowell      14548
#> 11     2019   Springfield      25297
#> 12     2019     Worcester      25415
#> 13     2020        Lowell      14434
#> 14     2020   Springfield      25007
#> 15     2020     Worcester      25044
#> 16     2021        Lowell      14023
#> 17     2021   Springfield      24239
#> 18     2021     Worcester      23986
#> 19     2022        Lowell      13991
#> 20     2022   Springfield      23799
#> 21     2022     Worcester      23735
#> 22     2023        Lowell      14130
#> 23     2023   Springfield      23721
#> 24     2023     Worcester      24318
#> 25     2024        Lowell      14274
#> 26     2024   Springfield      23693
#> 27     2024     Worcester      24350

ggplot(gateway, aes(x = end_year, y = n_students, color = district_name)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.5) +
  geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.5) +
  scale_y_continuous(labels = comma) +
  labs(title = "Gateway Cities Enrollment",
       subtitle = "Springfield, Worcester, and Lowell",
       x = "School Year", y = "Students", color = "") +
  theme_readme()

3. Massachusetts is diversifying fast

The state has gone from 62% white in 2016 to 53% in 2024. Hispanic students now make up 25% of enrollment.

demo <- enr %>%
  filter(is_state, grade_level == "TOTAL",
         subgroup %in% c("white", "black", "hispanic", "asian")) %>%
  group_by(end_year, subgroup) %>%
  slice_max(n_students, n = 1) %>%
  ungroup()
stopifnot(nrow(demo) > 0)

print(demo %>% mutate(pct = round(pct * 100, 1)) %>% select(end_year, subgroup, n_students, pct))
#> # A tibble: 36 × 4
#>    end_year subgroup n_students   pct
#>       <int> <chr>         <dbl> <dbl>
#>  1     2016 asian         61973   6.5
#>  2     2016 black         83902   8.8
#>  3     2016 hispanic     177338  18.6
#>  4     2016 white        597800  62.7
#>  5     2017 asian         63901   6.7
#>  6     2017 black         84884   8.9
#>  7     2017 hispanic     185027  19.4
#>  8     2017 white        584648  61.3
#>  9     2018 asian         65828   6.9
#> 10     2018 black         85863   9  
#> # ℹ 26 more rows

ggplot(demo, aes(x = end_year, y = pct * 100, color = subgroup)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.5) +
  scale_color_manual(values = colors,
                     labels = c("Asian", "Black", "Hispanic", "White")) +
  labs(title = "Massachusetts Demographics Shift",
       subtitle = "Percent of student population by race/ethnicity",
       x = "School Year", y = "Percent", color = "") +
  theme_readme()

4. Cape and Islands enrollment declining

Cape Cod and the Islands have seen declining enrollment as seasonal communities age.

cape <- enr %>%
  filter(is_district, grepl("Barnstable|Nauset|Monomoy|Martha|Nantucket", district_name),
         subgroup == "total_enrollment", grade_level == "TOTAL") %>%
  group_by(end_year) %>%
  summarize(n_students = sum(n_students, na.rm = TRUE), .groups = "drop")
stopifnot(nrow(cape) > 0)

print(cape)
#> # A tibble: 9 × 2
#>   end_year n_students
#>      <int>      <dbl>
#> 1     2016      11041
#> 2     2017      10983
#> 3     2018      10943
#> 4     2019      10974
#> 5     2020      10917
#> 6     2021      10433
#> 7     2022      10423
#> 8     2023      10571
#> 9     2024      10338

ggplot(cape, aes(x = end_year, y = n_students)) +
  geom_line(linewidth = 1.5, color = colors["total"]) +
  geom_point(size = 3, color = colors["total"]) +
  scale_y_continuous(labels = comma) +
  labs(title = "Cape and Islands Enrollment",
       subtitle = "Combined enrollment for Cape Cod and Island districts",
       x = "School Year", y = "Students") +
  theme_readme()

5. COVID crushed kindergarten

Kindergarten enrollment dropped from 66,000 in 2018 to 57,500 in 2021 and has only partially recovered.

k_trend <- enr %>%
  filter(is_state, subgroup == "total_enrollment",
         grade_level %in% c("K", "01", "06", "12")) %>%
  group_by(end_year, grade_level) %>%
  slice_max(n_students, n = 1) %>%
  ungroup() %>%
  mutate(grade_label = case_when(
    grade_level == "K" ~ "Kindergarten",
    grade_level == "01" ~ "Grade 1",
    grade_level == "06" ~ "Grade 6",
    grade_level == "12" ~ "Grade 12"
  ))
stopifnot(nrow(k_trend) > 0)

print(k_trend %>% select(end_year, grade_level, n_students))
#> # A tibble: 36 × 3
#>    end_year grade_level n_students
#>       <int> <chr>            <dbl>
#>  1     2016 01               69606
#>  2     2016 06               71129
#>  3     2016 12               69298
#>  4     2016 K                66024
#>  5     2017 01               68167
#>  6     2017 06               70494
#>  7     2017 12               69397
#>  8     2017 K                66131
#>  9     2018 01               68039
#> 10     2018 06               71262
#> # ℹ 26 more rows

ggplot(k_trend, aes(x = end_year, y = n_students, color = grade_label)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.5) +
  geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.5) +
  scale_y_continuous(labels = comma) +
  labs(title = "COVID Impact on Grade-Level Enrollment",
       subtitle = "Kindergarten hit hardest in 2020-21",
       x = "School Year", y = "Students", color = "") +
  theme_readme()

6. Charter schools serving 47,000+ students

Massachusetts charter enrollment has grown, with 72 charter campuses enrolling over 47,000 students in 2024.

charter_current <- enr_current %>%
  filter(is_charter, is_campus, subgroup == "total_enrollment", grade_level == "TOTAL")
stopifnot(nrow(charter_current) > 0)

print(charter_current %>%
  summarize(
    total_charter = sum(n_students, na.rm = TRUE),
    n_schools = n()
  ))
#>   total_charter n_schools
#> 1         47147        72

charter <- enr %>%
  filter(is_charter, is_campus, subgroup == "total_enrollment", grade_level == "TOTAL") %>%
  group_by(end_year) %>%
  summarize(n_students = sum(n_students, na.rm = TRUE), .groups = "drop")

ggplot(charter, aes(x = end_year, y = n_students)) +
  geom_line(linewidth = 1.5, color = colors["total"]) +
  geom_point(size = 3, color = colors["total"]) +
  scale_y_continuous(labels = comma) +
  labs(title = "Massachusetts Charter School Enrollment",
       subtitle = "Total students across all charter schools",
       x = "School Year", y = "Students") +
  theme_readme()

7. Over 42% of students are economically disadvantaged

The economically disadvantaged rate jumped from 27% in 2016 to over 43% in 2022, partly due to a definition change.

econ <- enr %>%
  filter(is_state, subgroup == "econ_disadv", grade_level == "TOTAL") %>%
  group_by(end_year) %>%
  slice_max(n_students, n = 1) %>%
  ungroup()
stopifnot(nrow(econ) > 0)

print(econ %>% mutate(pct = round(pct * 100, 1)) %>% select(end_year, n_students, pct))
#> # A tibble: 9 × 3
#>   end_year n_students   pct
#>      <int>      <dbl> <dbl>
#> 1     2016     260998  27.4
#> 2     2017     288465  30.2
#> 3     2018     305203  32  
#> 4     2019     297120  31.2
#> 5     2020     310873  32.8
#> 6     2021     333843  36.6
#> 7     2022     399140  43.8
#> 8     2023     386060  42.3
#> 9     2024     385697  42.2

ggplot(econ, aes(x = end_year, y = pct * 100)) +
  geom_line(linewidth = 1.5, color = colors["total"]) +
  geom_point(size = 3, color = colors["total"]) +
  geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.5) +
  labs(title = "Economically Disadvantaged Students",
       subtitle = "Percent of MA students classified as economically disadvantaged",
       x = "School Year", y = "Percent") +
  theme_readme()

8. English learners are concentrated in cities

Phoenix Academy Chelsea and Lowell Community Charter have the highest EL rates, but large urban districts like Lynn and Chelsea serve the most EL students by volume.

el <- enr_current %>%
  filter(is_district, subgroup == "lep", grade_level == "TOTAL") %>%
  arrange(desc(n_students)) %>%
  head(10)
stopifnot(nrow(el) > 0)

print(el %>% mutate(pct = round(pct * 100, 1)) %>% select(district_name, n_students, pct))
#>    district_name n_students  pct
#> 1         Boston      15408 33.7
#> 2      Worcester       7413 30.4
#> 3           Lynn       6951 43.4
#> 4       Lawrence       5481 42.1
#> 5       Brockton       4648 31.1
#> 6         Lowell       4092 28.7
#> 7    Springfield       4056 17.1
#> 8     Framingham       3407 37.3
#> 9    New Bedford       3211 25.7
#> 10       Everett       3060 41.7

el <- el %>%
  mutate(district_label = reorder(district_name, n_students))

ggplot(el, aes(x = district_label, y = n_students)) +
  geom_col(fill = colors["total"]) +
  coord_flip() +
  scale_y_continuous(labels = comma) +
  labs(title = "English Learners by District",
       subtitle = "Top 10 districts by number of EL students",
       x = "", y = "English Learner Students") +
  theme_readme()

9. Wealthy suburbs remain stable

Newton, Lexington, and Wellesley have maintained relatively stable enrollment even as urban districts decline.

suburbs <- enr %>%
  filter(is_district, district_id %in% c("0207", "0155", "0317"),
         subgroup == "total_enrollment", grade_level == "TOTAL")
stopifnot(nrow(suburbs) > 0)

print(suburbs %>% select(end_year, district_name, n_students))
#>    end_year district_name n_students
#> 1      2016     Lexington       6925
#> 2      2016        Newton      12670
#> 3      2016     Wellesley       5075
#> 4      2017     Lexington       7072
#> 5      2017        Newton      12827
#> 6      2017     Wellesley       5018
#> 7      2018     Lexington       7246
#> 8      2018        Newton      12928
#> 9      2018     Wellesley       5006
#> 10     2019     Lexington       7259
#> 11     2019        Newton      12883
#> 12     2019     Wellesley       4963
#> 13     2020     Lexington       7190
#> 14     2020        Newton      12779
#> 15     2020     Wellesley       4862
#> 16     2021     Lexington       6901
#> 17     2021        Newton      12024
#> 18     2021     Wellesley       4432
#> 19     2022     Lexington       6790
#> 20     2022        Newton      11974
#> 21     2022     Wellesley       4290
#> 22     2023     Lexington       6845
#> 23     2023        Newton      11882
#> 24     2023     Wellesley       4158
#> 25     2024     Lexington       6805
#> 26     2024        Newton      11752
#> 27     2024     Wellesley       4101

ggplot(suburbs, aes(x = end_year, y = n_students, color = district_name)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.5) +
  geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.5) +
  scale_y_continuous(labels = comma) +
  labs(title = "Suburban Ring Enrollment",
       subtitle = "Newton, Lexington, and Wellesley",
       x = "School Year", y = "Students", color = "") +
  theme_readme()

10. Regional districts serve rural Massachusetts

Regional school districts consolidate resources across rural communities. Greater Lowell Voc-Tech leads with over 2,300 students.

regional <- enr_current %>%
  filter(is_district, grepl("Regional", district_name),
         subgroup == "total_enrollment", grade_level == "TOTAL") %>%
  arrange(desc(n_students)) %>%
  head(10)
stopifnot(nrow(regional) > 0)

print(regional %>% select(district_name, n_students))
#>                                           district_name n_students
#> 1          Greater Lowell Regional Vocational Technical       2314
#> 2     Greater New Bedford Regional Vocational Technical       2147
#> 3        Greater Lawrence Regional Vocational Technical       1774
#> 4                      Monomoy Regional School District       1746
#> 5             Mystic Valley Regional Charter (District)       1653
#> 6            Southeastern Regional Vocational Technical       1597
#> 7                Foxborough Regional Charter (District)       1520
#> 8      Greater Fall River Regional Vocational Technical       1443
#> 9            Montachusett Regional Vocational Technical       1428
#> 10 Northeast Metropolitan Regional Vocational Technical       1343

regional <- regional %>%
  mutate(district_label = reorder(district_name, n_students))

ggplot(regional, aes(x = district_label, y = n_students)) +
  geom_col(fill = colors["total"]) +
  coord_flip() +
  scale_y_continuous(labels = comma) +
  labs(title = "Regional School Districts",
       subtitle = "Top 10 regional districts by enrollment",
       x = "", y = "Students") +
  theme_readme()

11. Springfield is 69% Hispanic

Springfield has undergone a dramatic demographic transformation. Hispanic students now make up 69% of enrollment, up from 65% in 2016, while white students have dropped to under 8%.

springfield_demo <- enr %>%
  filter(is_district, district_id == "0281", grade_level == "TOTAL",
         subgroup %in% c("white", "black", "hispanic", "asian"))
stopifnot(nrow(springfield_demo) > 0)

print(springfield_demo %>% mutate(pct = round(pct * 100, 1)) %>% select(end_year, subgroup, pct))
#>    end_year subgroup  pct
#> 1      2016    white 12.3
#> 2      2016    black 19.4
#> 3      2016 hispanic 64.5
#> 4      2016    asian  2.5
#> 5      2017    white 11.8
#> 6      2017    black 19.5
#> 7      2017 hispanic 64.8
#> 8      2017    asian  2.4
#> 9      2018    white 11.2
#> 10     2018    black 19.6
#> 11     2018 hispanic 65.0
#> 12     2018    asian  2.4
#> 13     2019    white 10.2
#> 14     2019    black 18.9
#> 15     2019 hispanic 66.6
#> 16     2019    asian  2.3
#> 17     2020    white  9.7
#> 18     2020    black 18.9
#> 19     2020 hispanic 67.1
#> 20     2020    asian  2.1
#> 21     2021    white  9.1
#> 22     2021    black 18.6
#> 23     2021 hispanic 67.9
#> 24     2021    asian  2.1
#> 25     2022    white  8.7
#> 26     2022    black 18.2
#> 27     2022 hispanic 68.3
#> 28     2022    asian  2.0
#> 29     2023    white  8.3
#> 30     2023    black 17.6
#> 31     2023 hispanic 69.0
#> 32     2023    asian  1.8
#> 33     2024    white  7.9
#> 34     2024    black 17.6
#> 35     2024 hispanic 69.3
#> 36     2024    asian  1.7

ggplot(springfield_demo, aes(x = end_year, y = pct * 100, color = subgroup)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.5) +
  scale_color_manual(values = colors,
                     labels = c("Asian", "Black", "Hispanic", "White")) +
  labs(title = "Springfield Demographics Transformation",
       subtitle = "Hispanic students now majority in state's third-largest district",
       x = "School Year", y = "Percent", color = "") +
  theme_readme()

12. Fall River and New Bedford: SouthCoast holds steady

The two largest SouthCoast cities have held surprisingly steady since 2016, with New Bedford around 12,500 and Fall River around 10,000-10,600.

southcoast <- enr %>%
  filter(is_district, district_id %in% c("0095", "0201"),  # Fall River, New Bedford
         subgroup == "total_enrollment", grade_level == "TOTAL")
stopifnot(nrow(southcoast) > 0)

print(southcoast %>% select(end_year, district_name, n_students))
#>    end_year district_name n_students
#> 1      2016    Fall River      10123
#> 2      2016   New Bedford      12681
#> 3      2017    Fall River      10163
#> 4      2017   New Bedford      12640
#> 5      2018    Fall River      10128
#> 6      2018   New Bedford      12626
#> 7      2019    Fall River      10120
#> 8      2019   New Bedford      12845
#> 9      2020    Fall River      10229
#> 10     2020   New Bedford      12880
#> 11     2021    Fall River       9998
#> 12     2021   New Bedford      12565
#> 13     2022    Fall River      10268
#> 14     2022   New Bedford      12504
#> 15     2023    Fall River      10447
#> 16     2023   New Bedford      12522
#> 17     2024    Fall River      10656
#> 18     2024   New Bedford      12488

ggplot(southcoast, aes(x = end_year, y = n_students, color = district_name)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.5) +
  geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.5) +
  scale_y_continuous(labels = comma) +
  labs(title = "SouthCoast Enrollment Trends",
       subtitle = "Fall River and New Bedford enrollment over time",
       x = "School Year", y = "Students", color = "") +
  theme_readme()

13. Worcester suburbs stable but not growing

Shrewsbury, Westborough, and Grafton have maintained consistent enrollment around 3,000-6,000 students each.

worcester_suburbs <- enr %>%
  filter(is_district, district_id %in% c("0271", "0321", "0110"),  # Shrewsbury, Westborough, Grafton
         subgroup == "total_enrollment", grade_level == "TOTAL")
stopifnot(nrow(worcester_suburbs) > 0)

print(worcester_suburbs %>% select(end_year, district_name, n_students))
#>    end_year district_name n_students
#> 1      2016       Grafton       3206
#> 2      2016    Shrewsbury       6045
#> 3      2016   Westborough       3672
#> 4      2017       Grafton       3189
#> 5      2017    Shrewsbury       6191
#> 6      2017   Westborough       3805
#> 7      2018       Grafton       3155
#> 8      2018    Shrewsbury       6214
#> 9      2018   Westborough       3926
#> 10     2019       Grafton       3173
#> 11     2019    Shrewsbury       6207
#> 12     2019   Westborough       3925
#> 13     2020       Grafton       3205
#> 14     2020    Shrewsbury       6268
#> 15     2020   Westborough       3942
#> 16     2021       Grafton       3121
#> 17     2021    Shrewsbury       5974
#> 18     2021   Westborough       3825
#> 19     2022       Grafton       3138
#> 20     2022    Shrewsbury       5885
#> 21     2022   Westborough       3856
#> 22     2023       Grafton       3080
#> 23     2023    Shrewsbury       5892
#> 24     2023   Westborough       3830
#> 25     2024       Grafton       3050
#> 26     2024    Shrewsbury       5921
#> 27     2024   Westborough       3887

ggplot(worcester_suburbs, aes(x = end_year, y = n_students, color = district_name)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.5) +
  scale_y_continuous(labels = comma) +
  labs(title = "Worcester Suburban Enrollment",
       subtitle = "Shrewsbury, Westborough, and Grafton",
       x = "School Year", y = "Students", color = "") +
  theme_readme()

14. Western MA faces population decline

The Pioneer Valley has seen persistent enrollment declines. Northampton, Amherst, and Pittsfield combined dropped from 9,400 in 2016 to 8,350 in 2024.

# Major Western MA districts
western_ma <- enr %>%
  filter(is_district,
         district_id %in% c("0210", "0008", "0236"),  # Northampton, Amherst, Pittsfield
         subgroup == "total_enrollment", grade_level == "TOTAL") %>%
  group_by(end_year) %>%
  summarize(n_students = sum(n_students, na.rm = TRUE), .groups = "drop")
stopifnot(nrow(western_ma) > 0)

print(western_ma)
#> # A tibble: 9 × 2
#>   end_year n_students
#>      <int>      <dbl>
#> 1     2016       9447
#> 2     2017       9310
#> 3     2018       9268
#> 4     2019       9186
#> 5     2020       9052
#> 6     2021       8620
#> 7     2022       8624
#> 8     2023       8532
#> 9     2024       8354

ggplot(western_ma, aes(x = end_year, y = n_students)) +
  geom_line(linewidth = 1.5, color = colors["total"]) +
  geom_point(size = 3, color = colors["total"]) +
  geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.5) +
  scale_y_continuous(labels = comma) +
  labs(title = "Western Massachusetts Enrollment",
       subtitle = "Combined enrollment for Northampton, Amherst, and Pittsfield",
       x = "School Year", y = "Students") +
  theme_readme()

15. Special education rates vary widely by district

Some charter and vocational schools have special ed rates above 40%, while the statewide average is around 20%.

sped_by_district <- enr_current %>%
  filter(is_district, subgroup == "special_ed", grade_level == "TOTAL") %>%
  arrange(desc(pct)) %>%
  head(15)
stopifnot(nrow(sped_by_district) > 0)

print(sped_by_district %>% mutate(pct = round(pct * 100, 1)) %>%
  select(district_name, n_students, pct))
#>                                                         district_name
#> 1                               Map Academy Charter School (District)
#> 2                   Boston Day and Evening Academy Charter (District)
#> 3                                                           Hawlemont
#> 4                         Lowell Middlesex Academy Charter (District)
#> 5                             Minuteman Regional Vocational Technical
#> 6                           Northampton-Smith Vocational Agricultural
#> 7                      UP Academy Charter School of Boston (District)
#> 8  Phoenix Academy Public Charter High School, Springfield (District)
#> 9                                                                Rowe
#> 10    Phoenix Academy Public Charter High School, Lawrence (District)
#> 11                       Nashoba Valley Regional Vocational Technical
#> 12                         Libertas Academy Charter School (District)
#> 13         Boston Green Academy Horace Mann Charter School (District)
#> 14                                                            Holyoke
#> 15                           Tri-County Regional Vocational Technical
#>    n_students  pct
#> 1         154 55.6
#> 2         140 47.8
#> 3          24 42.9
#> 4          44 41.5
#> 5         271 39.7
#> 6         214 37.6
#> 7          61 36.7
#> 8          55 34.6
#> 9          21 34.4
#> 10         38 33.3
#> 11        250 32.3
#> 12        166 32.0
#> 13        145 31.6
#> 14       1527 31.2
#> 15        298 30.9

sped_by_district <- sped_by_district %>%
  mutate(district_label = reorder(district_name, pct))

ggplot(sped_by_district, aes(x = district_label, y = pct * 100)) +
  geom_col(fill = colors["total"]) +
  coord_flip() +
  labs(title = "Special Education Rates by District",
       subtitle = "Top 15 districts by percent of students receiving SPED services",
       x = "", y = "Percent SPED") +
  theme_readme()

16. Four-year graduation rates climbed from 80% to 90%

Massachusetts’ 4-year graduation rate rose from 80% in 2006 to a peak of 90% in 2022 before dipping to 88% in 2024.

grad <- fetch_graduation_multi(2006:2024, use_cache = TRUE)

grad_max_year <- max(grad$end_year)
if (grad_max_year < 2024) {
  warning("Graduation data for 2024 unavailable; most recent year is ", grad_max_year)
}

grad_state <- grad %>%
  filter(is_state, subgroup == "all", cohort_type == "4-year") %>%
  select(end_year, grad_rate, cohort_count) %>%
  mutate(rate_pct = round(grad_rate * 100, 1))
stopifnot(nrow(grad_state) > 0)

print(grad_state)
#>    end_year grad_rate cohort_count rate_pct
#> 1      2006     0.799        74380     79.9
#> 2      2007     0.809        75912     80.9
#> 3      2008     0.812        77383     81.2
#> 4      2009     0.815        77038     81.5
#> 5      2010     0.821        76308     82.1
#> 6      2011     0.834        74307     83.4
#> 7      2012     0.847        73483     84.7
#> 8      2013     0.850        74537     85.0
#> 9      2014     0.861        73168     86.1
#> 10     2015     0.873        72474     87.3
#> 11     2016     0.875        74045     87.5
#> 12     2017     0.883        73249     88.3
#> 13     2018     0.879        74641     87.9
#> 14     2019     0.880        75067     88.0
#> 15     2020     0.890        74232     89.0
#> 16     2021     0.898        74226     89.8
#> 17     2022     0.901        73901     90.1
#> 18     2023     0.892        72602     89.2
#> 19     2024     0.884        73043     88.4

ggplot(grad_state, aes(x = end_year, y = rate_pct)) +
  geom_line(linewidth = 1.5, color = colors["total"]) +
  geom_point(size = 3, color = colors["total"]) +
  geom_vline(xintercept = 2020.5, linetype = "dashed", color = "red", alpha = 0.5) +
  scale_y_continuous(limits = c(70, 100)) +
  labs(title = "Massachusetts 4-Year Graduation Rate",
       subtitle = "State-level graduation rate trend",
       x = "School Year", y = "Graduation Rate (%)") +
  theme_readme()

17. Urban-suburban graduation gaps persist

Boston (80%) trails Newton (95%) by 15 percentage points, reflecting opportunity gaps across the state.

# Use most recent year available from the multi-year fetch
grad_2024 <- grad %>% filter(end_year == grad_max_year)

grad_districts <- grad_2024 %>%
  filter(is_district,
         district_name %in% c("Boston", "Springfield", "Worcester", "Newton"),
         subgroup == "all",
         cohort_type == "4-year") %>%
  select(district_name, grad_rate, cohort_count) %>%
  mutate(rate_pct = round(grad_rate * 100, 1))
stopifnot(nrow(grad_districts) > 0)

print(grad_districts)
#>   district_name grad_rate cohort_count rate_pct
#> 1        Boston     0.797         3711     79.7
#> 2        Newton     0.954          963     95.4
#> 3   Springfield     0.786         1841     78.6
#> 4     Worcester     0.860         1990     86.0

grad_districts <- grad_districts %>%
  mutate(district_label = reorder(district_name, grad_rate))

ggplot(grad_districts, aes(x = district_label, y = rate_pct)) +
  geom_col(fill = colors["total"]) +
  coord_flip() +
  scale_y_continuous(limits = c(0, 100)) +
  labs(title = "Graduation Rates: Urban vs Suburban",
       subtitle = "2024 4-year graduation rates",
       x = "", y = "Graduation Rate (%)") +
  theme_readme()

18. Special populations face graduation challenges

English learners (67%) and students with disabilities (75%) graduate at lower rates than peers.

grad_special <- grad_2024 %>%
  filter(is_state,
         subgroup %in% c("all", "english_learner", "special_ed", "low_income"),
         cohort_type == "4-year") %>%
  select(subgroup, grad_rate, cohort_count) %>%
  mutate(rate_pct = round(grad_rate * 100, 1))
stopifnot(nrow(grad_special) > 0)

print(grad_special)
#>          subgroup grad_rate cohort_count rate_pct
#> 1             all     0.884        73043     88.4
#> 2      special_ed     0.754        15038     75.4
#> 3      low_income     0.816        39273     81.6
#> 4 english_learner     0.667         7193     66.7

grad_special <- grad_special %>%
  mutate(
    subgroup_label = case_when(
      subgroup == "all" ~ "All Students",
      subgroup == "english_learner" ~ "English Learners",
      subgroup == "special_ed" ~ "Students with Disabilities",
      subgroup == "low_income" ~ "Low Income"
    ),
    subgroup_label = reorder(subgroup_label, grad_rate))

ggplot(grad_special, aes(x = subgroup_label, y = rate_pct)) +
  geom_col(fill = colors["total"]) +
  coord_flip() +
  scale_y_continuous(limits = c(0, 100)) +
  labs(title = "Graduation Rates by Subgroup",
       subtitle = "2024 4-year graduation rates",
       x = "", y = "Graduation Rate (%)") +
  theme_readme()

sessionInfo()
#> R version 4.5.2 (2025-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] scales_1.4.0       dplyr_1.2.0        ggplot2_4.0.2      maschooldata_0.1.0
#> 
#> loaded via a namespace (and not attached):
#>  [1] gtable_0.3.6       jsonlite_2.0.0     compiler_4.5.2     tidyselect_1.2.1  
#>  [5] jquerylib_0.1.4    systemfonts_1.3.2  textshaping_1.0.5  yaml_2.3.12       
#>  [9] fastmap_1.2.0      R6_2.6.1           labeling_0.4.3     generics_0.1.4    
#> [13] curl_7.0.0         knitr_1.51         tibble_3.3.1       desc_1.4.3        
#> [17] bslib_0.10.0       pillar_1.11.1      RColorBrewer_1.1-3 rlang_1.1.7       
#> [21] utf8_1.2.6         cachem_1.1.0       xfun_0.56          fs_1.6.7          
#> [25] sass_0.4.10        S7_0.2.1           cli_3.6.5          pkgdown_2.2.0     
#> [29] withr_3.0.2        magrittr_2.0.4     digest_0.6.39      grid_4.5.2        
#> [33] rappdirs_0.3.4     lifecycle_1.0.5    vctrs_0.7.1        evaluate_1.0.5    
#> [37] glue_1.8.0         farver_2.1.2       codetools_0.2-20   ragg_1.5.1        
#> [41] foreign_0.8-90     httr_1.4.8         rmarkdown_2.30     purrr_1.2.1       
#> [45] tools_4.5.2        pkgconfig_2.0.3    htmltools_0.5.9