Skip to contents

This vignette explores Arizona’s public school enrollment data from the Arizona Department of Education.

Data available: 2018, 2019, 2024, 2025, and 2026. Years 2020-2023 are not available as automated Excel downloads due to Cloudflare protection.


1. Arizona enrollment fell despite population boom

While Arizona’s population grew significantly from 2018-2025, public school enrollment actually declined - from 1.11 million to 1.10 million students. Enrollment peaked in 2019 (1.14M) then fell 3.7% by 2025. This suggests more families are choosing private schools, homeschooling, or leaving the public system.

enr <- fetch_enr_multi(c(2018, 2019, 2024, 2025), use_cache = TRUE)

state_enr <- enr |>
  filter(is_state,
         subgroup == "total_enrollment", grade_level == "TOTAL") |>
  select(end_year, n_students) |>
  mutate(change = n_students - lag(n_students),
         pct_change = round(change / lag(n_students) * 100, 1))
stopifnot(nrow(state_enr) > 0)
state_enr
#>   end_year n_students change pct_change
#> 1     2018    1112682     NA         NA
#> 2     2019    1141209  28527        2.6
#> 3     2024    1115111 -26098       -2.3
#> 4     2025    1099529 -15582       -1.4
state_enr_chart <- enr |>
  filter(is_state,
         subgroup == "total_enrollment", grade_level == "TOTAL")
stopifnot(nrow(state_enr_chart) > 0)
state_enr_chart |>
  ggplot(aes(x = factor(end_year), y = n_students)) +
  geom_col(fill = "#BF0A30", width = 0.6) +
  geom_text(aes(label = scales::comma(n_students)), vjust = -0.5, size = 4) +
  scale_y_continuous(labels = scales::comma, limits = c(0, 1300000)) +
  labs(
    title = "Arizona Public School Enrollment",
    subtitle = "Essentially flat from 2018 to 2025 despite population growth",
    x = "School Year (ending)",
    y = "Total Students"
  )


2. Hispanic students now 49% of Arizona schools

Hispanic students grew from 45.7% in 2018 to 48.8% in 2025, while White students declined from 38.0% to 32.9%. Arizona’s schools are becoming increasingly diverse.

demographics <- enr |>
  filter(is_state, grade_level == "TOTAL",
         subgroup %in% c("hispanic", "white", "black", "asian",
                         "native_american", "multiracial", "total_enrollment")) |>
  group_by(end_year) |>
  mutate(pct = round(n_students / n_students[subgroup == "total_enrollment"] * 100, 1)) |>
  filter(subgroup != "total_enrollment")
stopifnot(nrow(demographics) > 0)

demographics |>
  select(end_year, subgroup, n_students, pct) |>
  arrange(end_year, desc(n_students))
#> # A tibble: 24 × 4
#> # Groups:   end_year [4]
#>    end_year subgroup        n_students   pct
#>       <dbl> <chr>                <dbl> <dbl>
#>  1     2018 hispanic            508121  45.7
#>  2     2018 white               422414  38  
#>  3     2018 black                58875   5.3
#>  4     2018 native_american      48579   4.4
#>  5     2018 multiracial          33218   3  
#>  6     2018 asian                31283   2.8
#>  7     2019 hispanic            520535  45.6
#>  8     2019 white               430623  37.7
#>  9     2019 black                61568   5.4
#> 10     2019 native_american      49650   4.4
#> # ℹ 14 more rows
demo_2025 <- demographics |>
  filter(end_year == 2025) |>
  mutate(subgroup = factor(subgroup, levels = c("hispanic", "white", "black",
                                                 "native_american", "asian", "multiracial")))
stopifnot(nrow(demo_2025) > 0)
demo_2025 |>
  ggplot(aes(x = reorder(subgroup, -n_students), y = n_students, fill = subgroup)) +
  geom_col() +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Arizona School Enrollment by Race/Ethnicity (2025)",
    subtitle = "Hispanic students are the largest group at 49%",
    x = NULL,
    y = "Number of Students"
  ) +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1))


3. Queen Creek doubled in size while Mesa lost 7,100 students

Queen Creek Unified grew 112% (from 7,095 to 15,034 students) as new subdivisions opened in the southeast Valley. Meanwhile, Mesa Unified - the state’s largest district - lost 7,156 students (-11.4%).

growth <- enr |>
  filter(is_district,
         subgroup == "total_enrollment", grade_level == "TOTAL") |>
  group_by(end_year, district_name) |>
  summarize(n_students = sum(n_students, na.rm = TRUE), .groups = "drop") |>
  pivot_wider(names_from = end_year, values_from = n_students,
              names_prefix = "y") |>
  filter(!is.na(y2018), !is.na(y2025), y2018 >= 1000) |>
  mutate(change = y2025 - y2018,
         pct_change = round((y2025 / y2018 - 1) * 100, 1)) |>
  arrange(desc(change))
stopifnot(nrow(growth) > 0)

growth |>
  select(district_name, y2018, y2025, change, pct_change) |>
  head(10)
#> # A tibble: 10 × 5
#>    district_name                           y2018 y2025 change pct_change
#>    <chr>                                   <dbl> <dbl>  <dbl>      <dbl>
#>  1 Queen Creek Unified District             7095 15034   7939      112. 
#>  2 American Leadership Academy, Inc.        7904 15266   7362       93.1
#>  3 Maricopa Unified School District         6661  9504   2843       42.7
#>  4 Tolleson Union High School District     11152 13901   2749       24.7
#>  5 Leman Academy of Excellence, Inc.        2042  4592   2550      125. 
#>  6 Agua Fria Union High School District     7766 10074   2308       29.7
#>  7 American Virtual Academy                 4227  6289   2062       48.8
#>  8 Saddle Mountain Unified School District  1630  3256   1626       99.8
#>  9 Buckeye Union High School District       4014  5634   1620       40.4
#> 10 GAR, LLC dba Student Choice High School  1054  2370   1316      125.
growth_top10 <- growth |>
  head(10) |>
  mutate(district_name = gsub(" District.*$| Unified.*$", "", district_name))
stopifnot(nrow(growth_top10) > 0)
growth_top10 |>
  ggplot(aes(x = reorder(district_name, change), y = change, fill = change > 0)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = c("TRUE" = "#2E8B57", "FALSE" = "#CD5C5C")) +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Top 10 Fastest Growing Districts (2018-2025)",
    subtitle = "Queen Creek added 7,939 students (+112%)",
    x = NULL,
    y = "Change in Enrollment"
  ) +
  theme(legend.position = "none")


4. Mesa, Tucson, and Paradise Valley lead enrollment losses

The three largest enrollment declines in Arizona are all in established urban districts: Mesa (-7,156), Tucson (-5,265), and Paradise Valley (-5,081). These districts face competition from charters and demographic shifts.

growth |>
  arrange(change) |>
  select(district_name, y2018, y2025, change, pct_change) |>
  head(10)
#> # A tibble: 10 × 5
#>    district_name                         y2018 y2025 change pct_change
#>    <chr>                                 <dbl> <dbl>  <dbl>      <dbl>
#>  1 Mesa Unified District                 62756 55600  -7156      -11.4
#>  2 Tucson Unified District               45474 40209  -5265      -11.6
#>  3 Paradise Valley Unified District      31245 26164  -5081      -16.3
#>  4 Glendale Elementary District          12513  8547  -3966      -31.7
#>  5 Cartwright Elementary District        17292 13375  -3917      -22.7
#>  6 Washington Elementary School District 22577 18765  -3812      -16.9
#>  7 Kyrene Elementary District            16773 13247  -3526      -21  
#>  8 Chandler Unified District #80         44429 41349  -3080       -6.9
#>  9 Alhambra Elementary District          12548  9709  -2839      -22.6
#> 10 Phoenix Elementary District            6481  3943  -2538      -39.2
decline_top10 <- growth |>
  arrange(change) |>
  head(10) |>
  mutate(district_name = gsub(" District.*$| Unified.*$", "", district_name))
stopifnot(nrow(decline_top10) > 0)
decline_top10 |>
  ggplot(aes(x = reorder(district_name, -change), y = -change, fill = factor(1))) +
  geom_col(fill = "#CD5C5C") +
  coord_flip() +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Top 10 Largest Enrollment Declines (2018-2025)",
    subtitle = "Mesa lost 7,156 students (-11.4%)",
    x = NULL,
    y = "Students Lost"
  ) +
  theme(legend.position = "none")


5. Arizona has 40% more seniors than kindergartners

There are 98,216 12th graders but only 70,164 kindergartners - a 40% difference. This “inverted pyramid” could signal declining birth rates or families with young children leaving public schools.

grade_order <- c("PK", "K", "01", "02", "03", "04", "05",
                 "06", "07", "08", "09", "10", "11", "12")

grades <- enr |>
  filter(is_district,
         subgroup == "total_enrollment", grade_level %in% grade_order,
         end_year == 2025) |>
  group_by(grade_level) |>
  summarize(n_students = sum(n_students, na.rm = TRUE), .groups = "drop") |>
  mutate(grade_level = factor(grade_level, levels = grade_order))
stopifnot(nrow(grades) > 0)

grades |>
  arrange(grade_level)
#> # A tibble: 14 × 2
#>    grade_level n_students
#>    <fct>            <dbl>
#>  1 PK               19815
#>  2 K                70164
#>  3 01               73928
#>  4 02               77811
#>  5 03               81507
#>  6 04               79398
#>  7 05               82241
#>  8 06               82458
#>  9 07               82350
#> 10 08               83408
#> 11 09               85720
#> 12 10               88332
#> 13 11               89504
#> 14 12               98216
stopifnot(nrow(grades) > 0)
grades |>
  ggplot(aes(x = grade_level, y = n_students)) +
  geom_col(fill = "#4682B4") +
  geom_hline(yintercept = grades$n_students[grades$grade_level == "K"],
             linetype = "dashed", color = "red", linewidth = 1) +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Enrollment by Grade Level (2025)",
    subtitle = "12th grade has 40% more students than kindergarten (dashed line)",
    x = "Grade Level",
    y = "Number of Students"
  )


6. Top 27 districts educate half of Arizona’s students

Student enrollment is heavily concentrated: just 27 of Arizona’s 630 districts educate 50% of all students. The top 10 districts alone serve 30% of students.

concentration <- enr |>
  filter(is_district,
         subgroup == "total_enrollment", grade_level == "TOTAL",
         end_year == 2025) |>
  arrange(desc(n_students)) |>
  mutate(
    cum_students = cumsum(n_students),
    cum_pct = round(cum_students / sum(n_students) * 100, 1),
    rank = row_number()
  )
stopifnot(nrow(concentration) > 0)

total_students <- sum(concentration$n_students)
n_districts <- nrow(concentration)

concentration |>
  select(rank, district_name, n_students, cum_pct) |>
  head(15)
#>    rank                         district_name n_students cum_pct
#> 1     1                 Mesa Unified District      55600     5.1
#> 2     2         Chandler Unified District #80      41349     8.8
#> 3     3               Tucson Unified District      40209    12.5
#> 4     4        Peoria Unified School District      34373    15.6
#> 5     5          Deer Valley Unified District      32221    18.5
#> 6     6              Gilbert Unified District      31508    21.4
#> 7     7      Paradise Valley Unified District      26164    23.8
#> 8     8    Phoenix Union High School District      25760    26.1
#> 9     9               Dysart Unified District      23033    28.2
#> 10   10           Scottsdale Unified District      20645    30.1
#> 11   11 Washington Elementary School District      18765    31.8
#> 12   12   Glendale Union High School District      16061    33.3
#> 13   13     American Leadership Academy, Inc.      15266    34.6
#> 14   14          Queen Creek Unified District      15034    36.0
#> 15   15                 Vail Unified District      14941    37.4
conc_top30 <- concentration |>
  filter(rank <= 30)
stopifnot(nrow(conc_top30) > 0)
conc_top30 |>
  ggplot(aes(x = rank, y = cum_pct)) +
  geom_line(color = "#BF0A30", linewidth = 1.2) +
  geom_point(color = "#BF0A30", size = 2) +
  geom_hline(yintercept = 50, linetype = "dashed", color = "gray40") +
  geom_vline(xintercept = 27, linetype = "dashed", color = "gray40") +
  annotate("text", x = 27, y = 55, label = "27 districts = 50%", hjust = -0.1) +
  scale_y_continuous(limits = c(0, 100)) +
  labs(
    title = "Cumulative Student Enrollment by District Rank",
    subtitle = "Half of all students are in just 27 of 630 districts",
    x = "District Rank (by enrollment)",
    y = "Cumulative % of Students"
  )


7. Charters serve nearly 1 in 4 Arizona students

Charter schools and other non-traditional districts now serve 25% of Arizona’s students (270,000 students across 437 districts). Traditional districts (unified, union, elementary) serve the remaining 75%.

charter_data <- enr |>
  filter(is_district,
         subgroup == "total_enrollment", grade_level == "TOTAL",
         end_year == 2025) |>
  mutate(district_type = case_when(
    grepl("Unified|Union|Elementary District|High School District", district_name) ~ "Traditional",
    TRUE ~ "Charter/Other"
  )) |>
  group_by(district_type) |>
  summarize(
    n_districts = n(),
    total_students = sum(n_students, na.rm = TRUE),
    avg_size = round(mean(n_students), 0),
    .groups = "drop"
  ) |>
  mutate(pct = round(total_students / sum(total_students) * 100, 1))
stopifnot(nrow(charter_data) > 0)

charter_data
#> # A tibble: 2 × 5
#>   district_type n_districts total_students avg_size   pct
#>   <chr>               <int>          <dbl>    <dbl> <dbl>
#> 1 Charter/Other         437         270122      618  24.6
#> 2 Traditional           193         829407     4297  75.4
stopifnot(nrow(charter_data) > 0)
charter_data |>
  ggplot(aes(x = district_type, y = total_students, fill = district_type)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(scales::comma(total_students), "\n(", pct, "%)")),
            vjust = -0.3, size = 4) +
  scale_fill_manual(values = c("Charter/Other" = "#FF8C00", "Traditional" = "#4682B4")) +
  scale_y_continuous(labels = scales::comma, limits = c(0, 1000000)) +
  labs(
    title = "Enrollment by District Type (2025)",
    subtitle = "Charters serve nearly 1 in 4 Arizona students",
    x = NULL,
    y = "Total Students"
  ) +
  theme(legend.position = "none")


8. Chinle is 99% Native American

Arizona has several districts with almost entirely Native American enrollment, reflecting the state’s 22 federally recognized tribes. Chinle Unified is 99.1% Native American, followed by San Carlos (98.8%) and Window Rock (98.2%).

native_am <- enr |>
  filter(is_district, grade_level == "TOTAL",
         subgroup %in% c("native_american", "total_enrollment"),
         end_year == 2025) |>
  group_by(district_name, subgroup) |>
  summarize(n_students = sum(n_students, na.rm = TRUE), .groups = "drop") |>
  pivot_wider(names_from = subgroup, values_from = n_students) |>
  filter(!is.na(native_american), total_enrollment >= 100) |>
  mutate(pct_native = round(native_american / total_enrollment * 100, 1)) |>
  arrange(desc(pct_native))
stopifnot(nrow(native_am) > 0)

native_am |>
  select(district_name, total_enrollment, native_american, pct_native) |>
  head(10)
#> # A tibble: 10 × 4
#>    district_name                     total_enrollment native_american pct_native
#>    <chr>                                        <dbl>           <dbl>      <dbl>
#>  1 Chinle Unified District                       2908            2882       99.1
#>  2 San Carlos Unified District                   1433            1416       98.8
#>  3 Window Rock Unified District                  1701            1671       98.2
#>  4 Red Mesa Unified District                      427             419       98.1
#>  5 Tuba City Unified School Distric…             1378            1348       97.8
#>  6 Whiteriver Unified District                   2228            2169       97.4
#>  7 Pinon Unified District                        1017             990       97.3
#>  8 Mcnary Elementary District                     183             176       96.2
#>  9 Peach Springs Unified District                 179             172       96.1
#> 10 Kayenta Unified School District …             1540            1461       94.9
native_top10 <- native_am |>
  head(10) |>
  mutate(district_name = gsub(" District.*$| Unified.*$", "", district_name))
stopifnot(nrow(native_top10) > 0)
native_top10 |>
  ggplot(aes(x = reorder(district_name, pct_native), y = pct_native)) +
  geom_col(fill = "#8B4513") +
  geom_text(aes(label = paste0(pct_native, "%")), hjust = -0.1, size = 3.5) +
  coord_flip() +
  scale_y_continuous(limits = c(0, 110)) +
  labs(
    title = "Districts with Highest Native American Enrollment",
    subtitle = "Chinle is 99.1% Native American",
    x = NULL,
    y = "% Native American"
  )


9. Border districts are over 95% Hispanic

Arizona’s border districts have near-complete Hispanic enrollment. Nogales Unified (99.3%), Gadsden Elementary (99.2%), and Douglas Unified (97.8%) serve predominantly Hispanic communities near the Mexico border.

hispanic_maj <- enr |>
  filter(is_district, grade_level == "TOTAL",
         subgroup %in% c("hispanic", "total_enrollment"),
         end_year == 2025) |>
  group_by(district_name, subgroup) |>
  summarize(n_students = sum(n_students, na.rm = TRUE), .groups = "drop") |>
  pivot_wider(names_from = subgroup, values_from = n_students) |>
  filter(!is.na(hispanic), total_enrollment >= 500) |>
  mutate(pct_hispanic = round(hispanic / total_enrollment * 100, 1)) |>
  arrange(desc(pct_hispanic))
stopifnot(nrow(hispanic_maj) > 0)

hispanic_maj |>
  select(district_name, total_enrollment, hispanic, pct_hispanic) |>
  head(10)
#> # A tibble: 10 × 4
#>    district_name                          total_enrollment hispanic pct_hispanic
#>    <chr>                                             <dbl>    <dbl>        <dbl>
#>  1 Nogales Unified District                           5690     5651         99.3
#>  2 Gadsden Elementary District                        5181     5141         99.2
#>  3 Douglas Unified District                           3601     3521         97.8
#>  4 Santa Cruz Valley Unified District                 3606     3516         97.5
#>  5 Western School of Science and Technol…              515      496         96.3
#>  6 Somerton Elementary District                       2967     2843         95.8
#>  7 Pan-American Elementary Charter                    1221     1160         95  
#>  8 Harvest Power Community Development G…             1642     1556         94.8
#>  9 Espiritu Community Development Corp.                697      652         93.5
#> 10 American Basic Schools LLC                          634      590         93.1
hispanic_top10 <- hispanic_maj |>
  head(10) |>
  mutate(district_name = gsub(" District.*$|, Inc\\.$", "", district_name))
stopifnot(nrow(hispanic_top10) > 0)
hispanic_top10 |>
  ggplot(aes(x = reorder(district_name, pct_hispanic), y = pct_hispanic)) +
  geom_col(fill = "#FF6B35") +
  geom_text(aes(label = paste0(pct_hispanic, "%")), hjust = -0.1, size = 3.5) +
  coord_flip() +
  scale_y_continuous(limits = c(0, 110)) +
  labs(
    title = "Districts with Highest Hispanic Enrollment",
    subtitle = "Border districts are over 95% Hispanic",
    x = NULL,
    y = "% Hispanic"
  )


10. Mesa Unified is still Arizona’s largest district

Despite losing 7,156 students, Mesa Unified remains Arizona’s largest district with 55,600 students. Chandler (41,349), Tucson (40,209), and Peoria (34,373) round out the top four.

largest <- enr |>
  filter(is_district,
         subgroup == "total_enrollment", grade_level == "TOTAL",
         end_year == 2025) |>
  select(district_name, n_students) |>
  arrange(desc(n_students))
stopifnot(nrow(largest) > 0)
largest |> head(15)
#>                            district_name n_students
#> 1                  Mesa Unified District      55600
#> 2          Chandler Unified District #80      41349
#> 3                Tucson Unified District      40209
#> 4         Peoria Unified School District      34373
#> 5           Deer Valley Unified District      32221
#> 6               Gilbert Unified District      31508
#> 7       Paradise Valley Unified District      26164
#> 8     Phoenix Union High School District      25760
#> 9                Dysart Unified District      23033
#> 10           Scottsdale Unified District      20645
#> 11 Washington Elementary School District      18765
#> 12   Glendale Union High School District      16061
#> 13     American Leadership Academy, Inc.      15266
#> 14          Queen Creek Unified District      15034
#> 15                 Vail Unified District      14941
largest_top10 <- enr |>
  filter(is_district,
         subgroup == "total_enrollment", grade_level == "TOTAL",
         end_year == 2025) |>
  arrange(desc(n_students)) |>
  head(10) |>
  mutate(district_name = gsub(" District.*$| Unified.*$", "", district_name))
stopifnot(nrow(largest_top10) > 0)
largest_top10 |>
  ggplot(aes(x = reorder(district_name, n_students), y = n_students)) +
  geom_col(fill = "#4682B4") +
  coord_flip() +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "10 Largest Districts in Arizona (2025)",
    subtitle = "Mesa remains #1 with 55,600 students",
    x = NULL,
    y = "Number of Students"
  )


11. Arizona has 630 districts serving 1.1 million students

Arizona’s school system includes 630 separate districts - from Mesa’s 55,600 students down to tiny rural districts with just 11 students. The average district has 1,745 students, but the median is just 414.

district_stats <- enr |>
  filter(is_district,
         subgroup == "total_enrollment", grade_level == "TOTAL",
         end_year == 2025) |>
  summarize(
    n_districts = n(),
    total_students = sum(n_students),
    mean_size = round(mean(n_students), 0),
    median_size = median(n_students),
    min_size = min(n_students),
    max_size = max(n_students)
  )
stopifnot(nrow(district_stats) > 0)

district_stats
#>   n_districts total_students mean_size median_size min_size max_size
#> 1         630        1099529      1745         414       11    55600
size_dist <- enr |>
  filter(is_district,
         subgroup == "total_enrollment", grade_level == "TOTAL",
         end_year == 2025) |>
  mutate(size_bucket = cut(n_students,
                           breaks = c(0, 100, 500, 1000, 5000, 10000, 50000, Inf),
                           labels = c("<100", "100-499", "500-999", "1K-5K",
                                      "5K-10K", "10K-50K", ">50K"))) |>
  count(size_bucket)
stopifnot(nrow(size_dist) > 0)
size_dist |>
  ggplot(aes(x = size_bucket, y = n)) +
  geom_col(fill = "#6B8E23") +
  geom_text(aes(label = n), vjust = -0.5) +
  labs(
    title = "Distribution of District Sizes",
    subtitle = "Most districts are small; a few giants dominate enrollment",
    x = "District Size (students)",
    y = "Number of Districts"
  )


12. Boys outnumber girls 51% to 49%

Male students slightly outnumber female students in Arizona schools: 559,758 boys (50.9%) vs 539,574 girls (49.1%). This 2-point gap is consistent with national patterns.

gender <- enr |>
  filter(is_district, grade_level == "TOTAL",
         subgroup %in% c("male", "female"),
         end_year == 2025) |>
  group_by(subgroup) |>
  summarize(n_students = sum(n_students, na.rm = TRUE), .groups = "drop") |>
  mutate(pct = round(n_students / sum(n_students) * 100, 1))
stopifnot(nrow(gender) > 0)

gender
#> # A tibble: 2 × 3
#>   subgroup n_students   pct
#>   <chr>         <dbl> <dbl>
#> 1 female       539574  49.1
#> 2 male         559758  50.9
stopifnot(nrow(gender) > 0)
gender |>
  ggplot(aes(x = subgroup, y = n_students, fill = subgroup)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(scales::comma(n_students), "\n(", pct, "%)")),
            vjust = -0.3, size = 4) +
  scale_fill_manual(values = c("female" = "#CC79A7", "male" = "#0072B2")) +
  scale_y_continuous(labels = scales::comma, limits = c(0, 650000)) +
  labs(
    title = "Enrollment by Gender (2025)",
    subtitle = "Boys outnumber girls 51% to 49%",
    x = NULL,
    y = "Number of Students"
  ) +
  theme(legend.position = "none")


13. Virtual schools serve 6,000+ students

American Virtual Academy is Arizona’s largest virtual school with 6,289 students in 2025. This represents 49% growth from 4,227 students in 2018, reflecting the post-pandemic persistence of online learning.

virtual_schools <- enr |>
  filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL",
         grepl("Virtual|Online|Digital", district_name, ignore.case = TRUE)) |>
  select(end_year, district_name, n_students) |>
  arrange(end_year, desc(n_students))
stopifnot(nrow(virtual_schools) > 0)
virtual_schools
#>    end_year                   district_name n_students
#> 1      2018        American Virtual Academy       4227
#> 2      2018 ASU Preparatory Academy Digital         38
#> 3      2019        American Virtual Academy       4618
#> 4      2019 ASU Preparatory Academy Digital        276
#> 5      2024        American Virtual Academy       7147
#> 6      2024 ASU Preparatory Academy Digital       3575
#> 7      2024     Premier Prep Online Academy        130
#> 8      2024        Online School of Arizona         41
#> 9      2025        American Virtual Academy       6289
#> 10     2025 ASU Preparatory Academy Digital       3845
#> 11     2025     Premier Prep Online Academy        216
#> 12     2025        Online School of Arizona         45
virtual <- enr |>
  filter(is_district, subgroup == "total_enrollment", grade_level == "TOTAL",
         district_name == "American Virtual Academy")
stopifnot(nrow(virtual) > 0)

virtual |>
  ggplot(aes(x = factor(end_year), y = n_students)) +
  geom_col(fill = "#9370DB", width = 0.6) +
  geom_text(aes(label = scales::comma(n_students)), vjust = -0.5, size = 4) +
  scale_y_continuous(labels = scales::comma, limits = c(0, 8000)) +
  labs(
    title = "American Virtual Academy Enrollment",
    subtitle = "Grew 49% from 2018 to 2025",
    x = "School Year",
    y = "Number of Students"
  )


14. Elementary grades are shrinking faster than high school

Elementary enrollment (K-5) totals 465,049 students while high school (9-12) has 361,772. But when you look at individual grades, high school grades average 90,443 students while elementary grades average only 77,508 - a 17% difference suggesting demographic shift.

grade_groups <- enr |>
  filter(is_district,
         subgroup == "total_enrollment", grade_level %in% grade_order,
         end_year == 2025) |>
  group_by(grade_level) |>
  summarize(n_students = sum(n_students, na.rm = TRUE), .groups = "drop") |>
  mutate(level = case_when(
    grade_level %in% c("K", "01", "02", "03", "04", "05") ~ "Elementary (K-5)",
    grade_level %in% c("06", "07", "08") ~ "Middle (6-8)",
    grade_level %in% c("09", "10", "11", "12") ~ "High School (9-12)",
    TRUE ~ "Other"
  )) |>
  group_by(level) |>
  summarize(
    total_students = sum(n_students),
    n_grades = n(),
    avg_per_grade = round(sum(n_students) / n()),
    .groups = "drop"
  )
stopifnot(nrow(grade_groups) > 0)

grade_groups
#> # A tibble: 4 × 4
#>   level              total_students n_grades avg_per_grade
#>   <chr>                       <dbl>    <int>         <dbl>
#> 1 Elementary (K-5)           465049        6         77508
#> 2 High School (9-12)         361772        4         90443
#> 3 Middle (6-8)               248216        3         82739
#> 4 Other                       19815        1         19815
grade_levels_chart <- grade_groups |>
  filter(level != "Other")
stopifnot(nrow(grade_levels_chart) > 0)
grade_levels_chart |>
  ggplot(aes(x = level, y = avg_per_grade, fill = level)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = scales::comma(avg_per_grade)), vjust = -0.5, size = 4) +
  scale_fill_brewer(palette = "Set1") +
  scale_y_continuous(labels = scales::comma, limits = c(0, 100000)) +
  labs(
    title = "Average Enrollment Per Grade by School Level",
    subtitle = "High school grades average 17% more students than elementary",
    x = NULL,
    y = "Average Students Per Grade"
  ) +
  theme(legend.position = "none")


15. 108 districts are Hispanic-majority

Of Arizona’s 630 districts, 108 have majority Hispanic enrollment (at least 500 students and >50% Hispanic). These districts serve 413,175 students total - about 38% of all students statewide.

hispanic_count <- hispanic_maj |>
  filter(pct_hispanic >= 50) |>
  summarize(
    n_districts = n(),
    total_students = sum(total_enrollment),
    pct_of_state = round(sum(total_enrollment) / sum(concentration$n_students) * 100, 1)
  )
stopifnot(nrow(hispanic_count) > 0)

hispanic_count
#> # A tibble: 1 × 3
#>   n_districts total_students pct_of_state
#>         <int>          <dbl>        <dbl>
#> 1         108         413175         37.6
hisp_maj_chart <- hispanic_maj |>
  mutate(majority = ifelse(pct_hispanic >= 50, "Hispanic Majority", "Not Hispanic Majority")) |>
  group_by(majority) |>
  summarize(n_districts = n(), total_students = sum(total_enrollment), .groups = "drop")
stopifnot(nrow(hisp_maj_chart) > 0)
hisp_maj_chart |>
  ggplot(aes(x = majority, y = n_districts, fill = majority)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(n_districts, " districts\n",
                               scales::comma(total_students), " students")),
            vjust = -0.3, size = 3.5) +
  scale_fill_manual(values = c("Hispanic Majority" = "#FF6B35",
                               "Not Hispanic Majority" = "#4682B4")) +
  scale_y_continuous(limits = c(0, 400)) +
  labs(
    title = "Hispanic-Majority Districts in Arizona",
    subtitle = "108 districts (17%) are majority Hispanic",
    x = NULL,
    y = "Number of Districts"
  ) +
  theme(legend.position = "none")


Data Notes

Source: Arizona Department of Education October 1 Enrollment Reports URL: https://www.azed.gov/accountability-research Available years: 2018, 2019, 2024, 2025, 2026 Missing years: 2020-2023 (Cloudflare protection blocks automated downloads)

Important caveats: - Small counts may be suppressed in the source data (marked with *) - Virtual and charter schools are counted separately from traditional districts

What’s included: - State, district, and school level enrollment - Demographics: Hispanic, White, Black, Asian, Native American, Pacific Islander, Multiracial - Gender: Male, Female - Grade levels: PK through 12


Session Info

sessionInfo()
#> R version 4.5.2 (2025-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] ggplot2_4.0.2      tidyr_1.3.2        dplyr_1.2.0        azschooldata_0.1.0
#> 
#> loaded via a namespace (and not attached):
#>  [1] gtable_0.3.6       jsonlite_2.0.0     compiler_4.5.2     tidyselect_1.2.1  
#>  [5] jquerylib_0.1.4    systemfonts_1.3.2  scales_1.4.0       textshaping_1.0.5 
#>  [9] readxl_1.4.5       yaml_2.3.12        fastmap_1.2.0      R6_2.6.1          
#> [13] labeling_0.4.3     generics_0.1.4     knitr_1.51         tibble_3.3.1      
#> [17] desc_1.4.3         bslib_0.10.0       pillar_1.11.1      RColorBrewer_1.1-3
#> [21] rlang_1.1.7        utf8_1.2.6         cachem_1.1.0       xfun_0.56         
#> [25] fs_1.6.7           sass_0.4.10        S7_0.2.1           cli_3.6.5         
#> [29] withr_3.0.2        pkgdown_2.2.0      magrittr_2.0.4     digest_0.6.39     
#> [33] grid_4.5.2         rappdirs_0.3.4     lifecycle_1.0.5    vctrs_0.7.1       
#> [37] evaluate_1.0.5     glue_1.8.0         cellranger_1.1.0   farver_2.1.2      
#> [41] codetools_0.2-20   ragg_1.5.1         rmarkdown_2.30     purrr_1.2.1       
#> [45] tools_4.5.2        pkgconfig_2.0.3    htmltools_0.5.9