-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstem_analysis.R
More file actions
42 lines (33 loc) · 1.39 KB
/
Copy pathstem_analysis.R
File metadata and controls
42 lines (33 loc) · 1.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
library(readxl)
library(dplyr)
library(tidyr)
data <- read_excel("STEM Employment data & Salary.xlsx", sheet = "Final")
years <- unique(data$Year)
for (yr in years) {
cat("\n====================\n")
cat("Year:", yr, "\n")
# Filter data for the current year
year_data <- filter(data, Year == yr)
# Chi-Square Test: STEM Background vs Gender (Total Employed Graduates)
chi_table <- year_data %>%
filter(!is.na(`Employed Graduates`)) %>%
group_by(Sex, STEM) %>%
summarise(total_grads = sum(`Employed Graduates`, na.rm = TRUE)) %>%
pivot_wider(names_from = STEM, values_from = total_grads, values_fill = 0)
if (nrow(chi_table) > 1) {
chi_matrix <- as.matrix(chi_table[, -1])
rownames(chi_matrix) <- chi_table$Sex
cat("\nChi-Square Test (Gender vs STEM Graduates):\n")
print(chisq.test(chi_matrix))
} else {
cat("Not enough data for chi-square test in year", yr, "\n")
}
# t-Test: Median Monthly Salaries by Gender
stem_data <- filter(year_data, STEM == 1, !is.na(`Median monthly salaries & wages of employees (RM)`))
if (n_distinct(stem_data$Sex) == 2) {
cat("\nt-Test (Median Salary by Gender in STEM):\n")
print(t.test(`Median monthly salaries & wages of employees (RM)` ~ Sex, data = stem_data))
} else {
cat("Not enough gender diversity in STEM data for t-test in year", yr, "\n")
}
}