poet/bin/plot_metrics.R

143 lines
4.9 KiB
R

#!/usr/bin/env Rscript
suppressPackageStartupMessages({library(dplyr); library(ggplot2); library(tidyr)})
args <- commandArgs(trailingOnly = TRUE)
if (length(args) < 1) stop("Usage: Rscript plot_mape_stats.R <stats_overview_file1> [stats_overview_file2] ...")
cat("Reading", length(args), "stats file(s)...\n")
# Process all input files
all_data <- lapply(args, function(stats_file) {
if (!file.exists(stats_file)) {
warning("File not found: ", stats_file)
return(NULL)
}
cat(" -", stats_file, "\n")
lines <- readLines(stats_file)
data_lines <- lines[!grepl("^-+$", lines) & nchar(lines) > 0]
parsed <- lapply(data_lines, function(line) {
parts <- strsplit(trimws(line), "\\s+")[[1]]
if (length(parts) >= 5) {
data.frame(
Iteration = as.numeric(parts[1]),
Rollback = as.numeric(parts[2]),
Species = parts[3],
MAPE = as.numeric(parts[4]),
RRMSE = as.numeric(parts[5]),
stringsAsFactors = FALSE
)
}
})
df <- bind_rows(parsed) %>% filter(!is.na(Iteration))
species_list <- c("H", "O", "C", "Ca", "Cl", "Mg", "Calcite", "Dolomite")
#species_list <- "Dolomite"
df_filtered <- df %>%
filter(Species %in% species_list) %>%
group_by(Iteration) %>%
summarise(
MedianMAPE = median(MAPE, na.rm = TRUE),
MaxMAPE = max(MAPE, na.rm = TRUE),
Rollback = first(Rollback),
.groups = "drop"
) %>%
filter(Iteration %% 100 == 0) %>%
mutate(Folder = basename(dirname(stats_file)))
# Detect rollback changes
df_filtered <- df_filtered %>%
arrange(Iteration) %>%
mutate(RollbackChange = Rollback != lag(Rollback, default = first(Rollback)))
return(df_filtered)
})
combined_data <- bind_rows(all_data) %>%
filter(Iteration >= 3000 & Iteration <= 8000) %>%
filter(is.finite(MedianMAPE) & MedianMAPE > 0) %>%
filter(is.finite(MaxMAPE) & MaxMAPE > 0)
# Identify rollback transitions for each folder
rollback_points <- combined_data %>%
filter(RollbackChange == TRUE) %>%
select(Folder, Iteration, Rollback)
cat("\nData summary:\n")
print(head(combined_data))
cat("\nLegend:", unique(combined_data$Folder), "\n")
cat("\nRollback transitions detected:\n")
print(rollback_points)
# A consistent style for both plots
pretty_theme <- theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
axis.title = element_text(face = "bold"),
legend.position = "right",
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "grey85"),
panel.grid.major.y = element_line(color = "grey85"),
axis.line = element_line(linewidth = 0.8, colour = "black"),
axis.ticks = element_line(colour = "black")
)
# Determine nice log-scale breaks (1e-1, 1e-2, 1e-3, etc.)
log_breaks <- 10^seq(max(-6, floor(log10(min(combined_data$MedianMAPE, combined_data$MaxMAPE, na.rm = TRUE)))),
ceiling(log10(max(combined_data$MedianMAPE, combined_data$MaxMAPE, na.rm = TRUE))),
by = 1)
# Common log label formatter
log_labels <- function(x) sprintf("1e%d", log10(x))
# Plot Median MAPE
p1 <- ggplot(combined_data, aes(x = Iteration, y = MedianMAPE, color = Folder)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(data = rollback_points, aes(xintercept = Iteration, color = Folder),
linetype = "dashed", alpha = 0.6, linewidth = 0.8) +
scale_x_continuous(breaks = seq(0, max(combined_data$Iteration), by = 1000)) +
scale_y_log10(breaks = log_breaks, labels = log_labels) +
labs(
title = "Median MAPE Across H, O, C, Ca, Cl, Mg, Calcite, Dolomite",
x = "Iteration",
y = "Median MAPE",
color = "Legend"
) +
pretty_theme
# Plot Max MAPE
p2 <- ggplot(combined_data, aes(x = Iteration, y = MaxMAPE, color = Folder)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(data = rollback_points, aes(xintercept = Iteration, color = Folder),
linetype = "dashed", alpha = 0.6, linewidth = 0.8) +
scale_x_continuous(breaks = seq(0, max(combined_data$Iteration), by = 1000)) +
scale_y_log10(breaks = log_breaks, labels = log_labels, limits = c(1e-5, NA)) +
labs(
title = "Max MAPE Across H, O, C, Ca, Cl, Mg, Calcite, Dolomite",
x = "Iteration",
y = "Max MAPE",
color = "Legend"
) +
pretty_theme
# Save plots
script_dir <- dirname(sub("--file=", "", grep("--file=", commandArgs(trailingOnly = FALSE), value = TRUE)))
if (length(script_dir) == 0 || script_dir == "") script_dir <- getwd()
ggsave(file.path(script_dir, "median_mape.pdf"), p1, width = 10, height = 6)
ggsave(file.path(script_dir, "max_mape.pdf"), p2, width = 10, height = 6)
cat("\nPlots saved:\n")
cat(" -", file.path(script_dir, "median_mape.pdf"), "\n")
cat(" -", file.path(script_dir, "max_mape.pdf"), "\n")
# Also save data
write.csv(combined_data, file.path(script_dir, "mape_summary.csv"), row.names = FALSE)
cat(" -", file.path(script_dir, "mape_summary.csv"), "\n")