using DataFrames, RCall, Statistics, Plots, Base.Threads, CSV, CategoricalArrays prefix = "barite_fgcs" input_dir = "barite/" timing_file = "timings.qs2" ema_file = "ema.out.csv" relevant_timings = ["Chemistry", "Phreeqc", "PHT_read", "PHT_write", "DHT_gather", "Interpolation", "Energy"] grid_size = 400 * 400 iterations = 5000 calculations = grid_size * iterations output_dir = "timings" if !isdir(output_dir) mkdir(output_dir) end function read_timings(timing_file) res = nothing @rput timing_file R"res <- qs2::qs_read(timing_file)" @rget res return res end function read_ema(ema_file) if (!isfile(ema_file)) return DataFrame() end return CSV.read(ema_file, DataFrame) end function eval_timings_interp(timings) chemistry = timings[:chemistry] time_vec = [chemistry[:simtime], sum(chemistry[:phreeqc_time]), sum(chemistry[:interp_w]), sum(chemistry[:interp_r]), sum(chemistry[:interp_g]), sum(chemistry[:interp_fc])] interp_df = DataFrame(Calls=chemistry[:interp_calls], Cached=chemistry[:interp_cached]) return time_vec, interp_df end function eval_ema(ema_df) if (nrow(ema_df) == 0) return 0.0 end only_package = filter(row -> occursin(r"package-\d+", row[:device_name]), ema_df) return sum(only_package[!, :energy]) * 1e-6 * (1 / 3600) end function eval_timings_ref(timings) chemistry = timings[:chemistry] time_vec = [chemistry[:simtime], sum(chemistry[:phreeqc_time]), 0, 0, 0, 0] return time_vec end function find_all_directories(input_dir, prefix) result_dirs = readdir(input_dir, join=true) result_dirs = filter(x -> occursin(prefix, x), result_dirs) return result_dirs end function workhorse_loop(dirs) df_timings = DataFrame() df_interp = Dict{Tuple{Int,Int},DataFrame}() for dir in dirs metainfo = split(basename(dir), "_") type = nothing aqueous = 0 minerals = 0 energy = eval_ema(read_ema(joinpath(dir, ema_file))) if (metainfo[3] == "ref") type = "Reference" timings = read_timings(joinpath(dir, timing_file)) time_vec = eval_timings_ref(timings) else aqueous = parse(Int, metainfo[3]) minerals = parse(Int, metainfo[4]) type = "(" * string(aqueous) * "," * string(minerals) * ")" timings = read_timings(joinpath(dir, timing_file)) time_vec, interp_df = eval_timings_interp(timings) df_interp[(aqueous, minerals)] = interp_df end push!(time_vec, energy) new_df = DataFrame(relevant_timings .=> time_vec) local_df = DataFrame(Type=type, Aqueous=aqueous, Minerals=minerals) local_df = hcat(local_df, new_df) append!(df_timings, local_df) end return df_timings, df_interp end function eval_additional_info(df, interp_dict, grid_size, iterations) interp_full = Vector() for row in eachrow(df) if (row[:Type] != "Reference") interp_count = sum(interp_dict[(row[:Aqueous], row[:Minerals])][!, :Calls]) push!(interp_full, interp_count) else push!(interp_full, 0) end end calculations = grid_size * iterations phreeqc_calls = calculations .- interp_full time_per_phreeqc = (df[!, :Phreeqc] ./ phreeqc_calls) df = hcat(df, DataFrame(Phreeqc_calls=phreeqc_calls, Time_per_Phreeqc=time_per_phreeqc, Interp_calls=interp_full)) return df end function color_from_aqueous!(df) my_colors = palette(:viridis, 4) df[!, :Type] = CategoricalArray(df[!, :Type]) color_vec = Vector() for row in eachrow(df) if (row[:Type] == "Reference") push!(color_vec, my_colors[1]) else push!(color_vec, my_colors[row[:Aqueous]]) end end df[!, :Colors] = color_vec end dirs = find_all_directories("barite/", "barite_fgcs") df_timings, df_interp = workhorse_loop(dirs) group = groupby(df_timings, [:Type, :Aqueous, :Minerals]) combine_df = combine(group, relevant_timings .=> mean .=> relevant_timings, ) df_timings = eval_additional_info(combine_df, df_interp, grid_size, iterations) CSV.write("$output_dir/timings_barite.csv", df_timings) for (key, value) in df_interp CSV.write("$output_dir/interp_barite_$(key[1])_$(key[2]).csv", value) end color_from_aqueous!(df_timings) # plot with different colors and shapes in bars bar(df_timings[!, :Type], df_timings[!, :Chemistry], color=df_timings[!, :Colors], xguidefontsize=8, yguidefontsize=8, xtickfontsize=6, ytickfontsize=6, legend=false, ylabel="Time [s]", xlabel="Simulation Type", formatter=:plain, yminorgrid=true, ylimits=(0, 60000)) savefig("$output_dir/timings_barite.pdf") function get_count_of_id2(initqsfile) res = nothing @rput initqsfile R"res <- qs2::qs_read(initqsfile)" @rget res id = res[!, :ID] return count(x -> x == 2.0, id) end bar(df_timings[!, :Type], df_timings[!, :Energy], color=df_timings[!, :Colors], xguidefontsize=8, yguidefontsize=8, xtickfontsize=6, ytickfontsize=6, legend=false, formatter=:plain, yminorgrid=true, ylimits=(0, 12000), yticks=[0, 2000, 4000, 6000, 8000, 10000, 12000], yminorticks=4, ylabel="Energy [Wh]", xlabel="Simulation Type") savefig("$output_dir/energy_barite.pdf") max_count = get_count_of_id2("barite/barite_fgcs_ref_128_3/iter_0000.qs2") interp_3_1 = df_interp[(3, 1)] interp_3_1[:, :RelCalls] = interp_3_1[!, :Calls] ./ max_count interp_3_1[:, :RelCached] = interp_3_1[!, :Cached] ./ interp_3_1[!, :Calls] interp_3_1[:, :iter] = 1:nrow(interp_3_1) plot_df = stack(interp_3_1, [:RelCalls, :RelCached], variable_name=:variable, value_name=:value) rename_dict = Dict("RelCalls" => "Interpolation Calls", "RelCached" => "O/W Cached Sets") plot_df[!, :variable] = map(x -> rename_dict[x], plot_df[!, :variable]) plot_df[!, :variable] = CategoricalArray(plot_df[!, :variable]) levels!(plot_df[!, :variable], ["Interpolation Calls", "O/W Cached Sets"]) my_colors = palette(:viridis, 3)[2:3] color_dict = Dict("Interpolation Calls" => my_colors[1], "O/W Cached Sets" => my_colors[2]) plot_df[!, :Colors] = map(x -> color_dict[x], plot_df[!, :variable]) plot(plot_df[!, :iter], plot_df[!, :value] * 100, group=plot_df[!, :variable], color=plot_df[!, :Colors], xguidefontsize=8, yguidefontsize=8, xtickfontsize=6, ytickfontsize=6, ylimits=(0, 100), yminorgrid=true, yminorticks=4, formatter=:plain, linewidth=2, ylabel="Relative Count [%]", xlabel="Iteration") savefig("$output_dir/interp_calls_barite_3_1.pdf") interp_3_1 = df_timings[df_timings[!, :Type].=="(3,1)", :] interp_ref = df_timings[df_timings[!, :Type].=="Reference", :] interp_4_3 = df_timings[df_timings[!, :Type].=="(4,3)", :] interp_2_1 = df_timings[df_timings[!, :Type].=="(2,1)", :] 1 - (interp_3_1.Chemistry/interp_ref.Chemistry)[1] 1 - (interp_3_1.Energy/interp_ref.Energy)[1] 1 - (interp_4_3.Chemistry/interp_ref.Chemistry)[1] 1 - (interp_4_3.Energy/interp_ref.Energy)[1] 1 - (interp_2_1.Chemistry/interp_ref.Chemistry)[1] 1 - (interp_2_1.Energy/interp_ref.Energy)[1] dirs = find_all_directories("dolomite/", "dolo_fgcs") df_timings, df_interp = workhorse_loop(dirs) group = groupby(df_timings, [:Type, :Aqueous, :Minerals]) combine_df = combine(group, relevant_timings .=> median .=> relevant_timings, ) df_timings = eval_additional_info(combine_df, df_interp, grid_size, iterations) CSV.write("$output_dir/timings_dolomite.csv", df_timings) for (key, value) in df_interp CSV.write("$output_dir/interp_dolomite_$(key[1])_$(key[2]).csv", value) end color_from_aqueous!(df_timings) bar(df_timings[!, :Type], df_timings[!, :Chemistry], color=df_timings[!, :Colors], xguidefontsize=8, yguidefontsize=8, xtickfontsize=6, ytickfontsize=6, legend=false, formatter=:plain, yminorgrid=true, ylim=(0, 3000), #yticks=0:2500:15000, ylabel="Time [s]", xlabel="Type") savefig("$output_dir/timings_dolomite.pdf") bar(df_timings[!, :Type], df_timings[!, :Energy], color=df_timings[!, :Colors], xguidefontsize=8, yguidefontsize=8, xtickfontsize=6, ytickfontsize=6, legend=false, formatter=:plain, yminorgrid=true, ylim=(0, 800), yminorticks=4, ylabel="Energy [Wh]", xlabel="Type") savefig("$output_dir/energy_dolomite.pdf") interp_3_3 = df_interp[(3, 3)] max_count = 400 * 400 interp_3_3[:, :RelCalls] = interp_3_3[!, :Calls] ./ max_count interp_3_3[:, :RelCached] = interp_3_3[!, :Cached] ./ interp_3_3[!, :Calls] interp_3_3[:, :iter] = 1:nrow(interp_3_3) plot_df = stack(interp_3_3, [:RelCalls, :RelCached], variable_name=:variable, value_name=:value) rename_dict = Dict("RelCalls" => "Interpolation Calls", "RelCached" => "O/W Cached Sets") plot_df[!, :variable] = map(x -> rename_dict[x], plot_df[!, :variable]) plot_df[!, :variable] = CategoricalArray(plot_df[!, :variable]) levels!(plot_df[!, :variable], ["Interpolation Calls", "O/W Cached Sets"]) my_colors = palette(:viridis, 3)[2:3] color_dict = Dict("Interpolation Calls" => my_colors[1], "O/W Cached Sets" => my_colors[2]) plot_df[!, :Colors] = map(x -> color_dict[x], plot_df[!, :variable]) plot(plot_df[!, :iter], plot_df[!, :value] * 100, group=plot_df[!, :variable], color=plot_df[!, :Colors], xguidefontsize=8, yguidefontsize=8, xtickfontsize=6, ytickfontsize=6, ylimits=(0, 100), yminorgrid=true, yminorticks=4, linewidth=2, formatter=:plain, ylabel="Relative Count [%]", xlabel="Iteration") savefig("$output_dir/interp_calls_dolomite_3_3.pdf") # get timings for (4,3) and Reference interp_4_3 = df_timings[df_timings[!, :Type].=="(4,3)", :] interp_ref = df_timings[df_timings[!, :Type].=="Reference", :] 1 - (interp_4_3.Chemistry/interp_ref.Chemistry)[1] 1 - (interp_4_3.Energy/interp_ref.Energy)[1] interp_3_3 = df_timings[df_timings[!, :Type].=="(3,3)", :] 1 - (interp_3_3.Chemistry/interp_ref.Chemistry)[1] 1 - (interp_3_3.Energy/interp_ref.Energy)[1]