From 9e884bcc9923a98e9d1b44dd6ef3b42694dd49e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 15 Jan 2025 13:42:27 +0100 Subject: [PATCH] Add convert script and new data set --- barite_50_4_corner.h5 | 3 ++ convert_barite.jl | 78 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 barite_50_4_corner.h5 create mode 100755 convert_barite.jl diff --git a/barite_50_4_corner.h5 b/barite_50_4_corner.h5 new file mode 100644 index 0000000..f55a844 --- /dev/null +++ b/barite_50_4_corner.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d135c6696d4a0068b442f8e58c77842dcc8c229b79fe9ae0af030cfc3e813bf7 +size 62127814 diff --git a/convert_barite.jl b/convert_barite.jl new file mode 100755 index 0000000..6830d32 --- /dev/null +++ b/convert_barite.jl @@ -0,0 +1,78 @@ +#!/usr/bin/env julia + +# qs_read = [] + + +# # find all the files in 'barite_out' +# files = readdir("barite_out"; join=true) + +# # remove files which do not have the extension '.qs2' and contains 'iter' +# files = filter(x -> occursin(r".*\.qs2", x) && occursin(r"iter", x), files) + +# # remove first entry as it is iteration 0 +# files = files[2:end] + +# test1 = qs_read(files[1]) + +# @rput test1 + +# R"test1 <- test1$C" + +# @rget test1 + +# check if ARGS contains 2 elements +if length(ARGS) != 2 + println("Usage: julia convert.jl .h5") + exit(1) +end + +to_read_dir = ARGS[1] +output_file_name = ARGS[2] * ".h5" + +# check if the directory exists +if !isdir(to_read_dir) + println("The directory \"$to_read_dir\" does not exist") + exit(1) +end + +using HDF5, RCall, DataFrames +@rlibrary qs2 + +# List all .rds files starting with "iter" in a given directory +qs_files = filter(x -> occursin(r".*\.qs2", x) && occursin(r"iter", x), readdir(to_read_dir; join=true))[2:end] + +df_design = DataFrame() +df_result = DataFrame() + +for file in qs_files + # Load the RDS file + data = qs_read(file) + + @rput data + + R"transport <- data$T" + R"chemistry <- data$C" + + @rget transport + @rget chemistry + + # Append the DataFrame to the big DataFrame + append!(df_design, transport) + append!(df_result, chemistry) +end + +# remove ID, Barite_p1, Celestite_p1 columns +df_design = df_design[:, Not([:ID, :Barite_p1, :Celestite_p1])] +df_result = df_result[:, Not([:ID, :Barite_p1, :Celestite_p1])] + + +h5open(output_file_name, "w") do fid + group_in = create_group(fid, "design") + group_out = create_group(fid, "result") + + group_in["names"] = names(df_design) + group_in["data", compress=9] = Matrix(df_design) + + group_out["names"] = names(df_result) + group_out["data", compress=9] = Matrix(df_result) +end