mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-15 19:48:23 +01:00
79 lines
1.8 KiB
Julia
Executable File
79 lines
1.8 KiB
Julia
Executable File
#!/usr/bin/env julia
|
|
|
|
# qs_read = []
|
|
|
|
|
|
# # find all the files in 'barite_out'
|
|
# files = readdir("barite_out"; join=true)
|
|
|
|
# # remove files which do not have the extension '.qs2' and contains 'iter'
|
|
# files = filter(x -> occursin(r".*\.qs2", x) && occursin(r"iter", x), files)
|
|
|
|
# # remove first entry as it is iteration 0
|
|
# files = files[2:end]
|
|
|
|
# test1 = qs_read(files[1])
|
|
|
|
# @rput test1
|
|
|
|
# R"test1 <- test1$C"
|
|
|
|
# @rget test1
|
|
|
|
# check if ARGS contains 2 elements
|
|
if length(ARGS) != 2
|
|
println("Usage: julia convert.jl <directory> <output_file_name>.h5")
|
|
exit(1)
|
|
end
|
|
|
|
to_read_dir = ARGS[1]
|
|
output_file_name = ARGS[2] * ".h5"
|
|
|
|
# check if the directory exists
|
|
if !isdir(to_read_dir)
|
|
println("The directory \"$to_read_dir\" does not exist")
|
|
exit(1)
|
|
end
|
|
|
|
using HDF5, RCall, DataFrames
|
|
@rlibrary qs2
|
|
|
|
# List all .rds files starting with "iter" in a given directory
|
|
qs_files = filter(x -> occursin(r".*\.qs2", x) && occursin(r"iter", x), readdir(to_read_dir; join=true))[2:end]
|
|
|
|
df_design = DataFrame()
|
|
df_result = DataFrame()
|
|
|
|
for file in qs_files
|
|
# Load the RDS file
|
|
data = qs_read(file)
|
|
|
|
@rput data
|
|
|
|
R"transport <- data$T"
|
|
R"chemistry <- data$C"
|
|
|
|
@rget transport
|
|
@rget chemistry
|
|
|
|
# Append the DataFrame to the big DataFrame
|
|
append!(df_design, transport)
|
|
append!(df_result, chemistry)
|
|
end
|
|
|
|
# remove ID, Barite_p1, Celestite_p1 columns
|
|
df_design = df_design[:, Not([:ID, :Barite_p1, :Celestite_p1])]
|
|
df_result = df_result[:, Not([:ID, :Barite_p1, :Celestite_p1])]
|
|
|
|
|
|
h5open(output_file_name, "w") do fid
|
|
group_in = create_group(fid, "design")
|
|
group_out = create_group(fid, "result")
|
|
|
|
group_in["names"] = names(df_design)
|
|
group_in["data", compress=9] = Matrix(df_design)
|
|
|
|
group_out["names"] = names(df_result)
|
|
group_out["data", compress=9] = Matrix(df_result)
|
|
end
|