mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-13 08:48:22 +01:00
clean up repositry
This commit is contained in:
parent
4fe35855f0
commit
d5d836bd98
BIN
barite_50_4_corner.h5
(Stored with Git LFS)
BIN
barite_50_4_corner.h5
(Stored with Git LFS)
Binary file not shown.
@ -1,88 +0,0 @@
|
||||
#!/usr/bin/env julia
|
||||
|
||||
# qs_read = []
|
||||
|
||||
|
||||
# # find all the files in 'barite_out'
|
||||
# files = readdir("barite_out"; join=true)
|
||||
|
||||
# # remove files which do not have the extension '.qs2' and contains 'iter'
|
||||
# files = filter(x -> occursin(r".*\.qs2", x) && occursin(r"iter", x), files)
|
||||
|
||||
# # remove first entry as it is iteration 0
|
||||
# files = files[2:end]
|
||||
|
||||
# test1 = qs_read(files[1])
|
||||
|
||||
# @rput test1
|
||||
|
||||
# R"test1 <- test1$C"
|
||||
|
||||
# @rget test1
|
||||
|
||||
# check if ARGS contains 2 elements
|
||||
if length(ARGS) != 2
|
||||
println("Usage: julia convert.jl <directory> <output_file_name>.h5")
|
||||
exit(1)
|
||||
end
|
||||
|
||||
to_read_dir = ARGS[1]
|
||||
output_file_name = ARGS[2] * ".h5"
|
||||
|
||||
# check if the directory exists
|
||||
if !isdir(to_read_dir)
|
||||
println("The directory \"$to_read_dir\" does not exist")
|
||||
exit(1)
|
||||
end
|
||||
|
||||
using HDF5, RCall, DataFrames
|
||||
@rlibrary qs2
|
||||
|
||||
# List all .rds files starting with "iter" in a given directory
|
||||
qs_files = filter(x -> occursin(r".*\.qs2", x) && occursin(r"iter", x), readdir(to_read_dir; join=true))[2:end]
|
||||
|
||||
df_design = DataFrame()
|
||||
df_result = DataFrame()
|
||||
|
||||
for file in qs_files
|
||||
# Load the RDS file
|
||||
data = qs_read(file)
|
||||
|
||||
# get basename of the file
|
||||
basename = split(file, "/")[end]
|
||||
|
||||
# get the iteration number by splitting the basename and parse the second element
|
||||
iteration = parse(Int, split(split(basename, "_")[2], ".")[1])
|
||||
|
||||
@rput data
|
||||
|
||||
R"transport <- data$T"
|
||||
R"chemistry <- data$C"
|
||||
|
||||
@rget transport
|
||||
@rget chemistry
|
||||
|
||||
# Add iteration number to the DataFrame
|
||||
transport.iteration = fill(iteration, size(transport, 1))
|
||||
chemistry.iteration = fill(iteration, size(chemistry, 1))
|
||||
|
||||
# Append the DataFrame to the big DataFrame
|
||||
append!(df_design, transport)
|
||||
append!(df_result, chemistry)
|
||||
end
|
||||
|
||||
# remove ID, Barite_p1, Celestite_p1 columns
|
||||
df_design = df_design[:, Not([:ID, :Barite_p1, :Celestite_p1])]
|
||||
df_result = df_result[:, Not([:ID, :Barite_p1, :Celestite_p1])]
|
||||
|
||||
|
||||
h5open(output_file_name, "w") do fid
|
||||
group_in = create_group(fid, "design")
|
||||
group_out = create_group(fid, "result")
|
||||
|
||||
group_in["names"] = names(df_design)
|
||||
group_in["data", compress=9] = Matrix(df_design)
|
||||
|
||||
group_out["names"] = names(df_result)
|
||||
group_out["data", compress=9] = Matrix(df_result)
|
||||
end
|
||||
@ -1,60 +0,0 @@
|
||||
using HDF5
|
||||
using RData
|
||||
|
||||
using DataFrames
|
||||
|
||||
# Load Training Data
|
||||
# train_data = load("Barite_50_Data.rds")
|
||||
|
||||
# training_h5_name = "Barite_50_Data.h5"
|
||||
# h5open(training_h5_name, "w") do fid
|
||||
# for key in keys(train_data)
|
||||
# group = create_group(fid, key)
|
||||
# group["names"] = names(train_data[key])
|
||||
# group["data", compress=3] = Matrix(train_data[key])
|
||||
# # group = create_group(fid, key)
|
||||
# # grou["names"] = coln
|
||||
# end
|
||||
# end
|
||||
|
||||
# List all .rds files starting with "iter" in a given directory
|
||||
rds_files = filter(x -> startswith(x, "iter"), readdir("barite_out/"))
|
||||
|
||||
# remove "iter_0.rds" from the list
|
||||
rds_files = rds_files[2:end]
|
||||
|
||||
big_df_in = DataFrame()
|
||||
big_df_out = DataFrame()
|
||||
|
||||
for rds_file in rds_files
|
||||
# Load the RDS file
|
||||
data = load("barite_out/$rds_file")
|
||||
# Convert the data to a DataFrame
|
||||
df_T = DataFrame(data["T"])
|
||||
df_C = DataFrame(data["C"])
|
||||
# Append the DataFrame to the big DataFrame
|
||||
append!(big_df_in, df_T)
|
||||
append!(big_df_out, df_C)
|
||||
end
|
||||
|
||||
# remove ID, Barite_p1, Celestite_p1 columns
|
||||
big_df_in = big_df_in[:, Not([:ID, :Barite_p1, :Celestite_p1])]
|
||||
big_df_out = big_df_out[:, Not([:ID, :Barite_p1, :Celestite_p1])]
|
||||
|
||||
inference_h5_name = "Barite_50_Data_inference.h5"
|
||||
h5open(inference_h5_name, "w") do fid
|
||||
fid["names"] = names(big_df_in)
|
||||
fid["data", compress=9] = Matrix(big_df_in)
|
||||
end
|
||||
|
||||
training_h5_name = "Barite_50_Data_training.h5"
|
||||
h5open(training_h5_name, "w") do fid
|
||||
group_in = create_group(fid, "design")
|
||||
group_out = create_group(fid, "result")
|
||||
|
||||
group_in["names"] = names(big_df_in)
|
||||
group_in["data", compress=9] = Matrix(big_df_in)
|
||||
|
||||
group_out["names"] = names(big_df_out)
|
||||
group_out["data", compress=9] = Matrix(big_df_out)
|
||||
end
|
||||
@ -3,19 +3,11 @@
|
||||
### Parameters to optimize
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Saved models
|
||||
|
||||
`./results/model_large_standardization.keras`: Trained on `barite_50_4_corner.h5` dataset with extended Loss function (Huber loss with mass balance) and **standardized data**
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Experiments
|
||||
|
||||
| **Experiment** | **Dataset** | **Model** | **Lossfunction** | **Activation** | **Preprocessing** |
|
||||
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user