diff --git a/barite_50_4_corner.h5 b/barite_50_4_corner.h5 deleted file mode 100644 index f55a844..0000000 --- a/barite_50_4_corner.h5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d135c6696d4a0068b442f8e58c77842dcc8c229b79fe9ae0af030cfc3e813bf7 -size 62127814 diff --git a/convert_barite.jl b/convert_barite.jl deleted file mode 100755 index d57df53..0000000 --- a/convert_barite.jl +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env julia - -# qs_read = [] - - -# # find all the files in 'barite_out' -# files = readdir("barite_out"; join=true) - -# # remove files which do not have the extension '.qs2' and contains 'iter' -# files = filter(x -> occursin(r".*\.qs2", x) && occursin(r"iter", x), files) - -# # remove first entry as it is iteration 0 -# files = files[2:end] - -# test1 = qs_read(files[1]) - -# @rput test1 - -# R"test1 <- test1$C" - -# @rget test1 - -# check if ARGS contains 2 elements -if length(ARGS) != 2 - println("Usage: julia convert.jl .h5") - exit(1) -end - -to_read_dir = ARGS[1] -output_file_name = ARGS[2] * ".h5" - -# check if the directory exists -if !isdir(to_read_dir) - println("The directory \"$to_read_dir\" does not exist") - exit(1) -end - -using HDF5, RCall, DataFrames -@rlibrary qs2 - -# List all .rds files starting with "iter" in a given directory -qs_files = filter(x -> occursin(r".*\.qs2", x) && occursin(r"iter", x), readdir(to_read_dir; join=true))[2:end] - -df_design = DataFrame() -df_result = DataFrame() - -for file in qs_files - # Load the RDS file - data = qs_read(file) - - # get basename of the file - basename = split(file, "/")[end] - - # get the iteration number by splitting the basename and parse the second element - iteration = parse(Int, split(split(basename, "_")[2], ".")[1]) - - @rput data - - R"transport <- data$T" - R"chemistry <- data$C" - - @rget transport - @rget chemistry - - # Add iteration number to the DataFrame - transport.iteration = fill(iteration, size(transport, 1)) - chemistry.iteration = fill(iteration, size(chemistry, 1)) - - # Append the DataFrame to the big DataFrame - append!(df_design, transport) - append!(df_result, chemistry) -end - -# remove ID, Barite_p1, Celestite_p1 columns -df_design = df_design[:, Not([:ID, :Barite_p1, :Celestite_p1])] -df_result = df_result[:, Not([:ID, :Barite_p1, :Celestite_p1])] - - -h5open(output_file_name, "w") do fid - group_in = create_group(fid, "design") - group_out = create_group(fid, "result") - - group_in["names"] = names(df_design) - group_in["data", compress=9] = Matrix(df_design) - - group_out["names"] = names(df_result) - group_out["data", compress=9] = Matrix(df_result) -end diff --git a/convert_data.jl b/convert_data.jl deleted file mode 100644 index 3cd3cc9..0000000 --- a/convert_data.jl +++ /dev/null @@ -1,60 +0,0 @@ -using HDF5 -using RData - -using DataFrames - -# Load Training Data -# train_data = load("Barite_50_Data.rds") - -# training_h5_name = "Barite_50_Data.h5" -# h5open(training_h5_name, "w") do fid -# for key in keys(train_data) -# group = create_group(fid, key) -# group["names"] = names(train_data[key]) -# group["data", compress=3] = Matrix(train_data[key]) -# # group = create_group(fid, key) -# # grou["names"] = coln -# end -# end - -# List all .rds files starting with "iter" in a given directory -rds_files = filter(x -> startswith(x, "iter"), readdir("barite_out/")) - -# remove "iter_0.rds" from the list -rds_files = rds_files[2:end] - -big_df_in = DataFrame() -big_df_out = DataFrame() - -for rds_file in rds_files - # Load the RDS file - data = load("barite_out/$rds_file") - # Convert the data to a DataFrame - df_T = DataFrame(data["T"]) - df_C = DataFrame(data["C"]) - # Append the DataFrame to the big DataFrame - append!(big_df_in, df_T) - append!(big_df_out, df_C) -end - -# remove ID, Barite_p1, Celestite_p1 columns -big_df_in = big_df_in[:, Not([:ID, :Barite_p1, :Celestite_p1])] -big_df_out = big_df_out[:, Not([:ID, :Barite_p1, :Celestite_p1])] - -inference_h5_name = "Barite_50_Data_inference.h5" -h5open(inference_h5_name, "w") do fid - fid["names"] = names(big_df_in) - fid["data", compress=9] = Matrix(big_df_in) -end - -training_h5_name = "Barite_50_Data_training.h5" -h5open(training_h5_name, "w") do fid - group_in = create_group(fid, "design") - group_out = create_group(fid, "result") - - group_in["names"] = names(big_df_in) - group_in["data", compress=9] = Matrix(big_df_in) - - group_out["names"] = names(big_df_out) - group_out["data", compress=9] = Matrix(big_df_out) -end \ No newline at end of file diff --git a/doc/measurement_plan.md b/doc/measurement_plan.md index 7faf52b..d972010 100644 --- a/doc/measurement_plan.md +++ b/doc/measurement_plan.md @@ -3,19 +3,11 @@ ### Parameters to optimize - - - - - ### Saved models `./results/model_large_standardization.keras`: Trained on `barite_50_4_corner.h5` dataset with extended Loss function (Huber loss with mass balance) and **standardized data** - - - ### Experiments | **Experiment** | **Dataset** | **Model** | **Lossfunction** | **Activation** | **Preprocessing** | diff --git a/model_large.keras b/model_large.keras deleted file mode 100644 index 8bfca12..0000000 Binary files a/model_large.keras and /dev/null differ