From 9fdda3115106eed2f0bdc750a944d51dc3874ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Tue, 19 Mar 2024 09:47:40 +0000 Subject: [PATCH] Refactor pqc_to_grid function to use matrix instead of data.table --- R_lib/init_r_lib.R | 82 +++++++++------------------------------------- 1 file changed, 15 insertions(+), 67 deletions(-) diff --git a/R_lib/init_r_lib.R b/R_lib/init_r_lib.R index 8879ac703..672e372cc 100644 --- a/R_lib/init_r_lib.R +++ b/R_lib/init_r_lib.R @@ -1,82 +1,30 @@ -input <- readRDS("/home/max/Storage/poet/build/apps/out.rds") - -grid_def <- matrix(c(2, 3), nrow = 2, ncol = 5) - -library(data.table) - pqc_to_grid <- function(pqc_in, grid) { - # Convert the input DataFrame to a data.table - dt <- data.table(pqc_in) + # Convert the input DataFrame to a matrix + dt <- as.matrix(pqc_in) # Flatten the matrix into a vector id_vector <- as.vector(t(grid)) - # Initialize an empty data.table to store the results - result_dt <- data.table() + # Initialize an empty matrix to store the results + result_mat <- matrix(nrow = 0, ncol = ncol(dt)) # Iterate over each ID in the vector for (id_mat in id_vector) { - # Find the matching row in the data.table - matching_dt <- dt[dt$id == id_mat] + # Find the matching row in the matrix + matching_row <- dt[dt[, "ID"] == id_mat, ] - # Append the matching data.table row to the result data.table - result_dt <- rbind(result_dt, matching_dt) + # Append the matching row to the result matrix + result_mat <- rbind(result_mat, matching_row) } + # Convert the result matrix to a data frame + res_df <- as.data.frame(result_mat) + # Remove all columns which only contain NaN - # result_dt <- result_dt[, colSums(is.na(result_dt)) != nrow(result_dt)] + res_df <- res_df[, colSums(is.na(res_df)) != nrow(res_df)] - res_df <- as.data.frame(result_dt) + # Remove row names + rownames(res_df) <- NULL - return(res_df[, colSums(is.na(res_df)) != nrow(res_df)]) + return(res_df) } - -pqc_init <- pqc_to_grid(input, grid_def) -test <- pqc_init - -modify_module_sizes <- function(mod_sizes, pqc_mat, init_grid) { - # Find all unique IDs in init_grid - unique_ids <- unique(init_grid$id) - - # remove rows from pqc_mat that are not in init_grid - pqc_mat <- as.data.frame(pqc_mat) - pqc_mat <- pqc_mat[pqc_mat$id %in% unique_ids, ] - - # Find the column indices where all rows are NaN - na_cols <- which(sapply(pqc_mat, function(x) all(is.na(x)))) - # na_cols <- which(colSums(is.nan(pqc_mat)) == nrow(pqc_mat)) - - # Build cumsum over mod_sizes - cum_mod_sizes <- cumsum(mod_sizes) - - # Find the indices where the value of na_cols is equal to the value of cum_mod_sizes - idx <- which(cum_mod_sizes %in% na_cols) - - # Set the value of mod_sizes to 0 at the indices found in the previous step - mod_sizes[idx] <- 0 - - return(mod_sizes) -} - -# mod_sizes <- c(7, 0, 4, 2, 0) - -# unique_ids <- unique(as.vector(pqc_init$id)) - -# tmp <- as.data.frame(input) -# pqc_test <- tmp[tmp$id %in% unique_ids, ] -# na_cols <- which(colSums(is.na(pqc_test)) == nrow(pqc_test)) -# cum_mod_sizes <- cumsum(mod_sizes) - -# # Get the indices of the columns of cum_mod_sizes where the value of the column is equal to the value of na_cols -# idx <- which(cum_mod_sizes %in% na_cols) -# mod_sizes[idx] <- 0 - -# idx <- which(na_cols == cum_mod_sizes) - - - -# idx <- which(na_cols[1] >= cum_mod_sizes) - -# mod_sizes <- modify_module_sizes(mod_sizes, pqc_init, pqc_init) - -# remove column with all NA