mirror of
https://git.gfz-potsdam.de/naaice/tug.git
synced 2025-12-16 02:48:23 +01:00
fixed avx algorithm and added constexpr to switch avx
This commit is contained in:
parent
001f59fae8
commit
247999f69a
@ -358,23 +358,38 @@ static Eigen::VectorX<T> ThomasAlgorithmAVX(Eigen::SparseMatrix<T> &A,
|
|||||||
const __m256d zero = _mm256_setzero_pd();
|
const __m256d zero = _mm256_setzero_pd();
|
||||||
const __m256d one = _mm256_set1_pd(1.0);
|
const __m256d one = _mm256_set1_pd(1.0);
|
||||||
|
|
||||||
for (Eigen::Index i = 1; i < n; i += avx_size) {
|
/*
|
||||||
// Load vectors into AVX registers
|
c_diag[i] /= b_diag[i] - a_diag[i] * c_diag[i - 1];
|
||||||
__m256d a_diag_avx = _mm256_loadu_pd(&a_diag[i]);
|
x_vec[i] = (x_vec[i] - a_diag[i] * x_vec[i - 1]) / (b_diag[i] - a_diag[i] * c_diag[i - 1]);
|
||||||
__m256d b_diag_avx = _mm256_loadu_pd(&b_diag[i]);
|
*/
|
||||||
__m256d c_diag_avx = _mm256_loadu_pd(&c_diag[i]);
|
|
||||||
__m256d x_vec_avx = _mm256_loadu_pd(&x_vec[i]);
|
|
||||||
|
|
||||||
// Compute intermediate results
|
for (Eigen::Index i = 1; i < n; i+=4) { // Hier verwenden wir 4 Elemente gleichzeitig
|
||||||
__m256d factor = _mm256_div_pd(a_diag_avx, b_diag_avx);
|
__m256d a = _mm256_loadu_pd(&a_diag[i]);
|
||||||
c_diag_avx = _mm256_div_pd(c_diag_avx, _mm256_sub_pd(b_diag_avx, _mm256_mul_pd(a_diag_avx, c_diag_avx)));
|
__m256d b = _mm256_loadu_pd(&b_diag[i]);
|
||||||
x_vec_avx = _mm256_div_pd(_mm256_sub_pd(x_vec_avx, _mm256_mul_pd(a_diag_avx, x_vec_avx)), _mm256_sub_pd(b_diag_avx, _mm256_mul_pd(a_diag_avx, c_diag_avx)));
|
__m256d c = _mm256_loadu_pd(&c_diag[i]);
|
||||||
|
__m256d x = _mm256_loadu_pd(&x_vec[i]);
|
||||||
|
|
||||||
// Store results back to memory
|
__m256d prev_c = _mm256_loadu_pd(&c_diag[i - 1]);
|
||||||
_mm256_storeu_pd(&c_diag[i], c_diag_avx);
|
__m256d prev_x = _mm256_loadu_pd(&x_vec[i - 1]);
|
||||||
_mm256_storeu_pd(&x_vec[i], x_vec_avx);
|
|
||||||
|
__m256d tmp = _mm256_sub_pd(b, _mm256_mul_pd(a, prev_c));
|
||||||
|
|
||||||
|
c = _mm256_div_pd(c, tmp);
|
||||||
|
x = _mm256_div_pd(_mm256_sub_pd(x, _mm256_mul_pd(a, prev_x)), tmp);
|
||||||
|
|
||||||
|
_mm256_storeu_pd(&c_diag[i], c);
|
||||||
|
_mm256_storeu_pd(&x_vec[i], x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remainder: Handle elements that are not multiples of 4
|
||||||
|
for (Eigen::Index i = n - (n%4); i < n; i++) {
|
||||||
|
c_diag[i] /= b_diag[i] - a_diag[i] * c_diag[i - 1];
|
||||||
|
x_vec[i] = (x_vec[i] - a_diag[i] * x_vec[i - 1]) / (b_diag[i] - a_diag[i] * c_diag[i - 1]);
|
||||||
|
}
|
||||||
|
//std::cout << x_vec[0] << std::endl;
|
||||||
|
x_vec[n] = (x_vec[n] - a_diag[n] * x_vec[n - 1]) /
|
||||||
|
(b_diag[n] - a_diag[n] * c_diag[n - 1]);
|
||||||
|
|
||||||
// Perform final back substitution
|
// Perform final back substitution
|
||||||
for (Eigen::Index i = n; i-- > 0;) {
|
for (Eigen::Index i = n; i-- > 0;) {
|
||||||
x_vec[i] -= c_diag[i] * x_vec[i + 1];
|
x_vec[i] -= c_diag[i] * x_vec[i + 1];
|
||||||
@ -501,11 +516,22 @@ void BTCS_LU(Grid<T> &grid, Boundary<T> &bc, T timestep, int numThreads) {
|
|||||||
|
|
||||||
// entry point for Thomas algorithm solver; differentiate 1D and 2D grid
|
// entry point for Thomas algorithm solver; differentiate 1D and 2D grid
|
||||||
template <class T>
|
template <class T>
|
||||||
void BTCS_Thomas(Grid<T> &grid, Boundary<T> &bc, T timestep, int numThreads) {
|
void BTCS_Thomas(Grid<T> &grid, Boundary<T> &bc, T timestep, int numThreads, bool enableAVX) {
|
||||||
if (grid.getDim() == 1) {
|
if (grid.getDim() == 1) {
|
||||||
BTCS_1D(grid, bc, timestep, ThomasAlgorithm);
|
if(enableAVX) {
|
||||||
|
BTCS_1D(grid, bc, timestep, ThomasAlgorithmAVX);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
BTCS_1D(grid, bc, timestep, ThomasAlgorithm);
|
||||||
|
}
|
||||||
} else if (grid.getDim() == 2) {
|
} else if (grid.getDim() == 2) {
|
||||||
BTCS_2D(grid, bc, timestep, ThomasAlgorithm, numThreads);
|
if(enableAVX) {
|
||||||
|
BTCS_2D(grid, bc, timestep, ThomasAlgorithmAVX, numThreads);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
BTCS_2D(grid, bc, timestep, ThomasAlgorithm, numThreads);
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
throw_invalid_argument(
|
throw_invalid_argument(
|
||||||
"Error: Only 1- and 2-dimensional grids are defined!");
|
"Error: Only 1- and 2-dimensional grids are defined!");
|
||||||
|
|||||||
@ -46,9 +46,9 @@ enum APPROACH {
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
enum SOLVER {
|
enum SOLVER {
|
||||||
EIGEN_LU_SOLVER, /*!< EigenLU solver */
|
EIGEN_LU_SOLVER, /*!< EigenLU solver */
|
||||||
THOMAS_ALGORITHM_SOLVER /*!< Thomas Algorithm solver; more efficient for
|
THOMAS_ALGORITHM_SOLVER /*!< Thomas Algorithm solver; more efficient for
|
||||||
tridiagonal matrices */
|
tridiagonal matrices */
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -82,6 +82,11 @@ enum TIME_MEASURE {
|
|||||||
TIME_MEASURE_ON /*!< print time measure after last iteration */
|
TIME_MEASURE_ON /*!< print time measure after last iteration */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum AVX {
|
||||||
|
AVX_ENABLED,
|
||||||
|
AVX_DISABLED
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief The class forms the interface for performing the diffusion simulations
|
* @brief The class forms the interface for performing the diffusion simulations
|
||||||
* and contains all the methods for controlling the desired parameters, such as
|
* and contains all the methods for controlling the desired parameters, such as
|
||||||
@ -93,7 +98,8 @@ enum TIME_MEASURE {
|
|||||||
* @tparam solver Set the solver to be used
|
* @tparam solver Set the solver to be used
|
||||||
*/
|
*/
|
||||||
template <class T, APPROACH approach = BTCS_APPROACH,
|
template <class T, APPROACH approach = BTCS_APPROACH,
|
||||||
SOLVER solver = THOMAS_ALGORITHM_SOLVER>
|
SOLVER solver = THOMAS_ALGORITHM_SOLVER,
|
||||||
|
AVX enableAVX = AVX_DISABLED>
|
||||||
class Simulation {
|
class Simulation {
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
@ -433,8 +439,12 @@ public:
|
|||||||
if (csv_output >= CSV_OUTPUT_VERBOSE) {
|
if (csv_output >= CSV_OUTPUT_VERBOSE) {
|
||||||
printConcentrationsCSV(filename);
|
printConcentrationsCSV(filename);
|
||||||
}
|
}
|
||||||
|
if constexpr (enableAVX == AVX_ENABLED) {
|
||||||
BTCS_Thomas(this->grid, this->bc, this->timestep, this->numThreads);
|
BTCS_Thomas(this->grid, this->bc, this->timestep, this->numThreads, true);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
BTCS_Thomas(this->grid, this->bc, this->timestep, this->numThreads, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.29)
|
||||||
find_library(DOCTEST_LIB doctest)
|
find_library(DOCTEST_LIB doctest)
|
||||||
|
|
||||||
if(NOT DOCTEST_LIB)
|
if(NOT DOCTEST_LIB)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user