mirror of
https://git.gfz-potsdam.de/naaice/poet.git
synced 2025-12-16 12:54:50 +01:00
579 lines
19 KiB
C++
579 lines
19 KiB
C++
/*
|
|
* Copyright (c), 2017, Adrien Devresse <adrien.devresse@epfl.ch>
|
|
*
|
|
* Distributed under the Boost Software License, Version 1.0.
|
|
* (See accompanying file LICENSE_1_0.txt or copy at
|
|
* http://www.boost.org/LICENSE_1_0.txt)
|
|
*
|
|
*/
|
|
#pragma once
|
|
|
|
#include <cstdlib>
|
|
#include <vector>
|
|
|
|
#include "H5_definitions.hpp"
|
|
#include "H5Utils.hpp"
|
|
#include "convert_size_vector.hpp"
|
|
|
|
#include "../H5PropertyList.hpp"
|
|
#include "h5s_wrapper.hpp"
|
|
|
|
namespace HighFive {
|
|
|
|
class ElementSet {
|
|
public:
|
|
///
|
|
/// \brief Create a list of points of N-dimension for selection.
|
|
///
|
|
/// \param list List of continuous coordinates (e.g.: in 2 dimensions space
|
|
/// `ElementSet{1, 2, 3 ,4}` creates points `(1, 2)` and `(3, 4)`).
|
|
ElementSet(std::initializer_list<std::size_t> list);
|
|
///
|
|
/// \brief Create a list of points of N-dimension for selection.
|
|
///
|
|
/// \param list List of N-dim points.
|
|
ElementSet(std::initializer_list<std::vector<std::size_t>> list);
|
|
///
|
|
/// \brief Create a list of points of N-dimension for selection.
|
|
///
|
|
/// \param element_ids List of continuous coordinates (e.g.: in 2 dimensions space
|
|
/// `ElementSet{1, 2, 3 ,4}` creates points `(1, 2)` and `(3, 4)`).
|
|
explicit ElementSet(const std::vector<std::size_t>& element_ids);
|
|
///
|
|
/// \brief Create a list of points of N-dimension for selection.
|
|
///
|
|
/// \param element_ids List of N-dim points.
|
|
explicit ElementSet(const std::vector<std::vector<std::size_t>>& element_ids);
|
|
|
|
private:
|
|
std::vector<std::size_t> _ids;
|
|
|
|
template <typename Derivate>
|
|
friend class SliceTraits;
|
|
};
|
|
|
|
inline std::vector<hsize_t> toHDF5SizeVector(const std::vector<size_t>& from) {
|
|
return detail::convertSizeVector<hsize_t>(from);
|
|
}
|
|
|
|
inline std::vector<size_t> toSTLSizeVector(const std::vector<hsize_t>& from) {
|
|
return detail::convertSizeVector<size_t>(from);
|
|
}
|
|
|
|
struct RegularHyperSlab {
|
|
RegularHyperSlab() = default;
|
|
|
|
explicit RegularHyperSlab(const std::vector<size_t>& offset_,
|
|
const std::vector<size_t>& count_ = {},
|
|
const std::vector<size_t>& stride_ = {},
|
|
const std::vector<size_t>& block_ = {})
|
|
: offset(toHDF5SizeVector(offset_))
|
|
, count(toHDF5SizeVector(count_))
|
|
, stride(toHDF5SizeVector(stride_))
|
|
, block(toHDF5SizeVector(block_)) {}
|
|
|
|
static RegularHyperSlab fromHDF5Sizes(std::vector<hsize_t> offset_,
|
|
std::vector<hsize_t> count_ = {},
|
|
std::vector<hsize_t> stride_ = {},
|
|
std::vector<hsize_t> block_ = {}) {
|
|
RegularHyperSlab slab;
|
|
slab.offset = std::move(offset_);
|
|
slab.count = std::move(count_);
|
|
slab.stride = std::move(stride_);
|
|
slab.block = std::move(block_);
|
|
|
|
return slab;
|
|
}
|
|
|
|
size_t rank() const {
|
|
return std::max(std::max(offset.size(), count.size()),
|
|
std::max(stride.size(), block.size()));
|
|
}
|
|
|
|
/// Dimensions when all gaps are removed.
|
|
std::vector<size_t> packedDims() const {
|
|
auto n_dims = rank();
|
|
auto dims = std::vector<size_t>(n_dims, 0);
|
|
|
|
for (size_t i = 0; i < n_dims; ++i) {
|
|
dims[i] = count[i] * (block.empty() ? 1 : block[i]);
|
|
}
|
|
|
|
return dims;
|
|
}
|
|
|
|
std::vector<hsize_t> offset;
|
|
std::vector<hsize_t> count;
|
|
std::vector<hsize_t> stride;
|
|
std::vector<hsize_t> block;
|
|
};
|
|
|
|
class HyperSlab {
|
|
public:
|
|
HyperSlab() {
|
|
selects.emplace_back(RegularHyperSlab{}, Op::None);
|
|
};
|
|
|
|
explicit HyperSlab(const RegularHyperSlab& sel) {
|
|
selects.emplace_back(sel, Op::Set);
|
|
}
|
|
|
|
HyperSlab operator|(const RegularHyperSlab& sel) const {
|
|
auto ret = *this;
|
|
ret |= sel;
|
|
return ret;
|
|
}
|
|
|
|
HyperSlab& operator|=(const RegularHyperSlab& sel) {
|
|
selects.emplace_back(sel, Op::Or);
|
|
return *this;
|
|
}
|
|
|
|
HyperSlab operator&(const RegularHyperSlab& sel) const {
|
|
auto ret = *this;
|
|
ret &= sel;
|
|
return ret;
|
|
}
|
|
|
|
HyperSlab& operator&=(const RegularHyperSlab& sel) {
|
|
selects.emplace_back(sel, Op::And);
|
|
return *this;
|
|
}
|
|
|
|
HyperSlab operator^(const RegularHyperSlab& sel) const {
|
|
auto ret = *this;
|
|
ret ^= sel;
|
|
return ret;
|
|
}
|
|
|
|
HyperSlab& operator^=(const RegularHyperSlab& sel) {
|
|
selects.emplace_back(sel, Op::Xor);
|
|
return *this;
|
|
}
|
|
|
|
HyperSlab& notA(const RegularHyperSlab& sel) {
|
|
selects.emplace_back(sel, Op::NotA);
|
|
return *this;
|
|
}
|
|
|
|
HyperSlab& notB(const RegularHyperSlab& sel) {
|
|
selects.emplace_back(sel, Op::NotB);
|
|
return *this;
|
|
}
|
|
|
|
DataSpace apply(const DataSpace& space_) const {
|
|
return apply_impl(space_);
|
|
}
|
|
|
|
private:
|
|
enum class Op {
|
|
Noop,
|
|
Set,
|
|
Or,
|
|
And,
|
|
Xor,
|
|
NotB,
|
|
NotA,
|
|
Append,
|
|
Prepend,
|
|
Invalid,
|
|
None,
|
|
};
|
|
|
|
H5S_seloper_t convert(Op op) const {
|
|
switch (op) {
|
|
case Op::Noop:
|
|
return H5S_SELECT_NOOP;
|
|
case Op::Set:
|
|
return H5S_SELECT_SET;
|
|
case Op::Or:
|
|
return H5S_SELECT_OR;
|
|
case Op::And:
|
|
return H5S_SELECT_AND;
|
|
case Op::Xor:
|
|
return H5S_SELECT_XOR;
|
|
case Op::NotB:
|
|
return H5S_SELECT_NOTB;
|
|
case Op::NotA:
|
|
return H5S_SELECT_NOTA;
|
|
case Op::Append:
|
|
return H5S_SELECT_APPEND;
|
|
case Op::Prepend:
|
|
return H5S_SELECT_PREPEND;
|
|
case Op::Invalid:
|
|
return H5S_SELECT_INVALID;
|
|
default:
|
|
throw DataSpaceException("Invalid HyperSlab operation.");
|
|
}
|
|
}
|
|
|
|
struct Select_: public RegularHyperSlab {
|
|
Select_(const RegularHyperSlab& sel, Op op_)
|
|
: RegularHyperSlab(sel)
|
|
, op(op_) {}
|
|
|
|
Op op;
|
|
};
|
|
|
|
std::vector<Select_> selects;
|
|
|
|
protected:
|
|
DataSpace select_none(const DataSpace& outer_space) const {
|
|
auto space = outer_space.clone();
|
|
detail::h5s_select_none(space.getId());
|
|
return space;
|
|
}
|
|
|
|
void select_hyperslab(DataSpace& space, const Select_& sel) const {
|
|
detail::h5s_select_hyperslab(space.getId(),
|
|
convert(sel.op),
|
|
sel.offset.empty() ? nullptr : sel.offset.data(),
|
|
sel.stride.empty() ? nullptr : sel.stride.data(),
|
|
sel.count.empty() ? nullptr : sel.count.data(),
|
|
sel.block.empty() ? nullptr : sel.block.data());
|
|
}
|
|
|
|
#if H5_VERSION_GE(1, 10, 6)
|
|
/// The length of a stream of `Op::Or` starting at `begin`.
|
|
size_t detect_streak(Select_ const* begin, Select_ const* end, Op op) const {
|
|
assert(op == Op::Or);
|
|
auto const* it =
|
|
std::find_if(begin, end, [op](const Select_& sel) { return sel.op != op; });
|
|
return static_cast<size_t>(it - begin);
|
|
}
|
|
|
|
DataSpace combine_selections(const DataSpace& left_space,
|
|
Op op,
|
|
const DataSpace& right_space) const {
|
|
assert(op == Op::Or);
|
|
|
|
auto left_type = detail::h5s_get_select_type(left_space.getId());
|
|
auto right_type = detail::h5s_get_select_type(right_space.getId());
|
|
|
|
// Since HDF5 doesn't allow `combine_selections` with a None
|
|
// selection, we need to avoid the issue:
|
|
if (left_type == H5S_SEL_NONE) {
|
|
return right_space;
|
|
} else if (right_type == H5S_SEL_NONE) {
|
|
return left_space;
|
|
} else if (left_type == H5S_SEL_ALL) {
|
|
return left_space;
|
|
} else if (right_type == H5S_SEL_ALL) {
|
|
return right_space;
|
|
} else {
|
|
return detail::make_data_space(
|
|
detail::h5s_combine_select(left_space.getId(), convert(op), right_space.getId()));
|
|
}
|
|
}
|
|
|
|
/// Reduce a sequence of `Op::Or` efficiently.
|
|
///
|
|
/// The issue is that `H5Sselect_hyperslab` runs in time that linear of the
|
|
/// number of block in the existing selection. Therefore, a loop that adds
|
|
/// slab-by-slab has quadratic runtime in the number of slabs.
|
|
///
|
|
/// Fortunately, `H5Scombine_select` doesn't suffer from the same problem.
|
|
/// However, it's only available in 1.10.6 and newer.
|
|
///
|
|
/// The solution is to use divide-and-conquer to reduce (long) streaks of
|
|
/// `Op::Or` in what seems to be log-linear time.
|
|
DataSpace reduce_streak(const DataSpace& outer_space,
|
|
Select_ const* begin,
|
|
Select_ const* end,
|
|
Op op) const {
|
|
assert(op == Op::Or);
|
|
|
|
if (begin == end) {
|
|
throw std::runtime_error("Broken logic in 'DataSpace::reduce_streak'.");
|
|
}
|
|
|
|
std::ptrdiff_t distance = end - begin;
|
|
if (distance == 1) {
|
|
auto space = select_none(outer_space);
|
|
select_hyperslab(space, *begin);
|
|
return space;
|
|
}
|
|
|
|
Select_ const* mid = begin + distance / 2;
|
|
auto right_space = reduce_streak(outer_space, begin, mid, op);
|
|
auto left_space = reduce_streak(outer_space, mid, end, op);
|
|
|
|
return combine_selections(left_space, op, right_space);
|
|
}
|
|
|
|
DataSpace apply_impl(const DataSpace& space_) const {
|
|
auto space = space_.clone();
|
|
auto n_selects = selects.size();
|
|
for (size_t i = 0; i < n_selects; ++i) {
|
|
auto const* const begin = selects.data() + i;
|
|
auto const* const end = selects.data() + n_selects;
|
|
|
|
auto n_ors = detect_streak(begin, end, Op::Or);
|
|
|
|
if (n_ors > 1) {
|
|
auto right_space = reduce_streak(space_, begin, begin + n_ors, Op::Or);
|
|
space = combine_selections(space, Op::Or, right_space);
|
|
i += n_ors - 1;
|
|
} else if (selects[i].op == Op::None) {
|
|
detail::h5s_select_none(space.getId());
|
|
} else {
|
|
select_hyperslab(space, selects[i]);
|
|
}
|
|
}
|
|
return space;
|
|
}
|
|
#else
|
|
DataSpace apply_impl(const DataSpace& space_) const {
|
|
auto space = space_.clone();
|
|
for (const auto& sel: selects) {
|
|
if (sel.op == Op::None) {
|
|
detail::h5s_select_none(space.getId());
|
|
} else {
|
|
select_hyperslab(space, sel);
|
|
}
|
|
}
|
|
return space;
|
|
}
|
|
#endif
|
|
};
|
|
|
|
///
|
|
/// \brief Selects the Cartesian product of slices.
|
|
///
|
|
/// Given a one-dimensional dataset one might want to select the union of
|
|
/// multiple, non-overlapping slices. For example,
|
|
///
|
|
/// using Slice = std::array<size_t, 2>;
|
|
/// using Slices = std::vector<Slice>;
|
|
/// auto slices = Slices{{0, 2}, {4, 10}};
|
|
/// dset.select(ProductSet(slices);
|
|
///
|
|
/// to select elements `0`, `1` and `4`, ..., `9` (inclusive).
|
|
///
|
|
/// For a two-dimensional array one which to select the row specified above,
|
|
/// but only columns `2`, `3` and `4`:
|
|
///
|
|
/// dset.select(ProductSet(slices, Slice{2, 5}));
|
|
/// // Analogues with the roles of columns and rows reversed:
|
|
/// dset.select(ProductSet(Slice{2, 5}, slices));
|
|
///
|
|
/// One can generalize once more and allow the unions of slices in both x- and
|
|
/// y-dimension:
|
|
///
|
|
/// auto yslices = Slices{{1, 5}, {7, 8}};
|
|
/// auto xslices = Slices{{0, 3}, {6, 8}};
|
|
/// dset.select(ProductSet(yslices, xslices));
|
|
///
|
|
/// which selects the following from a 11x8 dataset:
|
|
///
|
|
/// . . . . . . . .
|
|
/// x x x . . . x x
|
|
/// x x x . . . x x
|
|
/// x x x . . . x x
|
|
/// x x x . . . x x
|
|
/// . . . . . . . .
|
|
/// . . . . . . . .
|
|
/// x x x . . . x x
|
|
/// . . . . . . . .
|
|
/// . . . . . . . .
|
|
/// . . . . . . . .
|
|
///
|
|
/// Final twist, the selection along and axis may be discrete indices, from
|
|
/// which a vector of, possibly single-element, slices can be constructed. The
|
|
/// corresponding types are `std::vector<size_t>` and `size_t` for multiple or
|
|
/// just a single values. Note, looping over rows or columns one-by-one can be
|
|
/// a very serious performance problem. In particular,
|
|
///
|
|
/// // Avoid:
|
|
/// for(auto i : indices) {
|
|
/// dset.select(i).read<double>();
|
|
/// }
|
|
///
|
|
/// // Use:
|
|
/// std::vector<size_t> tmp(indices.begin(), indices.end());
|
|
/// dset.select(tmp).read<std::vector<double>>();
|
|
///
|
|
/// obvious omit the copy if `indices` already has the correct type.
|
|
///
|
|
/// The solution works analogous in higher dimensions. A selection `sk` along
|
|
/// axis `k` can be interpreted as a subset `S_k` of the natural numbers. The
|
|
/// index `i` is in `S_k` if it's selected by `sk`. The `ProductSet` of `s0`,
|
|
/// ..., `sN` selects the Cartesian product `S_0 x ... x S_N`.
|
|
///
|
|
/// Note that the selections along each axis must be sorted and non-overlapping.
|
|
///
|
|
/// \since 3.0
|
|
class ProductSet {
|
|
public:
|
|
template <class... Slices>
|
|
explicit ProductSet(const Slices&... slices);
|
|
|
|
private:
|
|
HyperSlab slab;
|
|
std::vector<size_t> shape;
|
|
|
|
template <typename Derivate>
|
|
friend class SliceTraits;
|
|
};
|
|
|
|
|
|
template <typename Derivate>
|
|
class SliceTraits {
|
|
public:
|
|
///
|
|
/// \brief Select an \p hyper_slab in the current Slice/Dataset.
|
|
///
|
|
/// HyperSlabs can be either regular or irregular. Irregular hyperslabs are typically generated
|
|
/// by taking the union of regular hyperslabs. An irregular hyperslab, in general, does not fit
|
|
/// nicely into a multi-dimensional array, but only a subset of such an array.
|
|
///
|
|
/// Therefore, the only memspaces supported for general hyperslabs are one-dimensional arrays.
|
|
Selection select(const HyperSlab& hyper_slab) const;
|
|
|
|
///
|
|
/// \brief Select an \p hyper_slab in the current Slice/Dataset.
|
|
///
|
|
/// If the selection can be read into a simple, multi-dimensional dataspace,
|
|
/// then this overload enable specifying the shape of the memory dataspace
|
|
/// with `memspace`. Note, that simple implies no offsets, strides or
|
|
/// number of blocks, just the size of the block in each dimension.
|
|
Selection select(const HyperSlab& hyper_slab, const DataSpace& memspace) const;
|
|
|
|
///
|
|
/// \brief Select a region in the current Slice/Dataset of \p count points at
|
|
/// \p offset separated by \p stride. If strides are not provided they will
|
|
/// default to 1 in all dimensions.
|
|
///
|
|
/// vector offset and count have to be from the same dimension
|
|
///
|
|
Selection select(const std::vector<size_t>& offset,
|
|
const std::vector<size_t>& count,
|
|
const std::vector<size_t>& stride = {},
|
|
const std::vector<size_t>& block = {}) const;
|
|
|
|
///
|
|
/// \brief Select a set of columns in the last dimension of this dataset.
|
|
///
|
|
/// The column indices must be smaller than the dimension size.
|
|
///
|
|
Selection select(const std::vector<size_t>& columns) const;
|
|
|
|
///
|
|
/// \brief Select a region in the current Slice/Dataset out of a list of elements.
|
|
///
|
|
Selection select(const ElementSet& elements) const;
|
|
|
|
///
|
|
/// \brief Select a region consisting of a product of slices.
|
|
///
|
|
/// \since 3.0
|
|
///
|
|
Selection select(const ProductSet& product_set) const;
|
|
|
|
template <typename T>
|
|
T read(const DataTransferProps& xfer_props = DataTransferProps()) const;
|
|
|
|
///
|
|
/// Read the entire dataset into a buffer
|
|
///
|
|
/// An exception is raised is if the numbers of dimension of the buffer and
|
|
/// of the dataset are different.
|
|
///
|
|
/// The array type can be a N-pointer or a N-vector. For plain pointers
|
|
/// not dimensionality checking will be performed, it is the user's
|
|
/// responsibility to ensure that the right amount of space has been
|
|
/// allocated.
|
|
template <typename T>
|
|
void read(T& array, const DataTransferProps& xfer_props = DataTransferProps()) const;
|
|
|
|
///
|
|
/// Read the entire dataset into a raw buffer
|
|
///
|
|
/// No dimensionality checks will be performed, it is the user's
|
|
/// responsibility to ensure that the right amount of space has been
|
|
/// allocated.
|
|
/// \param array: A buffer containing enough space for the data
|
|
/// \param mem_datatype: The type of the data in memory. This prevents deducing it from T.
|
|
/// \param xfer_props: Data Transfer properties
|
|
template <typename T>
|
|
void read_raw(T* array,
|
|
const DataType& mem_datatype,
|
|
const DataTransferProps& xfer_props = DataTransferProps()) const;
|
|
|
|
///
|
|
/// Read the entire dataset into a raw buffer
|
|
///
|
|
/// Same as `read(T*, const DataType&, const DataTransferProps&)`. However,
|
|
/// this overload deduces the HDF5 datatype of the element of `array` from
|
|
/// `T`. Note, that the file datatype is already fixed.
|
|
///
|
|
/// \param array: A buffer containing enough space for the data
|
|
/// \param xfer_props: Data Transfer properties
|
|
template <typename T>
|
|
void read_raw(T* array, const DataTransferProps& xfer_props = DataTransferProps()) const;
|
|
|
|
|
|
///
|
|
/// Write the integrality N-dimension buffer to this dataset
|
|
/// An exception is raised is if the numbers of dimension of the buffer and
|
|
/// of the dataset are different
|
|
///
|
|
/// The array type can be a N-pointer or a N-vector ( e.g int** integer two
|
|
/// dimensional array )
|
|
template <typename T>
|
|
void write(const T& buffer, const DataTransferProps& xfer_props = DataTransferProps());
|
|
|
|
///
|
|
/// Write from a raw pointer into this dataset.
|
|
///
|
|
/// No dimensionality checks will be performed, it is the user's
|
|
/// responsibility to ensure that the buffer holds the right amount of
|
|
/// elements. For n-dimensional matrices the buffer layout follows H5
|
|
/// default conventions.
|
|
///
|
|
/// Note, this is the shallowest wrapper around `H5Dwrite` and should
|
|
/// be used if full control is needed. Generally prefer `write`.
|
|
///
|
|
/// \param buffer: A buffer containing the data to be written
|
|
/// \param dtype: The datatype of `buffer`, i.e. the memory data type.
|
|
/// \param xfer_props: The HDF5 data transfer properties, e.g. collective MPI-IO.
|
|
template <typename T>
|
|
void write_raw(const T* buffer,
|
|
const DataType& mem_datatype,
|
|
const DataTransferProps& xfer_props = DataTransferProps());
|
|
|
|
///
|
|
/// Write from a raw pointer into this dataset.
|
|
///
|
|
/// Same as `write_raw(const T*, const DataTransferProps&)`. However, this
|
|
/// overload attempts to guess the data type of `buffer`, i.e. the memory
|
|
/// datatype. Note that the file datatype is already fixed.
|
|
///
|
|
template <typename T>
|
|
void write_raw(const T* buffer, const DataTransferProps& xfer_props = DataTransferProps());
|
|
|
|
///
|
|
/// \brief Return a `Selection` with `axes` squeezed from the memspace.
|
|
///
|
|
/// Returns a selection in which the memspace has been modified
|
|
/// to not include the axes listed in `axes`.
|
|
///
|
|
/// Throws if any axis to be squeezes has a dimension other than `1`.
|
|
///
|
|
/// \since 3.0
|
|
Selection squeezeMemSpace(const std::vector<size_t>& axes) const;
|
|
|
|
///
|
|
/// \brief Return a `Selection` with a simple memspace with `dims`.
|
|
///
|
|
/// Returns a selection in which the memspace has been modified
|
|
/// to be a simple dataspace with dimensions `dims`.
|
|
///
|
|
/// Throws if the number of elements changes.
|
|
///
|
|
/// \since 3.0
|
|
Selection reshapeMemSpace(const std::vector<size_t>& dims) const;
|
|
};
|
|
|
|
} // namespace HighFive
|