poet/include/highfive/bits/H5Slice_traits.hpp

579 lines
19 KiB
C++

/*
* Copyright (c), 2017, Adrien Devresse <adrien.devresse@epfl.ch>
*
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*
*/
#pragma once
#include <cstdlib>
#include <vector>
#include "H5_definitions.hpp"
#include "H5Utils.hpp"
#include "convert_size_vector.hpp"
#include "../H5PropertyList.hpp"
#include "h5s_wrapper.hpp"
namespace HighFive {
class ElementSet {
public:
///
/// \brief Create a list of points of N-dimension for selection.
///
/// \param list List of continuous coordinates (e.g.: in 2 dimensions space
/// `ElementSet{1, 2, 3 ,4}` creates points `(1, 2)` and `(3, 4)`).
ElementSet(std::initializer_list<std::size_t> list);
///
/// \brief Create a list of points of N-dimension for selection.
///
/// \param list List of N-dim points.
ElementSet(std::initializer_list<std::vector<std::size_t>> list);
///
/// \brief Create a list of points of N-dimension for selection.
///
/// \param element_ids List of continuous coordinates (e.g.: in 2 dimensions space
/// `ElementSet{1, 2, 3 ,4}` creates points `(1, 2)` and `(3, 4)`).
explicit ElementSet(const std::vector<std::size_t>& element_ids);
///
/// \brief Create a list of points of N-dimension for selection.
///
/// \param element_ids List of N-dim points.
explicit ElementSet(const std::vector<std::vector<std::size_t>>& element_ids);
private:
std::vector<std::size_t> _ids;
template <typename Derivate>
friend class SliceTraits;
};
inline std::vector<hsize_t> toHDF5SizeVector(const std::vector<size_t>& from) {
return detail::convertSizeVector<hsize_t>(from);
}
inline std::vector<size_t> toSTLSizeVector(const std::vector<hsize_t>& from) {
return detail::convertSizeVector<size_t>(from);
}
struct RegularHyperSlab {
RegularHyperSlab() = default;
explicit RegularHyperSlab(const std::vector<size_t>& offset_,
const std::vector<size_t>& count_ = {},
const std::vector<size_t>& stride_ = {},
const std::vector<size_t>& block_ = {})
: offset(toHDF5SizeVector(offset_))
, count(toHDF5SizeVector(count_))
, stride(toHDF5SizeVector(stride_))
, block(toHDF5SizeVector(block_)) {}
static RegularHyperSlab fromHDF5Sizes(std::vector<hsize_t> offset_,
std::vector<hsize_t> count_ = {},
std::vector<hsize_t> stride_ = {},
std::vector<hsize_t> block_ = {}) {
RegularHyperSlab slab;
slab.offset = std::move(offset_);
slab.count = std::move(count_);
slab.stride = std::move(stride_);
slab.block = std::move(block_);
return slab;
}
size_t rank() const {
return std::max(std::max(offset.size(), count.size()),
std::max(stride.size(), block.size()));
}
/// Dimensions when all gaps are removed.
std::vector<size_t> packedDims() const {
auto n_dims = rank();
auto dims = std::vector<size_t>(n_dims, 0);
for (size_t i = 0; i < n_dims; ++i) {
dims[i] = count[i] * (block.empty() ? 1 : block[i]);
}
return dims;
}
std::vector<hsize_t> offset;
std::vector<hsize_t> count;
std::vector<hsize_t> stride;
std::vector<hsize_t> block;
};
class HyperSlab {
public:
HyperSlab() {
selects.emplace_back(RegularHyperSlab{}, Op::None);
};
explicit HyperSlab(const RegularHyperSlab& sel) {
selects.emplace_back(sel, Op::Set);
}
HyperSlab operator|(const RegularHyperSlab& sel) const {
auto ret = *this;
ret |= sel;
return ret;
}
HyperSlab& operator|=(const RegularHyperSlab& sel) {
selects.emplace_back(sel, Op::Or);
return *this;
}
HyperSlab operator&(const RegularHyperSlab& sel) const {
auto ret = *this;
ret &= sel;
return ret;
}
HyperSlab& operator&=(const RegularHyperSlab& sel) {
selects.emplace_back(sel, Op::And);
return *this;
}
HyperSlab operator^(const RegularHyperSlab& sel) const {
auto ret = *this;
ret ^= sel;
return ret;
}
HyperSlab& operator^=(const RegularHyperSlab& sel) {
selects.emplace_back(sel, Op::Xor);
return *this;
}
HyperSlab& notA(const RegularHyperSlab& sel) {
selects.emplace_back(sel, Op::NotA);
return *this;
}
HyperSlab& notB(const RegularHyperSlab& sel) {
selects.emplace_back(sel, Op::NotB);
return *this;
}
DataSpace apply(const DataSpace& space_) const {
return apply_impl(space_);
}
private:
enum class Op {
Noop,
Set,
Or,
And,
Xor,
NotB,
NotA,
Append,
Prepend,
Invalid,
None,
};
H5S_seloper_t convert(Op op) const {
switch (op) {
case Op::Noop:
return H5S_SELECT_NOOP;
case Op::Set:
return H5S_SELECT_SET;
case Op::Or:
return H5S_SELECT_OR;
case Op::And:
return H5S_SELECT_AND;
case Op::Xor:
return H5S_SELECT_XOR;
case Op::NotB:
return H5S_SELECT_NOTB;
case Op::NotA:
return H5S_SELECT_NOTA;
case Op::Append:
return H5S_SELECT_APPEND;
case Op::Prepend:
return H5S_SELECT_PREPEND;
case Op::Invalid:
return H5S_SELECT_INVALID;
default:
throw DataSpaceException("Invalid HyperSlab operation.");
}
}
struct Select_: public RegularHyperSlab {
Select_(const RegularHyperSlab& sel, Op op_)
: RegularHyperSlab(sel)
, op(op_) {}
Op op;
};
std::vector<Select_> selects;
protected:
DataSpace select_none(const DataSpace& outer_space) const {
auto space = outer_space.clone();
detail::h5s_select_none(space.getId());
return space;
}
void select_hyperslab(DataSpace& space, const Select_& sel) const {
detail::h5s_select_hyperslab(space.getId(),
convert(sel.op),
sel.offset.empty() ? nullptr : sel.offset.data(),
sel.stride.empty() ? nullptr : sel.stride.data(),
sel.count.empty() ? nullptr : sel.count.data(),
sel.block.empty() ? nullptr : sel.block.data());
}
#if H5_VERSION_GE(1, 10, 6)
/// The length of a stream of `Op::Or` starting at `begin`.
size_t detect_streak(Select_ const* begin, Select_ const* end, Op op) const {
assert(op == Op::Or);
auto const* it =
std::find_if(begin, end, [op](const Select_& sel) { return sel.op != op; });
return static_cast<size_t>(it - begin);
}
DataSpace combine_selections(const DataSpace& left_space,
Op op,
const DataSpace& right_space) const {
assert(op == Op::Or);
auto left_type = detail::h5s_get_select_type(left_space.getId());
auto right_type = detail::h5s_get_select_type(right_space.getId());
// Since HDF5 doesn't allow `combine_selections` with a None
// selection, we need to avoid the issue:
if (left_type == H5S_SEL_NONE) {
return right_space;
} else if (right_type == H5S_SEL_NONE) {
return left_space;
} else if (left_type == H5S_SEL_ALL) {
return left_space;
} else if (right_type == H5S_SEL_ALL) {
return right_space;
} else {
return detail::make_data_space(
detail::h5s_combine_select(left_space.getId(), convert(op), right_space.getId()));
}
}
/// Reduce a sequence of `Op::Or` efficiently.
///
/// The issue is that `H5Sselect_hyperslab` runs in time that linear of the
/// number of block in the existing selection. Therefore, a loop that adds
/// slab-by-slab has quadratic runtime in the number of slabs.
///
/// Fortunately, `H5Scombine_select` doesn't suffer from the same problem.
/// However, it's only available in 1.10.6 and newer.
///
/// The solution is to use divide-and-conquer to reduce (long) streaks of
/// `Op::Or` in what seems to be log-linear time.
DataSpace reduce_streak(const DataSpace& outer_space,
Select_ const* begin,
Select_ const* end,
Op op) const {
assert(op == Op::Or);
if (begin == end) {
throw std::runtime_error("Broken logic in 'DataSpace::reduce_streak'.");
}
std::ptrdiff_t distance = end - begin;
if (distance == 1) {
auto space = select_none(outer_space);
select_hyperslab(space, *begin);
return space;
}
Select_ const* mid = begin + distance / 2;
auto right_space = reduce_streak(outer_space, begin, mid, op);
auto left_space = reduce_streak(outer_space, mid, end, op);
return combine_selections(left_space, op, right_space);
}
DataSpace apply_impl(const DataSpace& space_) const {
auto space = space_.clone();
auto n_selects = selects.size();
for (size_t i = 0; i < n_selects; ++i) {
auto const* const begin = selects.data() + i;
auto const* const end = selects.data() + n_selects;
auto n_ors = detect_streak(begin, end, Op::Or);
if (n_ors > 1) {
auto right_space = reduce_streak(space_, begin, begin + n_ors, Op::Or);
space = combine_selections(space, Op::Or, right_space);
i += n_ors - 1;
} else if (selects[i].op == Op::None) {
detail::h5s_select_none(space.getId());
} else {
select_hyperslab(space, selects[i]);
}
}
return space;
}
#else
DataSpace apply_impl(const DataSpace& space_) const {
auto space = space_.clone();
for (const auto& sel: selects) {
if (sel.op == Op::None) {
detail::h5s_select_none(space.getId());
} else {
select_hyperslab(space, sel);
}
}
return space;
}
#endif
};
///
/// \brief Selects the Cartesian product of slices.
///
/// Given a one-dimensional dataset one might want to select the union of
/// multiple, non-overlapping slices. For example,
///
/// using Slice = std::array<size_t, 2>;
/// using Slices = std::vector<Slice>;
/// auto slices = Slices{{0, 2}, {4, 10}};
/// dset.select(ProductSet(slices);
///
/// to select elements `0`, `1` and `4`, ..., `9` (inclusive).
///
/// For a two-dimensional array one which to select the row specified above,
/// but only columns `2`, `3` and `4`:
///
/// dset.select(ProductSet(slices, Slice{2, 5}));
/// // Analogues with the roles of columns and rows reversed:
/// dset.select(ProductSet(Slice{2, 5}, slices));
///
/// One can generalize once more and allow the unions of slices in both x- and
/// y-dimension:
///
/// auto yslices = Slices{{1, 5}, {7, 8}};
/// auto xslices = Slices{{0, 3}, {6, 8}};
/// dset.select(ProductSet(yslices, xslices));
///
/// which selects the following from a 11x8 dataset:
///
/// . . . . . . . .
/// x x x . . . x x
/// x x x . . . x x
/// x x x . . . x x
/// x x x . . . x x
/// . . . . . . . .
/// . . . . . . . .
/// x x x . . . x x
/// . . . . . . . .
/// . . . . . . . .
/// . . . . . . . .
///
/// Final twist, the selection along and axis may be discrete indices, from
/// which a vector of, possibly single-element, slices can be constructed. The
/// corresponding types are `std::vector<size_t>` and `size_t` for multiple or
/// just a single values. Note, looping over rows or columns one-by-one can be
/// a very serious performance problem. In particular,
///
/// // Avoid:
/// for(auto i : indices) {
/// dset.select(i).read<double>();
/// }
///
/// // Use:
/// std::vector<size_t> tmp(indices.begin(), indices.end());
/// dset.select(tmp).read<std::vector<double>>();
///
/// obvious omit the copy if `indices` already has the correct type.
///
/// The solution works analogous in higher dimensions. A selection `sk` along
/// axis `k` can be interpreted as a subset `S_k` of the natural numbers. The
/// index `i` is in `S_k` if it's selected by `sk`. The `ProductSet` of `s0`,
/// ..., `sN` selects the Cartesian product `S_0 x ... x S_N`.
///
/// Note that the selections along each axis must be sorted and non-overlapping.
///
/// \since 3.0
class ProductSet {
public:
template <class... Slices>
explicit ProductSet(const Slices&... slices);
private:
HyperSlab slab;
std::vector<size_t> shape;
template <typename Derivate>
friend class SliceTraits;
};
template <typename Derivate>
class SliceTraits {
public:
///
/// \brief Select an \p hyper_slab in the current Slice/Dataset.
///
/// HyperSlabs can be either regular or irregular. Irregular hyperslabs are typically generated
/// by taking the union of regular hyperslabs. An irregular hyperslab, in general, does not fit
/// nicely into a multi-dimensional array, but only a subset of such an array.
///
/// Therefore, the only memspaces supported for general hyperslabs are one-dimensional arrays.
Selection select(const HyperSlab& hyper_slab) const;
///
/// \brief Select an \p hyper_slab in the current Slice/Dataset.
///
/// If the selection can be read into a simple, multi-dimensional dataspace,
/// then this overload enable specifying the shape of the memory dataspace
/// with `memspace`. Note, that simple implies no offsets, strides or
/// number of blocks, just the size of the block in each dimension.
Selection select(const HyperSlab& hyper_slab, const DataSpace& memspace) const;
///
/// \brief Select a region in the current Slice/Dataset of \p count points at
/// \p offset separated by \p stride. If strides are not provided they will
/// default to 1 in all dimensions.
///
/// vector offset and count have to be from the same dimension
///
Selection select(const std::vector<size_t>& offset,
const std::vector<size_t>& count,
const std::vector<size_t>& stride = {},
const std::vector<size_t>& block = {}) const;
///
/// \brief Select a set of columns in the last dimension of this dataset.
///
/// The column indices must be smaller than the dimension size.
///
Selection select(const std::vector<size_t>& columns) const;
///
/// \brief Select a region in the current Slice/Dataset out of a list of elements.
///
Selection select(const ElementSet& elements) const;
///
/// \brief Select a region consisting of a product of slices.
///
/// \since 3.0
///
Selection select(const ProductSet& product_set) const;
template <typename T>
T read(const DataTransferProps& xfer_props = DataTransferProps()) const;
///
/// Read the entire dataset into a buffer
///
/// An exception is raised is if the numbers of dimension of the buffer and
/// of the dataset are different.
///
/// The array type can be a N-pointer or a N-vector. For plain pointers
/// not dimensionality checking will be performed, it is the user's
/// responsibility to ensure that the right amount of space has been
/// allocated.
template <typename T>
void read(T& array, const DataTransferProps& xfer_props = DataTransferProps()) const;
///
/// Read the entire dataset into a raw buffer
///
/// No dimensionality checks will be performed, it is the user's
/// responsibility to ensure that the right amount of space has been
/// allocated.
/// \param array: A buffer containing enough space for the data
/// \param mem_datatype: The type of the data in memory. This prevents deducing it from T.
/// \param xfer_props: Data Transfer properties
template <typename T>
void read_raw(T* array,
const DataType& mem_datatype,
const DataTransferProps& xfer_props = DataTransferProps()) const;
///
/// Read the entire dataset into a raw buffer
///
/// Same as `read(T*, const DataType&, const DataTransferProps&)`. However,
/// this overload deduces the HDF5 datatype of the element of `array` from
/// `T`. Note, that the file datatype is already fixed.
///
/// \param array: A buffer containing enough space for the data
/// \param xfer_props: Data Transfer properties
template <typename T>
void read_raw(T* array, const DataTransferProps& xfer_props = DataTransferProps()) const;
///
/// Write the integrality N-dimension buffer to this dataset
/// An exception is raised is if the numbers of dimension of the buffer and
/// of the dataset are different
///
/// The array type can be a N-pointer or a N-vector ( e.g int** integer two
/// dimensional array )
template <typename T>
void write(const T& buffer, const DataTransferProps& xfer_props = DataTransferProps());
///
/// Write from a raw pointer into this dataset.
///
/// No dimensionality checks will be performed, it is the user's
/// responsibility to ensure that the buffer holds the right amount of
/// elements. For n-dimensional matrices the buffer layout follows H5
/// default conventions.
///
/// Note, this is the shallowest wrapper around `H5Dwrite` and should
/// be used if full control is needed. Generally prefer `write`.
///
/// \param buffer: A buffer containing the data to be written
/// \param dtype: The datatype of `buffer`, i.e. the memory data type.
/// \param xfer_props: The HDF5 data transfer properties, e.g. collective MPI-IO.
template <typename T>
void write_raw(const T* buffer,
const DataType& mem_datatype,
const DataTransferProps& xfer_props = DataTransferProps());
///
/// Write from a raw pointer into this dataset.
///
/// Same as `write_raw(const T*, const DataTransferProps&)`. However, this
/// overload attempts to guess the data type of `buffer`, i.e. the memory
/// datatype. Note that the file datatype is already fixed.
///
template <typename T>
void write_raw(const T* buffer, const DataTransferProps& xfer_props = DataTransferProps());
///
/// \brief Return a `Selection` with `axes` squeezed from the memspace.
///
/// Returns a selection in which the memspace has been modified
/// to not include the axes listed in `axes`.
///
/// Throws if any axis to be squeezes has a dimension other than `1`.
///
/// \since 3.0
Selection squeezeMemSpace(const std::vector<size_t>& axes) const;
///
/// \brief Return a `Selection` with a simple memspace with `dims`.
///
/// Returns a selection in which the memspace has been modified
/// to be a simple dataspace with dimensions `dims`.
///
/// Throws if the number of elements changes.
///
/// \since 3.0
Selection reshapeMemSpace(const std::vector<size_t>& dims) const;
};
} // namespace HighFive