I always have thought and known that multidimensional arrays to which indexing is done only once by multiplication is faster than arrays of arrays to which indexing is done by two pointer dereferencing, due to better locality and space saving.
I ran a small test a while ago, and the result was quite surprising. At least my callgrind profiler reported that the same function using array of arrays run slightly faster.
I wonder whether I should change the definition of my matrix class to use an array of arrays internally. This class is used virtually everywhere in my simulation engine (? not exactly sure how to call..), and I do want to find the best way to save a few seconds.
test_matrix has the cost of 350 200 020 and test_array_array has the cost of 325 200 016. The code was compiled with -O3 by clang++. All member functions are inlined according to the profiler.
#include <iostream>
#include <memory>
template<class T>
class BasicArray : public std::unique_ptr<T[]> {
public:
BasicArray() = default;
BasicArray(std::size_t);
};
template<class T>
BasicArray<T>::BasicArray(std::size_t size)
: std::unique_ptr<T[]>(new T[size]) {}
template<class T>
class Matrix : public BasicArray<T> {
public:
Matrix() = default;
Matrix(std::size_t, std::size_t);
T &operator()(std::size_t, std::size_t) const;
std::size_t get_index(std::size_t, std::size_t) const;
std::size_t get_size(std::size_t) const;
private:
std::size_t sizes[2];
};
template<class T>
Matrix<T>::Matrix(std::size_t i, std::size_t j)
: BasicArray<T>(i * j)
, sizes {i, j} {}
template<class T>
T &Matrix<T>::operator()(std::size_t i, std::size_t j) const {
return (*this)[get_index(i, j)];
}
template<class T>
std::size_t Matrix<T>::get_index(std::size_t i, std::size_t j) const {
return i * get_size(2) + j;
}
template<class T>
std::size_t Matrix<T>::get_size(std::size_t d) const {
return sizes[d - 1];
}
template<class T>
class Array : public BasicArray<T> {
public:
Array() = default;
Array(std::size_t);
std::size_t get_size() const;
private:
std::size_t size;
};
template<class T>
Array<T>::Array(std::size_t size)
: BasicArray<T>(size)
, size(size) {}
template<class T>
std::size_t Array<T>::get_size() const {
return size;
}
static void __attribute__((noinline)) test_matrix(const Matrix<int> &m) {
for (std::size_t i = 0; i < m.get_size(1); ++i) {
for (std::size_t j = 0; j < m.get_size(2); ++j) {
static_cast<volatile void>(m(i, j) = i + j);
}
}
}
static void __attribute__((noinline))
test_array_array(const Array<Array<int>> &aa) {
for (std::size_t i = 0; i < aa.get_size(); ++i) {
for (std::size_t j = 0; j < aa[0].get_size(); ++j) {
static_cast<volatile void>(aa[i][j] = i + j);
}
}
}
int main() {
constexpr int N = 1000;
Matrix<int> m(N, N);
Array<Array<int>> aa(N);
for (std::size_t i = 0; i < aa.get_size(); ++i) {
aa[i] = Array<int>(N);
}
test_matrix(m);
test_array_array(aa);
}