Skip to content

Commit 928b269

Browse files
feat(core): Use NUMA-aware allocator for tile data
1 parent 1977eea commit 928b269

3 files changed

Lines changed: 89 additions & 19 deletions

File tree

core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ set(SOURCE_FILES
1313
src/utils.cpp
1414
src/performance_counters.cpp
1515
src/target.cpp
16+
src/tile_data.cpp
1617
src/kernels.cpp
1718
src/hyperparameters.cpp
1819
src/cpu/gp_functions.cpp

core/include/gprat/tile_data.hpp

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,59 @@
44
#pragma once
55

66
#include "gprat/detail/config.hpp"
7-
#include "gprat/performance_counters.hpp"
87

98
#include <hpx/serialization/serialize_buffer.hpp>
109
#include <span>
1110

1211
GPRAT_NS_BEGIN
1312

13+
namespace detail
14+
{
15+
void *allocate_tile_data(std::size_t num_bytes);
16+
void deallocate_tile_data(void *p, std::size_t num_bytes);
17+
18+
template <class T>
19+
struct tile_data_allocator
20+
{
21+
typedef T value_type;
22+
23+
tile_data_allocator() = default;
24+
25+
template <class U>
26+
constexpr tile_data_allocator(const tile_data_allocator<U> &) noexcept
27+
{ }
28+
29+
[[nodiscard]] T *allocate(std::size_t n)
30+
{
31+
if (n > (std::numeric_limits<std::size_t>::max)() / sizeof(T))
32+
{
33+
throw std::bad_array_new_length();
34+
}
35+
36+
if (auto p = static_cast<T *>(allocate_tile_data(n * sizeof(T))))
37+
{
38+
return p;
39+
}
40+
41+
throw std::bad_alloc();
42+
}
43+
44+
void deallocate(T *p, std::size_t n) noexcept { deallocate_tile_data(p, n * sizeof(T)); }
45+
};
46+
47+
template <class T, class U>
48+
bool operator==(const tile_data_allocator<T> &, const tile_data_allocator<U> &)
49+
{
50+
return true;
51+
}
52+
53+
template <class T, class U>
54+
bool operator!=(const tile_data_allocator<T> &, const tile_data_allocator<U> &)
55+
{
56+
return false;
57+
}
58+
} // namespace detail
59+
1460
/**
1561
* @brief Non-mutable reference-counted dynamic array of a given type T.
1662
* This class represents a simple reference-counted non-resizeable buffer with elements of type T.
@@ -25,7 +71,7 @@ template <typename T>
2571
class const_tile_data
2672
{
2773
protected:
28-
typedef hpx::serialization::serialize_buffer<T> cpu_buffer_type;
74+
typedef hpx::serialization::serialize_buffer<T, detail::tile_data_allocator<T>> cpu_buffer_type;
2975

3076
struct hold_reference
3177
{
@@ -38,25 +84,12 @@ class const_tile_data
3884
cpu_buffer_type data_;
3985
};
4086

41-
// In case we want pooling down the road...
42-
static T *allocate(std::size_t n)
43-
{
44-
track_tile_data_allocation(n);
45-
return new T[n];
46-
}
47-
48-
static void deallocate(T *p) noexcept
49-
{
50-
track_tile_data_deallocation(0); // we don't know here
51-
delete[] p;
52-
}
53-
5487
public:
5588
const_tile_data() = default;
5689

5790
// Create a new (uninitialized) tile_data of the given size.
5891
explicit const_tile_data(std::size_t size) :
59-
cpu_data_(allocate(size), size, cpu_buffer_type::take, &const_tile_data::deallocate)
92+
cpu_data_(size)
6093
{ }
6194

6295
// Create a tile_data which acts as a proxy to a part of the embedded array.
@@ -85,10 +118,12 @@ class const_tile_data
85118
return { cpu_data_.data(), cpu_data_.size() };
86119
}
87120

121+
friend bool operator==(const const_tile_data &a, const const_tile_data &b) noexcept
122+
{
123+
return a.cpu_data_ == b.cpu_data_;
124+
}
125+
88126
protected:
89-
// Serialization support: even if all of the code below runs on one
90-
// locality only, we need to provide an (empty) implementation for the
91-
// serialization as all arguments passed to actions have to support this.
92127
friend class hpx::serialization::access;
93128

94129
template <typename Archive>

core/src/tile_data.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include "gprat/tile_data.hpp"
2+
3+
#include "gprat/performance_counters.hpp"
4+
5+
#include <hpx/runtime_local/runtime_local.hpp>
6+
7+
GPRAT_NS_BEGIN
8+
9+
namespace detail
10+
{
11+
12+
void *allocate_tile_data(std::size_t num_bytes)
13+
{
14+
auto &topology = hpx::get_runtime().get_topology();
15+
const auto bitmap = topology.cpuset_to_nodeset(topology.get_machine_affinity_mask());
16+
17+
track_tile_data_allocation(num_bytes);
18+
return topology.allocate_membind(num_bytes, bitmap, hpx::threads::hpx_hwloc_membind_policy::membind_firsttouch, 0);
19+
}
20+
21+
void deallocate_tile_data(void *p, std::size_t num_bytes)
22+
{
23+
track_tile_data_deallocation(num_bytes);
24+
25+
if (hpx::is_running())
26+
{
27+
auto &topology = hpx::get_runtime().get_topology();
28+
topology.deallocate(p, num_bytes);
29+
}
30+
}
31+
32+
} // namespace detail
33+
34+
GPRAT_NS_END

0 commit comments

Comments
 (0)