*
						
					
					
						
					
				
				
					Das Thema hatten wird im Forum schon ein paar Mal. Damit das für nicht so Erfahrene nachvollziehbar ist, habe ich einen Allokator geschrieben, der das Verhalten eines NUMA-Allokators simuliert. Es werden N gleich große Speicherblöcke alloziert und diese getrennt verwaltet, ganz so wie das auch die libnuma tut.
Die Speicherverwaltung in diesem Demo Allokator ist weder threadsafe noch ist das wirklich viel Aufwand in die effiziente Verwaltung der Speicherblöcke geflossen. Konstruktive Kritik ist also gern gesehen. Die Demo ist mangels Zeit auch noch nicht wirklich fertig, und kann mit doxygen generiert werden.
/*
 * NUMA-AllocatorSimulator.h
 *
 */
#ifndef NUMA_alloc_sim_H_
#define NUMA_alloc_sim_H_
#include <algorithm>
#include <iterator>
#include <cstddef>
#include <iostream>
#include <memory>
#include <new>
#include <vector>
#include <boost/core/noncopyable.hpp>
/// Chunks are used to store all informations for each allocation.
struct Chunk {
	size_t	size_;
	void*	p_;
	/// @param [in] s Size of memory [chunk](@ref Chunk).
	/// @param [in] pointer The pointer to the memory [chunk](@ref Chunk).
	Chunk (const size_t s, void* const pointer) : size_(s), p_(pointer) {}
};
/// @brief This function compares [Chunks](@ref Chunk) by their stored pointer address.
/// @param [in] ck1 Chunk
/// @param [in] ck2 Chunk
/// @return ck1.p_ < ck2.p_
bool operator< (const Chunk& ck1, const Chunk& ck2) {
	return (ck1.p_ < ck2.p_);
}
struct ChunkIndex {
	size_t	size_;
	size_t	index_;
};
/// @brief This function compares [ChunkIndexes](@ref ChunkIndex) by their stored sizes.
/// @param [in] ci1 ChunkIndex
/// @param [in] ci2 ChunkIndex
/// @return ci1.size_ < ci2.size_
bool operator< (const ChunkIndex& ci1, const ChunkIndex& ci2) {
	return ci1.size_ < ci2.size_;
}
namespace {
	using IT = std::vector<Chunk>::iterator;
}
/// @brief The class Node manages the allocations for a simulated NUMA node.
///
///        Each Node allocates a big block of memory, and manages simulated
///        NUMA allocations by itself. Each NUMA allocation is stored in
///        a [chunk](@ref Chunk), which contains the address and the size of
///        the NUMA allocation. Each used [chunk](@ref Chunk) is stored in the
///        vector used_, and each freed [chunk](@ref Chunk) ist stored in the
///        vector free_.
///
class Node : private boost::noncopyable {
	/// The size of the simulated NUMA node memory.
	size_t size_;
	std::vector<Chunk> free_;
	std::vector<Chunk> used_;
	/// The pointer to the allocated simulated NUMA memory.
	std::unique_ptr<char, decltype (&std::free)> p_;
public:
	/// The MemoryPool class allocates a array of Nodes, so we cannot initialize
	/// the instances of class Node directly via the constructor.
	/// @post Both vectors are default constructed size_ ist set to zero and p_ is a nullptr.
	/// 
	Node () : size_(0), free_({}), used_({}), p_(nullptr, free) {}
	/// @brief Allocates the memory for the simulated NUMA node.
	/// @param [in] size Size of the simulated NUMA memory space.
	/// @pre size_ is zero and p_ is a nullptr.
	/// @post The memory is allocated and setup of Node is completed.
	/// @exception If size is zero, or [node](@ref Node) is allready
	///            initialized, or if the memory block is to large, then
	///            std::bad_alloc is thrown.
	/// Or if the init function is called a second time, std::runtime_error is thrown.
	void init (const size_t size) {
		void* temp_pointer = nullptr;
		// check the pre conditions and if the malloc is successfull
		if (0 != size_ || 0 == size) throw std::bad_alloc();
		if (nullptr == (temp_pointer = malloc(size))) throw std::bad_alloc();
		// store pointer and other stuff to meet post conditions
		this->p_ = std::unique_ptr<char, decltype (&std::free)> (static_cast<char*>(temp_pointer), free);
		size_ = size;
		Chunk ck (size, temp_pointer);
		this->free_.push_back(ck);
	}
	/// \brief Helper function which generates a new pointer.
	/// \param [in] pointer The pointer to the allocated memory block.
	/// \param [in] size Size if allocated memory block.
	/// \return New pointer, which points to p+s.
	[[nodiscard]] void* generate_pointer_with_offset (void* const pointer, const size_t size) {
		char* new_pointer = static_cast<char*> (pointer);
		new_pointer += size;
		return static_cast<void*> (new_pointer);
	}
	[[nodiscard]] void* calculate_base_pointer (void* const pointer, const size_t size) {
		char* base_pointer = static_cast<char*> (pointer);
		base_pointer -= size;
		return static_cast<void*> (base_pointer);
	}
	/// @brief
	/// @param [in] size  
	/// @pre   free_ is sorted by size of chunk.size_ 
	/// @post 
	[[nodiscard]] void* allocate (const size_t size) {
		if (0 == size) return nullptr;
		if (size_ < size) throw std::bad_alloc();
		void* new_pointer = nullptr;
		bool found = false;
		const size_t end = free_.size();
		std::vector<Chunk>::size_type pos = 0;
		std::vector<ChunkIndex> freeIndex(free_.size());
		for (size_t i = 0; i < end; ++i) {
			freeIndex[i] = {free_[i].size_, i};
		}
		sort (freeIndex.begin(), freeIndex.end());
		for (size_t i = 0; i < end; ++i) {
			if (freeIndex[i].size_ >= size) {
				pos = i;
				found = true;
				break;
			} 
		}
		if (!found) throw std::bad_alloc();
		IT it = free_.begin() + pos;
		new_pointer = it->p_;
		Chunk ck {size, new_pointer};
		it->p_     = generate_pointer_with_offset (it->p_, size);
		it->size_ -= size;
		IT position = upper_bound (used_.begin(), used_.end(), ck);
		used_.insert (position, ck);
		if (0 == it->size_) free_.erase(it);
		std::cout << " p=" << new_pointer << "\n";
		return new_pointer;
	}
	/// @brief 
	/// @param [in,out] pointer
	/// @param [in] size
	/// @pre The pointer p must be poiting to memory allocated for node \p node. I.e. p must be in the nodes_.used_ vector.
	/// @post Test
	void deallocate (void* pointer, const size_t size) {
		if (nullptr == pointer) return;
		Chunk ck = {size, pointer};
		std::pair<IT,IT> pair = std::equal_range (used_.begin(), used_.end(), ck);
		if (pair.first == used_.end()) {
			throw std::runtime_error ("Node::deallocation failed");
		}
		used_.erase(pair.first);
		IT position = std::upper_bound (free_.begin(), free_.end(), ck);
		free_.insert (position, ck);
		// searching for Chunks which can be merged with this chunk
		recycle();
	}
	/// \brief This function 
	void recycle() {
		if (free_.size() <= 1) return;
		std::vector<Chunk> fs (free_);
		std::sort(fs.begin(), fs.end());
		const size_t e = fs.size();
		for (ssize_t i = (e - 2); i >= 0; --i) {
			char* p = static_cast<char*> (fs[i].p_);
			char* q = p + fs[i].size_;
			if (q == fs[i+1].p_) {
				fs[i].size_ += fs[i+1].size_;
				IT it = next(fs.begin(), (i+1));
				fs.erase(it);
			}
		}
		swap(fs, free_);
		sort(free_.begin(), free_.end());
	}
};
class MemoryPool : private boost::noncopyable {
private:
	int			n_nodes_;
	std::unique_ptr<Node[]>	nodes_;
public:
	~MemoryPool () = default;
	/// \brief This constructor 
	/// \param [in] number_nodes The number of NUMA nodes to simulate.
	/// \param [in] size The memory size of each simulated NUMA node.
	MemoryPool (const int number_nodes, const size_t size) : n_nodes_(number_nodes), nodes_(new Node[number_nodes]) {
		for (int i = 0; i < n_nodes_; ++i) {
			this->nodes_[i].init(size);
		}
	}
	/// \brief This function allocates @p n memory on simulated NUMA node @p node.
	/// \param [in] size
	/// \param [in] node
	/// \return Pointer to newly allocated memory.
	/// @exception If node does not reference an simulated NUMA node or
	[[nodiscard]] void* allocate (const size_t size, const int node) {
		std::cout << "MemoryPool::allocate   node=" << node << " size=" << size;
		if ((0 > node) || (n_nodes_ <= node)) throw std::runtime_error ("allocate failed; node number is incorrect");
		return this->nodes_[node].allocate (size);
	}
	/// \brief
	/// @param [in] pointer
	/// @param [in] size
	/// @param [in] node
	void deallocate (void* pointer, const size_t size, const int node) {
		std::cout << "MemoryPool::deallocate node=" << node << " size=" << size << " p=" << pointer << std::endl;
		this->nodes_[node].deallocate(pointer, size);
	}
};
/// \brief This allocator class simulates
/// \tparam T any possible type
template <typename T>
class NUMA_alloc_sim {
		int node_;
		std::shared_ptr<MemoryPool> smp_;
	public:
		using value_type 		= T;
		using size_type			= size_t;
		//using propagate_on_container_copy_assignment	= std::false_type;
		//using propagate_on_container_move_assignment	= std::false_type;
		//using propagate_on_container_swap				= std::false_type;
		//using is_always_equal							= std::false_type;
		~NUMA_alloc_sim() = default;
		/// @param [in] node ssss
		/// @param [in] smp
		NUMA_alloc_sim (const int node, std::shared_ptr<MemoryPool> smp) noexcept : node_(node), smp_(smp) {}
		NUMA_alloc_sim () noexcept : node_(0), smp_(nullptr) {}
		NUMA_alloc_sim (const NUMA_alloc_sim& rhs) noexcept = default;
		template <class U> NUMA_alloc_sim (const NUMA_alloc_sim<U>& rhs) noexcept;
		NUMA_alloc_sim (NUMA_alloc_sim&&) = default;
		NUMA_alloc_sim& operator= (const NUMA_alloc_sim&) = default;
		NUMA_alloc_sim& operator= (NUMA_alloc_sim&&) = default;
		[[nodiscard]] inline T* allocate (const size_type n) {
			return static_cast<T*> (this->smp_->allocate(sizeof(T)*n, this->node_));
		}
		inline void deallocate (T* p, const size_type n) {
			this->smp_->deallocate(p, n*sizeof(T), this->node_);
		}
		template <typename U> friend bool operator== (const NUMA_alloc_sim<U>&, const NUMA_alloc_sim<U>&);
		template <typename U> friend bool operator!= (const NUMA_alloc_sim<U>&, const NUMA_alloc_sim<U>&);
		template <typename U, typename V> friend constexpr bool operator== (const NUMA_alloc_sim<U>& lhs, const NUMA_alloc_sim<V>& rhs);
		template <typename U, typename V> friend constexpr bool operator!= (const NUMA_alloc_sim<U>& lhs, const NUMA_alloc_sim<V>& rhs);
};
template <typename U>
bool operator== (const NUMA_alloc_sim<U>& lhs, const NUMA_alloc_sim<U>& rhs) {
	return (lhs.node_ == rhs.node_);
}
template <typename U>
bool operator!= (const NUMA_alloc_sim<U>& lhs, const NUMA_alloc_sim<U>& rhs) {
	return (lhs.node_ != rhs.node_);
}
template <class U, class V>
constexpr bool operator== (const NUMA_alloc_sim<U>& lhs, const NUMA_alloc_sim<V>& rhs) {
	return false;
}
template <class U, class V>
constexpr bool operator!= (const NUMA_alloc_sim<U>& lhs, const NUMA_alloc_sim<V>& rhs) {
	return true;
}
#endif /* NUMA_alloc_sim_H_ */
Und dazu ein kleines Testprogramm, was die Problematik aufzeigt. Mit dem Define TEST lässt sich beim Übersetzen zwischen dem Standard Allokator und meinem Allokator umschalten.
/*
 * test4.cc
 *
 */
#include <memory>
#include <vector>
#include "NUMA-AllocatorSimulator.h"
int main () {
#ifdef TEST
	constexpr size_t size = 1024L*1024L*1024L;
	using Allocator = NUMA_alloc_sim<int>;
	using vec = std::vector<int,Allocator>;
	std::shared_ptr<MemoryPool> smp = std::make_shared<MemoryPool> (2, size);
	Allocator na(0, smp), nb(1, smp);
#else
	using Allocator = std::allocator<int>;
	using vec = std::vector<int>;
	Allocator na, nb;
#endif
	using size_type = std::allocator_traits<Allocator>::size_type;
	constexpr size_type m = 10;
	vec va(m, na);
	vec vb(m, nb);
	for (size_t i = 0; i < m; ++i) {
		va[i] = i;
		vb[i] = 2*i;
	}
	swap(va, vb);
}