Expected equality of these values:
h_vectors[i * options.dim + j]
Which is: 1.09825e+12
static_cast<float>(h_keys[i] * 0.00001)
Which is: 1.53826e+14
void test_basic_when_full(size_t max_hbm_for_vectors) {
constexpr uint64_t INIT_CAPACITY = 1024* 1024;//1 * 1024 * 1024UL;
constexpr uint64_t MAX_CAPACITY = INIT_CAPACITY;
constexpr uint64_t KEY_NUM = INIT_CAPACITY;//1 * 1024 * 1024UL;
constexpr uint64_t TEST_TIMES = 1;
K* h_keys;
S* h_scores;
V* h_vectors;
bool* h_found;
TableOptions options;
options.init_capacity = INIT_CAPACITY;
options.max_capacity = MAX_CAPACITY;
options.dim = DIM;
options.max_hbm_for_vectors = nv::merlin::GB(max_hbm_for_vectors);
options.evict_strategy = nv::merlin::EvictStrategy::kCustomized;
CUDA_CHECK(cudaMallocHost(&h_keys, KEY_NUM * sizeof(K)));
CUDA_CHECK(cudaMallocHost(&h_scores, KEY_NUM * sizeof(S)));
CUDA_CHECK(cudaMallocHost(&h_vectors, KEY_NUM * sizeof(V) * options.dim));
CUDA_CHECK(cudaMallocHost(&h_found, KEY_NUM * sizeof(bool)));
CUDA_CHECK(cudaMemset(h_vectors, 0, KEY_NUM * sizeof(V) * options.dim));
test_util::create_random_keys<K, S, V, DIM>(h_keys, h_scores, h_vectors,
KEY_NUM);
K* d_keys;
S* d_scores = nullptr;
V* d_vectors;
V* d_def_val;
V** d_vectors_ptr;
bool* d_found;
CUDA_CHECK(cudaMalloc(&d_keys, KEY_NUM * sizeof(K)));
CUDA_CHECK(cudaMalloc(&d_scores, KEY_NUM * sizeof(S)));
CUDA_CHECK(cudaMalloc(&d_vectors, KEY_NUM * sizeof(V) * options.dim));
CUDA_CHECK(cudaMalloc(&d_def_val, KEY_NUM * sizeof(V) * options.dim));
CUDA_CHECK(cudaMalloc(&d_vectors_ptr, KEY_NUM * sizeof(V*)));
CUDA_CHECK(cudaMalloc(&d_found, KEY_NUM * sizeof(bool)));
CUDA_CHECK(
cudaMemcpy(d_keys, h_keys, KEY_NUM * sizeof(K), cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(d_scores, h_scores, KEY_NUM * sizeof(S),
cudaMemcpyHostToDevice));
// CUDA_CHECK(cudaMemset(d_vectors, 1, KEY_NUM * sizeof(V) * options.dim));
CUDA_CHECK(cudaMemcpy(d_vectors, h_vectors,
KEY_NUM * sizeof(V) * options.dim,
cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemset(d_def_val, 2, KEY_NUM * sizeof(V) * options.dim));
CUDA_CHECK(cudaMemset(d_vectors_ptr, 0, KEY_NUM * sizeof(V*)));
CUDA_CHECK(cudaMemset(d_found, 0, KEY_NUM * sizeof(bool)));
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
uint64_t total_size = 0;
for (int i = 0; i < TEST_TIMES; i++) {
std::unique_ptr<Table> table = std::make_unique<Table>();
table->init(options);
total_size = table->size(stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
ASSERT_EQ(total_size, 0);
table->find_or_insert(KEY_NUM, d_keys, d_vectors, d_scores, stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
uint64_t total_size_after_insert = table->size(stream);
std::cout << total_size_after_insert << std::endl;
CUDA_CHECK(cudaStreamSynchronize(stream));
///////////////////////////////////////// my check
{
CUDA_CHECK(cudaMemset(d_def_val, 0, KEY_NUM * sizeof(V) * options.dim));
table->find(KEY_NUM, d_keys, d_def_val, d_found, nullptr, stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
int found_num = 0;
CUDA_CHECK(cudaMemset(h_found, 0, KEY_NUM * sizeof(bool)));
CUDA_CHECK(cudaMemset(h_vectors, 0, KEY_NUM * sizeof(V) * options.dim));
CUDA_CHECK(cudaMemcpy(h_found, d_found, KEY_NUM * sizeof(bool),
cudaMemcpyDeviceToHost));
CUDA_CHECK(cudaMemcpy(h_vectors, d_def_val,
KEY_NUM * sizeof(V) * options.dim,
cudaMemcpyDeviceToHost));
for (int i = 0; i < KEY_NUM; i++) {
if (h_found[i]) {
found_num++;
for (int j = 0; j < options.dim; j++) {
ASSERT_EQ(h_vectors[i * options.dim + j],
static_cast<float>(h_keys[i] * 0.00001));
}
}
}
ASSERT_EQ(total_size_after_insert, found_num);
}
///////////////////////////////////////// my check
table->erase(KEY_NUM, d_keys, stream);
size_t total_size_after_erase = table->size(stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
ASSERT_EQ(total_size_after_erase, 0);
table->find_or_insert(KEY_NUM, d_keys, d_vectors, d_scores, stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
uint64_t total_size_after_reinsert = table->size(stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
ASSERT_EQ(total_size_after_insert, total_size_after_reinsert);
}
CUDA_CHECK(cudaStreamDestroy(stream));
CUDA_CHECK(cudaMemcpy(h_vectors, d_vectors, KEY_NUM * sizeof(V) * options.dim,
cudaMemcpyDeviceToHost));
CUDA_CHECK(cudaFreeHost(h_keys));
CUDA_CHECK(cudaFreeHost(h_scores));
CUDA_CHECK(cudaFreeHost(h_vectors));
CUDA_CHECK(cudaFreeHost(h_found));
CUDA_CHECK(cudaFree(d_keys));
CUDA_CHECK(cudaFree(d_scores));
CUDA_CHECK(cudaFree(d_vectors));
CUDA_CHECK(cudaFree(d_def_val));
CUDA_CHECK(cudaFree(d_vectors_ptr));
CUDA_CHECK(cudaFree(d_found));
CUDA_CHECK(cudaDeviceSynchronize());
CudaCheckError();
}
TEST(FindOrInsertTest, test_basic_when_full) {
// test_basic_when_full(16);
test_basic_when_full(0);
}