I have been trying to use binary page memrory resource and contiguous memory resource(c_mr) for traccc cuda examples. Using contiguous memory resource work effectively for File IO, but when using c_mr for cuda algorithms (seeding algorithm here) there is an invalid global read error due to an address misaligned.
I tested with 2GB of contiguous memory allocation limit as well but got the same error
...
vecmem::host_memory_resource host_mr;
vecmem::cuda::managed_memory_resource mng_mr;
vecmem::contiguous_memory_resource c_mr(mng_mr,pow(2,30));//<--
traccc::clusterization_algorithm ca(mng_mr);
traccc::spacepoint_formation sf(mng_mr);
traccc::seeding_algorithm sa(host_mr);
traccc::track_params_estimation tp(host_mr);
traccc::cuda::seeding_algorithm sa_cuda(c_mr); //<--
traccc::cuda::track_params_estimation tp_cuda(c_mr); //<--
...
traccc::cell_container_types::host cells_per_event =
traccc::read_cells_from_event(
event, i_cfg.cell_directory, common_opts.input_data_format,
surface_transforms, digi_cfg, c_mr);//<--
...
========= Invalid __global__ read of size 8 bytes
========= at 0xeb8 in /home/chamodya/myrepos/recent/traccc/build/_deps/vecmem-src/core/include/vecmem/containers/impl/vector_view.ipp:57:vecmem::data::vector_view<traccc::internal_spacepoint<traccc::spacepoint>>::size_ptr() const
========= by thread (253,0,0) in block (47,0,0)
========= Address 0x7fdf7ec10244 is misaligned
========= Device Frame:/home/chamodya/myrepos/recent/traccc/build/_deps/vecmem-src/core/include/vecmem/containers/impl/device_vector.ipp:20:vecmem::device_vector<traccc::internal_spacepoint<traccc::spacepoint>>::device_vector(const vecmem::data::vector_view<traccc::internal_spacepoint<traccc::spacepoint>> &) [0xe10]
========= Device Frame:/home/chamodya/myrepos/recent/traccc/build/_deps/vecmem-src/core/include/vecmem/containers/impl/jagged_device_vector.ipp:64:vecmem::jagged_device_vector<traccc::internal_spacepoint<traccc::spacepoint>>::at(unsigned long) [0xe10]
========= Device Frame:/home/chamodya/myrepos/recent/traccc/build/_deps/detray-src/core/include/detray/grids/grid2.hpp:187:detray::grid2<detray::attach_populator, detray::axis::circular, detray::axis::regular, detray::serializer2, vecmem::device_vector, vecmem::jagged_device_vector, detray::darray, detray::dtuple, traccc::internal_spacepoint<traccc::spacepoint>, (bool)0, (unsigned int)1>::bin(unsigned long, unsigned long) [0xe10]
========= Device Frame:/home/chamodya/myrepos/recent/traccc/build/_deps/detray-src/core/include/detray/grids/grid2.hpp:203:detray::grid2<detray::attach_populator, detray::axis::circular, detray::axis::regular, detray::serializer2, vecmem::device_vector, vecmem::jagged_device_vector, detray::darray, detray::dtuple, traccc::internal_spacepoint<traccc::spacepoint>, (bool)0, (unsigned int)1>::bin(unsigned long) [0xe10]
========= Device Frame:/home/chamodya/myrepos/recent/traccc/device/common/include/traccc/seeding/device/impl/populate_grid.ipp:51:traccc::device::populate_grid(unsigned long, const traccc::seedfinder_config &, const traccc::container_view<const unsigned long, const traccc::spacepoint> &, const vecmem::data::vector_view<const std::pair<unsigned long, unsigned long>> &, detray::grid2_view<detray::grid2<detray::attach_populator, detray::axis::circular, detray::axis::regular, detray::serializer2, detray::dvector, detray::djagged_vector, detray::darray, detray::dtuple, traccc::internal_spacepoint<traccc::spacepoint>, (bool)0, (unsigned int)1>>) [0xe10]
========= Device Frame:/home/chamodya/myrepos/recent/traccc/device/cuda/src/seeding/spacepoint_binning.cu:43:traccc::cuda::kernels::populate_grid(traccc::seedfinder_config, traccc::container_view<const unsigned long, const traccc::spacepoint>, vecmem::data::vector_view<const std::pair<unsigned long, unsigned long>>, detray::grid2_view<detray::grid2<detray::attach_populator, detray::axis::circular, detray::axis::regular, detray::serializer2, detray::dvector, detray::djagged_vector, detray::darray, detray::dtuple, traccc::internal_spacepoint<traccc::spacepoint>, (bool)0, (unsigned int)1>>) [0xf8]
========= Saved host backtrace up to driver entry point at kernel launch time
...
compute-sanitizer build/bin/traccc_seq_example_cuda --detector_file=tml_detector/trackml-detector.csv --digitization_config_file=tml_detector/default-geometric-config-generic.json --cell_directory=tml_full/ttbar_mu200/ --events=2 --input-binary --run_cpu=0
Moreover, binary page memory resource did not produce the expected improvements for file IO (it actually worsened it), will create another issue after looking into it further.