I am trying to write functions to transform an OpenGL texture to a PyTorch tensor and back in a C++ app. To test that it works I added 128 to the tensor to basically brighten the image, and then rendered the resulting texture on a quad. It mostly works but I'm experiencing a weird behavior in which part of the texture is unaffected.
is the original texture, and is the texture after adding 128 to every element in the tensor. Note that like 1/4 of the image is not affected by this operation
These are the relevant sections of code. textureColorbuffer
uses the format GL_RGB (which if I understood correctly is bitdepth 8 for each channel). This is where I call the functions and add to the tensor:
cudaGraphicsGLRegisterImage(&cudaResource, textureColorbuffer, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone);
torch::Tensor tensor = resourceToTensor(cudaResource, WIDTH, HEIGHT);
tensor = tensor + static_cast<unsigned char>(128);
tensorToResource(tensor, cudaResource, WIDTH, HEIGHT);
And these are the used functions:
torch::Tensor resourceToTensor(cudaGraphicsResource* cudaResource, int width, int height) {
CUDA_CHECK_ERROR(cudaGraphicsMapResources(1, &cudaResource, 0));
cudaArray* textureArray;
CUDA_CHECK_ERROR(cudaGraphicsSubResourceGetMappedArray(&textureArray, cudaResource, 0, 0));
unsigned char* devicePtr;
size_t size = width * height * 3 * sizeof(unsigned char);
CUDA_CHECK_ERROR(cudaMalloc(&devicePtr, size));
CUDA_CHECK_ERROR(cudaMemcpyFromArray(devicePtr, textureArray, 0, 0, size, cudaMemcpyDeviceToDevice));
auto options = torch::TensorOptions().dtype(torch::kUInt8).device(torch::kCUDA);
torch::Tensor tensor = torch::from_blob(devicePtr, { height, width, 3 }, options);
CUDA_CHECK_ERROR(cudaGraphicsUnmapResources(1, &cudaResource, 0));
torch::Tensor clonedTensor = tensor.clone();
CUDA_CHECK_ERROR(cudaFree(devicePtr));
return clonedTensor;
}
void tensorToResource(torch::Tensor tensor, cudaGraphicsResource* cudaResource, int width, int height) {
tensor = tensor.to(torch::kCUDA);
CUDA_CHECK_ERROR(cudaGraphicsMapResources(1, &cudaResource, 0));
cudaArray* textureArray;
CUDA_CHECK_ERROR(cudaGraphicsSubResourceGetMappedArray(&textureArray, cudaResource, 0, 0));
const unsigned char* devicePtr = tensor.data_ptr<unsigned char>();
size_t size = width * height * 3 * sizeof(unsigned char);
CUDA_CHECK_ERROR(cudaMemcpyToArray(textureArray, 0, 0, devicePtr, size, cudaMemcpyDeviceToDevice));
CUDA_CHECK_ERROR(cudaGraphicsUnmapResources(1, &cudaResource, 0));
}
Does anybody know what could be the cause of this? Did I make a mistake with the sizes of the buffers and arrays?