I am writing my 3D engine for Linux, and I ran into a performance problem. What was included at the time of testing:
- Bloom
- DOF (Depth Of Field)
- Soft shadows
- Point light source
- 1k, 2k textures (ambient, diffuse, specular, normal mapping)
And I get only 48 fps
Framebuffer size - 1366x768
Shadow map resolution - 1024x1024
Nvidia GeForce 920mx, Intel Core i5-6200u
Here is the order of my actions:
- Rendering to the shadow map (cost: 0.5 - 1 frame)
- Basic rendering: drawing shadows, lighting, and more
- Blur vertically and horizontally (each pass is performed 2 times): bloom with fixed kernel and dof with generated (cost: bloom 10-15 frames, dof 12-16 frames; total = 30 frames)
- Blending
How normal are the results for this video card? Are there ways to optimize rendering, or is this the norm for this video card? Below is my implementation of the blur.
Fragment blur shader:
#version 330
#algdef
in vec2 texCoord;
#ifdef ALGINE_BLOOM_MODE_ENABLED
layout (location = 0) out vec4 bloomFragColor;
uniform sampler2D image; // bloom
uniform float bloom_kernel[BLOOM_KERNEL_SIZE];
vec3 bloom_result;
#endif
#ifdef ALGINE_DOF_MODE_ENABLED
layout (location = 1) out vec4 dofFragColor;
uniform sampler2D image2; // dof
uniform float max_sigma = 8.0;
uniform float min_sigma = 0.0001;
float dof_kernel[DOF_KERNEL_SIZE];
vec3 dof_result;
vec4 tmp;
float fdof;
const int DOF_LCR_SIZE = DOF_KERNEL_SIZE * 2 - 1; // left-center-right (lllcrrr)
const int DOF_MEAN = DOF_LCR_SIZE / 2;
void makeDofKernel(float sigma) {
float sum = 0; // For accumulating the kernel values
for (int x = DOF_MEAN; x < DOF_LCR_SIZE; x++) {
dof_kernel[x - DOF_MEAN] = exp(-0.5 * pow((x - DOF_MEAN) / sigma, 2.0));
// Accumulate the kernel values
sum += dof_kernel[x - DOF_MEAN];
}
sum += sum - dof_kernel[0];
// Normalize the kernel
for (int x = 0; x < DOF_KERNEL_SIZE; x++) dof_kernel[x] /= sum;
}
#endif
void main() {
#ifdef ALGINE_BLOOM_MODE_ENABLED
vec2 texOffset = 1.0 / textureSize(image, 0); // gets size of single texel
#else
vec2 texOffset = 1.0 / textureSize(image2, 0); // gets size of single texel
#endif
#ifdef ALGINE_DOF_MODE_ENABLED
tmp = texture(image2, texCoord);
fdof = tmp.a;
makeDofKernel(max_sigma * fdof + min_sigma);
dof_result = tmp.rgb * dof_kernel[0];
#endif
#ifdef ALGINE_BLOOM_MODE_ENABLED
bloom_result = texture(image, texCoord).rgb * bloom_kernel[0]; // current fragment’s contribution
#endif
#ifdef ALGINE_BLUS_HORIZONTAL
#ifdef ALGINE_BLOOM_MODE_ENABLED
for(int i = 1; i < BLOOM_KERNEL_SIZE; i++) {
bloom_result +=
bloom_kernel[i] * (
texture(image, texCoord + vec2(texOffset.x * i, 0.0)).rgb +
texture(image, texCoord - vec2(texOffset.x * i, 0.0)).rgb
);
}
#endif
#ifdef ALGINE_DOF_MODE_ENABLED
for(int i = 1; i < DOF_KERNEL_SIZE; i++) {
dof_result +=
dof_kernel[i] * (
texture(image2, texCoord + vec2(texOffset.x * i, 0.0)).rgb +
texture(image2, texCoord - vec2(texOffset.x * i, 0.0)).rgb
);
}
#endif
#else
#ifdef ALGINE_BLOOM_MODE_ENABLED
for(int i = 1; i < BLOOM_KERNEL_SIZE; i++) {
bloom_result +=
bloom_kernel[i] * (
texture(image, texCoord + vec2(0.0, texOffset.y * i)).rgb +
texture(image, texCoord - vec2(0.0, texOffset.y * i)).rgb
);
}
#endif
#ifdef ALGINE_DOF_MODE_ENABLED
for(int i = 1; i < DOF_KERNEL_SIZE; i++) {
dof_result +=
dof_kernel[i] * (
texture(image2, texCoord + vec2(0.0, texOffset.y * i)).rgb +
texture(image2, texCoord - vec2(0.0, texOffset.y * i)).rgb
);
}
#endif
#endif
#ifdef ALGINE_BLOOM_MODE_ENABLED
bloomFragColor = vec4(bloom_result, 1.0);
#endif
#ifdef ALGINE_DOF_MODE_ENABLED
dofFragColor = vec4(dof_result, fdof);
#endif
}
And C++ code:
// configuring textures
for (int i = 0; i < 2; i++) {
glBindFramebuffer(GL_FRAMEBUFFER, pingpongFBO[i]);
// bloom
glBindTexture(GL_TEXTURE_2D, pingpongBuffers[i]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, ALGINE_SCR_W, ALGINE_SCR_H, 0, GL_RGB, GL_FLOAT, NULL);
// dof
glBindTexture(GL_TEXTURE_2D, dofBuffers[i]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, ALGINE_SCR_W, ALGINE_SCR_H, 0, GL_RGBA, GL_FLOAT, NULL);
glClearColor(0, 0, 0, 1);
}
horizontal = true;
firstIteration = true;
for (int i = 0; i < ALGINE_BLUR_AMOUNT; i++) {
glUseProgram(blusPrograms[horizontal].programId);
glBindFramebuffer(GL_FRAMEBUFFER, pingpongFBO[horizontal]);
// bloom
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, firstIteration ? colorBuffers[1] : pingpongBuffers[!horizontal]);
// dof
glActiveTexture(GL_TEXTURE1);
glUniform1i(blusPrograms[horizontal].samplerDof, 1);
glBindTexture(GL_TEXTURE_2D, firstIteration ? colorBuffers[0] : dofBuffers[!horizontal]);
// rendering
renderQuad(blusPrograms[horizontal].inPos, blusPrograms[horizontal].inTexCoord);
horizontal = !horizontal;
if (firstIteration) firstIteration = false;
}
I will be grateful for your answers
glxgears
in a window of the same size as the engine tested, and I get about 100 frames. With dof and bloom disabled, my engine gives about 85 frames. So I think that @MaximusMinimus is really right, and the point is in my video card. \$\endgroup\$