What’s the efficient way to render a bunch of layered textures? I have some semitransparent textured rectangles that I position randomly in 3D space and render them from back to front.
Currently I call d3dContext->PSSetShaderResources() to feed the pixel shader with a new texture before each call to d3dContext->DrawIndexed(). I have a feeling that I am copying the texture to the GPU memory before each draw. I might have 10-30 ARGB textures roughly 1024×1024 pixels each and they are associated across 100-200 rectangles that I render on screen. My FPS is OK at 100, but goes pretty bad around 200. I possibly have some inefficiencies elsewhere since this is my first semi-serious D3D code, but I strongly suspect this has to do with copying the textures back and forth. 30*1024*1024*4 is 120MB, which is a bit high for a Metro Style App that should target any Windows 8 device. So putting them all in there might be a stretch, but maybe I could at least cache a few somehow? Any ideas?
*EDIT – Some code snippets added
Constant Buffer
struct ModelViewProjectionConstantBuffer
{
DirectX::XMMATRIX model;
DirectX::XMMATRIX view;
DirectX::XMMATRIX projection;
float opacity;
float3 highlight;
float3 shadow;
float textureTransitionAmount;
};
The Render Method
void RectangleRenderer::Render()
{
// Clear background and depth stencil
const float backgroundColorRGBA[] = { 0.35f, 0.35f, 0.85f, 1.000f };
m_d3dContext->ClearRenderTargetView(
m_renderTargetView.Get(),
backgroundColorRGBA
);
m_d3dContext->ClearDepthStencilView(
m_depthStencilView.Get(),
D3D11_CLEAR_DEPTH,
1.0f,
0
);
// Don't draw anything else until all textures are loaded
if (!m_loadingComplete)
return;
m_d3dContext->OMSetRenderTargets(
1,
m_renderTargetView.GetAddressOf(),
m_depthStencilView.Get()
);
UINT stride = sizeof(BasicVertex);
UINT offset = 0;
// The vertext buffer only has 4 vertices of a rectangle
m_d3dContext->IASetVertexBuffers(
0,
1,
m_vertexBuffer.GetAddressOf(),
&stride,
&offset
);
// The index buffer only has 4 vertices
m_d3dContext->IASetIndexBuffer(
m_indexBuffer.Get(),
DXGI_FORMAT_R16_UINT,
0
);
m_d3dContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
m_d3dContext->IASetInputLayout(m_inputLayout.Get());
FLOAT blendFactors[4] = { 0, };
m_d3dContext->OMSetBlendState(m_blendState.Get(), blendFactors, 0xffffffff);
m_d3dContext->VSSetShader(
m_vertexShader.Get(),
nullptr,
0
);
m_d3dContext->PSSetShader(
m_pixelShader.Get(),
nullptr,
0
);
m_d3dContext->PSSetSamplers(
0, // starting at the first sampler slot
1, // set one sampler binding
m_sampler.GetAddressOf()
);
// number of rectangles is in the 100-200 range
for (int i = 0; i < m_rectangles.size(); i++)
{
// start rendering from the farthest rectangle
int j = (i + m_farthestRectangle) % m_rectangles.size();
m_vsConstantBufferData.model = m_rectangles[j].transform;
m_vsConstantBufferData.opacity = m_rectangles[j].Opacity;
m_vsConstantBufferData.highlight = m_rectangles[j].Highlight;
m_vsConstantBufferData.shadow = m_rectangles[j].Shadow;
m_vsConstantBufferData.textureTransitionAmount = m_rectangles[j].textureTransitionAmount;
m_d3dContext->UpdateSubresource(
m_vsConstantBuffer.Get(),
0,
NULL,
&m_vsConstantBufferData,
0,
0
);
m_d3dContext->VSSetConstantBuffers(
0,
1,
m_vsConstantBuffer.GetAddressOf()
);
m_d3dContext->PSSetConstantBuffers(
0,
1,
m_vsConstantBuffer.GetAddressOf()
);
auto a = m_rectangles[j].textureId;
auto b = m_rectangles[j].targetTextureId;
auto srv1 = m_textures[m_rectangles[j].textureId].textureSRV.GetAddressOf();
auto srv2 = m_textures[m_rectangles[j].targetTextureId].textureSRV.GetAddressOf();
ID3D11ShaderResourceView* srvs[2];
srvs[0] = *srv1;
srvs[1] = *srv2;
m_d3dContext->PSSetShaderResources(
0, // starting at the first shader resource slot
2, // set one shader resource binding
srvs
);
m_d3dContext->DrawIndexed(
m_indexCount,
0,
0
);
}
}
Pixel Shader
cbuffer ModelViewProjectionConstantBuffer : register(b0)
{
matrix model;
matrix view;
matrix projection;
float opacity;
float3 highlight;
float3 shadow;
float textureTransitionAmount;
};
Texture2D baseTexture : register(t0);
Texture2D targetTexture : register(t1);
SamplerState simpleSampler : register(s0);
struct PixelShaderInput
{
float4 pos : SV_POSITION;
float3 norm : NORMAL;
float2 tex : TEXCOORD0;
};
float4 main(PixelShaderInput input) : SV_TARGET
{
float3 lightDirection = normalize(float3(0, 0, -1));
float4 baseTexelColor = baseTexture.Sample(simpleSampler, input.tex);
float4 targetTexelColor = targetTexture.Sample(simpleSampler, input.tex);
float4 texelColor = lerp(baseTexelColor, targetTexelColor, textureTransitionAmount);
float4 shadedColor;
shadedColor.rgb = lerp(shadow.rgb, highlight.rgb, texelColor.r);
shadedColor.a = texelColor.a * opacity;
return shadedColor;
}
As Jeremiah has suggested, you are not probably moving texture from CPU to GPU for each frame as you would have to create new texture for each frame or using “UpdateSubresource” or “Map/UnMap” methods.
I don’t think that instancing is going to help for this specific case, as the number of polygons is extremely low (I would start to worry with several millions of polygons). It is more likely that your application is going to be bandwidth/fillrate limited, as your are performing lots of texture sampling/blending (It depends on tecture fillrate, pixel fillrate and the nunber of ROP on your GPU).
In order to achieve better performance, It is highly recommended to:
don’t have any mipmaps, It will hurt a lot the cache of the GPU. (I also assume that you are using texture.Sample method in HLSL, and not texture.SampleLevel or variants)
like texconv.exe or preferably the sample from “Windows DirectX 11
Texture Converter“.
On a side note, you will probably get more attention for this kind of question on https://gamedev.stackexchange.com/.