On Habré, there are already many articles about using compute shaders with Unity, however, it’s difficult to find an article about using computational shaders on pure Win32 API + DirectX 11. However, this task is not much more difficult, in more detail - under the cut.
For this we will use:
To count the number of frames per second, we will use the standard library.
#include <time.h>
Output the number of frames per second through the window title, for which we will need to form the corresponding line
#include <stdio.h>
We will not consider in detail the error handling, in our case it is enough that the application falls in the debug version and indicates at the time of the crash:
#include <assert.h>
Header files for WinAPI:
#define WIN32_LEAN_AND_MEAN #include <tchar.h> #include <Windows.h>
Header files for Direct3D 11:
#include <dxgi.h> #include <d3d11.h>
The resource identifiers for loading the shader. You can instead load the shader object file generated by the HLSL compiler into memory. The creation of the resource file is described later.
#include "resource.h"
Constants common to the shader and the calling part will be declared in a separate header file.
#include "SharedConst.h"
Let's declare the Windows event handling function, which will be defined later:
LRESULT CALLBACK WndProc(HWND hWnd, UINT Msg, WPARAM wParam, LPARAM lParam);
Let's write functions for creating and destroying a window.
int windowWidth, windowHeight; HINSTANCE hInstance; HWND hWnd; void InitWindows() { // hInstance = GetModuleHandle(NULL); windowWidth = 800; windowHeight = 800; WNDCLASS wc; // wc.style = 0; // wc.lpfnWndProc = &WndProc; // wc.cbClsExtra = 0; wc.cbWndExtra = 0; // (), wc.hInstance = hInstance; // wc.hIcon = LoadIcon(hInstance, IDI_APPLICATION); wc.hCursor = LoadCursor(hInstance, IDC_ARROW); // , "" wc.hbrBackground = NULL; // wc.lpszMenuName = NULL; // wc.lpszClassName = _T("WindowClass1"); // ATOM result = RegisterClass(&wc); // , assert(result); // -- , .. DWORD dwStyle = WS_OVERLAPPEDWINDOW; RECT rect; // ( ) rect.left = (GetSystemMetrics(SM_CXSCREEN) - windowWidth) / 2; rect.top = (GetSystemMetrics(SM_CYSCREEN) - windowHeight) / 2; rect.right = rect.left + windowWidth; rect.bottom = rect.top + windowHeight; // . -- AdjustWindowRect(&rect, dwStyle, FALSE); hWnd = CreateWindow( _T("WindowClass1"), _T("WindowName1"), dwStyle, // rect.left, rect.top, // rect.right - rect.left, rect.bottom - rect.top, // // HWND_DESKTOP NULL HWND_DESKTOP, // NULL, // (), hInstance, // NULL); // , assert(hWnd); } void DisposeWindows() { // DestroyWindow(hWnd); // UnregisterClass(_T("WindowClass1"), hInstance); }
Next, initialize the interface for accessing the video card (Device and DeviceContext) and the output buffer chains (SwapChain):
IDXGISwapChain *swapChain; ID3D11Device *device; ID3D11DeviceContext *deviceContext; void InitSwapChain() { HRESULT result; DXGI_SWAP_CHAIN_DESC swapChainDesc; // swapChainDesc.BufferDesc.Width = windowWidth; swapChainDesc.BufferDesc.Height = windowHeight; // // .. , swapChainDesc.BufferDesc.RefreshRate.Numerator = 0; swapChainDesc.BufferDesc.RefreshRate.Denominator = 1; // -- 32- RGBA swapChainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // swapChainDesc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED; swapChainDesc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; // swapChainDesc.SampleDesc.Count = 1; swapChainDesc.SampleDesc.Quality = 0; // SwapChain swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; // "" ( ) swapChainDesc.BufferCount = 1; // swapChainDesc.OutputWindow = hWnd; // swapChainDesc.Windowed = TRUE; // swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; swapChainDesc.Flags = 0; // DirectX 11.0, .. D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; // Debug- DirectX #ifndef NDEBUG UINT flags = D3D11_CREATE_DEVICE_DEBUG; #else UINT flags = 0; #endif result = D3D11CreateDeviceAndSwapChain( // - NULL, // D3D_DRIVER_TYPE_HARDWARE, NULL, // . flags, // DirectX &featureLevel, 1, // SDK D3D11_SDK_VERSION, // &swapChainDesc, // , &swapChain, &device, NULL, &deviceContext); // , assert(SUCCEEDED(result)); } void DisposeSwapChain() { deviceContext->Release(); device->Release(); swapChain->Release(); }
Initialization of access from shaders to the buffer in which the drawing will be performed:
ID3D11RenderTargetView *renderTargetView; void InitRenderTargetView() { HRESULT result; ID3D11Texture2D *backBuffer; // "" SwapChain result = swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void **)&backBuffer); assert(SUCCEEDED(result)); // result = device->CreateRenderTargetView(backBuffer, NULL, &renderTargetView); assert(SUCCEEDED(result)); // // , , // .. SwapChain, // Release() backBuffer->Release(); // View deviceContext->OMSetRenderTargets(1, &renderTargetView, NULL); // D3D11_VIEWPORT viewport; viewport.TopLeftX = 0; viewport.TopLeftY = 0; viewport.Width = (FLOAT)windowWidth; viewport.Height = (FLOAT)windowHeight; viewport.MinDepth = 0; viewport.MaxDepth = 1; deviceContext->RSSetViewports(1, &viewport); } void DisposeRenderTargetView() { renderTargetView->Release(); }
Before initializing shaders, you need to create them. Visual Studio can recognize the file extension, so we can simply create a source with the .hlsl
extension, or directly create a shader through the menu. I chose the first method, because all the same, properties will have to be set using Shader Model 5.
Similarly, create a vertex and pixel shaders.
In the vertex shader, we will simply convert the coordinates from a two-dimensional vector (since we have exactly two-dimensional point positions) to a four-dimensional one (taken by a video card):
float4 main(float2 input: POSITION): SV_POSITION { return float4(input, 0, 1); }
In the pixel shader, we will return white color:
float4 main(float4 input: SV_POSITION): SV_TARGET { return float4(1, 1, 1, 1); }
Now computational shader. Let's set such formula for interactions of points:
With mass adopted 1
This is what the implementation of this on HLSL will look like:
#include "SharedConst.h" // , UAV 0 RWBuffer<float2> position: register(u0); // , UAV 1 RWBuffer<float2> velocity: register(u1); // [numthreads(NUMTHREADS, 1, 1)] void main(uint3 id: SV_DispatchThreadID) { float2 acc = float2(0, 0); for (uint i = 0; i < PARTICLE_COUNT; i++) { // float2 diff = position[i] - position[id.x]; // , 0- float len = max(1e-10, length(diff)); float k = 1e-9 * (len - 0.25) / len; acc += k * diff; } position[id.x] += velocity[id.x] + 0.5 * acc; velocity[id.x] += acc; }
You may notice that the SharedConst.h
file is included in the shader. This is the constant header file that is included in main.cpp
. Here is the content of this file:
#ifndef PARTICLE_COUNT #define PARTICLE_COUNT (1 << 15) #endif #ifndef NUMTHREADS #define NUMTHREADS 64 #endif
Just declaring the number of particles and the number of flows in one group. We will allocate one stream to each particle, so the number of groups will be PARTICLE_COUNT / NUMTHREADS
as PARTICLE_COUNT / NUMTHREADS
. This number must be an integer, so it is necessary that the number of particles be divided by the number of flows in the group.
We will load the compiled bytecode shaders using the Windows resource mechanism. To do this, create the following files:
resource.h
, which will contain the corresponding resource ID:
#pragma once #define IDR_BYTECODE_COMPUTE 101 #define IDR_BYTECODE_VERTEX 102 #define IDR_BYTECODE_PIXEL 103
And resource.rc
, the file for generating the corresponding resource as follows:
#include "resource.h" IDR_BYTECODE_COMPUTE ShaderObject "compute.cso" IDR_BYTECODE_VERTEX ShaderObject "vertex.cso" IDR_BYTECODE_PIXEL ShaderObject "pixel.cso"
Where ShaderObject
is the resource type, and compute.cso
, vertex.cso
and pixel.cso
are the corresponding Compiled Shader Object file names in the output directory.
For the files to be found, you should set the path to the output directory of the project in the properties resource.rc
:
Visual Studio automatically recognized the file as a description of resources and added it to the assembly; you do not need to do this manually
Now you can write the shader initialization code:
ID3D11ComputeShader *computeShader; ID3D11VertexShader *vertexShader; ID3D11PixelShader *pixelShader; ID3D11InputLayout *inputLayout; void InitShaders() { HRESULT result; HRSRC src; HGLOBAL res; // // // , .. // src = FindResource(hInstance, MAKEINTRESOURCE(IDR_BYTECODE_COMPUTE), _T("ShaderObject")); res = LoadResource(hInstance, src); // result = device->CreateComputeShader( // res, SizeofResource(hInstance, src), // . , .. NULL, // &computeShader); assert(SUCCEEDED(result)); FreeResource(res); // src = FindResource(hInstance, MAKEINTRESOURCE(IDR_BYTECODE_PIXEL), _T("ShaderObject")); res = LoadResource(hInstance, src); result = device->CreatePixelShader(res, SizeofResource(hInstance, src), NULL, &pixelShader); assert(SUCCEEDED(result)); FreeResource(res); // src = FindResource(hInstance, MAKEINTRESOURCE(IDR_BYTECODE_VERTEX), _T("ShaderObject")); res = LoadResource(hInstance, src); result = device->CreateVertexShader(res, SizeofResource(hInstance, src), NULL, &vertexShader); assert(SUCCEEDED(result)); // , // ( ) D3D11_INPUT_ELEMENT_DESC inputDesc; // inputDesc.SemanticName = "POSITION"; // , inputDesc.SemanticIndex = 0; // 32- inputDesc.Format = DXGI_FORMAT_R32G32_FLOAT; // inputDesc.AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT; // inputDesc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; // inputDesc.InputSlot = 0; // inputDesc.InstanceDataStepRate = 0; result = device->CreateInputLayout( // &inputDesc, 1, // res, SizeofResource(hInstance, src), // &inputLayout); assert(SUCCEEDED(result)); FreeResource(res); } void DisposeShaders() { inputLayout->Release(); computeShader->Release(); vertexShader->Release(); pixelShader->Release(); }
Buffer initialization code:
ID3D11Buffer *positionBuffer; ID3D11Buffer *velocityBuffer; void InitBuffers() { HRESULT result; float *data = new float[2 * PARTICLE_COUNT]; // , D3D11_SUBRESOURCE_DATA subresource; // subresource.pSysMem = data; // subresource.SysMemPitch = 0; // subresource.SysMemSlicePitch = 0; // D3D11_BUFFER_DESC desc; // desc.ByteWidth = sizeof(float[2 * PARTICLE_COUNT]); // desc.Usage = D3D11_USAGE_DEFAULT; // , desc.BindFlags = D3D11_BIND_VERTEX_BUFFER | D3D11_BIND_UNORDERED_ACCESS; // desc.CPUAccessFlags = 0; // desc.MiscFlags = 0; // desc.StructureByteStride = sizeof(float[2]); // for (int i = 0; i < 2 * PARTICLE_COUNT; i++) data[i] = 2.0f * rand() / RAND_MAX - 1.0f; // result = device->CreateBuffer(&desc, &subresource, &positionBuffer); assert(SUCCEEDED(result)); // desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS; // for (int i = 0; i < 2 * PARTICLE_COUNT; i++) data[i] = 0.0f; // result = device->CreateBuffer(&desc, &subresource, &velocityBuffer); assert(SUCCEEDED(result)); // , delete[] data; } void DisposeBuffers() { positionBuffer->Release(); velocityBuffer->Release(); }
And the initialization code for accessing the buffers from the compute shader:
ID3D11UnorderedAccessView *positionUAV; ID3D11UnorderedAccessView *velocityUAV; void InitUAV() { HRESULT result; // D3D11_UNORDERED_ACCESS_VIEW_DESC desc; // 32- desc.Format = DXGI_FORMAT_R32G32_FLOAT; // , desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; // desc.Buffer.FirstElement = 0; // desc.Buffer.NumElements = PARTICLE_COUNT; // desc.Buffer.Flags = 0; // result = device->CreateUnorderedAccessView(positionBuffer, &desc, &positionUAV); assert(!result); // result = device->CreateUnorderedAccessView(velocityBuffer, &desc, &velocityUAV); assert(!result); } void DisposeUAV() { positionUAV->Release(); velocityUAV->Release(); }
Next, you need to tell the driver to use the created shaders and bundles with buffers:
void InitBindings() { // // deviceContext->CSSetShader(computeShader, NULL, 0); // deviceContext->VSSetShader(vertexShader, NULL, 0); // deviceContext->PSSetShader(pixelShader, NULL, 0); // deviceContext->CSSetUnorderedAccessViews(1, 1, &velocityUAV, NULL); // deviceContext->IASetInputLayout(inputLayout); // deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); }
To calculate the average frame time, we will use the following code:
const int FRAME_TIME_COUNT = 128; clock_t frameTime[FRAME_TIME_COUNT]; int currentFrame = 0; float AverageFrameTime() { frameTime[currentFrame] = clock(); int nextFrame = (currentFrame + 1) % FRAME_TIME_COUNT; clock_t delta = frameTime[currentFrame] - frameTime[nextFrame]; currentFrame = nextFrame; return (float)delta / CLOCKS_PER_SEC / FRAME_TIME_COUNT; }
And on each frame - call this function:
void Frame() { float frameTime = AverageFrameTime(); // char buf[256]; sprintf_s(buf, "average framerate: %.1f", 1.0f / frameTime); SetWindowTextA(hWnd, buf); // float clearColor[] = { 0.0f, 0.0f, 0.0f, 0.0f }; deviceContext->ClearRenderTargetView(renderTargetView, clearColor); // 32- UINT stride = sizeof(float[2]); UINT offset = 0; ID3D11Buffer *nullBuffer = NULL; ID3D11UnorderedAccessView *nullUAV = NULL; // deviceContext->IASetVertexBuffers(0, 1, &nullBuffer, &stride, &offset); // deviceContext->CSSetUnorderedAccessViews(0, 1, &positionUAV, NULL); // deviceContext->Dispatch(PARTICLE_COUNT / NUMTHREADS, 1, 1); // deviceContext->CSSetUnorderedAccessViews(0, 1, &nullUAV, NULL); // deviceContext->IASetVertexBuffers(0, 1, &positionBuffer, &stride, &offset); // deviceContext->Draw(PARTICLE_COUNT, 0); // swapChain->Present(0, 0); }
In case the window size has changed, we also need to change the size of the render buffers:
void ResizeSwapChain() { HRESULT result; RECT rect; // GetClientRect(hWnd, &rect); windowWidth = rect.right - rect.left; windowHeight = rect.bottom - rect.top; // , , // "" DisposeRenderTargetView(); // result = swapChain->ResizeBuffers( // 0, // windowWidth, windowHeight, // DXGI_FORMAT_UNKNOWN, 0); assert(SUCCEEDED(result)); // "" InitRenderTargetView(); }
Finally, you can define a message handling function:
LRESULT CALLBACK WndProc(HWND hWnd, UINT Msg, WPARAM wParam, LPARAM lParam) { switch (Msg) { case WM_CLOSE: PostQuitMessage(0); break; case WM_KEYDOWN: if (wParam == VK_ESCAPE) PostQuitMessage(0); break; case WM_SIZE: ResizeSwapChain(); break; default: return DefWindowProc(hWnd, Msg, wParam, lParam); } return 0; }
And the main
function:
int main() { InitWindows(); InitSwapChain(); InitRenderTargetView(); InitShaders(); InitBuffers(); InitUAV(); InitBindings(); ShowWindow(hWnd, SW_SHOW); bool shouldExit = false; while (!shouldExit) { Frame(); MSG msg; while (!shouldExit && PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { TranslateMessage(&msg); DispatchMessage(&msg); if (msg.message == WM_QUIT) shouldExit = true; } } DisposeUAV(); DisposeBuffers(); DisposeShaders(); DisposeRenderTargetView(); DisposeSwapChain(); DisposeWindows(); }
A screenshot of the running program can be seen in the title of the article.
→ The project is completely posted on GitHub
Source: https://habr.com/ru/post/430202/
All Articles