開発環境 Microsoft Visual Studio Community 2019
実行環境 Microsoft Windows 10 Home (64bit)
プロジェクト テンプレート 空のプロジェクト(C++)
プロジェクト名 dx11compute2

cppファイルの追加


dx11compute2.cpp
#pragma comment(lib, "d3d11")
#pragma comment(lib, "d3dcompiler")
 
#include <d3d11.h>
#include <d3dcompiler.h>
#include <stdio.h>
 
#define SAFE_RELEASE(p)	{ if (p) { (p)->Release(); (p)=nullptr; } }
 
const UINT NUM_ELEMENTS = 1;
 
// 関数プロトタイプ宣言
HRESULT CreateComputeDevice();
HRESULT CreateComputeShader(LPCWSTR pSrcFile, LPCSTR pFunctionName);
HRESULT CreateStructuredBuffer(UINT uElementSize, UINT uCount, void* pInitData,
	ID3D11Buffer** ppBufOut);
HRESULT CreateBufferSRV(ID3D11Buffer* pBuffer, ID3D11ShaderResourceView** ppSRVOut);
HRESULT CreateBufferUAV(ID3D11Buffer* pBuffer, ID3D11UnorderedAccessView** ppUAVOut);
ID3D11Buffer* CreateAndCopyToDebugBuf(ID3D11Buffer* pBuffer);
void RunComputeShader(UINT nNumViews, ID3D11ShaderResourceView** pShaderResourceViews,
	ID3D11UnorderedAccessView* pUnorderedAccessView, UINT X, UINT Y, UINT Z);
 
// グローバル変数
ID3D11Device* g_pDevice = nullptr;
ID3D11DeviceContext* g_pContext = nullptr;
ID3D11ComputeShader* g_pCS = nullptr;
 
struct BufType {
	double x;
	double y;
};
 
int main()
{
	ID3D11Buffer* pBuf0 = nullptr;
	ID3D11Buffer* pBuf1 = nullptr;
	ID3D11Buffer* pBufResult = nullptr;
	ID3D11ShaderResourceView* pBuf0SRV = nullptr;
	ID3D11ShaderResourceView* pBuf1SRV = nullptr;
	ID3D11UnorderedAccessView* pBufResultUAV = nullptr;
 
	printf("Creating device...");
	HRESULT hr = CreateComputeDevice();
	if (FAILED(hr)) return 1;
	printf("done\n");
 
	printf("Creating Compute Shader...");
	hr = CreateComputeShader(L"dx11compute2.hlsl", "CSMain");
	if (FAILED(hr)) return 1;
	printf("done\n");
 
	__int64 ax = 0x485714add755dc6a;
	__int64 ay = 0x44df12e6473c9b3e;
	__int64 bx = 0x4853f88517b9c2dd;
	__int64 by = 0xc4e52215f59737a1;
	printf("%016llx\n", ax);
	printf("%016llx\n", ay);
	printf("%016llx\n", bx);
	printf("%016llx\n", by);
 
	BufType vBuf0[NUM_ELEMENTS];
	BufType vBuf1[NUM_ELEMENTS];
	for (int i = 0; i < NUM_ELEMENTS; i++) {
		vBuf0[i].x = *(double*)&ax;
		vBuf0[i].y = *(double*)&ay;
		vBuf1[i].x = *(double*)&bx;
		vBuf1[i].y = *(double*)&by;
	}
 
	CreateStructuredBuffer(sizeof BufType, NUM_ELEMENTS, vBuf0, &pBuf0);
	CreateStructuredBuffer(sizeof BufType, NUM_ELEMENTS, vBuf1, &pBuf1);
	CreateStructuredBuffer(sizeof BufType, NUM_ELEMENTS, nullptr, &pBufResult);
 
	CreateBufferSRV(pBuf0, &pBuf0SRV);
	CreateBufferSRV(pBuf1, &pBuf1SRV);
	CreateBufferUAV(pBufResult, &pBufResultUAV);
 
	ID3D11ShaderResourceView* aRViews[] = { pBuf0SRV, pBuf1SRV };
	RunComputeShader(2, aRViews, pBufResultUAV, NUM_ELEMENTS, 1, 1);
 
	{
		ID3D11Buffer* debugbuf = CreateAndCopyToDebugBuf(pBufResult);
		D3D11_MAPPED_SUBRESOURCE MappedResource;
		g_pContext->Map(debugbuf, 0, D3D11_MAP_READ, 0, &MappedResource);
		BufType* p = (BufType*)MappedResource.pData;
		for (int i = 0; i < NUM_ELEMENTS; i++) {
			printf("%016llx\n", *(__int64*)&p[i].x);
			printf("%016llx\n", *(__int64*)&p[i].y);
		}
		g_pContext->Unmap(debugbuf, 0);
		SAFE_RELEASE(debugbuf);
	}
 
	SAFE_RELEASE(pBuf0SRV);
	SAFE_RELEASE(pBuf1SRV);
	SAFE_RELEASE(pBufResultUAV);
	SAFE_RELEASE(pBuf0);
	SAFE_RELEASE(pBuf1);
	SAFE_RELEASE(pBufResult);
 
	SAFE_RELEASE(g_pCS);
	SAFE_RELEASE(g_pContext);
	SAFE_RELEASE(g_pDevice);
}
 
HRESULT CreateComputeDevice()
{
	D3D_FEATURE_LEVEL flvl[] = {
		D3D_FEATURE_LEVEL_11_1,
		D3D_FEATURE_LEVEL_11_0,
	};
	D3D_FEATURE_LEVEL flOut;
 
	HRESULT hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr,
		D3D11_CREATE_DEVICE_SINGLETHREADED, flvl, 2, D3D11_SDK_VERSION,
		&g_pDevice, &flOut, &g_pContext);
	return hr;
}
 
HRESULT CreateComputeShader(LPCWSTR pSrcFile, LPCSTR pFunctionName)
{
	ID3DBlob* pBlob;
	ID3DBlob* pErrorBlob;
	UINT flags = D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_IEEE_STRICTNESS;
	HRESULT hr = D3DCompileFromFile(pSrcFile, nullptr, D3D_COMPILE_STANDARD_FILE_INCLUDE,
		pFunctionName, "cs_5_0", flags, 0, &pBlob, &pErrorBlob);
	if (FAILED(hr)) {
		if (pErrorBlob) {
			printf("\n%s\n", (char*)pErrorBlob->GetBufferPointer());
		}
		goto EXIT;
	}
 
	hr = g_pDevice->CreateComputeShader(
		pBlob->GetBufferPointer(), pBlob->GetBufferSize(), nullptr, &g_pCS);
EXIT:
	SAFE_RELEASE(pErrorBlob);
	SAFE_RELEASE(pBlob);
	return hr;
}
 
HRESULT CreateStructuredBuffer(UINT uElementSize, UINT uCount, void* pInitData,
	ID3D11Buffer** ppBufOut)
{
	D3D11_BUFFER_DESC desc = {};
	desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
	desc.ByteWidth = uElementSize * uCount;
	desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
	desc.StructureByteStride = uElementSize;
 
	if (pInitData) {
		D3D11_SUBRESOURCE_DATA InitData;
		InitData.pSysMem = pInitData;
		return g_pDevice->CreateBuffer(&desc, &InitData, ppBufOut);
	}
	else {
		return g_pDevice->CreateBuffer(&desc, nullptr, ppBufOut);
	}
}
 
HRESULT CreateBufferSRV(ID3D11Buffer* pBuffer, ID3D11ShaderResourceView** ppSRVOut)
{
	D3D11_BUFFER_DESC descBuf = {};
	pBuffer->GetDesc(&descBuf);
 
	D3D11_SHADER_RESOURCE_VIEW_DESC desc = {};
	desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX;
	desc.BufferEx.FirstElement = 0;
 
	if (descBuf.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_STRUCTURED) {
		desc.Format = DXGI_FORMAT_UNKNOWN;
		desc.BufferEx.NumElements = descBuf.ByteWidth / descBuf.StructureByteStride;
	}
	return g_pDevice->CreateShaderResourceView(pBuffer, &desc, ppSRVOut);
}
 
HRESULT CreateBufferUAV(ID3D11Buffer* pBuffer, ID3D11UnorderedAccessView** ppUAVOut)
{
	D3D11_BUFFER_DESC descBuf = {};
	pBuffer->GetDesc(&descBuf);
 
	D3D11_UNORDERED_ACCESS_VIEW_DESC desc = {};
	desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
	desc.Buffer.FirstElement = 0;
 
	if (descBuf.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_STRUCTURED) {
		desc.Format = DXGI_FORMAT_UNKNOWN;
		desc.Buffer.NumElements = descBuf.ByteWidth / descBuf.StructureByteStride;
	}
	return g_pDevice->CreateUnorderedAccessView(pBuffer, &desc, ppUAVOut);
}
 
ID3D11Buffer* CreateAndCopyToDebugBuf(ID3D11Buffer* pBuffer)
{
	ID3D11Buffer* debugbuf = nullptr;
 
	D3D11_BUFFER_DESC desc = {};
	pBuffer->GetDesc(&desc);
	desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
	desc.Usage = D3D11_USAGE_STAGING;
	desc.BindFlags = 0;
	desc.MiscFlags = 0;
	if (SUCCEEDED(g_pDevice->CreateBuffer(&desc, nullptr, &debugbuf))) {
		g_pContext->CopyResource(debugbuf, pBuffer);
	}
	return debugbuf;
}
 
void RunComputeShader(UINT nNumViews, ID3D11ShaderResourceView** pShaderResourceViews,
	ID3D11UnorderedAccessView* pUnorderedAccessView, UINT X, UINT Y, UINT Z)
{
	g_pContext->CSSetShader(g_pCS, nullptr, 0);
	g_pContext->CSSetShaderResources(0, nNumViews, pShaderResourceViews);
	g_pContext->CSSetUnorderedAccessViews(0, 1, &pUnorderedAccessView, nullptr);
 
	g_pContext->Dispatch(X, Y, Z);
	g_pContext->CSSetShader(nullptr, nullptr, 0);
 
	ID3D11UnorderedAccessView* ppUAVnullptr[] = { nullptr };
	g_pContext->CSSetUnorderedAccessViews(0, 1, ppUAVnullptr, nullptr);
 
	ID3D11ShaderResourceView* ppSRVnullptr[] = { nullptr, nullptr };
	g_pContext->CSSetShaderResources(0, 2, ppSRVnullptr);
 
	ID3D11Buffer* ppCBnullptr[] = { nullptr };
	g_pContext->CSSetConstantBuffers(0, 1, ppCBnullptr);
}
 

hlslファイルの追加


プロジェクトのディレクトリなど、実行時のカレントディレクトリに置く。

dx11compute2.hlsl
struct vec2
{
	double x;
	double y;
};
 
// Knuth
vec2 two_sum(double a, double b)
{
	vec2 v;
	v.x = a + b;
	double c = v.x - a;
	v.y = (a - (v.x - c)) + (b - c);
	return v;
}
 
// Dekker
vec2 two_product(double a, double b)
{
	vec2 v;
	v.x = a * b;
	v.y = fma(a, b, -v.x);
	return v;
}
 
// double-double add
vec2 dd_add(vec2 a, vec2 b)
{
	vec2 v = two_sum(a.x, b.x);
	v.y += a.y + b.y;
	return two_sum(v.x, v.y);
}
 
// double-double mul
vec2 dd_mul(vec2 a, vec2 b)
{
	vec2 v = two_product(a.x, b.x);
	v.y += a.x * b.y + a.y * b.x + a.y * b.y;
	return two_sum(v.x, v.y);
}
 
StructuredBuffer<vec2> Buffer0 : register(t0);
StructuredBuffer<vec2> Buffer1 : register(t1);
RWStructuredBuffer<vec2> BufferOut : register(u0);
 
[numthreads(1, 1, 1)]
void CSMain(uint3 DTid : SV_DispatchThreadID)
{
	uint id = DTid.x;
	BufferOut[id] = dd_mul(Buffer0[id], Buffer1[id]);
}
 

実行



検算



----+----|----+----|----+----|----+----|
a:
31415926535897932384626433832795028841971
31415926535897932384626433832795060043776
x: 3.141592653589793e+40
y: 5.869679826411012e+23
485714add755dc6a
44df12e6473c9b3e
----+----|----+----|----+----|----+----|
b:
27182818284590452353602874713526624977572
27182818284590452353602874713526583164928
x: 2.7182818284590453e+40
y: -7.983877422070236e+23
4853f88517b9c2dd
c4e52215f59737a1
----+----|----+----|----+----|----+----|
a * b:
853973422267356706546355086954657449503455193948303503303487013041299071007274412
853973422267356706546355086954660278301394411090851369074414155864427082148741120
x: 8.539734222673567e+80
y: 4.81331677893069e+63
50bccf0ef45e68a9
4d2766ab348b092c
 
最終更新:2020年11月05日 11:46
添付ファイル