onnxruntime
275 строк · 10.7 Кб
1#include "testPch.h"2
3#include "imageTestHelper.h"4#include "robuffer.h"5#include "winrt/Windows.Storage.h"6#include "winrt/Windows.Storage.Streams.h"7
8#include <d3dx12.h>9#include <MemoryBuffer.h>10#include <wil\Resource.h>11
12#define FENCE_SIGNAL_VALUE 113
14using namespace winrt;15using namespace winml;16using namespace wfc;17using namespace wm;18using namespace wgi;19
20namespace ImageTestHelper {21BitmapPixelFormat GetPixelFormat(const std::wstring& inputPixelFormat) {22// Return corresponding BitmapPixelFormat according to input string23if (L"Bgra8" == inputPixelFormat || L"Bgr8" == inputPixelFormat) {24return BitmapPixelFormat::Bgra8;25} else if (L"Rgba8" == inputPixelFormat || L"Rgb8" == inputPixelFormat) {26return BitmapPixelFormat::Rgba8;27} else if (L"Gray8" == inputPixelFormat) {28return BitmapPixelFormat::Gray8;29} else {30throw std::invalid_argument("Unsupported pixelFormat");31}32}
33
34TensorFloat LoadInputImageFromCPU(SoftwareBitmap softwareBitmap, const std::wstring& modelPixelFormat) {35softwareBitmap = SoftwareBitmap::Convert(softwareBitmap, BitmapPixelFormat::Bgra8);36BYTE* pData = nullptr;37UINT32 size = 0;38wgi::BitmapBuffer spBitmapBuffer(softwareBitmap.LockBuffer(wgi::BitmapBufferAccessMode::Read));39wf::IMemoryBufferReference reference = spBitmapBuffer.CreateReference();40auto spByteAccess = reference.as<::Windows::Foundation::IMemoryBufferByteAccess>();41spByteAccess->GetBuffer(&pData, &size);42uint32_t height = softwareBitmap.PixelHeight();43uint32_t width = softwareBitmap.PixelWidth();44
45// TODO: Need modification for Gray846std::vector<int64_t> shape = {1, 3, height, width};47float* pCPUTensor;48uint32_t uCapacity;49TensorFloat tf = TensorFloat::Create(shape);50com_ptr<ITensorNative> itn = tf.as<ITensorNative>();51itn->GetBuffer(reinterpret_cast<BYTE**>(&pCPUTensor), &uCapacity);52if (BitmapPixelFormat::Bgra8 == GetPixelFormat(modelPixelFormat)) {53// loop condition is i < size - 2 to avoid potential for extending past the memory buffer54for (UINT32 i = 0; i < size - 2; i += 4) {55UINT32 pixelInd = i / 4;56pCPUTensor[pixelInd] = (float)pData[i];57pCPUTensor[(height * width) + pixelInd] = (float)pData[i + 1];58pCPUTensor[(height * width * 2) + pixelInd] = (float)pData[i + 2];59}60} else if (BitmapPixelFormat::Rgba8 == GetPixelFormat(modelPixelFormat)) {61for (UINT32 i = 0; i < size - 2; i += 4) {62UINT32 pixelInd = i / 4;63pCPUTensor[pixelInd] = (float)pData[i + 2];64pCPUTensor[(height * width) + pixelInd] = (float)pData[i + 1];65pCPUTensor[(height * width * 2) + pixelInd] = (float)pData[i];66}67}68// else if()69// TODO: for Gray870else {71std::cerr << "Unsupported pixelFormat";72}73return tf;74}
75
76TensorFloat LoadInputImageFromGPU(SoftwareBitmap softwareBitmap, const std::wstring& modelPixelFormat) {77softwareBitmap = SoftwareBitmap::Convert(softwareBitmap, BitmapPixelFormat::Bgra8);78BYTE* pData = nullptr;79UINT32 size = 0;80BitmapBuffer spBitmapBuffer(softwareBitmap.LockBuffer(wgi::BitmapBufferAccessMode::Read));81wf::IMemoryBufferReference reference = spBitmapBuffer.CreateReference();82com_ptr<::Windows::Foundation::IMemoryBufferByteAccess> spByteAccess =83reference.as<::Windows::Foundation::IMemoryBufferByteAccess>();84spByteAccess->GetBuffer(&pData, &size);85
86std::vector<int64_t> shape = {1, 3, softwareBitmap.PixelHeight(), softwareBitmap.PixelWidth()};87float* pCPUTensor;88uint32_t uCapacity;89
90// CPU tensor initialization91TensorFloat tf = TensorFloat::Create(shape);92com_ptr<ITensorNative> itn = tf.as<ITensorNative>();93itn->GetBuffer(reinterpret_cast<BYTE**>(&pCPUTensor), &uCapacity);94
95uint32_t height = softwareBitmap.PixelHeight();96uint32_t width = softwareBitmap.PixelWidth();97if (BitmapPixelFormat::Bgra8 == GetPixelFormat(modelPixelFormat)) {98// loop condition is i < size - 2 to avoid potential for extending past the memory buffer99for (UINT32 i = 0; i < size - 2; i += 4) {100UINT32 pixelInd = i / 4;101pCPUTensor[pixelInd] = (float)pData[i];102pCPUTensor[(height * width) + pixelInd] = (float)pData[i + 1];103pCPUTensor[(height * width * 2) + pixelInd] = (float)pData[i + 2];104}105} else if (BitmapPixelFormat::Rgba8 == GetPixelFormat(modelPixelFormat)) {106for (UINT32 i = 0; i < size - 2; i += 4) {107UINT32 pixelInd = i / 4;108pCPUTensor[pixelInd] = (float)pData[i + 2];109pCPUTensor[(height * width) + pixelInd] = (float)pData[i + 1];110pCPUTensor[(height * width * 2) + pixelInd] = (float)pData[i];111}112}113// else if()114// TODO: for Gray8115else {116std::cerr << "unsupported pixelFormat";117}118
119// create the d3d device.120com_ptr<ID3D12Device> pD3D12Device = nullptr;121WINML_EXPECT_NO_THROW(D3D12CreateDevice(122nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device), reinterpret_cast<void**>(&pD3D12Device)123));124
125// create the command queue.126com_ptr<ID3D12CommandQueue> dxQueue = nullptr;127D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {};128commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;129pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue), reinterpret_cast<void**>(&dxQueue));130com_ptr<ILearningModelDeviceFactoryNative> devicefactory =131get_activation_factory<LearningModelDevice, ILearningModelDeviceFactoryNative>();132com_ptr<ITensorStaticsNative> tensorfactory = get_activation_factory<TensorFloat, ITensorStaticsNative>();133com_ptr<::IUnknown> spUnk;134devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put());135
136// Create ID3D12GraphicsCommandList and Allocator137D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type;138com_ptr<ID3D12CommandAllocator> alloctor;139com_ptr<ID3D12GraphicsCommandList> cmdList;140
141pD3D12Device->CreateCommandAllocator(queuetype, winrt::guid_of<ID3D12CommandAllocator>(), alloctor.put_void());142
143pD3D12Device->CreateCommandList(1440, queuetype, alloctor.get(), nullptr, winrt::guid_of<ID3D12CommandList>(), cmdList.put_void()145);146
147// Create Committed Resource148// 3 is number of channels we use. R G B without alpha.149UINT64 bufferbytesize = 3 * sizeof(float) * softwareBitmap.PixelWidth() * softwareBitmap.PixelHeight();150D3D12_HEAP_PROPERTIES heapProperties = {151D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0152};153D3D12_RESOURCE_DESC resourceDesc = {154D3D12_RESOURCE_DIMENSION_BUFFER,1550,156bufferbytesize,1571,1581,1591,160DXGI_FORMAT_UNKNOWN,161{1, 0},162D3D12_TEXTURE_LAYOUT_ROW_MAJOR,163D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS
164};165
166com_ptr<ID3D12Resource> pGPUResource = nullptr;167com_ptr<ID3D12Resource> imageUploadHeap;168pD3D12Device->CreateCommittedResource(169&heapProperties,170D3D12_HEAP_FLAG_NONE,171&resourceDesc,172D3D12_RESOURCE_STATE_COMMON,173nullptr,174__uuidof(ID3D12Resource),175pGPUResource.put_void()176);177
178// Create the GPU upload buffer.179auto heap_properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);180auto buffer_desc = CD3DX12_RESOURCE_DESC::Buffer(bufferbytesize);181WINML_EXPECT_NO_THROW(pD3D12Device->CreateCommittedResource(182&heap_properties,183D3D12_HEAP_FLAG_NONE,184&buffer_desc,185D3D12_RESOURCE_STATE_GENERIC_READ,186nullptr,187__uuidof(ID3D12Resource),188imageUploadHeap.put_void()189));190
191// Copy from Cpu to GPU192D3D12_SUBRESOURCE_DATA CPUData = {};193CPUData.pData = reinterpret_cast<BYTE*>(pCPUTensor);194CPUData.RowPitch = static_cast<LONG_PTR>(bufferbytesize);195CPUData.SlicePitch = static_cast<LONG_PTR>(bufferbytesize);196UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData);197
198// Close the command list and execute it to begin the initial GPU setup.199WINML_EXPECT_NO_THROW(cmdList->Close());200ID3D12CommandList* ppCommandLists[] = {cmdList.get()};201dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);202
203//Create Event204HANDLE directEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);205wil::unique_event hDirectEvent(directEvent);206
207//Create Fence208::Microsoft::WRL::ComPtr<ID3D12Fence> spDirectFence = nullptr;209WINML_EXPECT_HRESULT_SUCCEEDED(210pD3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(spDirectFence.ReleaseAndGetAddressOf()))211);212//Adds fence to queue213WINML_EXPECT_HRESULT_SUCCEEDED(dxQueue->Signal(spDirectFence.Get(), FENCE_SIGNAL_VALUE));214WINML_EXPECT_HRESULT_SUCCEEDED(spDirectFence->SetEventOnCompletion(FENCE_SIGNAL_VALUE, hDirectEvent.get()));215
216//Wait for signal217DWORD retVal = WaitForSingleObject(hDirectEvent.get(), INFINITE);218if (retVal != WAIT_OBJECT_0) {219WINML_EXPECT_HRESULT_SUCCEEDED(E_UNEXPECTED);220}221
222// GPU tensorize223com_ptr<::IUnknown> spUnkTensor;224TensorFloat input1imagetensor(nullptr);225int64_t shapes[4] = {1, 3, softwareBitmap.PixelWidth(), softwareBitmap.PixelHeight()};226tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), shapes, 4, spUnkTensor.put());227spUnkTensor.try_as(input1imagetensor);228
229return input1imagetensor;230}
231
232bool VerifyHelper(VideoFrame actual, VideoFrame expected) {233// Verify two input ImageFeatureValues are identified.234auto softwareBitmapActual = actual.SoftwareBitmap();235auto softwareBitmapExpected = expected.SoftwareBitmap();236WINML_EXPECT_TRUE(softwareBitmapActual.PixelHeight() == softwareBitmapExpected.PixelHeight());237WINML_EXPECT_TRUE(softwareBitmapActual.PixelWidth() == softwareBitmapExpected.PixelWidth());238WINML_EXPECT_TRUE(softwareBitmapActual.BitmapPixelFormat() == softwareBitmapExpected.BitmapPixelFormat());239
240uint32_t size = 4 * softwareBitmapActual.PixelHeight() * softwareBitmapActual.PixelWidth();241
242ws::Streams::Buffer actualOutputBuffer(size);243ws::Streams::Buffer expectedOutputBuffer(size);244
245softwareBitmapActual.CopyToBuffer(actualOutputBuffer);246softwareBitmapExpected.CopyToBuffer(expectedOutputBuffer);247
248byte* actualBytes;249actualOutputBuffer.try_as<::Windows::Storage::Streams::IBufferByteAccess>()->Buffer(&actualBytes);250byte* expectedBytes;251expectedOutputBuffer.try_as<::Windows::Storage::Streams::IBufferByteAccess>()->Buffer(&expectedBytes);252
253byte* pActualByte = actualBytes;254byte* pExpectedByte = expectedBytes;255
256// hard code, might need to be modified later.257const float cMaxErrorRate = 0.4f;258int8_t epsilon = 20;259
260// Even given two same ImageFeatureValues, the comparison cannot exactly match.261// So we use error rate.262UINT errors = 0;263for (uint32_t i = 0; i < size; i++, pActualByte++, pExpectedByte++) {264// Only the check the first three channels, which are (B, G, R)265if ((i + 1) % 4 == 0)266continue;267auto diff = (*pActualByte - *pExpectedByte);268if (diff > epsilon) {269errors++;270}271}272std::cerr << "total errors is " << errors << "/" << size << ", errors rate is " << (float)errors / size << std::endl;273return (float)errors / size < cMaxErrorRate;274}
275} // namespace ImageTestHelper276