diff --git a/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc b/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc index a033cc1ecc..3b4b05cfc3 100644 --- a/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc +++ b/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc @@ -1,3 +1,4 @@ #include ShaderOpArithTable.xml DATASOURCE_XML "ShaderOpArithTable.xml" +LongVectorOp DATASOURCE_XML "LongVectorOp.xml" diff --git a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp index 7c50c07943..7a59baff85 100644 --- a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp +++ b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp @@ -77,6 +77,20 @@ static UINT getD3D12SDKVersion(std::wstring SDKPath) { return SDKVersion; } +// Simple wrapper to free the loaded module on scope exit. +struct DllWrapper { + HMODULE Module = NULL; // NOLINT + + ~DllWrapper() { Close(); } + + void Close() { + if (Module) { + FreeLibrary(Module); + Module = NULL; + } + } +}; + static bool createDevice( ID3D12Device **D3DDevice, D3D_SHADER_MODEL TestModel, bool SkipUnsupported, std::function @@ -108,20 +122,7 @@ static bool createDevice( // load. To force this to be used, we make sure that this DLL is loaded // before attempting to create the device. - struct WarpDll { - HMODULE Module = NULL; // NOLINT - - ~WarpDll() { Close(); } - - void Close() { - if (Module) { - FreeLibrary(Module); - Module = NULL; - } - } - }; - - WarpDll ExplicitlyLoadedWarpDll; + DllWrapper ExplicitlyLoadedWarpDll; WEX::Common::String WarpDllPath; if (SUCCEEDED(WEX::TestExecution::RuntimeParameters::TryGetValue( L"WARP_DLL", WarpDllPath))) { @@ -212,6 +213,53 @@ static bool createDevice( return true; } +// Read a resource embedded into a dll via an .rc file and wrap it in a DXC +// read-only stream +void readEmbeddedHlslDataIntoNewStream( + LPCWSTR ResourceName, // Resource name in rc file. e.g. L"LongVectorOp" + IStream **TestXML, dxc::SpecificDllLoader &Support) { + + DllWrapper Dll; + Dll.Module = LoadLibraryEx(TEXT("ExecHLSLTests.dll"), nullptr, + LOAD_LIBRARY_AS_DATAFILE); + + // 1. Locate the resource + HRSRC ResInfo = FindResourceW(Dll.Module, ResourceName, L"DATASOURCE_XML"); + if (!ResInfo) + VERIFY_SUCCEEDED(HRESULT_FROM_WIN32(::GetLastError())); + + // 2. Load the resource + HGLOBAL ResData = LoadResource(Dll.Module, ResInfo); + if (!ResData) + VERIFY_SUCCEEDED(HRESULT_FROM_WIN32(::GetLastError())); + VERIFY_SUCCEEDED(HRESULT_FROM_WIN32(::GetLastError())); + + // 3. Access the resource bytes + const void *Data = LockResource(ResData); + VERIFY_IS_NOT_NULL(Data); + + // Sanity + const DWORD Size = SizeofResource(Dll.Module, ResInfo); + VERIFY_IS_FALSE(0 == Size); + + VERIFY_SUCCEEDED( + Support.InitializeForDll(dxc::kDxCompilerLib, "DxcCreateInstance")); + + CComPtr Library; + VERIFY_SUCCEEDED(Support.CreateInstance(CLSID_DxcLibrary, &Library)); + + // 4. Create a DXC blob from the resource data + CComPtr Blob; + VERIFY_SUCCEEDED( + Library->CreateBlobWithEncodingFromPinned(Data, Size, CP_UTF8, &Blob)); + + // 5. Create a read-only stream from the DXC blob + CComPtr Stream; + VERIFY_SUCCEEDED(Library->CreateStreamFromBlobReadOnly(Blob, &Stream)); + + *TestXML = Stream.Detach(); +} + void readHlslDataIntoNewStream(LPCWSTR RelativePath, IStream **Stream, dxc::SpecificDllLoader &Support) { VERIFY_SUCCEEDED( diff --git a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h index b663bbc1be..a8b2d91251 100644 --- a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h +++ b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h @@ -39,6 +39,8 @@ class D3D12SDKSelector { bool SkipUnsupported = true); }; +void readEmbeddedHlslDataIntoNewStream(LPCWSTR ResourceName, IStream **Stream, + dxc::SpecificDllLoader &Support); void readHlslDataIntoNewStream(LPCWSTR RelativePath, IStream **Stream, dxc::SpecificDllLoader &Support); diff --git a/tools/clang/unittests/HLSLExec/LongVectorOp.xml b/tools/clang/unittests/HLSLExec/LongVectorOp.xml new file mode 100644 index 0000000000..3f113e4467 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectorOp.xml @@ -0,0 +1,891 @@ + + + + UAV(u0), UAV(u1) + + + + + + + + + + + data; + }; + RWStructuredBuffer InputVector : register(u0); + RWStructuredBuffer OutputVector: register(u1); + #else + RWByteAddressBuffer InputVector : register(u0); + RWByteAddressBuffer OutputVector : register(u1); + #endif + + [numthreads(1,1,1)] + void main(uint GI : SV_GroupIndex) { + #if USE_STRUCTURED_BUFFER + OutputVector[0].data = InputVector[0].data; + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif + }; + ]]> + + + + + SRV(t0), UAV(u1) + + + + + + + + + + + data; + }; + StructuredBuffer InputVector : register(t0); + RWStructuredBuffer OutputVector : register(u1); + #else + ByteAddressBuffer InputVector : register(t0); + RWByteAddressBuffer OutputVector : register(u1); + #endif + + [numthreads(1,1,1)] + void main(uint GI : SV_GroupIndex) { + #if USE_STRUCTURED_BUFFER + OutputVector[0].data = InputVector[0].data; + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif + }; + ]]> + + + + + DescriptorTable(UAV(u0, numDescriptors=2)) + + + + + + + + + + + + + + + data; + }; + RWStructuredBuffer InputVector : register(u0); + RWStructuredBuffer OutputVector: register(u1); + #else + RWByteAddressBuffer InputVector : register(u0); + RWByteAddressBuffer OutputVector : register(u1); + #endif + + + [numthreads(1,1,1)] + void main(uint GI : SV_GroupIndex) { + + #if USE_STRUCTURED_BUFFER + OutputVector[0].data = InputVector[0].data; + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif + + }; + ]]> + + + + + DescriptorTable(SRV(t0, numDescriptors=1), UAV(u0, numDescriptors=1)) + + + + + + + + + + + + + + + data; + }; + StructuredBuffer InputVector : register(t0); + RWStructuredBuffer OutputVector: register(u0); + #else + ByteAddressBuffer InputVector : register(t0); + RWByteAddressBuffer OutputVector : register(u0); + #endif + + [numthreads(1,1,1)] + void main(uint GI : SV_GroupIndex) { + #if USE_STRUCTURED_BUFFER + OutputVector[0].data = InputVector[0].data; + #else + vector Input = InputVector.Load< vector >(0); + OutputVector.Store< vector >(0, Input); + #endif + }; + ]]> + + + + + RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) + + + + + + + + + + + + + data; + }; + #endif + + [numthreads(1,1,1)] + void main(uint GI : SV_GroupIndex) { + + #if USE_STRUCTURED_BUFFER + StructuredBuffer InputVector = ResourceDescriptorHeap[0]; + RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; + OutputVector[0].data = InputVector[0].data; + #else + ByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; + RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; + + vector Input = InputVector.Load< vector >(0); + + OutputVector.Store< vector >(0, Input); + #endif + }; + ]]> + + + + + RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) + + + + + + + + + + + + + data; + }; + #endif + + [numthreads(1,1,1)] + void main(uint GI : SV_GroupIndex) { + #if USE_STRUCTURED_BUFFER + RWStructuredBuffer InputVector = ResourceDescriptorHeap[0]; + RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; + OutputVector[0].data = InputVector[0].data; + #else + RWByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; + RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; + + vector Input = InputVector.Load< vector >(0); + + OutputVector.Store< vector >(0, Input); + #endif + }; + ]]> + + + + + UAV(u0), UAV(u1), UAV(u2), UAV(u3) + + + + + + + + + + + + + + TestInitialize(vector Vector) + { + vector VectorCopy = Vector; + return VectorCopy; + } + #endif + + #ifdef FUNC_TEST_CAST + vector TestCast(vector Vector) + { + return (vector)Vector; + } + #endif + + #ifdef FUNC_TERNARY_ASSIGNMENT + vector TestTernaryAssignment(vector Vector, + vector Vector2)) + { + return (TERNARY_CONDITION ? Vector : Vector2); + } + #endif + + #ifdef FUNC_ASUINT_SPLITDOUBLE + vector TestAsUintSplitDouble(vector Vector) + { + vector LowBits; + vector HighBits; + asuint(Vector, LowBits, HighBits); + + // Store the high bits in the second half of the output vector. + // Because we know the outputs of asuint are always 32 bits, we can + // use 4 bytes per element for our offset. + g_OutputVector.Store< vector >(4 * NUM, HighBits); + + // Generic store logic in main handles storing LowBits in + // g_OutputVector. + return LowBits; + } + #endif + + #ifdef FUNC_FREXP + vector TestFrexp(vector Vector) + { + vector Mantissa; + vector Exponent; + + Mantissa = frexp(Vector, Exponent); + + // Store the exponent outputs in the second half of the output vector. + // Exponent values are always floats, so we can use 4 bytes per + // element for our offset. + g_OutputVector.Store< vector >(4 * NUM, Exponent); + + return Mantissa; + } + #endif + + #ifdef FUNC_WAVE_ACTIVE_MIN + vector TestWaveActiveMin(vector Vector) + { + Vector += WaveGetLaneIndex(); + return WaveActiveMin(Vector); + } + #endif + + #ifdef FUNC_WAVE_ACTIVE_MAX + vector TestWaveActiveMax(vector Vector) + { + Vector += WaveGetLaneIndex(); + return WaveActiveMax(Vector); + } + #endif + + #ifdef FUNC_WAVE_ACTIVE_PRODUCT + vector TestWaveActiveProduct(vector Vector) + { + uint LaneIndex = WaveGetLaneIndex(); + if(LaneIndex == (WaveGetLaneCount() - 1)) + { + Vector = LaneIndex; + } + return WaveActiveProduct(Vector); + } + #endif + + #ifdef FUNC_WAVE_ACTIVE_BIT_AND + vector TestWaveActiveBitAnd(vector Vector) + { + if(WaveGetLaneIndex() == (WaveGetLaneCount() - 1)) + { + // Clear the LSB on the last lane only. + Vector = Vector & ~((OUT_TYPE)1); + } + return WaveActiveBitAnd(Vector); + } + #endif + + #ifdef FUNC_WAVE_ACTIVE_BIT_OR + vector TestWaveActiveBitOr(vector Vector) + { + if(WaveGetLaneIndex() == (WaveGetLaneCount() - 1)) + { + // Set the LSB on the last lane only. + Vector = Vector | ((OUT_TYPE)1); + } + return WaveActiveBitOr(Vector); + } + #endif + + #ifdef FUNC_WAVE_ACTIVE_BIT_XOR + vector TestWaveActiveBitXor(vector Vector) + { + const uint isChosen = (WaveGetLaneIndex() == 0) ? 1 : 0; + // Clear the LSB for all lanes except lane 0, which sets it to 1. + Vector = (Vector & ~((OUT_TYPE)1)) | (OUT_TYPE)isChosen; + + return WaveActiveBitOr(Vector); + } + #endif + + #ifdef FUNC_WAVE_ACTIVE_ALL_EQUAL + bool MakeDifferent(bool A) { return !A; } + uint MakeDifferent(uint A) { return A ^ 1; } + uint64_t MakeDifferent(uint64_t A) { return A ^ 1; } + int MakeDifferent(int A) { return A ^ 1; } + int64_t MakeDifferent(int64_t A) { return A ^ 1; } + half MakeDifferent(half A) { return A + (half)1.0h; } + float MakeDifferent(float A) { return A + 1.0f; } + double MakeDifferent(double A) { return A + 1.0; } + + #if __HLSL_ENABLE_16_BIT + uint16_t MakeDifferent(uint16_t A) { return A ^ 1; } + int16_t MakeDifferent(int16_t A) { return A ^ 1; } + #endif + + vector TestWaveActiveAllEqual(vector Vector) + { + if(WaveGetLaneIndex() == (WaveGetLaneCount() - 1)) + { + // We just want to set the last element to any different value. + Vector[NUM - 1] = MakeDifferent(Vector[NUM - 1]); + } + + return WaveActiveAllEqual(Vector); + } + #endif + + #ifdef FUNC_WAVE_READ_LANE_AT + vector TestWaveReadLaneAt(vector Vector) + { + // Keep it simple and just read the last lane. + const uint LaneToRead = WaveGetLaneCount() - 1; + if(WaveGetLaneIndex() == LaneToRead) + { + [unroll] + for(uint i = 1; i < NUM; ++i) + { + Vector[i] = Vector[0]; + } + } + return WaveReadLaneAt(Vector, LaneToRead); + } + #endif + + #ifdef FUNC_WAVE_READ_LANE_FIRST + vector TestWaveReadLaneFirst(vector Vector) + { + if(WaveGetLaneIndex() == 0) + { + [unroll] + for(uint i = 1; i < NUM; ++i) + { + Vector[i] = Vector[0]; + } + } + return WaveReadLaneFirst(Vector); + } + #endif + + #ifdef FUNC_WAVE_PREFIX_SUM + void TestWavePrefixSum(vector Vector) + { + const uint LaneCount = WaveGetLaneCount(); + const uint MidLane = LaneCount/2; + + Vector = WavePrefixSum(Vector); + if(WaveGetLaneIndex() == MidLane) + { + g_OutputVector.Store< vector >(0, Vector); + } + } + #endif + + #ifdef FUNC_WAVE_PREFIX_PRODUCT + void TestWavePrefixProduct(vector Vector) + { + Vector = WavePrefixProduct(Vector); + if(WaveGetLaneIndex() == 2) + { + g_OutputVector.Store< vector >(0, Vector); + } + } + #endif + + #ifdef FUNC_WAVE_MULTI_PREFIX_SUM + void TestWaveMultiPrefixSum(vector Vector) + { + uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; + + // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in + // other (Key=0). + uint4 Mask = WaveMatch(Key); + + if(WaveGetLaneIndex() == 0) + { + // Lane 0 isn't in the mask. Shove in a value to make sure it + // doesn't constribute to the result. + Vector = 1; + } + + if(WaveGetLaneIndex() >= 3) + { + // Lane 3 is the last lane in the mask. We want to make sure + // it doesn't contribute to the result as this is a prefix op. + Vector = 10; + } + + Vector = WaveMultiPrefixSum(Vector, Mask); + if(WaveGetLaneIndex() == 3) + { + // Lane 3 is the last lane in the mask that we care about. Store the + // result from it. + g_OutputVector.Store< vector >(0, Vector); + } + } + #endif + + #ifdef FUNC_WAVE_MULTI_PREFIX_PRODUCT + void TestWaveMultiPrefixProduct(vector Vector) + { + uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; + + // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in + // other (Key=0). + uint4 Mask = WaveMatch(Key); + + if(WaveGetLaneIndex() == 0) + { + // Lane 0 isn't in the mask. Shove in a value to make sure it + // doesn't constribute to the result. + Vector = 4; + } + + if(WaveGetLaneIndex() == 3) + { + // Lane 3 is the last lane in the mask. We want to make sure + // it doesn't contribute to the result as this is a prefix op. + Vector = 10; + } + + Vector = WaveMultiPrefixProduct(Vector, Mask); + if(WaveGetLaneIndex() == 3) + { + // Lane 3 is the last lane in the mask. Store the result from it. + g_OutputVector.Store< vector >(0, Vector); + } + } + #endif + + #ifdef FUNC_WAVE_MULTI_PREFIX_BIT_AND + void TestWaveMultiPrefixBitAnd(vector Vector) + { + uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; + + // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in + // other (Key=0). + uint4 Mask = WaveMatch(Key); + + if(WaveGetLaneIndex() == 0 || WaveGetLaneIndex() == 3) + { + // Clear LSB on lane 0 and lane 3. Lane 0 isn't in the mask so + // shouldn't participate. Lane 3 is the output lane for this prefix + // op, so we set distinctive bits to verify it doesn't affect its own result. + Vector = Vector & ~((OUT_TYPE)0x1); + } + else // Lanes 1,2 (active contributors to the prefix operation) + { + // Keep only bits 1 and 2 (0x6 = 0b0110) to create predictable AND patterns + Vector = (Vector & ((OUT_TYPE)0x6)); + } + + Vector = WaveMultiPrefixBitAnd(Vector, Mask); + if(WaveGetLaneIndex() == 3) + { + // Lane 3 is the last lane in the mask. Store the result from it. + g_OutputVector.Store< vector >(0, Vector); + } + } + #endif + + #ifdef FUNC_WAVE_MULTI_PREFIX_BIT_OR + void TestWaveMultiPrefixBitOr(vector Vector) + { + uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; + + // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in + // other (Key=0). + uint4 Mask = WaveMatch(Key); + + if(WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) + { + // Lanes 1,2,3 (inside the mask): Clear bit 1 (0x2) to create + // predictable OR patterns + Vector = Vector & ~((OUT_TYPE)0x2); + } + else + { + // Lane 0 (outside the mask): Set bit 1 to verify this lane + // doesn't contribute to the result + Vector = Vector | ((OUT_TYPE)0x2); + } + + if(WaveGetLaneIndex() == 3) + { + // Lane 3 is the output lane: Set all bits to verify it doesn't + // affect its own prefix result (since prefix excludes current lane) + Vector = Vector | ~((OUT_TYPE)0x0); + } + + Vector = WaveMultiPrefixBitOr(Vector, Mask); + if(WaveGetLaneIndex() == 3) + { + // Lane 3 is the last lane in the mask. Store the result from it. + g_OutputVector.Store< vector >(0, Vector); + } + } + #endif + + #ifdef FUNC_WAVE_MULTI_PREFIX_BIT_XOR + void TestWaveMultiPrefixBitXor(vector Vector) + { + uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; + + // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in + // other (Key=0). + uint4 Mask = WaveMatch(Key); + + if(WaveGetLaneIndex() == 0) + { + // Lane 0 is not in the mask, so these values should have no effect + // on the prefix result. Set to 0 to verify exclusion. + Vector = 0; + } + + if(WaveGetLaneIndex() == 2) + { + // Lane 2: Create a specific pattern for XOR testing. + // Zero the lower half of the vector to create predictable XOR results. + [unroll] + for(uint I = 0; I < NUM/2; ++I) + { + Vector[I] = 0; + } + + // Also zero the last element to test edge cases + Vector[NUM - 1] = 0; + } + // Lane 1 and 3: Keep original input values + // Lane 3 will store the result (lane 1 XOR lane 2 prefix) + + Vector = WaveMultiPrefixBitXor(Vector, Mask); + if(WaveGetLaneIndex() == 3) + { + // Store result from lane 3 (last lane in mask) + g_OutputVector.Store< vector >(0, Vector); + } + } + #endif + + #ifdef FUNC_WAVE_MATCH + void TestWaveMatch(vector Vector) + { + if(WaveGetLaneIndex() == 0) + { + if(Vector[0] == (TYPE)0) + Vector[0] = (TYPE) 1; + else if(Vector[0] == (TYPE)1) + Vector[0] = (TYPE) 0; + else + Vector[0] = (TYPE) 1; + } + uint4 result = WaveMatch(Vector); + uint index = WaveGetLaneIndex(); + + g_OutputVector.Store(index * sizeof(uint4), result); + } + #endif + + #ifdef FUNC_TEST_SELECT + vector TestSelect(vector Vector1, + vector Vector2, + vector Vector3) + { + vector VectorCond = (Vector1 != 0); + return select(VectorCond, Vector2, Vector3); + } + #endif + + #ifdef FUNC_TEST_MODF + vector TestModF(vector Vector) + { + vector Mantissa; + vector Exponent; + + Mantissa = modf(Vector, Exponent); + + g_OutputVector.Store< vector >(sizeof(OUT_TYPE) * NUM, Exponent); + + return Mantissa; + } + #endif + + #ifdef FUNC_SHUFFLE_VECTOR + vector TestShuffleVector(TYPE Scalar) + { + vector Vector = Scalar; + return Vector; + } + #endif + + #ifdef FUNC_TEST_DERIVATIVE + void TestDerivative(vector Vector) + { + // 0 == upper-left lane in quad + // 1 == upper-right lane in quad + // 2 == lower-left lane in quad + // 3 == lower-right lane in quad + + const uint LaneIndex = WaveGetLaneIndex(); + + // We need to make sure the values are unique across lanes used in the + // partial derivative calculation so we can get a non-zero partial + // derivative. Multiplying the lane index by 2 is a simple way to + // ensure that. And we do this on all lanes so this function can be + // used generically for coarse and fine partial derivatives. + Vector += ((TYPE)(LaneIndex * 2)); + + vector Result = DERIVATIVE_FUNC(Vector); + + // For coarse derivatives, all lanes in the quad get the same result. + // But for fine derivatives, each lane gets a different result. To + // keep things generic we only store in the third lane as thats the + // lane we arbitrarily chose for validation with fine derivatives. + if(LaneIndex == 3) + { + g_OutputVector.Store< vector >(0, Result); + } + } + #endif + + #ifdef FUNC_TEST_QUAD_READ + void TestQuadRead(vector Vector) + { + const uint LaneIndex = WaveGetLaneIndex(); + + // Fill the long vector with something different on SOURCE_LANE_ID. + // We choose the 3rd element arbitrarily because it makes it easy + // to compute expected values CPU side. + [unroll] + for(uint i = 0; i < NUM; ++i) + { + Vector[i] = (LaneIndex == SOURCE_LANE_ID) ? Vector[2] : Vector[i]; + } + + #if IS_BINARY_OP + // QuadReadLaneAt + vector Result = QUAD_READ_FUNC(Vector, SOURCE_LANE_ID); + #else + // QuadReadAcross* + vector Result = QUAD_READ_FUNC(Vector); + #endif + + if(LaneIndex == 3) + { + g_OutputVector.Store< vector >(0, Result); + } + } + #endif + + #ifdef NUMTHREADS_XYZ + #define NUMTHREADS_ATTR [numthreads(NUMTHREADS_XYZ)] + #else + #define NUMTHREADS_ATTR [numthreads(1, 1, 1)] + #endif + + #ifdef WAVE_SIZE + #define WAVE_SIZE_ATTR [WaveSize(WAVE_SIZE)] + #else + #define WAVE_SIZE_ATTR + #endif + + WAVE_SIZE_ATTR + NUMTHREADS_ATTR + void main(uint GI : SV_GroupIndex) { + + #ifdef FUNC_SHUFFLE_VECTOR + // For shuffle vector, the input is a scalar, not a vector. + TYPE Input1 = g_InputVector1.Load(0); + #else + // For all other basic op types the first input is always a vector. + vector Input1 = g_InputVector1.Load< vector >(0); + #endif + + #if (IS_BINARY_OP || IS_TERNARY_OP) + vector Input2 = g_InputVector2.Load< vector >(0); + #endif + + #if IS_TERNARY_OP + vector Input3 = g_InputVector3.Load< vector >(0); + #endif + + #ifdef IS_REDUCTION_OP + const uint32_t OutNum = 1; + #else + const uint32_t OutNum = NUM; + #endif + + vector OutputVector; + #ifdef OP_STORES_RESULT_ON_SPECIFIC_LANE + FUNC(Input1); + return; + #elif TEST_ARRAY_OPERATOR + // This test case is for testing array operator []. + // It tests static array access with a compile time constant index array. + // Or dynamic access, by introducing a runtime dependency that prevents the + // index array from being a compile time constant. + const uint IndexCount = 6; + const uint IndexList[IndexCount] = { + 0, + OutNum - 1, + 1, + OutNum - 2, + OutNum / 2, + OutNum / 2 + 1 + }; + + OutputVector = 0; + uint End = min(OutNum, IndexCount); + + #if DYNAMIC_ACCESS + const uint Zero = (uint) Input2[0]; + #endif + + [unroll]for(uint i = 0; i < End; ++i) { + #if DYNAMIC_ACCESS + uint index = (uint)(IndexList[i] + Zero); + #else + uint index = (uint)(IndexList[i]); + #endif + OutputVector[index] = Input1[index]; + } + #elif IS_UNARY_OP + OutputVector = FUNC(Input1); + #elif IS_BINARY_OP + OutputVector = FUNC(Input1 OPERATOR Input2); + #elif IS_TERNARY_OP + // Ternary ops don't bother expanding OPERATOR because its + // always going to be comma for these test cases. + OutputVector = FUNC(Input1, Input2, Input3); + #endif + + g_OutputVector.Store< vector >(0, OutputVector); + }; + ]]> + + + diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 4b21206c7c..b461c09c55 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -415,7 +415,8 @@ runTest(ID3D12Device *D3DDevice, bool VerboseLogging, dxc::SpecificDllLoader DxilDllLoader; CComPtr TestXML; - readHlslDataIntoNewStream(L"ShaderOpArith.xml", &TestXML, DxilDllLoader); + readEmbeddedHlslDataIntoNewStream(L"LongVectorOp", &TestXML, DxilDllLoader); + auto ShaderOpSet = std::make_shared(); st::ParseShaderOpSetFromStream(TestXML, ShaderOpSet.get()); diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 2cfeb1f225..a2a6c9331c 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -3716,893 +3716,4 @@ void MSMain(uint GID : SV_GroupIndex, ]]> - - - UAV(u0), UAV(u1) - - - - - - - - - - - data; - }; - RWStructuredBuffer InputVector : register(u0); - RWStructuredBuffer OutputVector: register(u1); - #else - RWByteAddressBuffer InputVector : register(u0); - RWByteAddressBuffer OutputVector : register(u1); - #endif - - [numthreads(1,1,1)] - void main(uint GI : SV_GroupIndex) { - #if USE_STRUCTURED_BUFFER - OutputVector[0].data = InputVector[0].data; - #else - vector Input = InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, Input); - #endif - }; - ]]> - - - - - SRV(t0), UAV(u1) - - - - - - - - - - - data; - }; - StructuredBuffer InputVector : register(t0); - RWStructuredBuffer OutputVector : register(u1); - #else - ByteAddressBuffer InputVector : register(t0); - RWByteAddressBuffer OutputVector : register(u1); - #endif - - [numthreads(1,1,1)] - void main(uint GI : SV_GroupIndex) { - #if USE_STRUCTURED_BUFFER - OutputVector[0].data = InputVector[0].data; - #else - vector Input = InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, Input); - #endif - }; - ]]> - - - - - DescriptorTable(UAV(u0, numDescriptors=2)) - - - - - - - - - - - - - - - data; - }; - RWStructuredBuffer InputVector : register(u0); - RWStructuredBuffer OutputVector: register(u1); - #else - RWByteAddressBuffer InputVector : register(u0); - RWByteAddressBuffer OutputVector : register(u1); - #endif - - - [numthreads(1,1,1)] - void main(uint GI : SV_GroupIndex) { - - #if USE_STRUCTURED_BUFFER - OutputVector[0].data = InputVector[0].data; - #else - vector Input = InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, Input); - #endif - - }; - ]]> - - - - - DescriptorTable(SRV(t0, numDescriptors=1), UAV(u0, numDescriptors=1)) - - - - - - - - - - - - - - - data; - }; - StructuredBuffer InputVector : register(t0); - RWStructuredBuffer OutputVector: register(u0); - #else - ByteAddressBuffer InputVector : register(t0); - RWByteAddressBuffer OutputVector : register(u0); - #endif - - [numthreads(1,1,1)] - void main(uint GI : SV_GroupIndex) { - #if USE_STRUCTURED_BUFFER - OutputVector[0].data = InputVector[0].data; - #else - vector Input = InputVector.Load< vector >(0); - OutputVector.Store< vector >(0, Input); - #endif - }; - ]]> - - - - - RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) - - - - - - - - - - - - - data; - }; - #endif - - [numthreads(1,1,1)] - void main(uint GI : SV_GroupIndex) { - - #if USE_STRUCTURED_BUFFER - StructuredBuffer InputVector = ResourceDescriptorHeap[0]; - RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; - OutputVector[0].data = InputVector[0].data; - #else - ByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; - RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; - - vector Input = InputVector.Load< vector >(0); - - OutputVector.Store< vector >(0, Input); - #endif - }; - ]]> - - - - - RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) - - - - - - - - - - - - - data; - }; - #endif - - [numthreads(1,1,1)] - void main(uint GI : SV_GroupIndex) { - #if USE_STRUCTURED_BUFFER - RWStructuredBuffer InputVector = ResourceDescriptorHeap[0]; - RWStructuredBuffer OutputVector = ResourceDescriptorHeap[1]; - OutputVector[0].data = InputVector[0].data; - #else - RWByteAddressBuffer InputVector = ResourceDescriptorHeap[0]; - RWByteAddressBuffer OutputVector = ResourceDescriptorHeap[1]; - - vector Input = InputVector.Load< vector >(0); - - OutputVector.Store< vector >(0, Input); - #endif - }; - ]]> - - - - - UAV(u0), UAV(u1), UAV(u2), UAV(u3) - - - - - - - - - - - - - - TestInitialize(vector Vector) - { - vector VectorCopy = Vector; - return VectorCopy; - } - #endif - - #ifdef FUNC_TEST_CAST - vector TestCast(vector Vector) - { - return (vector)Vector; - } - #endif - - #ifdef FUNC_TERNARY_ASSIGNMENT - vector TestTernaryAssignment(vector Vector, - vector Vector2)) - { - return (TERNARY_CONDITION ? Vector : Vector2); - } - #endif - - #ifdef FUNC_ASUINT_SPLITDOUBLE - vector TestAsUintSplitDouble(vector Vector) - { - vector LowBits; - vector HighBits; - asuint(Vector, LowBits, HighBits); - - // Store the high bits in the second half of the output vector. - // Because we know the outputs of asuint are always 32 bits, we can - // use 4 bytes per element for our offset. - g_OutputVector.Store< vector >(4 * NUM, HighBits); - - // Generic store logic in main handles storing LowBits in - // g_OutputVector. - return LowBits; - } - #endif - - #ifdef FUNC_FREXP - vector TestFrexp(vector Vector) - { - vector Mantissa; - vector Exponent; - - Mantissa = frexp(Vector, Exponent); - - // Store the exponent outputs in the second half of the output vector. - // Exponent values are always floats, so we can use 4 bytes per - // element for our offset. - g_OutputVector.Store< vector >(4 * NUM, Exponent); - - return Mantissa; - } - #endif - - #ifdef FUNC_WAVE_ACTIVE_MIN - vector TestWaveActiveMin(vector Vector) - { - Vector += WaveGetLaneIndex(); - return WaveActiveMin(Vector); - } - #endif - - #ifdef FUNC_WAVE_ACTIVE_MAX - vector TestWaveActiveMax(vector Vector) - { - Vector += WaveGetLaneIndex(); - return WaveActiveMax(Vector); - } - #endif - - #ifdef FUNC_WAVE_ACTIVE_PRODUCT - vector TestWaveActiveProduct(vector Vector) - { - uint LaneIndex = WaveGetLaneIndex(); - if(LaneIndex == (WaveGetLaneCount() - 1)) - { - Vector = LaneIndex; - } - return WaveActiveProduct(Vector); - } - #endif - - #ifdef FUNC_WAVE_ACTIVE_BIT_AND - vector TestWaveActiveBitAnd(vector Vector) - { - if(WaveGetLaneIndex() == (WaveGetLaneCount() - 1)) - { - // Clear the LSB on the last lane only. - Vector = Vector & ~((OUT_TYPE)1); - } - return WaveActiveBitAnd(Vector); - } - #endif - - #ifdef FUNC_WAVE_ACTIVE_BIT_OR - vector TestWaveActiveBitOr(vector Vector) - { - if(WaveGetLaneIndex() == (WaveGetLaneCount() - 1)) - { - // Set the LSB on the last lane only. - Vector = Vector | ((OUT_TYPE)1); - } - return WaveActiveBitOr(Vector); - } - #endif - - #ifdef FUNC_WAVE_ACTIVE_BIT_XOR - vector TestWaveActiveBitXor(vector Vector) - { - const uint isChosen = (WaveGetLaneIndex() == 0) ? 1 : 0; - // Clear the LSB for all lanes except lane 0, which sets it to 1. - Vector = (Vector & ~((OUT_TYPE)1)) | (OUT_TYPE)isChosen; - - return WaveActiveBitOr(Vector); - } - #endif - - #ifdef FUNC_WAVE_ACTIVE_ALL_EQUAL - bool MakeDifferent(bool A) { return !A; } - uint MakeDifferent(uint A) { return A ^ 1; } - uint64_t MakeDifferent(uint64_t A) { return A ^ 1; } - int MakeDifferent(int A) { return A ^ 1; } - int64_t MakeDifferent(int64_t A) { return A ^ 1; } - half MakeDifferent(half A) { return A + (half)1.0h; } - float MakeDifferent(float A) { return A + 1.0f; } - double MakeDifferent(double A) { return A + 1.0; } - - #if __HLSL_ENABLE_16_BIT - uint16_t MakeDifferent(uint16_t A) { return A ^ 1; } - int16_t MakeDifferent(int16_t A) { return A ^ 1; } - #endif - - vector TestWaveActiveAllEqual(vector Vector) - { - if(WaveGetLaneIndex() == (WaveGetLaneCount() - 1)) - { - // We just want to set the last element to any different value. - Vector[NUM - 1] = MakeDifferent(Vector[NUM - 1]); - } - - return WaveActiveAllEqual(Vector); - } - #endif - - #ifdef FUNC_WAVE_READ_LANE_AT - vector TestWaveReadLaneAt(vector Vector) - { - // Keep it simple and just read the last lane. - const uint LaneToRead = WaveGetLaneCount() - 1; - if(WaveGetLaneIndex() == LaneToRead) - { - [unroll] - for(uint i = 1; i < NUM; ++i) - { - Vector[i] = Vector[0]; - } - } - return WaveReadLaneAt(Vector, LaneToRead); - } - #endif - - #ifdef FUNC_WAVE_READ_LANE_FIRST - vector TestWaveReadLaneFirst(vector Vector) - { - if(WaveGetLaneIndex() == 0) - { - [unroll] - for(uint i = 1; i < NUM; ++i) - { - Vector[i] = Vector[0]; - } - } - return WaveReadLaneFirst(Vector); - } - #endif - - #ifdef FUNC_WAVE_PREFIX_SUM - void TestWavePrefixSum(vector Vector) - { - const uint LaneCount = WaveGetLaneCount(); - const uint MidLane = LaneCount/2; - - Vector = WavePrefixSum(Vector); - if(WaveGetLaneIndex() == MidLane) - { - g_OutputVector.Store< vector >(0, Vector); - } - } - #endif - - #ifdef FUNC_WAVE_PREFIX_PRODUCT - void TestWavePrefixProduct(vector Vector) - { - Vector = WavePrefixProduct(Vector); - if(WaveGetLaneIndex() == 2) - { - g_OutputVector.Store< vector >(0, Vector); - } - } - #endif - - #ifdef FUNC_WAVE_MULTI_PREFIX_SUM - void TestWaveMultiPrefixSum(vector Vector) - { - uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; - - // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in - // other (Key=0). - uint4 Mask = WaveMatch(Key); - - if(WaveGetLaneIndex() == 0) - { - // Lane 0 isn't in the mask. Shove in a value to make sure it - // doesn't constribute to the result. - Vector = 1; - } - - if(WaveGetLaneIndex() >= 3) - { - // Lane 3 is the last lane in the mask. We want to make sure - // it doesn't contribute to the result as this is a prefix op. - Vector = 10; - } - - Vector = WaveMultiPrefixSum(Vector, Mask); - if(WaveGetLaneIndex() == 3) - { - // Lane 3 is the last lane in the mask that we care about. Store the - // result from it. - g_OutputVector.Store< vector >(0, Vector); - } - } - #endif - - #ifdef FUNC_WAVE_MULTI_PREFIX_PRODUCT - void TestWaveMultiPrefixProduct(vector Vector) - { - uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; - - // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in - // other (Key=0). - uint4 Mask = WaveMatch(Key); - - if(WaveGetLaneIndex() == 0) - { - // Lane 0 isn't in the mask. Shove in a value to make sure it - // doesn't constribute to the result. - Vector = 4; - } - - if(WaveGetLaneIndex() == 3) - { - // Lane 3 is the last lane in the mask. We want to make sure - // it doesn't contribute to the result as this is a prefix op. - Vector = 10; - } - - Vector = WaveMultiPrefixProduct(Vector, Mask); - if(WaveGetLaneIndex() == 3) - { - // Lane 3 is the last lane in the mask. Store the result from it. - g_OutputVector.Store< vector >(0, Vector); - } - } - #endif - - #ifdef FUNC_WAVE_MULTI_PREFIX_BIT_AND - void TestWaveMultiPrefixBitAnd(vector Vector) - { - uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; - - // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in - // other (Key=0). - uint4 Mask = WaveMatch(Key); - - if(WaveGetLaneIndex() == 0 || WaveGetLaneIndex() == 3) - { - // Clear LSB on lane 0 and lane 3. Lane 0 isn't in the mask so - // shouldn't participate. Lane 3 is the output lane for this prefix - // op, so we set distinctive bits to verify it doesn't affect its own result. - Vector = Vector & ~((OUT_TYPE)0x1); - } - else // Lanes 1,2 (active contributors to the prefix operation) - { - // Keep only bits 1 and 2 (0x6 = 0b0110) to create predictable AND patterns - Vector = (Vector & ((OUT_TYPE)0x6)); - } - - Vector = WaveMultiPrefixBitAnd(Vector, Mask); - if(WaveGetLaneIndex() == 3) - { - // Lane 3 is the last lane in the mask. Store the result from it. - g_OutputVector.Store< vector >(0, Vector); - } - } - #endif - - #ifdef FUNC_WAVE_MULTI_PREFIX_BIT_OR - void TestWaveMultiPrefixBitOr(vector Vector) - { - uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; - - // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in - // other (Key=0). - uint4 Mask = WaveMatch(Key); - - if(WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) - { - // Lanes 1,2,3 (inside the mask): Clear bit 1 (0x2) to create - // predictable OR patterns - Vector = Vector & ~((OUT_TYPE)0x2); - } - else - { - // Lane 0 (outside the mask): Set bit 1 to verify this lane - // doesn't contribute to the result - Vector = Vector | ((OUT_TYPE)0x2); - } - - if(WaveGetLaneIndex() == 3) - { - // Lane 3 is the output lane: Set all bits to verify it doesn't - // affect its own prefix result (since prefix excludes current lane) - Vector = Vector | ~((OUT_TYPE)0x0); - } - - Vector = WaveMultiPrefixBitOr(Vector, Mask); - if(WaveGetLaneIndex() == 3) - { - // Lane 3 is the last lane in the mask. Store the result from it. - g_OutputVector.Store< vector >(0, Vector); - } - } - #endif - - #ifdef FUNC_WAVE_MULTI_PREFIX_BIT_XOR - void TestWaveMultiPrefixBitXor(vector Vector) - { - uint Key = (WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) ? 1u : 0u; - - // Two groups. Lanes 1,2,3 in one group (Key=1), Lanes 0,(4..N) in - // other (Key=0). - uint4 Mask = WaveMatch(Key); - - if(WaveGetLaneIndex() == 0) - { - // Lane 0 is not in the mask, so these values should have no effect - // on the prefix result. Set to 0 to verify exclusion. - Vector = 0; - } - - if(WaveGetLaneIndex() == 2) - { - // Lane 2: Create a specific pattern for XOR testing. - // Zero the lower half of the vector to create predictable XOR results. - [unroll] - for(uint I = 0; I < NUM/2; ++I) - { - Vector[I] = 0; - } - - // Also zero the last element to test edge cases - Vector[NUM - 1] = 0; - } - // Lane 1 and 3: Keep original input values - // Lane 3 will store the result (lane 1 XOR lane 2 prefix) - - Vector = WaveMultiPrefixBitXor(Vector, Mask); - if(WaveGetLaneIndex() == 3) - { - // Store result from lane 3 (last lane in mask) - g_OutputVector.Store< vector >(0, Vector); - } - } - #endif - - #ifdef FUNC_WAVE_MATCH - void TestWaveMatch(vector Vector) - { - if(WaveGetLaneIndex() == 0) - { - if(Vector[0] == (TYPE)0) - Vector[0] = (TYPE) 1; - else if(Vector[0] == (TYPE)1) - Vector[0] = (TYPE) 0; - else - Vector[0] = (TYPE) 1; - } - uint4 result = WaveMatch(Vector); - uint index = WaveGetLaneIndex(); - - g_OutputVector.Store(index * sizeof(uint4), result); - } - #endif - - #ifdef FUNC_TEST_SELECT - vector TestSelect(vector Vector1, - vector Vector2, - vector Vector3) - { - vector VectorCond = (Vector1 != 0); - return select(VectorCond, Vector2, Vector3); - } - #endif - - #ifdef FUNC_TEST_MODF - vector TestModF(vector Vector) - { - vector Mantissa; - vector Exponent; - - Mantissa = modf(Vector, Exponent); - - g_OutputVector.Store< vector >(sizeof(OUT_TYPE) * NUM, Exponent); - - return Mantissa; - } - #endif - - #ifdef FUNC_SHUFFLE_VECTOR - vector TestShuffleVector(TYPE Scalar) - { - vector Vector = Scalar; - return Vector; - } - #endif - - #ifdef FUNC_TEST_DERIVATIVE - void TestDerivative(vector Vector) - { - // 0 == upper-left lane in quad - // 1 == upper-right lane in quad - // 2 == lower-left lane in quad - // 3 == lower-right lane in quad - - const uint LaneIndex = WaveGetLaneIndex(); - - // We need to make sure the values are unique across lanes used in the - // partial derivative calculation so we can get a non-zero partial - // derivative. Multiplying the lane index by 2 is a simple way to - // ensure that. And we do this on all lanes so this function can be - // used generically for coarse and fine partial derivatives. - Vector += ((TYPE)(LaneIndex * 2)); - - vector Result = DERIVATIVE_FUNC(Vector); - - // For coarse derivatives, all lanes in the quad get the same result. - // But for fine derivatives, each lane gets a different result. To - // keep things generic we only store in the third lane as thats the - // lane we arbitrarily chose for validation with fine derivatives. - if(LaneIndex == 3) - { - g_OutputVector.Store< vector >(0, Result); - } - } - #endif - - #ifdef FUNC_TEST_QUAD_READ - void TestQuadRead(vector Vector) - { - const uint LaneIndex = WaveGetLaneIndex(); - - // Fill the long vector with something different on SOURCE_LANE_ID. - // We choose the 3rd element arbitrarily because it makes it easy - // to compute expected values CPU side. - [unroll] - for(uint i = 0; i < NUM; ++i) - { - Vector[i] = (LaneIndex == SOURCE_LANE_ID) ? Vector[2] : Vector[i]; - } - - #if IS_BINARY_OP - // QuadReadLaneAt - vector Result = QUAD_READ_FUNC(Vector, SOURCE_LANE_ID); - #else - // QuadReadAcross* - vector Result = QUAD_READ_FUNC(Vector); - #endif - - if(LaneIndex == 3) - { - g_OutputVector.Store< vector >(0, Result); - } - } - #endif - - #ifdef NUMTHREADS_XYZ - #define NUMTHREADS_ATTR [numthreads(NUMTHREADS_XYZ)] - #else - #define NUMTHREADS_ATTR [numthreads(1, 1, 1)] - #endif - - #ifdef WAVE_SIZE - #define WAVE_SIZE_ATTR [WaveSize(WAVE_SIZE)] - #else - #define WAVE_SIZE_ATTR - #endif - - WAVE_SIZE_ATTR - NUMTHREADS_ATTR - void main(uint GI : SV_GroupIndex) { - - #ifdef FUNC_SHUFFLE_VECTOR - // For shuffle vector, the input is a scalar, not a vector. - TYPE Input1 = g_InputVector1.Load(0); - #else - // For all other basic op types the first input is always a vector. - vector Input1 = g_InputVector1.Load< vector >(0); - #endif - - #if (IS_BINARY_OP || IS_TERNARY_OP) - vector Input2 = g_InputVector2.Load< vector >(0); - #endif - - #if IS_TERNARY_OP - vector Input3 = g_InputVector3.Load< vector >(0); - #endif - - #ifdef IS_REDUCTION_OP - const uint32_t OutNum = 1; - #else - const uint32_t OutNum = NUM; - #endif - - vector OutputVector; - #ifdef OP_STORES_RESULT_ON_SPECIFIC_LANE - FUNC(Input1); - return; - #elif TEST_ARRAY_OPERATOR - // This test case is for testing array operator []. - // It tests static array access with a compile time constant index array. - // Or dynamic access, by introducing a runtime dependency that prevents the - // index array from being a compile time constant. - const uint IndexCount = 6; - const uint IndexList[IndexCount] = { - 0, - OutNum - 1, - 1, - OutNum - 2, - OutNum / 2, - OutNum / 2 + 1 - }; - - OutputVector = 0; - uint End = min(OutNum, IndexCount); - - #if DYNAMIC_ACCESS - const uint Zero = (uint) Input2[0]; - #endif - - [unroll]for(uint i = 0; i < End; ++i) { - #if DYNAMIC_ACCESS - uint index = (uint)(IndexList[i] + Zero); - #else - uint index = (uint)(IndexList[i]); - #endif - OutputVector[index] = Input1[index]; - } - #elif IS_UNARY_OP - OutputVector = FUNC(Input1); - #elif IS_BINARY_OP - OutputVector = FUNC(Input1 OPERATOR Input2); - #elif IS_TERNARY_OP - // Ternary ops don't bother expanding OPERATOR because its - // always going to be comma for these test cases. - OutputVector = FUNC(Input1, Input2, Input3); - #endif - - g_OutputVector.Store< vector >(0, OutputVector); - }; - ]]> - -