@@ -1597,9 +1597,11 @@ template <typename T> struct ExpectedBuilder<OpType::WaveMatch, T> {
15971597 static std::vector<UINT> buildExpected (Op<OpType::WaveMatch, T, 1 > &,
15981598 const InputSets<T> &,
15991599 const UINT WaveSize) {
1600- // For this test, the shader arranges it so that lane 0 is different from
1601- // all the other lanes. Besides that all other lines write their result of
1602- // WaveMatch as well.
1600+ // For this test, the shader arranges it so that lanes 0, WAVE_SIZE/2 and
1601+ // WAVE_SIZE-1 are different from all the other lanes, also those
1602+ // lanes modify the vector at positions 0, WAVE_SIZE/2 and WAVE_SIZE-1
1603+ // respectively, if the input vector has enough elements. Besides that all
1604+ // other lanes write their result of WaveMatch as well.
16031605
16041606 std::vector<UINT> Expected;
16051607 Expected.assign (WaveSize * 4 , 0 );
@@ -1613,21 +1615,52 @@ template <typename T> struct ExpectedBuilder<OpType::WaveMatch, T> {
16131615 const uint64_t HighWaveMask =
16141616 (HighWaves < 64 ) ? (1ULL << HighWaves) - 1 : ~0ULL ;
16151617
1616- const uint64_t LowExpected = ~ 1ULL & LowWaveMask ;
1617- const uint64_t HighExpected = ~ 0ULL & HighWaveMask ;
1618+ const UINT MidBit = WaveSize / 2 ;
1619+ const UINT LastBit = WaveSize - 1 ;
16181620
1619- Expected[0 ] = 1 ;
1620- Expected[1 ] = 0 ;
1621- Expected[2 ] = 0 ;
1622- Expected[3 ] = 0 ;
1621+ uint64_t LowUnchangedLanes = ~1ULL ; // Clear bit 0
1622+ uint64_t HighUnchangedLanes = ~0ULL ;
16231623
1624- // all lanes other than the first one have the same result
1625- for (UINT I = 1 ; I < WaveSize; ++I) {
1624+ if (MidBit < 64 )
1625+ LowUnchangedLanes &= ~(1ULL << MidBit);
1626+ else
1627+ HighUnchangedLanes &= ~(1ULL << (MidBit - 64 ));
1628+
1629+ if (LastBit < 64 )
1630+ LowUnchangedLanes &= ~(1ULL << LastBit);
1631+ else
1632+ HighUnchangedLanes &= ~(1ULL << (LastBit - 64 ));
1633+
1634+ // Removing bits outside the wave size.
1635+ LowUnchangedLanes &= LowWaveMask;
1636+ HighUnchangedLanes &= HighWaveMask;
1637+
1638+ for (UINT I = 0 ; I < WaveSize; ++I) {
16261639 const UINT Index = I * 4 ;
1627- Expected[Index] = static_cast <UINT>(LowExpected);
1628- Expected[Index + 1 ] = static_cast <UINT>(LowExpected >> 32 );
1629- Expected[Index + 2 ] = static_cast <UINT>(HighExpected);
1630- Expected[Index + 3 ] = static_cast <UINT>(HighExpected >> 32 );
1640+
1641+ if (I == 0 || MidBit == I || LastBit == I) {
1642+ uint64_t LowChangedLanes = 0ULL ;
1643+ uint64_t HighChangedLanes = 0ULL ;
1644+
1645+ if (I < 64 )
1646+ LowChangedLanes = (1ULL << I);
1647+ else
1648+ HighChangedLanes = (1ULL << (I - 64 ));
1649+
1650+ LowChangedLanes &= LowWaveMask;
1651+ HighChangedLanes &= HighWaveMask;
1652+
1653+ Expected[Index] = static_cast <UINT>(LowChangedLanes);
1654+ Expected[Index + 1 ] = static_cast <UINT>(LowChangedLanes >> 32 );
1655+ Expected[Index + 2 ] = static_cast <UINT>(HighChangedLanes);
1656+ Expected[Index + 3 ] = static_cast <UINT>(HighChangedLanes >> 32 );
1657+ continue ;
1658+ }
1659+
1660+ Expected[Index] = static_cast <UINT>(LowUnchangedLanes);
1661+ Expected[Index + 1 ] = static_cast <UINT>(LowUnchangedLanes >> 32 );
1662+ Expected[Index + 2 ] = static_cast <UINT>(HighUnchangedLanes);
1663+ Expected[Index + 3 ] = static_cast <UINT>(HighUnchangedLanes >> 32 );
16311664 }
16321665
16331666 return Expected;
0 commit comments