Skip to content

Commit 5cf3ecb

Browse files
committed
regen CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
1 parent 728a6c9 commit 5cf3ecb

File tree

1 file changed

+54
-54
lines changed

1 file changed

+54
-54
lines changed

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll

Lines changed: 54 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -10188,8 +10188,8 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32
1018810188
; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
1018910189
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
1019010190
; SI-NEXT: v_mov_b32_e32 v1, s4
10191-
; SI-NEXT: v_readlane_b32 s30, v18, 28
1019210191
; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
10192+
; SI-NEXT: v_readlane_b32 s30, v18, 28
1019310193
; SI-NEXT: v_readlane_b32 s31, v18, 29
1019410194
; SI-NEXT: v_readlane_b32 s85, v18, 27
1019510195
; SI-NEXT: v_readlane_b32 s84, v18, 26
@@ -10640,8 +10640,8 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32
1064010640
; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
1064110641
; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0
1064210642
; VI-NEXT: v_mov_b32_e32 v1, s4
10643-
; VI-NEXT: v_readlane_b32 s30, v18, 18
1064410643
; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
10644+
; VI-NEXT: v_readlane_b32 s30, v18, 18
1064510645
; VI-NEXT: v_readlane_b32 s31, v18, 19
1064610646
; VI-NEXT: v_readlane_b32 s67, v18, 17
1064710647
; VI-NEXT: v_readlane_b32 s66, v18, 16
@@ -11063,8 +11063,8 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32
1106311063
; GFX9-NEXT: s_or_b32 s4, s4, s5
1106411064
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
1106511065
; GFX9-NEXT: v_mov_b32_e32 v1, s4
11066-
; GFX9-NEXT: v_readlane_b32 s30, v18, 14
1106711066
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
11067+
; GFX9-NEXT: v_readlane_b32 s30, v18, 14
1106811068
; GFX9-NEXT: v_readlane_b32 s31, v18, 15
1106911069
; GFX9-NEXT: v_readlane_b32 s55, v18, 13
1107011070
; GFX9-NEXT: v_readlane_b32 s54, v18, 12
@@ -11423,12 +11423,12 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32
1142311423
; GFX11-NEXT: s_or_b32 s3, s4, s5
1142411424
; GFX11-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_mov_b32 v14, s1
1142511425
; GFX11-NEXT: v_dual_mov_b32 v15, s2 :: v_dual_mov_b32 v16, s3
11426-
; GFX11-NEXT: v_readlane_b32 s30, v17, 7
1142711426
; GFX11-NEXT: s_clause 0x3
1142811427
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off
1142911428
; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16
1143011429
; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32
1143111430
; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48
11431+
; GFX11-NEXT: v_readlane_b32 s30, v17, 7
1143211432
; GFX11-NEXT: v_readlane_b32 s31, v17, 8
1143311433
; GFX11-NEXT: v_readlane_b32 s48, v17, 6
1143411434
; GFX11-NEXT: v_readlane_b32 s39, v17, 5
@@ -25294,8 +25294,8 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3
2529425294
; SI-NEXT: v_or_b32_e32 v2, v3, v2
2529525295
; SI-NEXT: v_or_b32_e32 v1, v1, v2
2529625296
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
25297-
; SI-NEXT: v_readlane_b32 s30, v40, 28
2529825297
; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
25298+
; SI-NEXT: v_readlane_b32 s30, v40, 28
2529925299
; SI-NEXT: v_readlane_b32 s31, v40, 29
2530025300
; SI-NEXT: v_readlane_b32 s85, v40, 27
2530125301
; SI-NEXT: v_readlane_b32 s84, v40, 26
@@ -26621,12 +26621,12 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3
2662126621
; GFX11-NEXT: v_or_b32_e32 v2, v4, v10
2662226622
; GFX11-NEXT: v_or_b32_e32 v3, v11, v7
2662326623
; GFX11-NEXT: v_or_b32_e32 v4, v12, v8
26624-
; GFX11-NEXT: v_readlane_b32 s30, v40, 8
2662526624
; GFX11-NEXT: s_clause 0x3
2662626625
; GFX11-NEXT: scratch_store_b128 v0, v[82:85], off
2662726626
; GFX11-NEXT: scratch_store_b128 v0, v[23:26], off offset:16
2662826627
; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:32
2662926628
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48
26629+
; GFX11-NEXT: v_readlane_b32 s30, v40, 8
2663026630
; GFX11-NEXT: v_readlane_b32 s31, v40, 9
2663126631
; GFX11-NEXT: v_readlane_b32 s49, v40, 7
2663226632
; GFX11-NEXT: v_readlane_b32 s48, v40, 6
@@ -39802,8 +39802,8 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in
3980239802
; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
3980339803
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
3980439804
; SI-NEXT: v_mov_b32_e32 v1, s4
39805-
; SI-NEXT: v_readlane_b32 s30, v18, 28
3980639805
; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
39806+
; SI-NEXT: v_readlane_b32 s30, v18, 28
3980739807
; SI-NEXT: v_readlane_b32 s31, v18, 29
3980839808
; SI-NEXT: v_readlane_b32 s85, v18, 27
3980939809
; SI-NEXT: v_readlane_b32 s84, v18, 26
@@ -40254,8 +40254,8 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in
4025440254
; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
4025540255
; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0
4025640256
; VI-NEXT: v_mov_b32_e32 v1, s4
40257-
; VI-NEXT: v_readlane_b32 s30, v18, 18
4025840257
; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
40258+
; VI-NEXT: v_readlane_b32 s30, v18, 18
4025940259
; VI-NEXT: v_readlane_b32 s31, v18, 19
4026040260
; VI-NEXT: v_readlane_b32 s67, v18, 17
4026140261
; VI-NEXT: v_readlane_b32 s66, v18, 16
@@ -40677,8 +40677,8 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in
4067740677
; GFX9-NEXT: s_or_b32 s4, s4, s5
4067840678
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
4067940679
; GFX9-NEXT: v_mov_b32_e32 v1, s4
40680-
; GFX9-NEXT: v_readlane_b32 s30, v18, 14
4068140680
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
40681+
; GFX9-NEXT: v_readlane_b32 s30, v18, 14
4068240682
; GFX9-NEXT: v_readlane_b32 s31, v18, 15
4068340683
; GFX9-NEXT: v_readlane_b32 s55, v18, 13
4068440684
; GFX9-NEXT: v_readlane_b32 s54, v18, 12
@@ -41037,12 +41037,12 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in
4103741037
; GFX11-NEXT: s_or_b32 s3, s4, s5
4103841038
; GFX11-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_mov_b32 v14, s1
4103941039
; GFX11-NEXT: v_dual_mov_b32 v15, s2 :: v_dual_mov_b32 v16, s3
41040-
; GFX11-NEXT: v_readlane_b32 s30, v17, 7
4104141040
; GFX11-NEXT: s_clause 0x3
4104241041
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off
4104341042
; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16
4104441043
; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32
4104541044
; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48
41045+
; GFX11-NEXT: v_readlane_b32 s30, v17, 7
4104641046
; GFX11-NEXT: v_readlane_b32 s31, v17, 8
4104741047
; GFX11-NEXT: v_readlane_b32 s48, v17, 6
4104841048
; GFX11-NEXT: v_readlane_b32 s39, v17, 5
@@ -53428,8 +53428,8 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32
5342853428
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
5342953429
; SI-NEXT: s_waitcnt expcnt(0)
5343053430
; SI-NEXT: v_mov_b32_e32 v1, s4
53431-
; SI-NEXT: v_readlane_b32 s30, v40, 30
5343253431
; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
53432+
; SI-NEXT: v_readlane_b32 s30, v40, 30
5343353433
; SI-NEXT: v_readlane_b32 s31, v40, 31
5343453434
; SI-NEXT: v_readlane_b32 s87, v40, 29
5343553435
; SI-NEXT: v_readlane_b32 s86, v40, 28
@@ -53876,8 +53876,8 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32
5387653876
; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
5387753877
; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0
5387853878
; VI-NEXT: v_mov_b32_e32 v1, s4
53879-
; VI-NEXT: v_readlane_b32 s30, v40, 18
5388053879
; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
53880+
; VI-NEXT: v_readlane_b32 s30, v40, 18
5388153881
; VI-NEXT: v_readlane_b32 s31, v40, 19
5388253882
; VI-NEXT: v_readlane_b32 s67, v40, 17
5388353883
; VI-NEXT: v_readlane_b32 s66, v40, 16
@@ -54293,8 +54293,8 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32
5429354293
; GFX9-NEXT: s_or_b32 s4, s4, s5
5429454294
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
5429554295
; GFX9-NEXT: v_mov_b32_e32 v1, s4
54296-
; GFX9-NEXT: v_readlane_b32 s30, v40, 14
5429754296
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
54297+
; GFX9-NEXT: v_readlane_b32 s30, v40, 14
5429854298
; GFX9-NEXT: v_readlane_b32 s31, v40, 15
5429954299
; GFX9-NEXT: v_readlane_b32 s55, v40, 13
5430054300
; GFX9-NEXT: v_readlane_b32 s54, v40, 12
@@ -54671,12 +54671,12 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32
5467154671
; GFX11-NEXT: v_or_b32_e32 v3, v3, v2
5467254672
; GFX11-NEXT: v_mov_b32_e32 v2, s0
5467354673
; GFX11-NEXT: v_mov_b32_e32 v4, s1
54674-
; GFX11-NEXT: v_readlane_b32 s30, v33, 8
5467554674
; GFX11-NEXT: s_clause 0x3
5467654675
; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off
5467754676
; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:16
5467854677
; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
5467954678
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48
54679+
; GFX11-NEXT: v_readlane_b32 s30, v33, 8
5468054680
; GFX11-NEXT: v_readlane_b32 s31, v33, 9
5468154681
; GFX11-NEXT: v_readlane_b32 s49, v33, 7
5468254682
; GFX11-NEXT: v_readlane_b32 s48, v33, 6
@@ -66748,11 +66748,9 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6674866748
; SI-NEXT: v_writelane_b32 v21, s17, 13
6674966749
; SI-NEXT: .LBB97_3: ; %end
6675066750
; SI-NEXT: v_readlane_b32 s18, v21, 0
66751-
; SI-NEXT: v_readlane_b32 s19, v21, 1
66751+
; SI-NEXT: s_and_b32 s16, s40, 0xff
6675266752
; SI-NEXT: s_lshl_b32 s17, s18, 8
6675366753
; SI-NEXT: v_readlane_b32 s18, v21, 2
66754-
; SI-NEXT: s_and_b32 s16, s40, 0xff
66755-
; SI-NEXT: v_readlane_b32 s19, v21, 3
6675666754
; SI-NEXT: s_or_b32 s16, s16, s17
6675766755
; SI-NEXT: s_and_b32 s17, s18, 0xff
6675866756
; SI-NEXT: v_readlane_b32 s18, v21, 4
@@ -66774,9 +66772,8 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6677466772
; SI-NEXT: v_mov_b32_e32 v2, s16
6677566773
; SI-NEXT: v_readlane_b32 s16, v21, 6
6677666774
; SI-NEXT: s_and_b32 s14, s14, 0xff
66777-
; SI-NEXT: v_readlane_b32 s17, v21, 7
6677866775
; SI-NEXT: s_lshl_b32 s16, s16, 8
66779-
; SI-NEXT: v_readlane_b32 s19, v21, 5
66776+
; SI-NEXT: v_readlane_b32 s17, v21, 7
6678066777
; SI-NEXT: s_or_b32 s14, s14, s16
6678166778
; SI-NEXT: v_readlane_b32 s16, v21, 8
6678266779
; SI-NEXT: v_readlane_b32 s17, v21, 9
@@ -66808,8 +66805,8 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6680866805
; SI-NEXT: v_mov_b32_e32 v2, s14
6680966806
; SI-NEXT: v_readlane_b32 s14, v21, 12
6681066807
; SI-NEXT: s_and_b32 s10, s10, 0xff
66811-
; SI-NEXT: v_readlane_b32 s15, v21, 13
6681266808
; SI-NEXT: s_lshl_b32 s14, s14, 8
66809+
; SI-NEXT: v_readlane_b32 s15, v21, 13
6681366810
; SI-NEXT: s_or_b32 s10, s10, s14
6681466811
; SI-NEXT: v_readlane_b32 s14, v21, 14
6681566812
; SI-NEXT: v_readlane_b32 s15, v21, 15
@@ -66960,17 +66957,20 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6696066957
; SI-NEXT: s_and_b32 s5, s89, 0xff
6696166958
; SI-NEXT: s_lshl_b32 s5, s5, 16
6696266959
; SI-NEXT: s_lshl_b32 s6, s91, 24
66960+
; SI-NEXT: v_readlane_b32 s19, v21, 1
6696366961
; SI-NEXT: s_and_b32 s4, s4, 0xffff
6696466962
; SI-NEXT: s_or_b32 s5, s6, s5
66963+
; SI-NEXT: v_readlane_b32 s19, v21, 3
6696566964
; SI-NEXT: v_add_i32_e32 v1, vcc, 56, v0
6696666965
; SI-NEXT: s_or_b32 s4, s4, s5
66966+
; SI-NEXT: v_readlane_b32 s19, v21, 5
6696766967
; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
6696866968
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
6696966969
; SI-NEXT: v_mov_b32_e32 v1, s4
66970-
; SI-NEXT: v_readlane_b32 s30, v20, 34
6697166970
; SI-NEXT: v_readlane_b32 s19, v21, 11
6697266971
; SI-NEXT: v_readlane_b32 s17, v21, 17
6697366972
; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
66973+
; SI-NEXT: v_readlane_b32 s30, v20, 34
6697466974
; SI-NEXT: v_readlane_b32 s31, v20, 35
6697566975
; SI-NEXT: v_readlane_b32 s99, v20, 33
6697666976
; SI-NEXT: v_readlane_b32 s98, v20, 32
@@ -67018,6 +67018,28 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6701867018
; SI-NEXT: v_writelane_b32 v21, s4, 0
6701967019
; SI-NEXT: v_writelane_b32 v21, s5, 1
6702067020
; SI-NEXT: ; implicit-def: $sgpr4
67021+
; SI-NEXT: v_writelane_b32 v21, s4, 2
67022+
; SI-NEXT: v_writelane_b32 v21, s5, 3
67023+
; SI-NEXT: ; implicit-def: $sgpr4
67024+
; SI-NEXT: v_writelane_b32 v21, s4, 4
67025+
; SI-NEXT: v_writelane_b32 v21, s5, 5
67026+
; SI-NEXT: ; implicit-def: $sgpr4
67027+
; SI-NEXT: v_writelane_b32 v21, s4, 6
67028+
; SI-NEXT: v_writelane_b32 v21, s5, 7
67029+
; SI-NEXT: ; implicit-def: $sgpr4
67030+
; SI-NEXT: v_writelane_b32 v21, s4, 8
67031+
; SI-NEXT: v_writelane_b32 v21, s5, 9
67032+
; SI-NEXT: ; implicit-def: $sgpr4
67033+
; SI-NEXT: v_writelane_b32 v21, s4, 10
67034+
; SI-NEXT: v_writelane_b32 v21, s5, 11
67035+
; SI-NEXT: ; implicit-def: $sgpr4
67036+
; SI-NEXT: v_writelane_b32 v21, s4, 12
67037+
; SI-NEXT: v_writelane_b32 v21, s5, 13
67038+
; SI-NEXT: ; implicit-def: $sgpr4
67039+
; SI-NEXT: v_writelane_b32 v21, s4, 14
67040+
; SI-NEXT: v_writelane_b32 v21, s5, 15
67041+
; SI-NEXT: ; implicit-def: $sgpr4
67042+
; SI-NEXT: v_writelane_b32 v21, s4, 16
6702167043
; SI-NEXT: ; implicit-def: $sgpr40
6702267044
; SI-NEXT: ; implicit-def: $sgpr60
6702367045
; SI-NEXT: ; implicit-def: $sgpr74
@@ -67045,6 +67067,7 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6704567067
; SI-NEXT: ; implicit-def: $sgpr79
6704667068
; SI-NEXT: ; implicit-def: $sgpr89
6704767069
; SI-NEXT: ; implicit-def: $sgpr91
67070+
; SI-NEXT: v_writelane_b32 v21, s5, 17
6704867071
; SI-NEXT: ; implicit-def: $sgpr42
6704967072
; SI-NEXT: ; implicit-def: $sgpr66
6705067073
; SI-NEXT: ; implicit-def: $sgpr64
@@ -67061,33 +67084,10 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6706167084
; SI-NEXT: ; implicit-def: $sgpr30
6706267085
; SI-NEXT: ; implicit-def: $sgpr94
6706367086
; SI-NEXT: ; implicit-def: $sgpr92
67087+
; SI-NEXT: ; implicit-def: $sgpr4
6706467088
; SI-NEXT: ; implicit-def: $sgpr90
6706567089
; SI-NEXT: ; implicit-def: $sgpr88
6706667090
; SI-NEXT: ; implicit-def: $sgpr78
67067-
; SI-NEXT: v_writelane_b32 v21, s4, 2
67068-
; SI-NEXT: v_writelane_b32 v21, s5, 3
67069-
; SI-NEXT: ; implicit-def: $sgpr4
67070-
; SI-NEXT: v_writelane_b32 v21, s4, 4
67071-
; SI-NEXT: v_writelane_b32 v21, s5, 5
67072-
; SI-NEXT: ; implicit-def: $sgpr4
67073-
; SI-NEXT: v_writelane_b32 v21, s4, 6
67074-
; SI-NEXT: v_writelane_b32 v21, s5, 7
67075-
; SI-NEXT: ; implicit-def: $sgpr4
67076-
; SI-NEXT: v_writelane_b32 v21, s4, 8
67077-
; SI-NEXT: v_writelane_b32 v21, s5, 9
67078-
; SI-NEXT: ; implicit-def: $sgpr4
67079-
; SI-NEXT: v_writelane_b32 v21, s4, 10
67080-
; SI-NEXT: v_writelane_b32 v21, s5, 11
67081-
; SI-NEXT: ; implicit-def: $sgpr4
67082-
; SI-NEXT: v_writelane_b32 v21, s4, 12
67083-
; SI-NEXT: v_writelane_b32 v21, s5, 13
67084-
; SI-NEXT: ; implicit-def: $sgpr4
67085-
; SI-NEXT: v_writelane_b32 v21, s4, 14
67086-
; SI-NEXT: v_writelane_b32 v21, s5, 15
67087-
; SI-NEXT: ; implicit-def: $sgpr4
67088-
; SI-NEXT: v_writelane_b32 v21, s4, 16
67089-
; SI-NEXT: v_writelane_b32 v21, s5, 17
67090-
; SI-NEXT: ; implicit-def: $sgpr4
6709167091
; SI-NEXT: s_branch .LBB97_2
6709267092
;
6709367093
; VI-LABEL: bitcast_v32i16_to_v64i8_scalar:
@@ -67519,8 +67519,8 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6751967519
; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
6752067520
; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0
6752167521
; VI-NEXT: v_mov_b32_e32 v1, s4
67522-
; VI-NEXT: v_readlane_b32 s30, v18, 18
6752367522
; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
67523+
; VI-NEXT: v_readlane_b32 s30, v18, 18
6752467524
; VI-NEXT: v_readlane_b32 s31, v18, 19
6752567525
; VI-NEXT: v_readlane_b32 s67, v18, 17
6752667526
; VI-NEXT: v_readlane_b32 s66, v18, 16
@@ -68414,12 +68414,12 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6841468414
; GFX11-NEXT: v_or_b32_e32 v2, v4, v10
6841568415
; GFX11-NEXT: v_or_b32_e32 v3, v11, v7
6841668416
; GFX11-NEXT: v_or_b32_e32 v4, v12, v8
68417-
; GFX11-NEXT: v_readlane_b32 s30, v40, 8
6841868417
; GFX11-NEXT: s_clause 0x3
6841968418
; GFX11-NEXT: scratch_store_b128 v0, v[82:85], off
6842068419
; GFX11-NEXT: scratch_store_b128 v0, v[23:26], off offset:16
6842168420
; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:32
6842268421
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48
68422+
; GFX11-NEXT: v_readlane_b32 s30, v40, 8
6842368423
; GFX11-NEXT: v_readlane_b32 s31, v40, 9
6842468424
; GFX11-NEXT: v_readlane_b32 s49, v40, 7
6842568425
; GFX11-NEXT: v_readlane_b32 s48, v40, 6
@@ -79546,8 +79546,8 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32
7954679546
; SI-NEXT: v_or_b32_e32 v1, v2, v1
7954779547
; SI-NEXT: v_or_b32_e32 v1, s4, v1
7954879548
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
79549-
; SI-NEXT: v_readlane_b32 s30, v40, 4
7955079549
; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
79550+
; SI-NEXT: v_readlane_b32 s30, v40, 4
7955179551
; SI-NEXT: v_readlane_b32 s31, v40, 5
7955279552
; SI-NEXT: v_readlane_b32 s37, v40, 3
7955379553
; SI-NEXT: v_readlane_b32 s36, v40, 2
@@ -80950,12 +80950,12 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32
8095080950
; GFX11-NEXT: v_or_b32_e32 v2, v4, v10
8095180951
; GFX11-NEXT: v_or_b32_e32 v3, v11, v7
8095280952
; GFX11-NEXT: v_or_b32_e32 v4, v12, v8
80953-
; GFX11-NEXT: v_readlane_b32 s30, v40, 8
8095480953
; GFX11-NEXT: s_clause 0x3
8095580954
; GFX11-NEXT: scratch_store_b128 v0, v[82:85], off
8095680955
; GFX11-NEXT: scratch_store_b128 v0, v[23:26], off offset:16
8095780956
; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:32
8095880957
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48
80958+
; GFX11-NEXT: v_readlane_b32 s30, v40, 8
8095980959
; GFX11-NEXT: v_readlane_b32 s31, v40, 9
8096080960
; GFX11-NEXT: v_readlane_b32 s49, v40, 7
8096180961
; GFX11-NEXT: v_readlane_b32 s48, v40, 6
@@ -88414,8 +88414,8 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a,
8841488414
; SI-NEXT: s_lshr_b64 s[4:5], s[74:75], 24
8841588415
; SI-NEXT: s_waitcnt expcnt(0)
8841688416
; SI-NEXT: v_writelane_b32 v41, s4, 0
88417-
; SI-NEXT: v_writelane_b32 v41, s5, 1
8841888417
; SI-NEXT: v_readfirstlane_b32 s4, v6
88418+
; SI-NEXT: v_writelane_b32 v41, s5, 1
8841988419
; SI-NEXT: s_lshr_b32 s5, s4, 16
8842088420
; SI-NEXT: v_readfirstlane_b32 s4, v7
8842188421
; SI-NEXT: s_lshr_b64 s[60:61], s[4:5], 16
@@ -88895,9 +88895,9 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a,
8889588895
; SI-NEXT: v_or_b32_e32 v1, s5, v1
8889688896
; SI-NEXT: v_or_b32_e32 v1, s4, v1
8889788897
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
88898-
; SI-NEXT: v_readlane_b32 s30, v40, 34
8889988898
; SI-NEXT: v_readlane_b32 s75, v41, 1
8890088899
; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
88900+
; SI-NEXT: v_readlane_b32 s30, v40, 34
8890188901
; SI-NEXT: v_readlane_b32 s31, v40, 35
8890288902
; SI-NEXT: v_readlane_b32 s99, v40, 33
8890388903
; SI-NEXT: v_readlane_b32 s98, v40, 32
@@ -89735,8 +89735,8 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a,
8973589735
; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
8973689736
; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0
8973789737
; VI-NEXT: v_mov_b32_e32 v1, s4
89738-
; VI-NEXT: v_readlane_b32 s30, v18, 26
8973989738
; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
89739+
; VI-NEXT: v_readlane_b32 s30, v18, 26
8974089740
; VI-NEXT: v_readlane_b32 s31, v18, 27
8974189741
; VI-NEXT: v_readlane_b32 s83, v18, 25
8974289742
; VI-NEXT: v_readlane_b32 s82, v18, 24
@@ -90519,8 +90519,8 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a,
9051990519
; GFX9-NEXT: s_or_b32 s4, s4, s5
9052090520
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
9052190521
; GFX9-NEXT: v_mov_b32_e32 v1, s4
90522-
; GFX9-NEXT: v_readlane_b32 s30, v18, 14
9052390522
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
90523+
; GFX9-NEXT: v_readlane_b32 s30, v18, 14
9052490524
; GFX9-NEXT: v_readlane_b32 s31, v18, 15
9052590525
; GFX9-NEXT: v_readlane_b32 s55, v18, 13
9052690526
; GFX9-NEXT: v_readlane_b32 s54, v18, 12
@@ -91258,12 +91258,12 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a,
9125891258
; GFX11-NEXT: s_or_b32 s3, s4, s5
9125991259
; GFX11-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_mov_b32 v14, s1
9126091260
; GFX11-NEXT: v_dual_mov_b32 v15, s2 :: v_dual_mov_b32 v16, s3
91261-
; GFX11-NEXT: v_readlane_b32 s30, v17, 9
9126291261
; GFX11-NEXT: s_clause 0x3
9126391262
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off
9126491263
; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16
9126591264
; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32
9126691265
; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48
91266+
; GFX11-NEXT: v_readlane_b32 s30, v17, 9
9126791267
; GFX11-NEXT: v_readlane_b32 s31, v17, 10
9126891268
; GFX11-NEXT: v_readlane_b32 s51, v17, 8
9126991269
; GFX11-NEXT: v_readlane_b32 s50, v17, 7

0 commit comments

Comments
 (0)