# fp16/32:int4:fp16/32
# 2D block wei scale, zp (fp16/32)

--reset
--dt=bf16:s4:bf16,bf16:u4:bf16
--stag=abc
--attr-scales=wei:per_ocic:bf16:32x1
--attr-zero-points=wei:per_ocic:s4:32x1
--attr-fpmath=bf16:true
7x6x32:7x32x64
7x6x32:1x32x64
3x6x96:3x96x64
3x6x96:1x96x64

--reset
--dt=f16:s4:f16,f16:u4:f16
--stag=abc
--attr-scales=wei:per_ocic:f16:32x1
--attr-zero-points=wei:per_ocic:s4:32x1
--attr-fpmath=f16:true
7x6x32:7x32x64
7x6x32:1x32x64
3x6x96:3x96x64
3x6x96:1x96x64

--reset
--dt=f32:s4:f32,f32:u4:f32
--stag=abc
--attr-scales=wei:per_ocic:f32:32x1
--attr-zero-points=wei:per_ocic:s4:32x1
--attr-fpmath=strict:true
7x6x32:7x32x64
7x6x32:1x32x64
3x6x96:3x96x64
3x6x96:1x96x64

# fp8:fp8:fp8
# common/per_oc src, wei, dst scale (bf16/f32)

--reset 
--dt=f8_e5m2:f8_e4m3:f8_e5m2,f8_e4m3:f8_e5m2:f8_e4m3,f8_e5m2,f8_e4m3 
--attr-scales=wei:per_oc:bf16+src:per_ocic:bf16:1x256+dst:per_oc:bf16,\
                      wei:common:2:bf16+src:common:2:bf16+dst:common:2:bf16,\
                      wei:per_oc:f32+src:per_ocic:f32:1x256+dst:per_oc:f32,\
                      wei:common:2:f32+src:common:2:f32+dst:common:2:f32
--stag=ab 
--wtag=ab 
--dtag=ab 
2x256:256x128n"DLRM:2*1"
2x256:256x2n"DLRM:7*1"

# fp8:fp8:fp16/32
# common/per_oc src, wei scale (bf16/f32) 

--reset 
--dt=f8_e5m2:f8_e4m3:f32,f8_e4m3:f8_e5m2:f16,f8_e5m2:f8_e4m3:bf16
--attr-scales=wei:per_oc:bf16+src:per_ocic:bf16:1x256,\
                      wei:common:2:bf16+src:common:2:bf16,\
                      wei:per_oc:f32+src:per_ocic:f32:1x256,\
                      wei:common:2:f32+src:common:2:f32
--stag=ab 
--wtag=ab 
--dtag=ab 
2x256:256x128n"DLRM:2*1"
2x256:256x2n"DLRM:7*1"

# fp16/32:int4:fp8
# 2D block wei scale (fp16/32)
# common/per_oc dst scale (fp16/32)

--reset
--dt=bf16:s4:f8_e5m2,bf16:u4:f8_e4m3
--stag=abc
--attr-scales=wei:per_ocic:bf16:32x1+dst:per_oc:bf16,wei:per_ocic:bf16:32x1+dst:common:2:bf16
--attr-zero-points=wei:per_ocic:s4:32x1
--attr-fpmath=bf16:true
7x6x32:7x32x64
7x6x32:1x32x64
3x6x96:3x96x64
3x6x96:1x96x64

# fp8:int4:fp16/32
# 2D block wei scale (fp16/32)
# common/per_oc src scale (fp16/32)

--reset
--dt=f8_e5m2:s4:bf16,f8_e4m3:u4:bf16
--stag=abc
--attr-scales=wei:per_ocic:bf16:32x1+src:per_oc:bf16:1x32,wei:per_ocic:bf16:32x1+src:common:2:bf16
--attr-zero-points=wei:per_ocic:s4:32x1
--attr-fpmath=bf16:true
7x6x32:7x32x64
7x6x32:1x32x64

# fp8:int4:fp8
# 2D block wei scale (fp16/32/fp8)
# common/per_oc src, dst scale (f32)

--reset
--dt=f8_e5m2:s4:f8_e5m2,f8_e4m3:u4:f8_e4m3
--stag=abc
--attr-scales=wei:per_ocic:f8_e5m2:32x1+src:per_oc:f32:1x32,wei:per_ocic:f8_e4m3:32x1+src:common:2:f32,wei:per_ocic:f32:32x1+src:common:2:f32
--attr-zero-points=wei:per_ocic:s4:32x1
--attr-fpmath=bf16:true
7x6x32:7x32x64
7x6x32:1x32x64
