5279 lines
227 KiB
ArmAsm
5279 lines
227 KiB
ArmAsm
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
|
# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-n1 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
|
|
|
|
# LLVM-MCA-BEGIN G01
|
|
ld1 { v1.1d }, [x27], #8
|
|
add x0, x27, 1
|
|
ld1 { v1.2d }, [x27], #16
|
|
add x0, x27, 1
|
|
ld1 { v1.2s }, [x27], #8
|
|
add x0, x27, 1
|
|
ld1 { v1.4h }, [x27], #8
|
|
add x0, x27, 1
|
|
ld1 { v1.4s }, [x27], #16
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G02
|
|
ld1 { v1.8b }, [x27], #8
|
|
add x0, x27, 1
|
|
ld1 { v1.8h }, [x27], #16
|
|
add x0, x27, 1
|
|
ld1 { v1.16b }, [x27], #16
|
|
add x0, x27, 1
|
|
ld1 { v1.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G03
|
|
ld1 { v1.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G04
|
|
ld1 { v1.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.1d, v2.1d }, [x27], #16
|
|
add x0, x27, 1
|
|
ld1 { v1.2d, v2.2d }, [x27], #32
|
|
add x0, x27, 1
|
|
ld1 { v1.2s, v2.2s }, [x27], #16
|
|
add x0, x27, 1
|
|
ld1 { v1.4h, v2.4h }, [x27], #16
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G05
|
|
ld1 { v1.4s, v2.4s }, [x27], #32
|
|
add x0, x27, 1
|
|
ld1 { v1.8b, v2.8b }, [x27], #16
|
|
add x0, x27, 1
|
|
ld1 { v1.8h, v2.8h }, [x27], #32
|
|
add x0, x27, 1
|
|
ld1 { v1.16b, v2.16b }, [x27], #32
|
|
add x0, x27, 1
|
|
ld1 { v1.1d, v2.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G06
|
|
ld1 { v1.2d, v2.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.2s, v2.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.4h, v2.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.4s, v2.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.8b, v2.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G07
|
|
ld1 { v1.8h, v2.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.16b, v2.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
|
|
add x0, x27, 1
|
|
ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
add x0, x27, 1
|
|
ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G08
|
|
ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
add x0, x27, 1
|
|
ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
add x0, x27, 1
|
|
ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
add x0, x27, 1
|
|
ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
add x0, x27, 1
|
|
ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G09
|
|
ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G10
|
|
ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
|
|
add x0, x27, 1
|
|
ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G11
|
|
ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
add x0, x27, 1
|
|
ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
add x0, x27, 1
|
|
ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
add x0, x27, 1
|
|
ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
add x0, x27, 1
|
|
ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G12
|
|
ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
add x0, x27, 1
|
|
ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G13
|
|
ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.b }[0], [x27], #1
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G14
|
|
ld1 { v1.b }[8], [x27], #1
|
|
add x0, x27, 1
|
|
ld1 { v1.b }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.b }[8], [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.h }[0], [x27], #2
|
|
add x0, x27, 1
|
|
ld1 { v1.h }[4], [x27], #2
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G15
|
|
ld1 { v1.h }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.h }[4], [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.s }[0], [x27], #4
|
|
add x0, x27, 1
|
|
ld1 { v1.s }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld1 { v1.d }[0], [x27], #8
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G16
|
|
ld1 { v1.d }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld1r { v1.1d }, [x27], #8
|
|
add x0, x27, 1
|
|
ld1r { v1.2d }, [x27], #8
|
|
add x0, x27, 1
|
|
ld1r { v1.2s }, [x27], #4
|
|
add x0, x27, 1
|
|
ld1r { v1.4h }, [x27], #2
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G17
|
|
ld1r { v1.4s }, [x27], #4
|
|
add x0, x27, 1
|
|
ld1r { v1.8b }, [x27], #1
|
|
add x0, x27, 1
|
|
ld1r { v1.8h }, [x27], #2
|
|
add x0, x27, 1
|
|
ld1r { v1.16b }, [x27], #1
|
|
add x0, x27, 1
|
|
ld1r { v1.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G18
|
|
ld1r { v1.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1r { v1.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1r { v1.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1r { v1.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1r { v1.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G19
|
|
ld1r { v1.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld1r { v1.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.2d, v2.2d }, [x27], #32
|
|
add x0, x27, 1
|
|
ld2 { v1.2s, v2.2s }, [x27], #16
|
|
add x0, x27, 1
|
|
ld2 { v1.4h, v2.4h }, [x27], #16
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G20
|
|
ld2 { v1.4s, v2.4s }, [x27], #32
|
|
add x0, x27, 1
|
|
ld2 { v1.8b, v2.8b }, [x27], #16
|
|
add x0, x27, 1
|
|
ld2 { v1.8h, v2.8h }, [x27], #32
|
|
add x0, x27, 1
|
|
ld2 { v1.16b, v2.16b }, [x27], #32
|
|
add x0, x27, 1
|
|
ld2 { v1.2d, v2.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G21
|
|
ld2 { v1.2s, v2.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.4h, v2.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.4s, v2.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.8b, v2.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.8h, v2.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G22
|
|
ld2 { v1.16b, v2.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.b, v2.b }[0], [x27], #2
|
|
add x0, x27, 1
|
|
ld2 { v1.b, v2.b }[8], [x27], #2
|
|
add x0, x27, 1
|
|
ld2 { v1.b, v2.b }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.b, v2.b }[8], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G23
|
|
ld2 { v1.h, v2.h }[0], [x27], #4
|
|
add x0, x27, 1
|
|
ld2 { v1.h, v2.h }[4], [x27], #4
|
|
add x0, x27, 1
|
|
ld2 { v1.h, v2.h }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.h, v2.h }[4], [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.s, v2.s }[0], [x27], #8
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G24
|
|
ld2 { v1.s, v2.s }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld2 { v1.d, v2.d }[0], [x27], #16
|
|
add x0, x27, 1
|
|
ld2 { v1.d, v2.d }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld2r { v1.1d, v2.1d }, [x27], #16
|
|
add x0, x27, 1
|
|
ld2r { v1.2d, v2.2d }, [x27], #16
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G25
|
|
ld2r { v1.2s, v2.2s }, [x27], #8
|
|
add x0, x27, 1
|
|
ld2r { v1.4h, v2.4h }, [x27], #4
|
|
add x0, x27, 1
|
|
ld2r { v1.4s, v2.4s }, [x27], #8
|
|
add x0, x27, 1
|
|
ld2r { v1.8b, v2.8b }, [x27], #2
|
|
add x0, x27, 1
|
|
ld2r { v1.8h, v2.8h }, [x27], #4
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G26
|
|
ld2r { v1.16b, v2.16b }, [x27], #2
|
|
add x0, x27, 1
|
|
ld2r { v1.1d, v2.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2r { v1.2d, v2.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2r { v1.2s, v2.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2r { v1.4h, v2.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G27
|
|
ld2r { v1.4s, v2.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2r { v1.8b, v2.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2r { v1.8h, v2.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld2r { v1.16b, v2.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G28
|
|
ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
add x0, x27, 1
|
|
ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
add x0, x27, 1
|
|
ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
add x0, x27, 1
|
|
ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
add x0, x27, 1
|
|
ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G29
|
|
ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
add x0, x27, 1
|
|
ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G30
|
|
ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
|
|
add x0, x27, 1
|
|
ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G31
|
|
ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
|
|
add x0, x27, 1
|
|
ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
|
|
add x0, x27, 1
|
|
ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G32
|
|
ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
|
|
add x0, x27, 1
|
|
ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
|
|
add x0, x27, 1
|
|
ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G33
|
|
ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
|
|
add x0, x27, 1
|
|
ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
|
|
add x0, x27, 1
|
|
ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
|
|
add x0, x27, 1
|
|
ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
|
|
add x0, x27, 1
|
|
ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G34
|
|
ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
|
|
add x0, x27, 1
|
|
ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
|
|
add x0, x27, 1
|
|
ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
|
|
add x0, x27, 1
|
|
ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G35
|
|
ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G36
|
|
ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
add x0, x27, 1
|
|
ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
add x0, x27, 1
|
|
ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
add x0, x27, 1
|
|
ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G37
|
|
ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
add x0, x27, 1
|
|
ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
add x0, x27, 1
|
|
ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
add x0, x27, 1
|
|
ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G38
|
|
ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G39
|
|
ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
|
|
add x0, x27, 1
|
|
ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
|
|
add x0, x27, 1
|
|
ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G40
|
|
ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
|
|
add x0, x27, 1
|
|
ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
|
|
add x0, x27, 1
|
|
ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
|
|
add x0, x27, 1
|
|
ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G41
|
|
ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
|
|
add x0, x27, 1
|
|
ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
|
|
add x0, x27, 1
|
|
ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
|
|
add x0, x27, 1
|
|
ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
|
|
add x0, x27, 1
|
|
ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G42
|
|
ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
|
|
add x0, x27, 1
|
|
ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
|
|
add x0, x27, 1
|
|
ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
|
|
add x0, x27, 1
|
|
ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
|
|
add x0, x27, 1
|
|
ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G43
|
|
ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G44
|
|
ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
ldp s1, s2, [x27], #248
|
|
add x0, x27, 1
|
|
ldp d1, d2, [x27], #496
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G45
|
|
ldp q1, q2, [x27], #992
|
|
add x0, x27, 1
|
|
ldp s1, s2, [x27, #248]!
|
|
add x0, x27, 1
|
|
ldp d1, d2, [x27, #496]!
|
|
add x0, x27, 1
|
|
ldp q1, q2, [x27, #992]!
|
|
add x0, x27, 1
|
|
ldp w1, w2, [x27], #248
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G46
|
|
ldp x1, x2, [x27], #496
|
|
add x0, x27, 1
|
|
ldp w1, w2, [x27, #248]!
|
|
add x0, x27, 1
|
|
ldp x1, x2, [x27, #496]!
|
|
add x0, x27, 1
|
|
ldpsw x1, x2, [x27], #248
|
|
add x0, x27, 1
|
|
ldpsw x1, x2, [x27, #248]!
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G47
|
|
ldr b1, [x27], #254
|
|
add x0, x27, 1
|
|
ldr h1, [x27], #254
|
|
add x0, x27, 1
|
|
ldr s1, [x27], #254
|
|
add x0, x27, 1
|
|
ldr d1, [x27], #254
|
|
add x0, x27, 1
|
|
ldr q1, [x27], #254
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G48
|
|
ldr b1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldr h1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldr s1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldr d1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldr q1, [x27, #254]!
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G49
|
|
ldr w1, [x27], #254
|
|
add x0, x27, 1
|
|
ldr x1, [x27], #254
|
|
add x0, x27, 1
|
|
ldr w1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldr x1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldrb w1, [x27], #254
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G50
|
|
ldrb w1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldrh w1, [x27], #254
|
|
add x0, x27, 1
|
|
ldrh w1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldrsb w1, [x27], #254
|
|
add x0, x27, 1
|
|
ldrsb x1, [x27], #254
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G51
|
|
ldrsb w1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldrsb x1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldrsh w1, [x27], #254
|
|
add x0, x27, 1
|
|
ldrsh x1, [x27], #254
|
|
add x0, x27, 1
|
|
ldrsh w1, [x27, #254]!
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G52
|
|
ldrsh x1, [x27, #254]!
|
|
add x0, x27, 1
|
|
ldrsw x1, [x27], #254
|
|
add x0, x27, 1
|
|
ldrsw x1, [x27, #254]!
|
|
add x0, x27, 1
|
|
st1 { v1.1d }, [x27], #8
|
|
add x0, x27, 1
|
|
st1 { v1.2d }, [x27], #16
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G53
|
|
st1 { v1.2s }, [x27], #8
|
|
add x0, x27, 1
|
|
st1 { v1.4h }, [x27], #8
|
|
add x0, x27, 1
|
|
st1 { v1.4s }, [x27], #16
|
|
add x0, x27, 1
|
|
st1 { v1.8b }, [x27], #8
|
|
add x0, x27, 1
|
|
st1 { v1.8h }, [x27], #16
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G54
|
|
st1 { v1.16b }, [x27], #16
|
|
add x0, x27, 1
|
|
st1 { v1.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G55
|
|
st1 { v1.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.1d, v2.1d }, [x27], #16
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G56
|
|
st1 { v1.2d, v2.2d }, [x27], #32
|
|
add x0, x27, 1
|
|
st1 { v1.2s, v2.2s }, [x27], #16
|
|
add x0, x27, 1
|
|
st1 { v1.4h, v2.4h }, [x27], #16
|
|
add x0, x27, 1
|
|
st1 { v1.4s, v2.4s }, [x27], #32
|
|
add x0, x27, 1
|
|
st1 { v1.8b, v2.8b }, [x27], #16
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G57
|
|
st1 { v1.8h, v2.8h }, [x27], #32
|
|
add x0, x27, 1
|
|
st1 { v1.16b, v2.16b }, [x27], #32
|
|
add x0, x27, 1
|
|
st1 { v1.1d, v2.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.2d, v2.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.2s, v2.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G58
|
|
st1 { v1.4h, v2.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.4s, v2.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.8b, v2.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.8h, v2.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.16b, v2.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G59
|
|
st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
|
|
add x0, x27, 1
|
|
st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
add x0, x27, 1
|
|
st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
add x0, x27, 1
|
|
st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
add x0, x27, 1
|
|
st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G60
|
|
st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
add x0, x27, 1
|
|
st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
add x0, x27, 1
|
|
st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
add x0, x27, 1
|
|
st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G61
|
|
st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G62
|
|
st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
|
|
add x0, x27, 1
|
|
st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
add x0, x27, 1
|
|
st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
add x0, x27, 1
|
|
st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G63
|
|
st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
add x0, x27, 1
|
|
st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
add x0, x27, 1
|
|
st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
add x0, x27, 1
|
|
st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
add x0, x27, 1
|
|
st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G64
|
|
st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G65
|
|
st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.b }[0], [x27], #1
|
|
add x0, x27, 1
|
|
st1 { v1.b }[8], [x27], #1
|
|
add x0, x27, 1
|
|
st1 { v1.b }[0], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G66
|
|
st1 { v1.b }[8], [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.h }[0], [x27], #2
|
|
add x0, x27, 1
|
|
st1 { v1.h }[4], [x27], #2
|
|
add x0, x27, 1
|
|
st1 { v1.h }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.h }[4], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G67
|
|
st1 { v1.s }[0], [x27], #4
|
|
add x0, x27, 1
|
|
st1 { v1.s }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st1 { v1.d }[0], [x27], #8
|
|
add x0, x27, 1
|
|
st1 { v1.d }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.2d, v2.2d }, [x27], #32
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G68
|
|
st2 { v1.2s, v2.2s }, [x27], #16
|
|
add x0, x27, 1
|
|
st2 { v1.4h, v2.4h }, [x27], #16
|
|
add x0, x27, 1
|
|
st2 { v1.4s, v2.4s }, [x27], #32
|
|
add x0, x27, 1
|
|
st2 { v1.8b, v2.8b }, [x27], #16
|
|
add x0, x27, 1
|
|
st2 { v1.8h, v2.8h }, [x27], #32
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G69
|
|
st2 { v1.16b, v2.16b }, [x27], #32
|
|
add x0, x27, 1
|
|
st2 { v1.2d, v2.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.2s, v2.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.4h, v2.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.4s, v2.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G70
|
|
st2 { v1.8b, v2.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.8h, v2.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.16b, v2.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.b, v2.b }[0], [x27], #2
|
|
add x0, x27, 1
|
|
st2 { v1.b, v2.b }[8], [x27], #2
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G71
|
|
st2 { v1.b, v2.b }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.b, v2.b }[8], [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.h, v2.h }[0], [x27], #4
|
|
add x0, x27, 1
|
|
st2 { v1.h, v2.h }[4], [x27], #4
|
|
add x0, x27, 1
|
|
st2 { v1.h, v2.h }[0], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G72
|
|
st2 { v1.h, v2.h }[4], [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.s, v2.s }[0], [x27], #8
|
|
add x0, x27, 1
|
|
st2 { v1.s, v2.s }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st2 { v1.d, v2.d }[0], [x27], #16
|
|
add x0, x27, 1
|
|
st2 { v1.d, v2.d }[0], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G73
|
|
st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
add x0, x27, 1
|
|
st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
add x0, x27, 1
|
|
st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G74
|
|
st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
add x0, x27, 1
|
|
st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
add x0, x27, 1
|
|
st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
add x0, x27, 1
|
|
st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
add x0, x27, 1
|
|
st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G75
|
|
st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G76
|
|
st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
st3 { v1.b, v2.b, v3.b }[0], [x27], #3
|
|
add x0, x27, 1
|
|
st3 { v1.b, v2.b, v3.b }[8], [x27], #3
|
|
add x0, x27, 1
|
|
st3 { v1.b, v2.b, v3.b }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st3 { v1.b, v2.b, v3.b }[8], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G77
|
|
st3 { v1.h, v2.h, v3.h }[0], [x27], #6
|
|
add x0, x27, 1
|
|
st3 { v1.h, v2.h, v3.h }[4], [x27], #6
|
|
add x0, x27, 1
|
|
st3 { v1.h, v2.h, v3.h }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st3 { v1.h, v2.h, v3.h }[4], [x27], x28
|
|
add x0, x27, 1
|
|
st3 { v1.s, v2.s, v3.s }[0], [x27], #12
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G78
|
|
st3 { v1.s, v2.s, v3.s }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st3 { v1.d, v2.d, v3.d }[0], [x27], #24
|
|
add x0, x27, 1
|
|
st3 { v1.d, v2.d, v3.d }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
add x0, x27, 1
|
|
st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G79
|
|
st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
add x0, x27, 1
|
|
st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
add x0, x27, 1
|
|
st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
add x0, x27, 1
|
|
st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
add x0, x27, 1
|
|
st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G80
|
|
st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G81
|
|
st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
|
|
add x0, x27, 1
|
|
st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
|
|
add x0, x27, 1
|
|
st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G82
|
|
st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
|
|
add x0, x27, 1
|
|
st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
|
|
add x0, x27, 1
|
|
st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G83
|
|
st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
|
|
add x0, x27, 1
|
|
st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
|
|
add x0, x27, 1
|
|
st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
|
|
add x0, x27, 1
|
|
st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G84
|
|
stp s1, s2, [x27], #248
|
|
add x0, x27, 1
|
|
stp d1, d2, [x27], #496
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G85
|
|
stp q1, q2, [x27], #992
|
|
add x0, x27, 1
|
|
stp s1, s2, [x27, #248]!
|
|
add x0, x27, 1
|
|
stp d1, d2, [x27, #496]!
|
|
add x0, x27, 1
|
|
stp q1, q2, [x27, #992]!
|
|
add x0, x27, 1
|
|
stp w1, w2, [x27], #248
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G86
|
|
stp x1, x2, [x27], #496
|
|
add x0, x27, 1
|
|
stp w1, w2, [x27, #248]!
|
|
add x0, x27, 1
|
|
stp x1, x2, [x27, #496]!
|
|
add x0, x27, 1
|
|
str b1, [x27], #254
|
|
add x0, x27, 1
|
|
str h1, [x27], #254
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G87
|
|
str s1, [x27], #254
|
|
add x0, x27, 1
|
|
str d1, [x27], #254
|
|
add x0, x27, 1
|
|
str q1, [x27], #254
|
|
add x0, x27, 1
|
|
str b1, [x27, #254]!
|
|
add x0, x27, 1
|
|
str h1, [x27, #254]!
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G88
|
|
str s1, [x27, #254]!
|
|
add x0, x27, 1
|
|
str d1, [x27, #254]!
|
|
add x0, x27, 1
|
|
str q1, [x27, #254]!
|
|
add x0, x27, 1
|
|
str w1, [x27], #254
|
|
add x0, x27, 1
|
|
str x1, [x27], #254
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G89
|
|
str w1, [x27, #254]!
|
|
add x0, x27, 1
|
|
str x1, [x27, #254]!
|
|
add x0, x27, 1
|
|
strb w1, [x27], #254
|
|
add x0, x27, 1
|
|
strb w1, [x27, #254]!
|
|
add x0, x27, 1
|
|
strh w1, [x27], #254
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G90
|
|
strh w1, [x27, #254]!
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# LLVM-MCA-BEGIN G91
|
|
ldr x1, [x27], #254
|
|
add x0, x27, 1
|
|
ldr x2, [x1], #254
|
|
add x0, x27, 1
|
|
# LLVM-MCA-END
|
|
|
|
# CHECK: [0] Code Region - G01
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 1500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.96
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. ld1 { v1.1d }, [x27], #8
|
|
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.2d }, [x27], #16
|
|
# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] D==eeeeeER.. ld1 { v1.2s }, [x27], #8
|
|
# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.4h }, [x27], #8
|
|
# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] .D===eeeeeER ld1 { v1.4s }, [x27], #16
|
|
# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
|
|
# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.0 0.1 1.5 <total>
|
|
|
|
# CHECK: [1] Code Region - G02
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 1500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.96
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. ld1 { v1.8b }, [x27], #8
|
|
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.8h }, [x27], #16
|
|
# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] D==eeeeeER.. ld1 { v1.16b }, [x27], #16
|
|
# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] .D===eeeeeER ld1 { v1.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.0 0.1 1.5 <total>
|
|
|
|
# CHECK: [2] Code Region - G03
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 1500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.96
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. ld1 { v1.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] D==eeeeeER.. ld1 { v1.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] .D===eeeeeER ld1 { v1.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.0 0.1 1.5 <total>
|
|
|
|
# CHECK: [3] Code Region - G04
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 1900
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.75
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 4.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. ld1 { v1.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.1d, v2.1d }, [x27], #16
|
|
# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeER.. ld1 { v1.2d, v2.2d }, [x27], #32
|
|
# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16
|
|
# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeER ld1 { v1.4h, v2.4h }, [x27], #16
|
|
# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 1.5 <total>
|
|
|
|
# CHECK: [4] Code Region - G05
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.94
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. ld1 { v1.4s, v2.4s }, [x27], #32
|
|
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.8b, v2.8b }, [x27], #16
|
|
# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeER.. ld1 { v1.8h, v2.8h }, [x27], #32
|
|
# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32
|
|
# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeER ld1 { v1.1d, v2.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 1.5 <total>
|
|
|
|
# CHECK: [5] Code Region - G06
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.94
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. ld1 { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeER.. ld1 { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeER ld1 { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 1.5 <total>
|
|
|
|
# CHECK: [6] Code Region - G07
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 707
|
|
# CHECK-NEXT: Total uOps: 2300
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.25
|
|
# CHECK-NEXT: IPC: 1.41
|
|
# CHECK-NEXT: Block RThroughput: 6.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER . . ld1 { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeER . . ld1 { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,3] D==eE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
|
|
# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.5 0.2 1.8 <total>
|
|
|
|
# CHECK: [7] Code Region - G08
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 757
|
|
# CHECK-NEXT: Total uOps: 2500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.30
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01234
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
# CHECK-NEXT: [0,5] . D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 2.0 <total>
|
|
|
|
# CHECK: [8] Code Region - G09
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 757
|
|
# CHECK-NEXT: Total uOps: 2500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.30
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01234
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 2.0 <total>
|
|
|
|
# CHECK: [9] Code Region - G10
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 856
|
|
# CHECK-NEXT: Total uOps: 2700
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.15
|
|
# CHECK-NEXT: IPC: 1.17
|
|
# CHECK-NEXT: Block RThroughput: 8.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01234
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
|
|
# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 2.0 <total>
|
|
|
|
# CHECK: [10] Code Region - G11
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1006
|
|
# CHECK-NEXT: Total uOps: 3000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.98
|
|
# CHECK-NEXT: IPC: 0.99
|
|
# CHECK-NEXT: Block RThroughput: 10.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeER . .. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
# CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeER. .. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
# CHECK-NEXT: [0,3] .D=eE----R. .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D==eeeeeeER .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
# CHECK-NEXT: [0,5] . D===eE----R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D==eeeeeeER .. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
# CHECK-NEXT: [0,7] . D===eE----R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D====eeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
# CHECK-NEXT: [0,9] . D=====eE----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 5.0 2.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.1 0.5 2.0 <total>
|
|
|
|
# CHECK: [11] Code Region - G12
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1006
|
|
# CHECK-NEXT: Total uOps: 3000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.98
|
|
# CHECK-NEXT: IPC: 0.99
|
|
# CHECK-NEXT: Block RThroughput: 10.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeER . .. ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
# CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeER. .. ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE----R. .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D==eeeeeeER .. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D===eE----R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D==eeeeeeER .. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D===eE----R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D====eeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D=====eE----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 5.0 2.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.1 0.5 2.0 <total>
|
|
|
|
# CHECK: [12] Code Region - G13
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1209
|
|
# CHECK-NEXT: Total uOps: 2800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.32
|
|
# CHECK-NEXT: IPC: 0.83
|
|
# CHECK-NEXT: Block RThroughput: 8.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789
|
|
# CHECK-NEXT: Index 0123456789 0
|
|
|
|
# CHECK: [0,0] DeeeeeeER . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE----R . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeER. . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE----R. . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D==eeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D===eE----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D==eeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D===eE----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=======eeeeeeeER ld1 { v1.b }[0], [x27], #1
|
|
# CHECK-NEXT: [0,9] . D========eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 8.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
|
|
# CHECK-NEXT: 9. 1 9.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.7 0.3 2.1 <total>
|
|
|
|
# CHECK: [13] Code Region - G14
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 3503
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 0.57
|
|
# CHECK-NEXT: IPC: 0.29
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789 01234567
|
|
# CHECK-NEXT: Index 0123456789 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.b }[8], [x27], #1
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1 { v1.b }[0], [x27], x28
|
|
# CHECK-NEXT: [0,3] D========eE-----R . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28
|
|
# CHECK-NEXT: [0,5] .D==============eE-----R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D====================eeeeeeeER . . ld1 { v1.h }[0], [x27], #2
|
|
# CHECK-NEXT: [0,7] .D=====================eE-----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==========================eeeeeeeER ld1 { v1.h }[4], [x27], #2
|
|
# CHECK-NEXT: [0,9] . D===========================eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
|
|
# CHECK-NEXT: 3. 1 9.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
|
|
# CHECK-NEXT: 5. 1 15.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
|
|
# CHECK-NEXT: 7. 1 22.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
|
|
# CHECK-NEXT: 9. 1 28.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 14.7 0.1 2.5 <total>
|
|
|
|
# CHECK: [14] Code Region - G15
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 3503
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 0.57
|
|
# CHECK-NEXT: IPC: 0.29
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789 01234567
|
|
# CHECK-NEXT: Index 0123456789 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.h }[0], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1 { v1.h }[4], [x27], x28
|
|
# CHECK-NEXT: [0,3] D========eE-----R . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4
|
|
# CHECK-NEXT: [0,5] .D==============eE-----R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D====================eeeeeeeER . . ld1 { v1.s }[0], [x27], x28
|
|
# CHECK-NEXT: [0,7] .D=====================eE-----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==========================eeeeeeeER ld1 { v1.d }[0], [x27], #8
|
|
# CHECK-NEXT: [0,9] . D===========================eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
|
|
# CHECK-NEXT: 3. 1 9.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
|
|
# CHECK-NEXT: 5. 1 15.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
|
|
# CHECK-NEXT: 7. 1 22.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
|
|
# CHECK-NEXT: 9. 1 28.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 14.7 0.1 2.5 <total>
|
|
|
|
# CHECK: [15] Code Region - G16
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1103
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 1.81
|
|
# CHECK-NEXT: IPC: 0.91
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.d }[0], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeeeER . ld1r { v1.1d }, [x27], #8
|
|
# CHECK-NEXT: [0,3] D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeeeER . ld1r { v1.2d }, [x27], #8
|
|
# CHECK-NEXT: [0,5] .D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeeeER. ld1r { v1.2s }, [x27], #4
|
|
# CHECK-NEXT: [0,7] .D===eE-----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeeER ld1r { v1.4h }, [x27], #2
|
|
# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 2.5 <total>
|
|
|
|
# CHECK: [16] Code Region - G17
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 509
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.93
|
|
# CHECK-NEXT: IPC: 1.96
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld1r { v1.4s }, [x27], #4
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeeeER . ld1r { v1.8b }, [x27], #1
|
|
# CHECK-NEXT: [0,3] D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeeeER . ld1r { v1.8h }, [x27], #2
|
|
# CHECK-NEXT: [0,5] .D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeeeER. ld1r { v1.16b }, [x27], #1
|
|
# CHECK-NEXT: [0,7] .D===eE-----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeeER ld1r { v1.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 2.5 <total>
|
|
|
|
# CHECK: [17] Code Region - G18
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 509
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.93
|
|
# CHECK-NEXT: IPC: 1.96
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld1r { v1.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeeeER . ld1r { v1.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,3] D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeeeER . ld1r { v1.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,5] .D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeeeER. ld1r { v1.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,7] .D===eE-----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeeER ld1r { v1.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 2.5 <total>
|
|
|
|
# CHECK: [18] Code Region - G19
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 509
|
|
# CHECK-NEXT: Total uOps: 2600
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.11
|
|
# CHECK-NEXT: IPC: 1.96
|
|
# CHECK-NEXT: Block RThroughput: 4.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld1r { v1.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeeeER . ld1r { v1.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,3] D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32
|
|
# CHECK-NEXT: [0,5] .D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16
|
|
# CHECK-NEXT: [0,7] . D==eE-----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=eeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16
|
|
# CHECK-NEXT: [0,9] . D==eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.1 2.5 <total>
|
|
|
|
# CHECK: [19] Code Region - G20
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 509
|
|
# CHECK-NEXT: Total uOps: 3000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.89
|
|
# CHECK-NEXT: IPC: 1.96
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld2 { v1.4s, v2.4s }, [x27], #32
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16
|
|
# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32
|
|
# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32
|
|
# CHECK-NEXT: [0,7] . D=eE-----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . DeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D=eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.1 2.5 <total>
|
|
|
|
# CHECK: [20] Code Region - G21
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 509
|
|
# CHECK-NEXT: Total uOps: 3000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.89
|
|
# CHECK-NEXT: IPC: 1.96
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld2 { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D=eE-----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . DeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D=eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.1 2.5 <total>
|
|
|
|
# CHECK: [21] Code Region - G22
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 2909
|
|
# CHECK-NEXT: Total uOps: 3000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 1.03
|
|
# CHECK-NEXT: IPC: 0.34
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789 01234567
|
|
# CHECK-NEXT: Index 0123456789 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
|
|
# CHECK-NEXT: [0,3] .D=======eE-----R . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2
|
|
# CHECK-NEXT: [0,5] . D=============eE-----R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2 { v1.b, v2.b }[0], [x27], x28
|
|
# CHECK-NEXT: [0,7] . D===================eE-----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D========================eeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28
|
|
# CHECK-NEXT: [0,9] . D=========================eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
|
|
# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
|
|
# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
|
|
# CHECK-NEXT: 7. 1 20.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
|
|
# CHECK-NEXT: 9. 1 26.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 13.5 0.1 2.5 <total>
|
|
|
|
# CHECK: [22] Code Region - G23
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 3503
|
|
# CHECK-NEXT: Total uOps: 3000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 0.86
|
|
# CHECK-NEXT: IPC: 0.29
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789 01234567
|
|
# CHECK-NEXT: Index 0123456789 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
|
|
# CHECK-NEXT: [0,3] .D=======eE-----R . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=============eE-----R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2 { v1.h, v2.h }[4], [x27], x28
|
|
# CHECK-NEXT: [0,7] . D===================eE-----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D========================eeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8
|
|
# CHECK-NEXT: [0,9] . D=========================eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
|
|
# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
|
|
# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
|
|
# CHECK-NEXT: 7. 1 20.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
|
|
# CHECK-NEXT: 9. 1 26.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 13.5 0.1 2.5 <total>
|
|
|
|
# CHECK: [23] Code Region - G24
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 2303
|
|
# CHECK-NEXT: Total uOps: 3000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 1.30
|
|
# CHECK-NEXT: IPC: 0.43
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789
|
|
# CHECK-NEXT: Index 0123456789 012345
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . . . ld2 { v1.s, v2.s }[0], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .D======eeeeeeeER . . ld2 { v1.d, v2.d }[0], [x27], #16
|
|
# CHECK-NEXT: [0,3] .D=======eE-----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D============eeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=============eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D============eeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16
|
|
# CHECK-NEXT: [0,7] . D=============eE-----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D============eeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16
|
|
# CHECK-NEXT: [0,9] . D=============eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
|
|
# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
|
|
# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
|
|
# CHECK-NEXT: 7. 1 14.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 13.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
|
|
# CHECK-NEXT: 9. 1 14.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 9.9 0.1 2.5 <total>
|
|
|
|
# CHECK: [24] Code Region - G25
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 509
|
|
# CHECK-NEXT: Total uOps: 3000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.89
|
|
# CHECK-NEXT: IPC: 1.96
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld2r { v1.2s, v2.2s }, [x27], #8
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4
|
|
# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8
|
|
# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2
|
|
# CHECK-NEXT: [0,7] . D=eE-----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . DeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4
|
|
# CHECK-NEXT: [0,9] . D=eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
|
|
# CHECK-NEXT: 9. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.1 2.5 <total>
|
|
|
|
# CHECK: [25] Code Region - G26
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 509
|
|
# CHECK-NEXT: Total uOps: 3000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.89
|
|
# CHECK-NEXT: IPC: 1.96
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld2r { v1.16b, v2.16b }, [x27], #2
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D=eE-----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . DeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D=eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.1 2.5 <total>
|
|
|
|
# CHECK: [26] Code Region - G27
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 609
|
|
# CHECK-NEXT: Total uOps: 3200
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.25
|
|
# CHECK-NEXT: IPC: 1.64
|
|
# CHECK-NEXT: Block RThroughput: 5.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01234
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld2r { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . DeeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.1 2.6 <total>
|
|
|
|
# CHECK: [27] Code Region - G28
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 759
|
|
# CHECK-NEXT: Total uOps: 4000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.27
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER .. ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeeER .. ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
# CHECK-NEXT: [0,3] .D=eE------R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeeeER .. ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
# CHECK-NEXT: [0,5] . D==eE------R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeeeER.. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
# CHECK-NEXT: [0,7] . D==eE------R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 3.0 <total>
|
|
|
|
# CHECK: [28] Code Region - G29
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 759
|
|
# CHECK-NEXT: Total uOps: 4000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.27
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER .. ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeeER .. ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE------R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeeeER .. ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eE------R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeeeER.. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE------R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 3.0 <total>
|
|
|
|
# CHECK: [29] Code Region - G30
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1909
|
|
# CHECK-NEXT: Total uOps: 3800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 1.99
|
|
# CHECK-NEXT: IPC: 0.52
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789
|
|
# CHECK-NEXT: Index 0123456789 01234567
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE------R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE------R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eE------R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D========eeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
|
|
# CHECK-NEXT: [0,7] . D=========eE-----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==============eeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
|
|
# CHECK-NEXT: [0,9] . D===============eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
|
|
# CHECK-NEXT: 7. 1 10.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 15.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
|
|
# CHECK-NEXT: 9. 1 16.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 6.1 0.2 2.8 <total>
|
|
|
|
# CHECK: [30] Code Region - G31
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 3503
|
|
# CHECK-NEXT: Total uOps: 3500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 1.00
|
|
# CHECK-NEXT: IPC: 0.29
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789 01234567
|
|
# CHECK-NEXT: Index 0123456789 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=======eE-----R . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
|
|
# CHECK-NEXT: [0,5] . D=============eE-----R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
|
|
# CHECK-NEXT: [0,7] . D===================eE-----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D========================eeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
|
|
# CHECK-NEXT: [0,9] . D=========================eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
|
|
# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
|
|
# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
|
|
# CHECK-NEXT: 7. 1 20.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
|
|
# CHECK-NEXT: 9. 1 26.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 13.5 0.1 2.5 <total>
|
|
|
|
# CHECK: [31] Code Region - G32
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 3503
|
|
# CHECK-NEXT: Total uOps: 3500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 1.00
|
|
# CHECK-NEXT: IPC: 0.29
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789 01234567
|
|
# CHECK-NEXT: Index 0123456789 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
|
|
# CHECK-NEXT: [0,3] .D=======eE-----R . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=============eE-----R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
|
|
# CHECK-NEXT: [0,7] . D===================eE-----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D========================eeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
|
|
# CHECK-NEXT: [0,9] . D=========================eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
|
|
# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
|
|
# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
|
|
# CHECK-NEXT: 7. 1 20.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
|
|
# CHECK-NEXT: 9. 1 26.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 13.5 0.1 2.5 <total>
|
|
|
|
# CHECK: [32] Code Region - G33
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 758
|
|
# CHECK-NEXT: Total uOps: 3500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.62
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012345
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
|
|
# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
|
|
# CHECK-NEXT: [0,5] . D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
|
|
# CHECK-NEXT: [0,7] . D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
|
|
# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 2.5 <total>
|
|
|
|
# CHECK: [33] Code Region - G34
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 758
|
|
# CHECK-NEXT: Total uOps: 3500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.62
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012345
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
|
|
# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
|
|
# CHECK-NEXT: [0,5] . D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeeER . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 2.5 <total>
|
|
|
|
# CHECK: [34] Code Region - G35
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 758
|
|
# CHECK-NEXT: Total uOps: 3500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.62
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012345
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeeER . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE-----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 2.5 <total>
|
|
|
|
# CHECK: [35] Code Region - G36
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 960
|
|
# CHECK-NEXT: Total uOps: 4500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.69
|
|
# CHECK-NEXT: IPC: 1.04
|
|
# CHECK-NEXT: Block RThroughput: 9.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeeeeER . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
# CHECK-NEXT: [0,3] . DeE--------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeeER . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
# CHECK-NEXT: [0,5] . DeE------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeeeeeeeER . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
# CHECK-NEXT: [0,7] . .DeE------R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . . DeeeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
# CHECK-NEXT: [0,9] . . DeE--------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
# CHECK-NEXT: 3. 1 1.0 0.0 8.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
# CHECK-NEXT: 9. 1 1.0 0.0 8.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.1 0.4 3.3 <total>
|
|
|
|
# CHECK: [36] Code Region - G37
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1009
|
|
# CHECK-NEXT: Total uOps: 4800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.76
|
|
# CHECK-NEXT: IPC: 0.99
|
|
# CHECK-NEXT: Block RThroughput: 10.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012345678
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . DeeeeeeeeeeER. . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
# CHECK-NEXT: [0,3] . DeE--------R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeeeeER . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
# CHECK-NEXT: [0,5] . DeE--------R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . .DeeeeeeeeeeER ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . . DeE--------R add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
# CHECK-NEXT: 3. 1 1.0 0.0 8.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
# CHECK-NEXT: 5. 1 1.0 0.0 8.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 1.0 0.0 8.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.0 0.5 3.6 <total>
|
|
|
|
# CHECK: [37] Code Region - G38
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1011
|
|
# CHECK-NEXT: Total uOps: 4800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.75
|
|
# CHECK-NEXT: IPC: 0.99
|
|
# CHECK-NEXT: Block RThroughput: 10.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789
|
|
# CHECK-NEXT: Index 0123456789 0
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . DeeeeeeeeeeER. . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,3] . DeE--------R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . .DeeeeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . . DeE--------R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . . DeeeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . . DeE--------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 1.0 0.0 8.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 1.0 0.0 8.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 1.0 0.0 8.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.0 0.5 3.6 <total>
|
|
|
|
# CHECK: [38] Code Region - G39
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 4003
|
|
# CHECK-NEXT: Total uOps: 5000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 1.25
|
|
# CHECK-NEXT: IPC: 0.25
|
|
# CHECK-NEXT: Block RThroughput: 10.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789 0123456789
|
|
# CHECK-NEXT: Index 0123456789 0123456789 012
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
|
|
# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
|
|
# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
|
|
# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
|
|
# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
|
|
# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
|
|
# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
|
|
# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
|
|
# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
|
|
# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 13.0 0.1 3.0 <total>
|
|
|
|
# CHECK: [39] Code Region - G40
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 4003
|
|
# CHECK-NEXT: Total uOps: 5000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 1.25
|
|
# CHECK-NEXT: IPC: 0.25
|
|
# CHECK-NEXT: Block RThroughput: 10.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789 0123456789
|
|
# CHECK-NEXT: Index 0123456789 0123456789 012
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
|
|
# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
|
|
# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
|
|
# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
|
|
# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
|
|
# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
|
|
# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
|
|
# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
|
|
# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
|
|
# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 13.0 0.1 3.0 <total>
|
|
|
|
# CHECK: [40] Code Region - G41
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 2103
|
|
# CHECK-NEXT: Total uOps: 5000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.38
|
|
# CHECK-NEXT: IPC: 0.48
|
|
# CHECK-NEXT: Block RThroughput: 10.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456789
|
|
# CHECK-NEXT: Index 0123456789 0123
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
|
|
# CHECK-NEXT: [0,1] .DeE------R . . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
|
|
# CHECK-NEXT: [0,3] . D======eE------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=====eeeeeeeeER. . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
|
|
# CHECK-NEXT: [0,5] . D=====eE------R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . .D======eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
|
|
# CHECK-NEXT: [0,7] . . D======eE------R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . . D=====eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
|
|
# CHECK-NEXT: [0,9] . . D=====eE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
|
|
# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 6.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
|
|
# CHECK-NEXT: 5. 1 6.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 7.0 2.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
|
|
# CHECK-NEXT: 7. 1 7.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 6.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
|
|
# CHECK-NEXT: 9. 1 6.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 5.4 0.3 3.0 <total>
|
|
|
|
# CHECK: [41] Code Region - G42
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1009
|
|
# CHECK-NEXT: Total uOps: 5000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.96
|
|
# CHECK-NEXT: IPC: 0.99
|
|
# CHECK-NEXT: Block RThroughput: 10.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012345678
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
|
|
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
|
|
# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
|
|
# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
|
|
# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
|
|
# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
|
|
# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
|
|
# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
|
|
# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
|
|
# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.0 0.5 3.0 <total>
|
|
|
|
# CHECK: [42] Code Region - G43
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1009
|
|
# CHECK-NEXT: Total uOps: 5000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.96
|
|
# CHECK-NEXT: IPC: 0.99
|
|
# CHECK-NEXT: Block RThroughput: 10.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012345678
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.0 0.5 3.0 <total>
|
|
|
|
# CHECK: [43] Code Region - G44
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 808
|
|
# CHECK-NEXT: Total uOps: 3800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.70
|
|
# CHECK-NEXT: IPC: 1.24
|
|
# CHECK-NEXT: Block RThroughput: 8.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012345
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,1] .DeE------R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,3] . DeE------R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeeER. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . DeE------R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeE-R. ldp s1, s2, [x27], #248
|
|
# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . .D==eeeeeER ldp d1, d2, [x27], #496
|
|
# CHECK-NEXT: [0,9] . .D===eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 1.0 1.0 ldp s1, s2, [x27], #248
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ldp d1, d2, [x27], #496
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.8 0.5 2.6 <total>
|
|
|
|
# CHECK: [44] Code Region - G45
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 508
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.94
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . ldp q1, q2, [x27], #992
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeE-R. . ldp s1, s2, [x27, #248]!
|
|
# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeER. . ldp d1, d2, [x27, #496]!
|
|
# CHECK-NEXT: [0,5] .D==eE---R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeeeER ldp q1, q2, [x27, #992]!
|
|
# CHECK-NEXT: [0,7] .D===eE-----R add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeE--R ldp w1, w2, [x27], #248
|
|
# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 1.0 ldp s1, s2, [x27, #248]!
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp d1, d2, [x27, #496]!
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldp q1, q2, [x27, #992]!
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 2.0 ldp w1, w2, [x27], #248
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 2.4 <total>
|
|
|
|
# CHECK: [45] Code Region - G46
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.94
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 4.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER .. ldp x1, x2, [x27], #496
|
|
# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER .. ldp w1, w2, [x27, #248]!
|
|
# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeER .. ldp x1, x2, [x27, #496]!
|
|
# CHECK-NEXT: [0,5] .D==eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeER. ldpsw x1, x2, [x27], #248
|
|
# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeER ldpsw x1, x2, [x27, #248]!
|
|
# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]!
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp x1, x2, [x27, #496]!
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27], #248
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 1.2 <total>
|
|
|
|
# CHECK: [46] Code Region - G47
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 1500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.96
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. ldr b1, [x27], #254
|
|
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeER .. ldr h1, [x27], #254
|
|
# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] D==eeeeeER.. ldr s1, [x27], #254
|
|
# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeER. ldr d1, [x27], #254
|
|
# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] .D===eeeeeER ldr q1, [x27], #254
|
|
# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27], #254
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27], #254
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27], #254
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldr q1, [x27], #254
|
|
# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.0 0.1 1.5 <total>
|
|
|
|
# CHECK: [47] Code Region - G48
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 1500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.96
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. ldr b1, [x27, #254]!
|
|
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeeER .. ldr h1, [x27, #254]!
|
|
# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] D==eeeeeER.. ldr s1, [x27, #254]!
|
|
# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeeER. ldr d1, [x27, #254]!
|
|
# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] .D===eeeeeER ldr q1, [x27, #254]!
|
|
# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]!
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27, #254]!
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27, #254]!
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27, #254]!
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldr q1, [x27, #254]!
|
|
# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.0 0.1 1.5 <total>
|
|
|
|
# CHECK: [48] Code Region - G49
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 506
|
|
# CHECK-NEXT: Total uOps: 1500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.96
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . ldr w1, [x27], #254
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER . ldr x1, [x27], #254
|
|
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] D==eeeeER . ldr w1, [x27, #254]!
|
|
# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeER. ldr x1, [x27, #254]!
|
|
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] .D===eeeeER ldrb w1, [x27], #254
|
|
# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr x1, [x27], #254
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr w1, [x27, #254]!
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr x1, [x27, #254]!
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrb w1, [x27], #254
|
|
# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.0 0.1 1.0 <total>
|
|
|
|
# CHECK: [49] Code Region - G50
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 506
|
|
# CHECK-NEXT: Total uOps: 1500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.96
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]!
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER . ldrh w1, [x27], #254
|
|
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] D==eeeeER . ldrh w1, [x27, #254]!
|
|
# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeER. ldrsb w1, [x27], #254
|
|
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] .D===eeeeER ldrsb x1, [x27], #254
|
|
# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]!
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrh w1, [x27], #254
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]!
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsb w1, [x27], #254
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsb x1, [x27], #254
|
|
# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.0 0.1 1.0 <total>
|
|
|
|
# CHECK: [50] Code Region - G51
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 506
|
|
# CHECK-NEXT: Total uOps: 1500
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 2.96
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]!
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER . ldrsb x1, [x27, #254]!
|
|
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] D==eeeeER . ldrsh w1, [x27], #254
|
|
# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeER. ldrsh x1, [x27], #254
|
|
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] .D===eeeeER ldrsh w1, [x27, #254]!
|
|
# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]!
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]!
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsh x1, [x27], #254
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsh w1, [x27, #254]!
|
|
# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 3.0 0.1 1.0 <total>
|
|
|
|
# CHECK: [51] Code Region - G52
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 1700
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.37
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]!
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER. ldrsw x1, [x27], #254
|
|
# CHECK-NEXT: [0,3] D==eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] D==eeeeER ldrsw x1, [x27, #254]!
|
|
# CHECK-NEXT: [0,5] .D==eE--R add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeE-R st1 { v1.1d }, [x27], #8
|
|
# CHECK-NEXT: [0,7] .D===eE-R add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] .D===eeER st1 { v1.2d }, [x27], #16
|
|
# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]!
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]!
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 1.0 st1 { v1.1d }, [x27], #8
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], #16
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.9 0.1 0.8 <total>
|
|
|
|
# CHECK: [52] Code Region - G53
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.97
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeeER. . st1 { v1.2s }, [x27], #8
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h }, [x27], #8
|
|
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4s }, [x27], #16
|
|
# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.8b }, [x27], #8
|
|
# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeER st1 { v1.8h }, [x27], #16
|
|
# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4s }, [x27], #16
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], #8
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], #16
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
|
|
|
|
# CHECK: [53] Code Region - G54
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.97
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeER st1 { v1.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2d }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.2s }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.4h }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
|
|
|
|
# CHECK: [54] Code Region - G55
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2100
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.17
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeeER. . st1 { v1.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeER . st1 { v1.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeER st1 { v1.1d, v2.1d }, [x27], #16
|
|
# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.8h }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.16b }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
|
|
|
|
# CHECK: [55] Code Region - G56
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2700
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.36
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeER . st1 { v1.2s, v2.2s }, [x27], #16
|
|
# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeER . st1 { v1.4h, v2.4h }, [x27], #16
|
|
# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeER. st1 { v1.4s, v2.4s }, [x27], #32
|
|
# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . DeeER st1 { v1.8b, v2.8b }, [x27], #16
|
|
# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
|
|
# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.1 0.0 <total>
|
|
|
|
# CHECK: [56] Code Region - G57
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.56
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeER . st1 { v1.16b, v2.16b }, [x27], #32
|
|
# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeER . st1 { v1.1d, v2.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeER. st1 { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . DeeER st1 { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.1 0.0 <total>
|
|
|
|
# CHECK: [57] Code Region - G58
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.56
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 5.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeER . st1 { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeER . st1 { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeER. st1 { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . DeeER st1 { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.1 0.0 <total>
|
|
|
|
# CHECK: [58] Code Region - G59
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 755
|
|
# CHECK-NEXT: Total uOps: 3700
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.90
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeER . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
|
|
# CHECK-NEXT: [0,1] D=eE-R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeER . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
# CHECK-NEXT: [0,3] .D=eE--R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
# CHECK-NEXT: [0,5] . D==eE-R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeER. . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
# CHECK-NEXT: [0,7] . D==eE-R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 0.7 <total>
|
|
|
|
# CHECK: [59] Code Region - G60
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 755
|
|
# CHECK-NEXT: Total uOps: 3800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.03
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeER . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
# CHECK-NEXT: [0,1] D=eE-R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeER . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
# CHECK-NEXT: [0,3] .D=eE--R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
# CHECK-NEXT: [0,5] . D==eE--R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE-R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 0.8 <total>
|
|
|
|
# CHECK: [60] Code Region - G61
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 755
|
|
# CHECK-NEXT: Total uOps: 3700
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.90
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE-R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeER . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE-R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeER. . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eE--R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE-R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 0.7 <total>
|
|
|
|
# CHECK: [61] Code Region - G62
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 704
|
|
# CHECK-NEXT: Total uOps: 3600
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.11
|
|
# CHECK-NEXT: IPC: 1.42
|
|
# CHECK-NEXT: Block RThroughput: 6.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeE-R . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
|
|
# CHECK-NEXT: [0,3] .D=eE-R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeER st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
# CHECK-NEXT: [0,5] . D=eE---R add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeE--R st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
# CHECK-NEXT: [0,7] . D==eE--R add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 1.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 2.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.2 0.3 1.1 <total>
|
|
|
|
# CHECK: [62] Code Region - G63
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 804
|
|
# CHECK-NEXT: Total uOps: 4200
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.22
|
|
# CHECK-NEXT: IPC: 1.24
|
|
# CHECK-NEXT: Block RThroughput: 8.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
# CHECK-NEXT: [0,1] .DeE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeE--R .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
# CHECK-NEXT: [0,3] .D=eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeER. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
# CHECK-NEXT: [0,5] . D=eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeeeeER st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
# CHECK-NEXT: [0,7] . DeE---R add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 2.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
# CHECK-NEXT: 7. 1 1.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.8 0.4 1.3 <total>
|
|
|
|
# CHECK: [63] Code Region - G64
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 705
|
|
# CHECK-NEXT: Total uOps: 3800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.39
|
|
# CHECK-NEXT: IPC: 1.42
|
|
# CHECK-NEXT: Block RThroughput: 7.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,1] .DeE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeE--R .. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeER .. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eER .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeER st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D=eE---R add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=eeE--R st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 2.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 0.0 2.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.9 0.2 1.4 <total>
|
|
|
|
# CHECK: [64] Code Region - G65
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 706
|
|
# CHECK-NEXT: Total uOps: 3200
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.53
|
|
# CHECK-NEXT: IPC: 1.42
|
|
# CHECK-NEXT: Block RThroughput: 5.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,1] .DeE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . DeeeeeER. . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,3] . DeE---R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeER . st1 { v1.b }[0], [x27], #1
|
|
# CHECK-NEXT: [0,5] . D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeER. st1 { v1.b }[8], [x27], #1
|
|
# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.b }[0], [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.b }[0], [x27], #1
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.1 0.3 1.2 <total>
|
|
|
|
# CHECK: [65] Code Region - G66
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 506
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.95
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . st1 { v1.b }[8], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER . st1 { v1.h }[0], [x27], #2
|
|
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeER . st1 { v1.h }[4], [x27], #2
|
|
# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeER. st1 { v1.h }[0], [x27], x28
|
|
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.h }[4], [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 1.0 <total>
|
|
|
|
# CHECK: [66] Code Region - G67
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 2200
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.34
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER .. st1 { v1.s }[0], [x27], #4
|
|
# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER .. st1 { v1.s }[0], [x27], x28
|
|
# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeER .. st1 { v1.d }[0], [x27], #8
|
|
# CHECK-NEXT: [0,5] .D==eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeER.. st1 { v1.d }[0], [x27], x28
|
|
# CHECK-NEXT: [0,7] .D===eE--R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeER st2 { v1.2d, v2.2d }, [x27], #32
|
|
# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 1.1 <total>
|
|
|
|
# CHECK: [67] Code Region - G68
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 2400
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.73
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 3.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER .. st2 { v1.2s, v2.2s }, [x27], #16
|
|
# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER .. st2 { v1.4h, v2.4h }, [x27], #16
|
|
# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeeER.. st2 { v1.4s, v2.4s }, [x27], #32
|
|
# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeER.. st2 { v1.8b, v2.8b }, [x27], #16
|
|
# CHECK-NEXT: [0,7] . D==eE--R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=eeeeeER st2 { v1.8h, v2.8h }, [x27], #32
|
|
# CHECK-NEXT: [0,9] . D==eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.1 1.2 <total>
|
|
|
|
# CHECK: [68] Code Region - G69
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 507
|
|
# CHECK-NEXT: Total uOps: 2600
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.13
|
|
# CHECK-NEXT: IPC: 1.97
|
|
# CHECK-NEXT: Block RThroughput: 4.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER .. st2 { v1.16b, v2.16b }, [x27], #32
|
|
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeER .. st2 { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeER .. st2 { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=eE--R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeER.. st2 { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE--R.. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=eeeeeER st2 { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D==eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.9 0.1 1.3 <total>
|
|
|
|
# CHECK: [69] Code Region - G70
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 506
|
|
# CHECK-NEXT: Total uOps: 2400
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.74
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . st2 { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeER . st2 { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE---R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeER. st2 { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeeeER. st2 { v1.b, v2.b }[0], [x27], #2
|
|
# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=eeeeER st2 { v1.b, v2.b }[8], [x27], #2
|
|
# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.7 0.1 1.2 <total>
|
|
|
|
# CHECK: [70] Code Region - G71
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 506
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.95
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . st2 { v1.b, v2.b }[0], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.b, v2.b }[8], [x27], x28
|
|
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeER . st2 { v1.h, v2.h }[0], [x27], #4
|
|
# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.h, v2.h }[4], [x27], #4
|
|
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeER st2 { v1.h, v2.h }[0], [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 1.0 <total>
|
|
|
|
# CHECK: [71] Code Region - G72
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 506
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.95
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . st2 { v1.h, v2.h }[4], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.s, v2.s }[0], [x27], #8
|
|
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeeeER . st2 { v1.s, v2.s }[0], [x27], x28
|
|
# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.d, v2.d }[0], [x27], #16
|
|
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeER st2 { v1.d, v2.d }[0], [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 1.0 <total>
|
|
|
|
# CHECK: [72] Code Region - G73
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 600
|
|
# CHECK-NEXT: Total Cycles: 407
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.91
|
|
# CHECK-NEXT: IPC: 1.47
|
|
# CHECK-NEXT: Block RThroughput: 3.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeER . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
# CHECK-NEXT: [0,1] D=eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
# CHECK-NEXT: [0,3] .D=eE---R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
# CHECK-NEXT: [0,5] . D==eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.8 0.3 1.7 <total>
|
|
|
|
# CHECK: [73] Code Region - G74
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 708
|
|
# CHECK-NEXT: Total uOps: 3800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.37
|
|
# CHECK-NEXT: IPC: 1.41
|
|
# CHECK-NEXT: Block RThroughput: 7.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 01234
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeER . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
# CHECK-NEXT: [0,5] . D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 1.9 <total>
|
|
|
|
# CHECK: [74] Code Region - G75
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 707
|
|
# CHECK-NEXT: Total uOps: 3400
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.81
|
|
# CHECK-NEXT: IPC: 1.41
|
|
# CHECK-NEXT: Block RThroughput: 6.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeER . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeER . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . DeeeeeER . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D=eE---R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=eeeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.7 0.2 1.7 <total>
|
|
|
|
# CHECK: [75] Code Region - G76
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 755
|
|
# CHECK-NEXT: Total uOps: 4000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.30
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeER . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeE-R . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
|
|
# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeER. . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
|
|
# CHECK-NEXT: [0,5] . D==eE--R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeER . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 1.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 1.4 <total>
|
|
|
|
# CHECK: [76] Code Region - G77
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 755
|
|
# CHECK-NEXT: Total uOps: 4000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.30
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
|
|
# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
|
|
# CHECK-NEXT: [0,3] .D=eE--R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeER. . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eE--R. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12
|
|
# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 1.0 <total>
|
|
|
|
# CHECK: [77] Code Region - G78
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 808
|
|
# CHECK-NEXT: Total uOps: 4200
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.20
|
|
# CHECK-NEXT: IPC: 1.24
|
|
# CHECK-NEXT: Block RThroughput: 8.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012345
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
|
|
# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeER . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eE---R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeER . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
# CHECK-NEXT: [0,7] . D=eE----R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=eeeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
# CHECK-NEXT: [0,9] . D==eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.0 0.3 1.7 <total>
|
|
|
|
# CHECK: [78] Code Region - G79
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1207
|
|
# CHECK-NEXT: Total uOps: 5800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.81
|
|
# CHECK-NEXT: IPC: 0.83
|
|
# CHECK-NEXT: Block RThroughput: 12.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 012345678
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeER. . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
# CHECK-NEXT: [0,1] D=eE-----R. . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
# CHECK-NEXT: [0,3] . DeE-------R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeeeeeeER . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
# CHECK-NEXT: [0,5] . D=eE-----R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D==eeeeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
# CHECK-NEXT: [0,7] . D==eE-------R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . .D=eeeeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
# CHECK-NEXT: [0,9] . . D=eE-------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
|
|
# CHECK-NEXT: 3. 1 1.0 0.0 7.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 2.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 7.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
|
|
# CHECK-NEXT: 9. 1 2.0 0.0 7.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.8 0.4 3.1 <total>
|
|
|
|
# CHECK: [79] Code Region - G80
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1007
|
|
# CHECK-NEXT: Total uOps: 4800
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.77
|
|
# CHECK-NEXT: IPC: 0.99
|
|
# CHECK-NEXT: Block RThroughput: 9.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeER . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . DeeeeeeeER .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: [0,3] . D=eE-----R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeeeER .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: [0,5] . D==eE-----R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeeeeeER st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: [0,7] . D=eE-------R add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . .D=eeeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: [0,9] . .D==eE-----R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 4.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.9 0.4 2.6 <total>
|
|
|
|
# CHECK: [80] Code Region - G81
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 1057
|
|
# CHECK-NEXT: Total uOps: 5200
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.92
|
|
# CHECK-NEXT: IPC: 0.95
|
|
# CHECK-NEXT: Block RThroughput: 10.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123456
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeeeeeER .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: [0,1] .DeE-------R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] . DeeeeeeeeeER .. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: [0,3] . DeE-------R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D==eeeeeER .. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
|
|
# CHECK-NEXT: [0,5] . D===eE---R .. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D===eeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
|
|
# CHECK-NEXT: [0,7] . D====eE---R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . .D===eeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
|
|
# CHECK-NEXT: [0,9] . .D====eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
|
|
# CHECK-NEXT: 1. 1 1.0 0.0 7.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
|
|
# CHECK-NEXT: 3. 1 1.0 0.0 7.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 3.0 3.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
|
|
# CHECK-NEXT: 5. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 4.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
|
|
# CHECK-NEXT: 7. 1 5.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
|
|
# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.9 0.6 2.3 <total>
|
|
|
|
# CHECK: [81] Code Region - G82
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 756
|
|
# CHECK-NEXT: Total uOps: 4000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.29
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 7.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0123
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
|
|
# CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
|
|
# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
|
|
# CHECK-NEXT: [0,5] . D==eE---R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE---R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
|
|
# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.3 0.3 1.5 <total>
|
|
|
|
# CHECK: [82] Code Region - G83
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 800
|
|
# CHECK-NEXT: Total Cycles: 605
|
|
# CHECK-NEXT: Total uOps: 3200
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 5.29
|
|
# CHECK-NEXT: IPC: 1.32
|
|
# CHECK-NEXT: Block RThroughput: 6.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
|
|
# CHECK-NEXT: [0,1] D=eE---R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
|
|
# CHECK-NEXT: [0,3] .D=eE---R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . D=eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
|
|
# CHECK-NEXT: [0,5] . D==eE--R. add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
|
|
# CHECK-NEXT: [0,7] . D==eE--R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.0 0.3 1.3 <total>
|
|
|
|
# CHECK: [83] Code Region - G84
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 400
|
|
# CHECK-NEXT: Total Cycles: 204
|
|
# CHECK-NEXT: Total uOps: 1000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.90
|
|
# CHECK-NEXT: IPC: 1.96
|
|
# CHECK-NEXT: Block RThroughput: 1.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345
|
|
|
|
# CHECK: [0,0] DeeER. stp s1, s2, [x27], #248
|
|
# CHECK-NEXT: [0,1] D=eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeER stp d1, d2, [x27], #496
|
|
# CHECK-NEXT: [0,3] .D=eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp s1, s2, [x27], #248
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stp d1, d2, [x27], #496
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.3 0.0 <total>
|
|
|
|
# CHECK: [84] Code Region - G85
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 703
|
|
# CHECK-NEXT: Total uOps: 3100
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.41
|
|
# CHECK-NEXT: IPC: 1.42
|
|
# CHECK-NEXT: Block RThroughput: 6.5
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeER . stp q1, q2, [x27], #992
|
|
# CHECK-NEXT: [0,1] D=eE-R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeeER . stp s1, s2, [x27, #248]!
|
|
# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeeER . stp d1, d2, [x27, #496]!
|
|
# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeeER stp q1, q2, [x27, #992]!
|
|
# CHECK-NEXT: [0,7] . D==eE-R add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=eE-R stp w1, w2, [x27], #248
|
|
# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stp s1, s2, [x27, #248]!
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 stp d1, d2, [x27, #496]!
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 1.0 0.0 stp q1, q2, [x27, #992]!
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 1.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 0.0 1.0 stp w1, w2, [x27], #248
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.9 0.2 0.3 <total>
|
|
|
|
# CHECK: [85] Code Region - G86
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2300
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.56
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 4.0
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] .DeER. . stp w1, w2, [x27, #248]!
|
|
# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] . DeER . stp x1, x2, [x27, #496]!
|
|
# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeER. str b1, [x27], #254
|
|
# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D=eeER str h1, [x27], #254
|
|
# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stp w1, w2, [x27, #248]!
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 1.0 0.0 0.0 stp x1, x2, [x27, #496]!
|
|
# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 str b1, [x27], #254
|
|
# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 2.0 0.0 0.0 str h1, [x27], #254
|
|
# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.8 0.1 0.0 <total>
|
|
|
|
# CHECK: [86] Code Region - G87
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2200
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.37
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeeER. . str s1, [x27], #254
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeER . str d1, [x27], #254
|
|
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27], #254
|
|
# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eeER. str b1, [x27, #254]!
|
|
# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eeER str h1, [x27, #254]!
|
|
# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27], #254
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27], #254
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 str b1, [x27, #254]!
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str h1, [x27, #254]!
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.5 0.1 0.0 <total>
|
|
|
|
# CHECK: [87] Code Region - G88
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2200
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.37
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeeER. . str s1, [x27, #254]!
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eeER . str d1, [x27, #254]!
|
|
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27, #254]!
|
|
# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] . D=eER . str w1, [x27], #254
|
|
# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eER. str x1, [x27], #254
|
|
# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]!
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27, #254]!
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27, #254]!
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 2.0 0.0 0.0 str w1, [x27], #254
|
|
# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str x1, [x27], #254
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.5 0.1 0.0 <total>
|
|
|
|
# CHECK: [88] Code Region - G89
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 1000
|
|
# CHECK-NEXT: Total Cycles: 504
|
|
# CHECK-NEXT: Total uOps: 2000
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.97
|
|
# CHECK-NEXT: IPC: 1.98
|
|
# CHECK-NEXT: Block RThroughput: 3.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 012345678
|
|
|
|
# CHECK: [0,0] DeER . . str w1, [x27, #254]!
|
|
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D=eER. . str x1, [x27, #254]!
|
|
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,4] .D=eER . strb w1, [x27], #254
|
|
# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
|
|
# CHECK-NEXT: [0,6] .D==eER . strb w1, [x27, #254]!
|
|
# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
|
|
# CHECK-NEXT: [0,8] . D==eER. strh w1, [x27], #254
|
|
# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]!
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str x1, [x27, #254]!
|
|
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 4. 1 2.0 0.0 0.0 strb w1, [x27], #254
|
|
# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 6. 1 3.0 0.0 0.0 strb w1, [x27, #254]!
|
|
# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 8. 1 3.0 0.0 0.0 strh w1, [x27], #254
|
|
# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
|
|
|
|
# CHECK: [89] Code Region - G90
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 200
|
|
# CHECK-NEXT: Total Cycles: 104
|
|
# CHECK-NEXT: Total uOps: 400
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 3.85
|
|
# CHECK-NEXT: IPC: 1.92
|
|
# CHECK-NEXT: Block RThroughput: 0.7
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: Index 01234
|
|
|
|
# CHECK: [0,0] DeER. strh w1, [x27, #254]!
|
|
# CHECK-NEXT: [0,1] D=eER add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]!
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 1.5 0.5 0.0 <total>
|
|
|
|
# CHECK: [90] Code Region - G91
|
|
|
|
# CHECK: Iterations: 100
|
|
# CHECK-NEXT: Instructions: 400
|
|
# CHECK-NEXT: Total Cycles: 142
|
|
# CHECK-NEXT: Total uOps: 600
|
|
|
|
# CHECK: Dispatch Width: 8
|
|
# CHECK-NEXT: uOps Per Cycle: 4.23
|
|
# CHECK-NEXT: IPC: 2.82
|
|
# CHECK-NEXT: Block RThroughput: 1.3
|
|
|
|
# CHECK: Timeline view:
|
|
# CHECK-NEXT: 0
|
|
# CHECK-NEXT: Index 0123456789
|
|
|
|
# CHECK: [0,0] DeeeeER . ldr x1, [x27], #254
|
|
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
|
|
# CHECK-NEXT: [0,2] D====eeeeER ldr x2, [x1], #254
|
|
# CHECK-NEXT: [0,3] D=eE------R add x0, x27, #1
|
|
|
|
# CHECK: Average Wait times (based on the timeline view):
|
|
# CHECK-NEXT: [0]: Executions
|
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
|
|
|
# CHECK: [0] [1] [2] [3]
|
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254
|
|
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
|
|
# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr x2, [x1], #254
|
|
# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
|
|
# CHECK-NEXT: 1 2.5 0.3 2.0 <total>
|