pmu-tools/mtl_cmt_ratios.py at master · clayne/pmu-tools

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

# -*- coding: latin-1 -*-

#

# auto generated TopDown/TMA 4 description for Intel 14th gen Core (code name Meteorlake) with Crestmont

# Please see http://ark.intel.com for more details on these CPUs.

#

# References:

# http://bit.ly/tma-ispass14

# http://halobates.de/blog/p/262

# https://sites.google.com/site/analysismethods/yasin-pubs

# https://download.01.org/perfmon/

# https://github.com/andikleen/pmu-tools/wiki/toplev-manual

#

# Helpers

print_error = lambda msg: False

version = "4"

base_frequency = -1.0

Memory = 0

Average_Frequency = 0.0

num_cores = 1

num_threads = 1

num_sockets = 1

def handle_error(obj, msg):

print_error(msg)

obj.errcount += 1

obj.val = 0

obj.thresh = False

def handle_error_metric(obj, msg):

print_error(msg)

obj.errcount += 1

obj.val = 0

# Constants

Pipeline_Width = 6

# Aux. formulas

def CLKS(self, EV, level):

return EV("CPU_CLK_UNHALTED.CORE", level)

def SLOTS(self, EV, level):

return Pipeline_Width * CLKS(self, EV, level)

def DurationTimeInSeconds(self, EV, level):

return EV("interval-ms", 0) / 1000

# Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall. See Info.Mem_Exec_Bound

def PCT_Mem_Exec_Bound_Cycles(self, EV, level):

return 100 * EV("LD_HEAD.ANY_AT_RET", level) / CLKS(self, EV, level)

# Percentage of time that retirement is stalled due to an L1 miss. See Info.Load_Miss_Bound

def PCT_Load_Miss_Bound_Cycles(self, EV, level):

return 100 * EV("MEM_BOUND_STALLS_LOAD.ALL", level) / CLKS(self, EV, level)

# Percentage of time that retirement is stalled due to a first level data TLB miss

def PCT_DTLB_Miss_Bound_Cycles(self, EV, level):

return 100 *(EV("LD_HEAD.DTLB_MISS_AT_RET", level) + EV("LD_HEAD.PGWALK_AT_RET", level)) / CLKS(self, EV, level)

# Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss. See Info.Ifetch_Bound

def PCT_IFetch_Miss_Bound_Cycles(self, EV, level):

return 100 * EV("MEM_BOUND_STALLS_IFETCH.ALL", level) / CLKS(self, EV, level)

# Instructions Per Cycle

def IPC(self, EV, level):

return EV("INST_RETIRED.ANY", level) / CLKS(self, EV, level)

# Cycles Per Instruction

def CPI(self, EV, level):

return CLKS(self, EV, level) / EV("INST_RETIRED.ANY", level)

# Uops Per Instruction

def UPI(self, EV, level):

return EV("TOPDOWN_RETIRING.ALL_P", level) / EV("INST_RETIRED.ANY", level)

# Floating Point Operations Per Cycle

def FLOPc(self, EV, level):

return EV("FP_FLOPS_RETIRED.ALL", level) / CLKS(self, EV, level)

# Instructions per Branch (lower number means higher occurrence rate)

def IpBranch(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("BR_INST_RETIRED.ALL_BRANCHES", level)

# Instruction per (near) call (lower number means higher occurrence rate)

def IpCall(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("BR_INST_RETIRED.NEAR_CALL", level)

# Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]

def IpFarBranch(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("BR_INST_RETIRED.FAR_BRANCH:USER", level)

# Instructions per retired Branch Misprediction

def IpMispredict(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("BR_MISP_RETIRED.ALL_BRANCHES", level)

# Instructions per retired conditional Branch Misprediction where the branch was not taken

def IpMisp_Cond_Ntaken(self, EV, level):

return EV("INST_RETIRED.ANY", level) / (EV("BR_MISP_RETIRED.COND", level) - EV("BR_MISP_RETIRED.COND_TAKEN", level))

# Instructions per retired conditional Branch Misprediction where the branch was taken

def IpMisp_Cond_Taken(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("BR_MISP_RETIRED.COND_TAKEN", level)

# Instructions per retired return Branch Misprediction

def IpMisp_Ret(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("BR_MISP_RETIRED.RETURN", level)

# Instructions per retired indirect call or jump Branch Misprediction

def IpMisp_Indirect(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("BR_MISP_RETIRED.INDIRECT", level)

# Instructions per Floating Point (FP) Operation

def IpFLOP(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("FP_FLOPS_RETIRED.ALL", level)

# Instructions per FP Arithmetic Scalar Single-Precision instruction

def IpFPArith_Scalar_SP(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("FP_INST_RETIRED.32B_SP", level)

# Instructions per FP Arithmetic Scalar Double-Precision instruction

def IpFPArith_Scalar_DP(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("FP_INST_RETIRED.64B_DP", level)

# Instructions per FP Arithmetic AVX/SSE 128-bit instruction

def IpFPArith_AVX128(self, EV, level):

return EV("INST_RETIRED.ANY", level) / (EV("FP_INST_RETIRED.128B_DP", level) + EV("FP_INST_RETIRED.128B_SP", level))

# Percentage of all uops which are microcode ops

def Microcode_Uop_Ratio(self, EV, level):

return 100 * EV("UOPS_RETIRED.MS", level) / EV("TOPDOWN_RETIRING.ALL_P", level)

# Percentage of all uops which are FPDiv uops

def FPDiv_Uop_Ratio(self, EV, level):

return 100 * EV("UOPS_RETIRED.FPDIV", level) / EV("TOPDOWN_RETIRING.ALL_P", level)

# Percentage of all uops which are IDiv uops

def IDiv_Uop_Ratio(self, EV, level):

return 100 * EV("UOPS_RETIRED.IDIV", level) / EV("TOPDOWN_RETIRING.ALL_P", level)

# Percentage of all uops which are x87 uops

def X87_Uop_Ratio(self, EV, level):

return 100 * EV("UOPS_RETIRED.X87", level) / EV("TOPDOWN_RETIRING.ALL_P", level)

# Instructions per Load

def IpLoad(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("MEM_UOPS_RETIRED.ALL_LOADS", level)

# Instructions per Store

def IpStore(self, EV, level):

return EV("INST_RETIRED.ANY", level) / EV("MEM_UOPS_RETIRED.ALL_STORES", level)

# Ratio of mem load uops to all uops

def MemLoad_Ratio(self, EV, level):

return 1000 * EV("MEM_UOPS_RETIRED.ALL_LOADS", level) / EV("TOPDOWN_RETIRING.ALL_P", level)

# Percentage of total non-speculative loads that are splits

def Load_Splits_Ratio(self, EV, level):

return 100 * EV("MEM_UOPS_RETIRED.SPLIT_LOADS", level) / EV("MEM_UOPS_RETIRED.ALL_LOADS", level)

# Percentage of total non-speculative loads that perform one or more locks

def Load_Locks_Ratio(self, EV, level):

return 100 * EV("MEM_UOPS_RETIRED.LOCK_LOADS", level) / EV("MEM_UOPS_RETIRED.ALL_LOADS", level)

# Percentage of total non-speculative loads with a store forward or unknown store address block

def PCT_Loads_with_StoreFwdBlk(self, EV, level):

return 100 * EV("LD_BLOCKS.DATA_UNKNOWN", level) / EV("MEM_UOPS_RETIRED.ALL_LOADS", level)

# Percentage of total non-speculative loads with an address aliasing block

def PCT_Loads_with_AdressAliasing(self, EV, level):

return 100 * EV("LD_BLOCKS.ADDRESS_ALIAS", level) / EV("MEM_UOPS_RETIRED.ALL_LOADS", level)

# Percentage of Memory Execution Bound due to a second level TLB miss

def PCT_LoadHead_with_STLBHit(self, EV, level):

return 100 * EV("LD_HEAD.DTLB_MISS_AT_RET", level) / EV("LD_HEAD.ANY_AT_RET", level)

# Percentage of Memory Execution Bound due to a pagewalk

def PCT_LoadHead_with_Pagewalk(self, EV, level):

return 100 * EV("LD_HEAD.PGWALK_AT_RET", level) / EV("LD_HEAD.ANY_AT_RET", level)

# Percentage of Memory Execution Bound due to a store forward address match

def PCT_LoadHead_with_StoreFwding(self, EV, level):

return 100 * EV("LD_HEAD.ST_ADDR_AT_RET", level) / EV("LD_HEAD.ANY_AT_RET", level)

# Percentage of Memory Execution Bound due to other block cases, such as pipeline conflicts, fences, etc

def PCT_LoadHead_with_OtherPipelineBlks(self, EV, level):

return 100 * EV("LD_HEAD.OTHER_AT_RET", level) / EV("LD_HEAD.ANY_AT_RET", level)

# Percentage of Memory Execution Bound due to a first level data cache miss

def PCT_LoadHead_with_L1miss(self, EV, level):

return 100 * EV("LD_HEAD.L1_MISS_AT_RET", level) / EV("LD_HEAD.ANY_AT_RET", level)

# Counts the number of cycles the core is stalled due to store buffer full

def Store_Bound(self, EV, level):

return 100 *(EV("MEM_SCHEDULER_BLOCK.ST_BUF", level) / EV("MEM_SCHEDULER_BLOCK.ALL", level)) * (EV("TOPDOWN_BE_BOUND.MEM_SCHEDULER", level) / SLOTS(self, EV, level))

# Counts the number of cycles that the oldest load of the load buffer is stalled at retirement

def Load_Bound(self, EV, level):

return 100 *(EV("LD_HEAD.L1_BOUND_AT_RET", level) + EV("MEM_BOUND_STALLS_LOAD.ALL", level)) / CLKS(self, EV, level)

# Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a pipeline block

def L1_Bound(self, EV, level):

return 100 * EV("LD_HEAD.L1_BOUND_AT_RET", level) / CLKS(self, EV, level)

# Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L2

def PCT_LoadMissBound_with_L2Hit(self, EV, level):

return 100 * EV("MEM_BOUND_STALLS_LOAD.L2_HIT", level) / EV("MEM_BOUND_STALLS_LOAD.ALL", level)

# Percentage of memory bound stalls where retirement is stalled due to an L1 miss that subsequently misses in the L2

def PCT_LoadMissBound_with_L2Miss(self, EV, level):

return 100 *(EV("MEM_BOUND_STALLS_LOAD.LLC_HIT", level) + EV("MEM_BOUND_STALLS_LOAD.LLC_MISS", level)) / EV("MEM_BOUND_STALLS_LOAD.ALL", level)

# Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L3

def PCT_LoadMissBound_with_L3Hit(self, EV, level):

return 100 * EV("MEM_BOUND_STALLS_LOAD.LLC_HIT", level) / EV("MEM_BOUND_STALLS_LOAD.ALL", level)

# Percentage of memory bound stalls where retirement is stalled due to an L1 miss that subsequently misses the L3

def PCT_LoadMissBound_with_L3Miss(self, EV, level):

return 100 * EV("MEM_BOUND_STALLS_LOAD.LLC_MISS", level) / EV("MEM_BOUND_STALLS_LOAD.ALL", level)

# Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L2

def PCT_IfetchMissBound_with_L2Hit(self, EV, level):

return 100 * EV("MEM_BOUND_STALLS_IFETCH.L2_HIT", level) / EV("MEM_BOUND_STALLS_IFETCH.ALL", level)

# Percentage of ifetch miss bound stalls, where the ifetch miss doesn't hit in the L2

def PCT_IfetchMissBound_with_L2Miss(self, EV, level):

return 100 *(EV("MEM_BOUND_STALLS_IFETCH.LLC_HIT", level) + EV("MEM_BOUND_STALLS_IFETCH.LLC_MISS", level)) / EV("MEM_BOUND_STALLS_IFETCH.ALL", level)

# Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L3

def PCT_IfetchMissBound_with_L3Hit(self, EV, level):

return 100 * EV("MEM_BOUND_STALLS_IFETCH.LLC_HIT", level) / EV("MEM_BOUND_STALLS_IFETCH.ALL", level)

# Percentage of ifetch miss bound stalls, where the ifetch miss subsequently misses in the L3

def PCT_IfetchMissBound_with_L3Miss(self, EV, level):

return 100 * EV("MEM_BOUND_STALLS_IFETCH.LLC_MISS", level) / EV("MEM_BOUND_STALLS_IFETCH.ALL", level)

# Ratio of all branches which mispredict

def Branch_Mispredict_Ratio(self, EV, level):

return EV("BR_MISP_RETIRED.ALL_BRANCHES", level) / EV("BR_INST_RETIRED.ALL_BRANCHES", level)

# Ratio between Mispredicted branches and unknown branches

def Branch_Mispredict_to_Unknown_Branch_Ratio(self, EV, level):

return EV("BR_MISP_RETIRED.ALL_BRANCHES", level) / EV("BACLEARS.ANY", level)

# Counts the number of machine clears relative to thousands of instructions retired, due to floating point assists

def Machine_Clears_FP_Assist_PKI(self, EV, level):

return 1000 * EV("MACHINE_CLEARS.FP_ASSIST", level) / EV("INST_RETIRED.ANY", level)

# Counts the number of machine clears relative to thousands of instructions retired, due to page faults

def Machine_Clears_Page_Fault_PKI(self, EV, level):

return 1000 * EV("MACHINE_CLEARS.PAGE_FAULT", level) / EV("INST_RETIRED.ANY", level)

# Counts the number of machine clears relative to thousands of instructions retired, due to self-modifying code

def Machine_Clears_SMC_PKI(self, EV, level):

return 1000 * EV("MACHINE_CLEARS.SMC", level) / EV("INST_RETIRED.ANY", level)

# Percentage of time that allocation is stalled due to store buffer full

def PCT_Store_Buffer_Stall_Cycles(self, EV, level):

return 100 * EV("MEM_SCHEDULER_BLOCK.ST_BUF", level) / CLKS(self, EV, level)

# Percentage of time that allocation is stalled due to load buffer full

def PCT_Load_Buffer_Stall_Cycles(self, EV, level):

return 100 * EV("MEM_SCHEDULER_BLOCK.LD_BUF", level) / CLKS(self, EV, level)

# Percentage of time that allocation is stalled due to memory reservation stations full

def PCT_Mem_RSV_Stall_Cycles(self, EV, level):

return 100 * EV("MEM_SCHEDULER_BLOCK.RSV", level) / CLKS(self, EV, level)

# Percentage of time that the core is stalled due to a TPAUSE or UMWAIT instruction

def PCT_Tpause_Cycles(self, EV, level):

return 100 * EV("SERIALIZATION.C01_MS_SCB", level) / SLOTS(self, EV, level)

# Average Frequency Utilization relative nominal frequency

def Turbo_Utilization(self, EV, level):

return CLKS(self, EV, level) / EV("CPU_CLK_UNHALTED.REF_TSC", level)

# Fraction of cycles spent in Kernel mode

def Kernel_Utilization(self, EV, level):

return EV("CPU_CLK_UNHALTED.CORE_P:sup", level) / CLKS(self, EV, level)

# Average CPU Utilization

def CPU_Utilization(self, EV, level):

return EV("CPU_CLK_UNHALTED.REF_TSC", level) / EV("msr/tsc/", 0)

# Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width

def GFLOPs(self, EV, level):

return EV("FP_FLOPS_RETIRED.ALL", level) / (EV("interval-s", 0) * 1000000000 )

# Event groups

class Frontend_Bound:

name = "Frontend_Bound"

domain = "Slots"

area = "FE"

level = 1

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.ALL_P", 1) / SLOTS(self, EV, 1)

self.thresh = (self.val > 0.20)

except ZeroDivisionError:

handle_error(self, "Frontend_Bound zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to frontend stalls."""

class IFetch_Latency:

name = "IFetch_Latency"

domain = "Slots"

area = "FE"

level = 2

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.FRONTEND_LATENCY", 2) / SLOTS(self, EV, 2)

self.thresh = (self.val > 0.15) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "IFetch_Latency zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to frontend latency restrictions due to

icache misses, itlb misses, branch detection, and resteer

limitations."""

class ICache_Misses:

name = "ICache_Misses"

domain = "Slots"

area = "FE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.ICACHE", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "ICache_Misses zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to instruction cache misses."""

class ITLB_Misses:

name = "ITLB_Misses"

domain = "Slots"

area = "FE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.ITLB_MISS", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "ITLB_Misses zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to Instruction Table Lookaside Buffer

(ITLB) misses."""

class Branch_Detect:

name = "Branch_Detect"

domain = "Slots"

area = "FE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.BRANCH_DETECT", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Branch_Detect zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to BACLEARS, which occurs when the Branch

Target Buffer (BTB) prediction or lack thereof, was

corrected by a later branch predictor in the frontend.

Includes BACLEARS due to all branch types including

conditional and unconditional jumps, returns, and indirect

branches."""

class Branch_Resteer:

name = "Branch_Resteer"

domain = "Slots"

area = "FE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.BRANCH_RESTEER", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Branch_Resteer zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to BTCLEARS, which occurs when the Branch

Target Buffer (BTB) predicts a taken branch."""

class IFetch_Bandwidth:

name = "IFetch_Bandwidth"

domain = "Slots"

area = "FE"

level = 2

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH", 2) / SLOTS(self, EV, 2)

self.thresh = (self.val > 0.10) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "IFetch_Bandwidth zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to frontend bandwidth restrictions due to

decode, predecode, cisc, and other limitations."""

class Cisc:

name = "Cisc"

domain = "Slots"

area = "FE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.CISC", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Cisc zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to the microcode sequencer (MS)."""

class Decode:

name = "Decode"

domain = "Slots"

area = "FE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.DECODE", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Decode zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to decode stalls."""

class Predecode:

name = "Predecode"

domain = "Slots"

area = "FE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.PREDECODE", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Predecode zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to wrong predecodes."""

class Other_FB:

name = "Other_FB"

domain = "Slots"

area = "FE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_FE_BOUND.OTHER", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Other_FB zero division")

return self.val

desc = """

Counts the number of issue slots that were not delivered by

the frontend due to other common frontend stalls not

categorized."""

class Bad_Speculation:

name = "Bad_Speculation"

domain = "Slots"

area = "BAD"

level = 1

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BAD_SPECULATION.ALL_P", 1) / SLOTS(self, EV, 1)

self.thresh = (self.val > 0.15)

except ZeroDivisionError:

handle_error(self, "Bad_Speculation zero division")

return self.val

desc = """

Counts the total number of issue slots that were not

consumed by the backend because allocation is stalled due to

a mispredicted jump or a machine clear. Only issue slots

wasted due to fast nukes such as memory ordering nukes are

counted. Other nukes are not accounted for. Counts all issue

slots blocked during this recovery window including relevant

microcode flows and while uops are not yet available in the

instruction queue (IQ). Also includes the issue slots that

were consumed by the backend but were thrown away because

they were younger than the mispredict or machine clear."""

class Branch_Mispredicts:

name = "Branch_Mispredicts"

domain = "Slots"

area = "BAD"

level = 2

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BAD_SPECULATION.MISPREDICT", 2) / SLOTS(self, EV, 2)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Branch_Mispredicts zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to branch mispredicts"""

class Machine_Clears:

name = "Machine_Clears"

domain = "Slots"

area = "BAD"

level = 2

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS", 2) / SLOTS(self, EV, 2)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Machine_Clears zero division")

return self.val

desc = """

Counts the total number of issue slots that were not

consumed by the backend because allocation is stalled due to

a machine clear (nuke) of any kind including memory ordering

and memory disambiguation"""

class Nuke:

name = "Nuke"

domain = "Slots"

area = "BAD"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BAD_SPECULATION.NUKE", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Nuke zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to a machine clear that requires the use of

microcode (slow nuke)"""

class Fast_Nuke:

name = "Fast_Nuke"

domain = "Slots"

area = "BAD"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BAD_SPECULATION.FASTNUKE", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.05) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Fast_Nuke zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to a machine clear that does not require the

use of microcode, classified as a fast nuke, due to memory

ordering, memory disambiguation and memory renaming"""

class Backend_Bound:

name = "Backend_Bound"

domain = "Slots"

area = "BE"

level = 1

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BE_BOUND.ALL_P", 1) / SLOTS(self, EV, 1)

self.thresh = (self.val > 0.10)

except ZeroDivisionError:

handle_error(self, "Backend_Bound zero division")

return self.val

desc = """

Counts the total number of issue slots that were not

consumed by the backend due to backend stalls. Note that

uops must be available for consumption in order for this

event to count. If a uop is not available (IQ is empty),

this event will not count"""

class Core_Bound:

name = "Core_Bound"

domain = "Slots"

area = "BE"

level = 2

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", 2) / SLOTS(self, EV, 2)

self.thresh = (self.val > 0.10) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Core_Bound zero division")

return self.val

desc = """

Counts the number of cycles due to backend bound stalls that

are bounded by core restrictions and not attributed to an

outstanding load or stores, or resource limitation"""

class Allocation_Restriction:

name = "Allocation_Restriction"

domain = "Slots"

area = "BE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.10) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Allocation_Restriction zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to certain allocation restrictions"""

class Resource_Bound:

name = "Resource_Bound"

domain = "Slots"

area = "BE"

level = 2

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = ((EV("TOPDOWN_BE_BOUND.ALL_P", 2) / SLOTS(self, EV, 2)) - self.Core_Bound.compute(EV))

self.thresh = (self.val > 0.20) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Resource_Bound zero division")

return self.val

desc = """

Counts the number of cycles the core is stalled due to a

resource limitation"""

class Mem_Scheduler:

name = "Mem_Scheduler"

domain = "Slots"

area = "BE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BE_BOUND.MEM_SCHEDULER", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.10) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Mem_Scheduler zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to memory reservation stalls in which a

scheduler is not able to accept uops"""

class Non_Mem_Scheduler:

name = "Non_Mem_Scheduler"

domain = "Slots"

area = "BE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.10) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Non_Mem_Scheduler zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to IEC or FPC RAT stalls, which can be due

to FIQ or IEC reservation stalls in which the integer,

floating point or SIMD scheduler is not able to accept uops"""

class Register:

name = "Register"

domain = "Slots"

area = "BE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BE_BOUND.REGISTER", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.10) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Register zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to the physical register file unable to

accept an entry (marble stalls)"""

class Reorder_Buffer:

name = "Reorder_Buffer"

domain = "Slots"

area = "BE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BE_BOUND.REORDER_BUFFER", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.10) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Reorder_Buffer zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to the reorder buffer being full (ROB

stalls)"""

class Serialization:

name = "Serialization"

domain = "Slots"

area = "BE"

level = 3

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_BE_BOUND.SERIALIZATION", 3) / SLOTS(self, EV, 3)

self.thresh = (self.val > 0.10) and self.parent.thresh

except ZeroDivisionError:

handle_error(self, "Serialization zero division")

return self.val

desc = """

Counts the number of issue slots that were not consumed by

the backend due to scoreboards from the instruction queue

(IQ), jump execution unit (JEU), or microcode sequencer (MS)"""

class Retiring:

name = "Retiring"

domain = "Slots"

area = "RET"

level = 1

htoff = False

sample = []

errcount = 0

sibling = None

metricgroup = frozenset([])

maxval = None

def compute(self, EV):

try:

self.val = EV("TOPDOWN_RETIRING.ALL_P", 1) / SLOTS(self, EV, 1)

self.thresh = (self.val > 0.75)

except ZeroDivisionError:

handle_error(self, "Retiring zero division")

return self.val

desc = """

Counts the number of issue slots that result in retirement

slots"""

class Metric_PCT_Mem_Exec_Bound_Cycles:

name = "PCT_Mem_Exec_Bound_Cycles"

domain = "Cycles"

maxval = 0

errcount = 0

area = "Info.Bottleneck"

metricgroup = frozenset(['Mem_Exec'])

sibling = None

def compute(self, EV):

try:

self.val = PCT_Mem_Exec_Bound_Cycles(self, EV, 0)

self.thresh = True

except ZeroDivisionError:

handle_error_metric(self, "PCT_Mem_Exec_Bound_Cycles zero division")

desc = """

Percentage of time that retirement is stalled by the Memory

Cluster due to a pipeline stall. See Info.Mem_Exec_Bound"""

class Metric_PCT_Load_Miss_Bound_Cycles:

name = "PCT_Load_Miss_Bound_Cycles"

domain = "Cycles"

maxval = 0

errcount = 0

area = "Info.Bottleneck"

metricgroup = frozenset(['Load_Store_Miss'])

sibling = None

def compute(self, EV):

try:

self.val = PCT_Load_Miss_Bound_Cycles(self, EV, 0)

self.thresh = True

except ZeroDivisionError:

handle_error_metric(self, "PCT_Load_Miss_Bound_Cycles zero division")

desc = """

Percentage of time that retirement is stalled due to an L1

miss. See Info.Load_Miss_Bound"""

class Metric_PCT_DTLB_Miss_Bound_Cycles:

name = "PCT_DTLB_Miss_Bound_Cycles"

domain = "Cycles"

maxval = 0

errcount = 0

area = "Info.Bottleneck"

metricgroup = frozenset([])

sibling = None

def compute(self, EV):

try:

self.val = PCT_DTLB_Miss_Bound_Cycles(self, EV, 0)

self.thresh = True

except ZeroDivisionError:

handle_error_metric(self, "PCT_DTLB_Miss_Bound_Cycles zero division")

desc = """

Percentage of time that retirement is stalled due to a first

level data TLB miss"""

class Metric_PCT_IFetch_Miss_Bound_Cycles:

name = "PCT_IFetch_Miss_Bound_Cycles"

domain = "Cycles"

maxval = 0

errcount = 0

area = "Info.Bottleneck"

metricgroup = frozenset(['Ifetch'])

sibling = None

def compute(self, EV):

try:

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

mtl_cmt_ratios.py

mtl_cmt_ratios.py

Files

mtl_cmt_ratios.py

Latest commit

History

mtl_cmt_ratios.py

File metadata and controls