diff --git a/libnvsochwpm/test/hwpm_record_format.h b/libnvsochwpm/test/hwpm_record_format.h index b8c9d90..c815b0e 100644 --- a/libnvsochwpm/test/hwpm_record_format.h +++ b/libnvsochwpm/test/hwpm_record_format.h @@ -137,6 +137,191 @@ struct ModeERecordVolta : ModeERecordRaw } }; +// Basic mode E userdata record in the native HW layout +struct ModeERecordUserData +{ + union + { + uint32_t data0_3; + struct + { + uint8_t data0; + uint8_t data1; + uint8_t data2; + uint8_t data3; + }; + }; + + union + { + uint32_t meta; + struct + { + uint8_t cnt_d0_2; + uint8_t perfmon_id; + uint16_t d3_11_pid_sd_tm; + }; + }; + + union + { + uint32_t data4_7; + struct + { + uint8_t data4; + uint8_t data5; + uint8_t data6; + uint8_t data7; + }; + }; + + union + { + uint32_t data8_11; + struct + { + uint8_t data8; + uint8_t data9; + uint8_t data10; + uint8_t data11; + }; + }; + + union + { + uint32_t data12_15; + struct + { + uint8_t data12; + uint8_t data13; + uint8_t data14; + uint8_t data15; + }; + }; + + union + { + uint32_t data16_19; + struct + { + uint8_t data16; + uint8_t data17; + uint8_t data18; + uint8_t data19; + }; + }; + + union + { + uint32_t data20_23; + struct + { + uint8_t data20; + uint8_t data21; + uint8_t data22; + uint8_t data23; + }; + }; + + union + { + uint32_t data24_27; + struct + { + uint8_t data24; + uint8_t data25; + uint8_t data26; + uint8_t data27; + }; + }; + + /* The number of bytes in data0-27.*/ + uint32_t GetCount() const + { + uint32_t count = cnt_d0_2 & 0x1F; + return count; + } + + uint32_t GetPerfmonId() const + { + uint32_t perfmonId_lsb = perfmon_id; + uint32_t perfmonId_msb = (d3_11_pid_sd_tm & 0xe00) >> 1; + uint32_t perfmonId = perfmonId_msb | perfmonId_lsb; + return perfmonId; + } + + uint32_t GetDropped() const + { + uint32_t dropped_lsb = cnt_d0_2 >> 5U; + uint32_t dropped_msb = (d3_11_pid_sd_tm & 0x1FFU) << 3; + uint32_t dropped = dropped_msb | dropped_lsb; + return dropped; + } + + uint32_t GetSD() const + { + uint8_t sd = (d3_11_pid_sd_tm >> 12U) & 0x1U; + return sd; + } + + uint32_t GetTM() const + { + uint8_t sd = (d3_11_pid_sd_tm >> 14U) & 0x1U; + return sd; + } +}; + +/* MODE E user data packet +Mode E while in USERDATA mode, see NV_PERF_PMM_CONTROL2_MODEE_USERDATA_ENABLED -- + +Detailed description : http://p4viewer.nvidia.com/get/hw/doc/gpu/maxwell/maxwell/design/IAS/Maxwell_HWPM_IAS.doc, section 3.4. + + 15 8 7 0 + .--+--+--+--+--+--+--+--+--+--+--+--+--+--+-----. +0x00 | data1[7:0] | data0[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x02 | data3[7:0] | data2[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x04 | PERFMONID[7:0] | d[2:0] | cnt[4:0] | d[2:0] is lower 3 bits of dropped[13:0] field + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x06 |0 |TM|0 |SD|PI[10:8] | dropped[11:3] | PI is upper bits of PERFMONID + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x08 | data5[7:0] | data4[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x0A | data7[7:0] | data6[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x0C | data9[7:0] | data8[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x0E | data11[7:0] | data10[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x10 | data13[7:0] | data12[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x12 | data15[7:0] | data14[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x14 | data17[7:0] | data16[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x16 | data19[7:0] | data18[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x18 | data21[7:0] | data20[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x1A | data23[7:0] | data22[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x1C | data25[7:0] | data24[7:0] | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +0x1E | data27[7:0] | data26[7:0] | + `--+--+--+--+--+--+--+--+--+--+--+--+--+--+-----' + +dropped[11:0] is the number of dropped bytes due to backpressure after this record. + There is no counting of bytes dropped before this record. The only time we drop + bytes before a record is in the interval between + - PM trigger start, AND + - First byte of packet (userdata_start) +cnt[4:0] is the number of valid bytes populated in this record. +PERFMONID(lsb) (PI= PERFMONID msb) DS has the same meaning as mode C/E records. +TM is a field spcific only to UserData mode, indicating a missed perf + trigger. TM is not context switched and is reset to 0 +*/ + // ============================================================================= // Mode C Record // ============================================================================= diff --git a/libnvsochwpm/test/soc_mode_e_buffer.cpp b/libnvsochwpm/test/soc_mode_e_buffer.cpp index 452d22b..b35376f 100644 --- a/libnvsochwpm/test/soc_mode_e_buffer.cpp +++ b/libnvsochwpm/test/soc_mode_e_buffer.cpp @@ -191,7 +191,7 @@ void SocModeEBuffer::ParseRecords() if (!p_mode_c_record->GetTimestamp()) { m_zero_timestamp_detected = true; } - } else { + } else if (m_record_format == RecordFormatType::ModeE) { auto p_mode_e_record = (ModeERecordVolta*)p_record; uint16_t sample_count = p_mode_e_record->GetSampleCount(); bool delayed_sampled = p_mode_e_record->IsDelayedSampled(); @@ -254,6 +254,9 @@ void SocModeEBuffer::ParseRecords() if (!p_mode_e_record->GetTimestamp()) { m_zero_timestamp_detected = true; } + } else if (m_record_format == RecordFormatType::ModeE_userdata) { + m_num_valid_records++; + m_perfmon_id_trigger_count_map.emplace(record_perfmon_id, 0); } } else { // Reach the end of valid records @@ -378,10 +381,17 @@ uint64_t SocModeEBuffer::GetLastPmaTimestamp() return m_last_pma_timestamp; } -void SocModeEBuffer::PrintRecord(PmRecordSocCommonPrefix* record, bool is_pma_record, bool is_mode_c) +void SocModeEBuffer::PrintRecord( + PmRecordSocCommonPrefix* record, + bool is_pma_record, + enum RecordFormatType format_type) { char str_buffer[256]; + bool is_mode_c = format_type == RecordFormatType::ModeC; + bool is_mode_e = format_type == RecordFormatType::ModeE; + bool is_mode_e_userdata = format_type == RecordFormatType::ModeE_userdata; + if (is_pma_record) { auto p_pma_record = (PmaRecordSoc*)(record); @@ -418,7 +428,7 @@ void SocModeEBuffer::PrintRecord(PmRecordSocCommonPrefix* record, bool is_pma_re p_mode_c_record->counter[11] ); } - else + else if (is_mode_e) { auto p_mode_e_record = (ModeERecordVolta*)(record); sprintf(str_buffer, @@ -435,6 +445,25 @@ void SocModeEBuffer::PrintRecord(PmRecordSocCommonPrefix* record, bool is_pma_re p_mode_e_record->zero3 ); } + else if (is_mode_e_userdata) + { + auto p_mode_e_userdata_record = (ModeERecordUserData*)(record); + sprintf(str_buffer, + "[MODEE_UD] PERFMON %3x, COUNT %4u, DROPPED %d, SD %d, TM %d, DATA 0-3 0x%x, DATA 4-7 0x%x, DATA 8-11 0x%x, DATA 12-15 0x%x, DATA 16-19 0x%x, DATA 20-23 0x%x, DATA 24-27 0x%x\n", + p_mode_e_userdata_record->GetPerfmonId(), + p_mode_e_userdata_record->GetCount(), + p_mode_e_userdata_record->GetDropped(), + p_mode_e_userdata_record->GetSD(), + p_mode_e_userdata_record->GetTM(), + p_mode_e_userdata_record->data0_3, + p_mode_e_userdata_record->data4_7, + p_mode_e_userdata_record->data8_11, + p_mode_e_userdata_record->data12_15, + p_mode_e_userdata_record->data16_19, + p_mode_e_userdata_record->data20_23, + p_mode_e_userdata_record->data24_27 + ); + } std::cerr << str_buffer; } @@ -482,7 +511,10 @@ bool SocModeEBuffer::RealtimeParseFlush(SocRealtimeParseFlushData& stats, bool v if (verbose) { std::cerr << "Incomplete PMA record: ptimer == 0\n"; - PrintRecord(p_record_common_prefix, /*is_pma_record*/ true); + PrintRecord( + p_record_common_prefix, + /*is_pma_record*/ true, + RecordFormatType::ModeC); } break; } @@ -493,7 +525,10 @@ bool SocModeEBuffer::RealtimeParseFlush(SocRealtimeParseFlushData& stats, bool v if (verbose) { std::cerr << "Incomplete PMA record: totalTrigCnt == 0\n"; - PrintRecord(p_record_common_prefix, /*is_pma_record*/ true); + PrintRecord( + p_record_common_prefix, + /*is_pma_record*/ true, + RecordFormatType::ModeC); } break; } @@ -503,7 +538,10 @@ bool SocModeEBuffer::RealtimeParseFlush(SocRealtimeParseFlushData& stats, bool v if (verbose) { std::cerr << "Malformed PMA record: ptimer " << curr_ptimer << " <= lastPtimer " << last_pma_timestamp << "\n"; - PrintRecord(p_record_common_prefix, /*is_pma_record*/ true); + PrintRecord( + p_record_common_prefix, + /*is_pma_record*/ true, + RecordFormatType::ModeC); } break; } @@ -513,7 +551,7 @@ bool SocModeEBuffer::RealtimeParseFlush(SocRealtimeParseFlushData& stats, bool v if (verbose) { std::cerr << "Malformed PMA record: totalTrigCnt " << (int)total_trig_cnt << " <= lastTriggerCount " << (int)last_trigger_count << "\n"; - PrintRecord(p_record_common_prefix, /*is_pma_record*/ true); + PrintRecord(p_record_common_prefix, /*is_pma_record*/true, RecordFormatType::ModeC); } break; } @@ -536,7 +574,10 @@ bool SocModeEBuffer::RealtimeParseFlush(SocRealtimeParseFlushData& stats, bool v if (verbose) { std::cerr << "Incomplete ModeE record: timestamp == 0\n"; - PrintRecord(p_record_common_prefix, /*is_pma_record*/ false); + PrintRecord( + p_record_common_prefix, + /*is_pma_record*/ false, + m_record_format); } break; } @@ -547,7 +588,10 @@ bool SocModeEBuffer::RealtimeParseFlush(SocRealtimeParseFlushData& stats, bool v if (verbose) { std::cerr << "Incomplete ModeE record: totalTriggerCount == 0\n"; - PrintRecord(p_record_common_prefix, /*is_pma_record*/ false); + PrintRecord( + p_record_common_prefix, + /*is_pma_record*/ false, + m_record_format); } break; } @@ -560,7 +604,10 @@ bool SocModeEBuffer::RealtimeParseFlush(SocRealtimeParseFlushData& stats, bool v if (verbose) { std::cerr << "Malformed ModeE record: totalTriggerCount " << (int)total_trigger_count << " <= lastTriggerCount " << (int)map_entry->second << "\n"; - PrintRecord(p_record_common_prefix, /*is_pma_record*/ false); + PrintRecord( + p_record_common_prefix, + /*is_pma_record*/ false, + m_record_format); } break; } @@ -678,11 +725,16 @@ void SocModeEBuffer::PrintRecords(const size_t num_records_to_print) const printf("No. PerfmonID Elaps_cyc DS SmpCt C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 \n"); printf("---- --------- --------- -- ----- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----\n"); } - else - { + else if (m_record_format == RecordFormatType::ModeE) + { printf("No. PerfmonID Elaps_cyc DS SmpCt Count0 Count1 Count2 Count3 TrgB TrgA \n"); printf("---- --------- --------- -- ----- ------- ------- ------- ------- -------- --------\n"); } + else if (m_record_format == RecordFormatType::ModeE_userdata) + { + printf("No. PerfmonID Count Dropped SD TM DATA0-3 DATA4-7 DATA8-11 DATA12-15 DATA16-19 DATA20-23 DATA24-27\n"); + printf("---- --------- ----- ------- ---- -- ------- ------- -------- --------- --------- --------- ---------\n"); + } uint32_t record_idx_lo = m_unread_head; uint32_t record_idx_hi = m_max_records; @@ -727,7 +779,7 @@ void SocModeEBuffer::PrintRecords(const size_t num_records_to_print) const p_mode_c_record->counter[11] ); } - else + else if (m_record_format == RecordFormatType::ModeE) { auto p_mode_e_record = (ModeERecordVolta*)p_record; uint64_t timestamp = p_mode_e_record->GetTimestamp(); @@ -749,6 +801,26 @@ void SocModeEBuffer::PrintRecords(const size_t num_records_to_print) const p_mode_e_record->zero3 ); } + else if (m_record_format == RecordFormatType::ModeE_userdata) + { + auto p_mode_e_userdata_record = (ModeERecordUserData*)(p_record); + sprintf(str_buffer, + "%4d %9x %5d %7d %4d %2d %7x %7x %8x %9x %9x %9x %9x\n", + ii, + p_mode_e_userdata_record->GetPerfmonId(), + p_mode_e_userdata_record->GetCount(), + p_mode_e_userdata_record->GetDropped(), + p_mode_e_userdata_record->GetSD(), + p_mode_e_userdata_record->GetTM(), + p_mode_e_userdata_record->data0_3, + p_mode_e_userdata_record->data4_7, + p_mode_e_userdata_record->data8_11, + p_mode_e_userdata_record->data12_15, + p_mode_e_userdata_record->data16_19, + p_mode_e_userdata_record->data20_23, + p_mode_e_userdata_record->data24_27 + ); + } record_strings.emplace_back(std::string(str_buffer)); if (record_strings.size() > num_records_to_print) @@ -799,11 +871,13 @@ void SocModeEBuffer::DumpBuffer() // Print the first record if (perfmon_id == PMA_PerfmonId) { - PrintRecord(p_record_common_prefix, true /* isPmaRecord */); + PrintRecord( + p_record_common_prefix, true /* isPmaRecord */, m_record_format); } else { - PrintRecord(p_record_common_prefix, false /* isPmaRecord */, m_record_format == RecordFormatType::ModeC); + PrintRecord( + p_record_common_prefix, false /* isPmaRecord */, m_record_format); } // Iterate through remaining records @@ -833,11 +907,13 @@ void SocModeEBuffer::DumpBuffer() auto perfmon_id = p_record_common_prefix->GetPerfmonId(); if (perfmon_id == PMA_PerfmonId) { - PrintRecord(p_record_common_prefix, true /* isPmaRecord */); + PrintRecord( + p_record_common_prefix, true /* isPmaRecord */, m_record_format); } else { - PrintRecord(p_record_common_prefix, false /* isPmaRecord */, m_record_format == RecordFormatType::ModeC); + PrintRecord( + p_record_common_prefix, false /* isPmaRecord */, m_record_format); } } } diff --git a/libnvsochwpm/test/soc_mode_e_buffer.h b/libnvsochwpm/test/soc_mode_e_buffer.h index 6f467f0..9335682 100644 --- a/libnvsochwpm/test/soc_mode_e_buffer.h +++ b/libnvsochwpm/test/soc_mode_e_buffer.h @@ -26,6 +26,7 @@ enum RecordFormatType { ModeC, ModeE, + ModeE_userdata, }; // realtime parse-flush @@ -43,7 +44,7 @@ class SocModeEBuffer void ResetParsedData(); void ParseRecords(); void PrintRecord(PmRecordSocCommonPrefix* record, bool is_pma_record, - bool is_mode_c = false); + enum RecordFormatType format_type); nv_soc_hwpm_api_table m_api_table; nv_soc_hwpm_session m_session; diff --git a/libnvsochwpm/test/t410_test.cpp b/libnvsochwpm/test/t410_test.cpp index fb26326..f7792f1 100644 --- a/libnvsochwpm/test/t410_test.cpp +++ b/libnvsochwpm/test/t410_test.cpp @@ -1257,12 +1257,18 @@ void T410Tests::SetupPmm(nv_soc_hwpm_session session, const PmmConfigurationPara NV_PERF_PMMSYS_SYS0_SIGVAL_PMA_TRIGGER, 0xFFFFFFFF); - // Enable GCM, local triggering. - uint32_t control_b = + // Enable GCM, local triggering, user data mode. + uint32_t control_d = 0; + uint32_t control_b = 0; + if (params.mode != PmmConfigurationParams::Mode::MODE_E_USERDATA) { + control_b = REG32_WR( 0, NV_PERF_PMMSYS_CONTROLB_COUNTING_MODE, NV_PERF_PMMSYS_CONTROLB_COUNTING_MODE_GENERAL); + } + printf("counting mode control_b: %x\n", control_b); + if (params.enable_local_triggering) { control_b |= REG32_WR( @@ -1274,7 +1280,20 @@ void T410Tests::SetupPmm(nv_soc_hwpm_session session, const PmmConfigurationPara NV_PERF_PMMSYS_CONTROLB_PMLOCALTRIGB_EN, NV_PERF_PMMSYS_CONTROLB_PMLOCALTRIGB_EN_ENABLE); } + if (params.mode == PmmConfigurationParams::Mode::MODE_E_USERDATA) + { + control_b |= REG32_WR( + 0, + NV_PERF_PMMSYS_CONTROLB_MODEE_USERDATA, + NV_PERF_PMMSYS_CONTROLB_MODEE_USERDATA_ENABLED); + + control_d |= REG32_WR( + 0, + NV_PERF_PMMSYS_CONTROLD_MODEE_USERDATA_WINDOW_MODE, + NV_PERF_PMMSYS_CONTROLD_MODEE_USERDATA_WINDOW_MODE_DISABLED); + } RegOpWrite32(session, PM_ADDR(PMMSYS, CONTROLB, perfmon_base), control_b, 0xFFFFFFFF); + RegOpWrite32(session, PM_ADDR(PMMSYS, CONTROLD, perfmon_base), control_d, 0xFFFFFFFF); // Set perfmon id const uint32_t soc_perfmon_prefix = 0x700; // TODO: Temporary identifier @@ -1358,6 +1377,8 @@ void T410Tests::SetupPmm(nv_soc_hwpm_session session, const PmmConfigurationPara RegOpWrite32(session, PM_ADDR(PMMSYS, CNTR1_INC, perfmon_base), counter_inc, 0xFFFFFFFF); RegOpWrite32(session, PM_ADDR(PMMSYS, CNTR2_INC, perfmon_base), counter_inc, 0xFFFFFFFF); RegOpWrite32(session, PM_ADDR(PMMSYS, CNTR3_INC, perfmon_base), counter_inc, 0xFFFFFFFF); + } else if (params.mode == PmmConfigurationParams::Mode::MODE_E_USERDATA) { + mode = NV_PERF_PMMSYS_CONTROL_MODE_E; } // Finally, program CONTROL register @@ -1376,7 +1397,7 @@ void T410Tests::SetupPmm(nv_soc_hwpm_session session, const PmmConfigurationPara RegOpWrite32(session, PM_ADDR(PMMSYS, CONTROL, perfmon_base), pmm_control, 0xFFFFFFFF); } -void T410Tests::SetupWatchbusPma(nv_soc_hwpm_session session, const PmmConfigurationParams ¶ms) +void T410Tests::SetupWatchbusPma(nv_soc_hwpm_session session, const PmmConfigurationParams& params) { const uint64_t perfmon_base = params.perfmon_base; @@ -1468,7 +1489,7 @@ TEST_F(T410Tests, SessionRegOpsNvtherm) } } -void T410Tests::SetupWatchbusNvtherm(nv_soc_hwpm_session session, const PmmConfigurationParams ¶ms) +void T410Tests::SetupWatchbusNvtherm(nv_soc_hwpm_session session, const PmmConfigurationParams& params) { const uint64_t perfmon_base = params.perfmon_base; @@ -1508,6 +1529,88 @@ void T410Tests::SetupWatchbusNvtherm(nv_soc_hwpm_session session, const PmmConfi RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG1_SEL, perfmon_base), 0x0, 0xFFFFFFFF); RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_SEL, perfmon_base), 0x0, 0xFFFFFFFF); RegOpWrite32(session, PM_ADDR(PMMSYS, SAMPLE_SEL, perfmon_base), 0x0, 0xFFFFFFFF); + } else if (params.mode == PmmConfigurationParams::Mode::MODE_E_USERDATA) { + // PRIVATE SIGNAL (see mode B). + // ACTUAL SIGNAL. + // SIGNAL(name/width/domain/instancetype):--/nvtherm0.user_data_group/11/nvtherm0/tjv/ + // ROUTE(index/registers):--/0/3/ + // DESTINATION(lsb_bitposition/watchbus_readback_index/watchbus_readback_lsb):--/22/0/0/ + // REGWRITE(field/addr/val/mask/chipletoffset/instanceoffset/instancecount/instancetype):--/NV_HWPM_GLOBAL_0_THERM_PM_CTRL_ENABLE/279275970299752/2147483648/2147483648/0/0/1/none/ + // REGWRITE(field/addr/val/mask/chipletoffset/instanceoffset/instancecount/instancetype):--/NV_HWPM_GLOBAL_0_THERM_PM_SELECT_FLEX_A/279275970299752/0/63/0/0/1/none/ + // REGWRITE(field/addr/val/mask/chipletoffset/instanceoffset/instancecount/instancetype):--/NV_HWPM_GLOBAL_0_THERM_PM_SELECT_FLEX_B/279275970299752/0/16128/0/0/1/none/ + const char *use_actual_signal = getenv("USE_ACTUAL_SIGNAL"); + printf("USE_ACTUAL_SIGNAL: %s\n", (use_actual_signal ? use_actual_signal : "0")); + bool use_static_signal = (!use_actual_signal || strcmp(use_actual_signal, "0") == 0); + + // Need to use the _SC (self clear data) according to Nathan/Alex + uint32_t mux_sel = (use_static_signal) ? 0x5 : 0x1; + const char *use_guide = getenv("USE_GUIDE"); + printf("USE_GUIDE: %s\n", (use_guide ? use_guide : "0")); + if (use_guide && strcmp(use_guide, "1") == 0) + mux_sel = (use_static_signal) ? 0x4 : 0x0; + const uint32_t channel_perfmux_sel = 0 + | REG32_WR( + 0, + NV_HWPM_GLOBAL_0_THERM_PM_SELECT_FLEX_A, + mux_sel) + | REG32_WR( + 0, + NV_HWPM_GLOBAL_0_THERM_PM_CTRL_ENABLE, + NV_HWPM_GLOBAL_0_THERM_PM_CTRL_ENABLE_ENABLE); + RegOpWrite32(session, NV_THERM_PERFMUX, channel_perfmux_sel, 0xFFFFFFFF); + + if (use_static_signal) { + printf("Setup userdata mode, capture '5' from 5555.\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG0_SEL, perfmon_base), 0x19181716, 0xFFFFFFFF); // '5' from 5555 + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG1_SEL, perfmon_base), 0x1D1C1B1A, 0xFFFFFFFF); // '5' from 5555 + + // Force start and valid bit to true. + printf("Force start and valid bit to true.\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_SEL, perfmon_base), 0x00010001, 0xFFFFFFFF); + } else { + const char *capture_valid = getenv("CAPTURE_VALID"); + bool capture_valid_bit = (capture_valid && strcmp(capture_valid, "1") == 0); + if (!capture_valid_bit) { + printf("Capture nvtherm debug data [7:0]\n"); + // From Nathan: + // userdata_data_d (user_data_group[7:0]) //|> w + // ,.userdata_flush_d (user_data_group[10]) //|> w + // ,.userdata_start_d (user_data_group[8]) //|> w + // ,.userdata_valid_d (user_data_group[9]) //|> w + // ); + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG0_SEL, perfmon_base), 0x19181716, 0xFFFFFFFF); // user data signal[3:0] + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG1_SEL, perfmon_base), 0x1D1C1B1A, 0xFFFFFFFF); // user data signal[7:4] + } else { + printf("Capture nvtherm debug data [10:8]\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG0_SEL, perfmon_base), 0x201F1E, 0xFFFFFFFF); // user data signal[3:0] + } + + const char *use_actual_start_valid = getenv("USE_ACTUAL_START_VALID"); + printf("USE_ACTUAL_START_VALID: %s\n", (use_actual_start_valid ? use_actual_start_valid : "0")); + if (!use_actual_start_valid || strcmp(use_actual_start_valid, "0") == 0) { + // Force start and valid bit to true. + printf("Force start and valid bit to true.\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_SEL, perfmon_base), 0x00010001, 0xFFFFFFFF); + } else { + // Use actual start and valid bit. + // Userdata comes in over watchbus -- + // userdata[7:4] = trig1_sel[3:0] + // userdata[3:0] = trig0_sel[3:0] + // userdata_start = event_sel[2] + // userdata_flush = event_sel[1] + // userdata_vld = event_sel[0] + + // From Nathan: + // userdata_data_d (user_data_group[7:0]) //|> w + // ,.userdata_flush_d (user_data_group[10]) //|> w + // ,.userdata_start_d (user_data_group[8]) //|> w + // ,.userdata_valid_d (user_data_group[9]) //|> w + // ); + printf("Use actual start and valid bit.\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_SEL, perfmon_base), 0x001E201F, 0xFFFFFFFF); + RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_OP, perfmon_base), 0xFFFF, 0xFFFFFFFF); + } + } } } @@ -1565,7 +1668,7 @@ TEST_F(T410Tests, SessionRegOpsCsnMbn) } } -void T410Tests::SetupWatchbusCsnMbn(nv_soc_hwpm_session session, const PmmConfigurationParams ¶ms) +void T410Tests::SetupWatchbusCsnMbn(nv_soc_hwpm_session session, const PmmConfigurationParams& params) { const uint64_t perfmon_base = params.perfmon_base; @@ -1663,7 +1766,7 @@ TEST_F(T410Tests, SessionRegOpsIpmu) } } -void T410Tests::SetupWatchbusIpmu(nv_soc_hwpm_session session, const PmmConfigurationParams ¶ms) +void T410Tests::SetupWatchbusIpmu(nv_soc_hwpm_session session, const PmmConfigurationParams& params) { const uint64_t perfmon_base = params.perfmon_base; @@ -1673,12 +1776,12 @@ void T410Tests::SetupWatchbusIpmu(nv_soc_hwpm_session session, const PmmConfigur return; } else if (params.mode == PmmConfigurationParams::Mode::MODE_B) { // Core-0 IPMU perfmux. + // Source: //hw/nvmobile_tb50x/ip/perf/hwpm_soc/2.2/dvlib/specs/src_tb500/pm_programming_guide.txt // SIGNAL(name/width/domain/instancetype):--/ucfcsnh0p0.ipmu02pm_static_pattern_a4a4_16/16/ucfcsnh0p0/tjv/ // ROUTE(index/registers):--/0/2/ // DESTINATION(lsb_bitposition/watchbus_readback_index/watchbus_readback_lsb):--/22/0/0/ // REGWRITE(field/addr/val/mask/chipletoffset/instanceoffset/instancecount/instancetype):--/NV_HWPM_GLOBAL_CORE0_IPMU_PERFMUX_CONTROL_PM_EN/4718640/256/256/0/0/1/none/ // REGWRITE(field/addr/val/mask/chipletoffset/instanceoffset/instancecount/instancetype):--/NV_HWPM_GLOBAL_CORE0_IPMU_PERFMUX_CONTROL_PM_SEL/4718640/1/255/0/0/1/none/ - // Source: //hw/nvmobile_tb50x/ip/perf/hwpm_soc/2.2/dvlib/specs/src_tb500/pm_programming_guide.txt const uint32_t mux_sel = 0x1; const uint32_t channel_perfmux_sel = 0 | REG32_WR( @@ -1703,6 +1806,74 @@ void T410Tests::SetupWatchbusIpmu(nv_soc_hwpm_session session, const PmmConfigur RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG1_SEL, perfmon_base), 0x0, 0xFFFFFFFF); RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_SEL, perfmon_base), 0x0, 0xFFFFFFFF); RegOpWrite32(session, PM_ADDR(PMMSYS, SAMPLE_SEL, perfmon_base), 0x0, 0xFFFFFFFF); + } else if (params.mode == PmmConfigurationParams::Mode::MODE_E_USERDATA) { + // PRIVATE SIGNAL (see mode B). + // ACTUAL SIGNAL. + // SIGNAL(name/width/domain/instancetype):--/ucfcsnh0p0.ipmu02pm_debug_event/16/ucfcsnh0p0/tjv/ + // ROUTE(index/registers):--/0/2/ + // DESTINATION(lsb_bitposition/watchbus_readback_index/watchbus_readback_lsb):--/22/0/0/ + // REGWRITE(field/addr/val/mask/chipletoffset/instanceoffset/instancecount/instancetype):--/NV_HWPM_GLOBAL_CORE0_IPMU_PERFMUX_CONTROL_PM_EN/4718640/256/256/0/0/1/none/ + // REGWRITE(field/addr/val/mask/chipletoffset/instanceoffset/instancecount/instancetype):--/NV_HWPM_GLOBAL_CORE0_IPMU_PERFMUX_CONTROL_PM_SEL/4718640/2/255/0/0/1/none/ + const char *use_actual_signal = getenv("USE_ACTUAL_SIGNAL"); + printf("USE_ACTUAL_SIGNAL: %s\n", (use_actual_signal ? use_actual_signal : "0")); + bool use_static_signal = (!use_actual_signal || strcmp(use_actual_signal, "0") == 0); + const uint32_t mux_sel = (use_static_signal) ? 0x1 : 0x2; + const uint32_t channel_perfmux_sel = 0 + | REG32_WR( + 0, + NV_HWPM_CORE_0_IPMU_MUX_SEL, + mux_sel) + | REG32_WR( + 0, + NV_HWPM_CORE_0_IPMU_ENABLE, + NV_HWPM_CORE_0_IPMU_ENABLE_ENABLE); + RegOpWrite32(session, NV_HWPM_CORE_0_IPMU_PERFMUX, channel_perfmux_sel, 0xFFFFFFFF); + + if (use_static_signal) { + printf("4/29 Setup userdata mode, capture 'a' from a4a4.\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG0_SEL, perfmon_base), 0x19181716, 0xFFFFFFFF); // '4' from a4a4 + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG1_SEL, perfmon_base), 0x1D1C1B1A, 0xFFFFFFFF); // 'a' from a4a4 + + // Force start and valid bit to true. + printf("Force start and valid bit to true.\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_SEL, perfmon_base), 0x00010001, 0xFFFFFFFF); + } else { + const char *capture_valid = getenv("CAPTURE_VALID"); + bool capture_valid_bit = (capture_valid && strcmp(capture_valid, "1") == 0); + if (!capture_valid_bit) { + printf("Capture ipmu debug data [10:3]\n"); + // Data layout: https://p4hw-swarm.nvidia.com/files/hw/doc/soc/tb50x/sysarch/iPMU/iPMU%20IAS.docx#view + // valid: bit 0 + // start: bit 1 + // flush: bit 2 + // data: bit 3 to 10 + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG0_SEL, perfmon_base), 0x1C1B1A19, 0xFFFFFFFF); // user data signal[3:0] + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG1_SEL, perfmon_base), 0x201F1E1D, 0xFFFFFFFF); // user data signal[7:4] + } else { + printf("Capture ipmu debug data [7:0]\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG0_SEL, perfmon_base), 0x19181716, 0xFFFFFFFF); // user data signal[3:0] + RegOpWrite32(session, PM_ADDR(PMMSYS, TRIG1_SEL, perfmon_base), 0x1D1C1B1A, 0xFFFFFFFF); // user data signal[7:4] + } + + const char *use_actual_start_valid = getenv("USE_ACTUAL_START_VALID"); + printf("USE_ACTUAL_START_VALID: %s\n", (use_actual_start_valid ? use_actual_start_valid : "0")); + if (!use_actual_start_valid || strcmp(use_actual_start_valid, "0") == 0) { + // Force start and valid bit to true. + printf("Force start and valid bit to true.\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_SEL, perfmon_base), 0x00010001, 0xFFFFFFFF); + } else { + // Use actual start and valid bit. + // Userdata comes in over watchbus -- + // userdata[7:4] = trig1_sel[3:0] + // userdata[3:0] = trig0_sel[3:0] + // userdata_start = event_sel[2] + // userdata_flush = event_sel[1] + // userdata_vld = event_sel[0] + printf("Use actual start and valid bit.\n"); + RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_SEL, perfmon_base), 0x00171816, 0xFFFFFFFF); + RegOpWrite32(session, PM_ADDR(PMMSYS, EVENT_OP, perfmon_base), 0xFFFF, 0xFFFFFFFF); + } + } } } @@ -1721,7 +1892,7 @@ void T410Tests::TeardownPma(nv_soc_hwpm_session session) } void T410Tests::TeardownPmm( - nv_soc_hwpm_session session, const PmmConfigurationParams ¶ms) + nv_soc_hwpm_session session, const PmmConfigurationParams& params) { const uint64_t perfmon_base = params.perfmon_base; @@ -1737,8 +1908,17 @@ void T410Tests::TeardownPerfmux(nv_soc_hwpm_session session) RegOpWrite32(session, NV_PERF_PMASYS_PERFMUX_CONFIG_SECURE, 0, 0xFFFFFFFF); } -void T410Tests::IssuePmaTrigger(nv_soc_hwpm_session session) +void T410Tests::IssuePmaTrigger( + nv_soc_hwpm_session session, + bool halt_before_trigger, + bool halt_after_trigger, + uint32_t delay_after_trigger) { + if (halt_before_trigger) { + printf("Halting before trigger. Press enter to continue\n"); + getchar(); + } + // This will issue PMA trigger to the perfmon. // The perfmon then will snapshot the counter value into shadow regiters uint32_t pma_global_trigger = 0 @@ -1747,6 +1927,15 @@ void T410Tests::IssuePmaTrigger(nv_soc_hwpm_session session) NV_PERF_PMASYS_COMMAND_SLICE_TRIGGER_CONTROL_GLOBAL_MANUAL_START, NV_PERF_PMASYS_COMMAND_SLICE_TRIGGER_CONTROL_GLOBAL_MANUAL_START_PULSE); RegOpWrite32(session, NV_PERF_PMASYS_COMMAND_SLICE_TRIGGER_CONTROL(0), pma_global_trigger, 0xFFFFFFFF); + + if (halt_after_trigger) { + printf("Halting after trigger. Press enter to continue\n"); + getchar(); + } + + if (delay_after_trigger) { + usleep(delay_after_trigger); + } } void T410Tests::HarvestCounters( @@ -1957,9 +2146,26 @@ void T410Tests::ModeETest(nv_soc_hwpm_resource resource) nv_soc_hwpm_session session; nv_soc_hwpm_session_attribute session_attr; uint32_t i, num_mem_bytes, num_triggers, num_perfmons; + const char *var_halt_before_trigger, *var_halt_after_trigger, + *var_delay_after_trigger; + bool halt_before_trigger, halt_after_trigger; + uint32_t delay_after_trigger; num_perfmons = 1; + halt_before_trigger = false; + halt_after_trigger = false; + delay_after_trigger = 0; + var_halt_before_trigger = getenv("HALT_BEFORE_TRIGGER"); + if (var_halt_before_trigger && strcmp(var_halt_before_trigger, "1") == 0) + halt_before_trigger = true; + var_halt_after_trigger = getenv("HALT_AFTER_TRIGGER"); + if (var_halt_after_trigger && strcmp(var_halt_after_trigger, "1") == 0) + halt_after_trigger = true; + var_delay_after_trigger = getenv("DELAY_AFTER_TRIGGER"); + if (var_delay_after_trigger) + delay_after_trigger = atoi(var_delay_after_trigger); + GetDevices(); for (i = 0; i < t410_dev_count; i++) { @@ -2048,10 +2254,21 @@ void T410Tests::ModeETest(nv_soc_hwpm_resource resource) printf("Watchbus setup done\n"); - num_triggers = 5; + const char *halt_to_override = getenv("HALT_TO_OVERRIDE"); + if (halt_to_override && strcmp(halt_to_override, "1") == 0) { + printf("Halt to override programming before trigger\n"); + getchar(); + } + + const char *var_num_triggers = getenv("NUM_TRIGGERS"); + num_triggers = (var_num_triggers) ? atoi(var_num_triggers) : 5; usleep(1000000); // 1 second for (i = 0; i < num_triggers; i++) { - IssuePmaTrigger(session); + IssuePmaTrigger( + session, + halt_before_trigger, + halt_after_trigger, + delay_after_trigger); } usleep(100000); // 100 milisecond @@ -2117,3 +2334,150 @@ TEST_F(T410Tests, SessionStreamoutTestModeEBasicStreamingIpmu) { ModeETest(NV_SOC_HWPM_RESOURCE_CPU); } + +void T410Tests::ModeETestUserData(nv_soc_hwpm_resource resource) +{ + nv_soc_hwpm_device dev; + nv_soc_hwpm_device_attribute dev_attr; + tegra_soc_hwpm_platform platform; + nv_soc_hwpm_session session; + nv_soc_hwpm_session_attribute session_attr; + uint32_t i, num_mem_bytes; + + printf("ModeETestUserData\n"); + + GetDevices(); + + for (i = 0; i < t410_dev_count; i++) { + printf("Device %d:\n", i); + dev = t410_dev[i]; + + dev_attr = NV_SOC_HWPM_DEVICE_ATTRIBUTE_SOC_PLATFORM; + ASSERT_EQ(0, + api_table.nv_soc_hwpm_device_get_info_fn( + dev, dev_attr, sizeof(platform), &platform)); + + // Allocate session. + ASSERT_EQ(0, api_table.nv_soc_hwpm_session_alloc_fn(dev, &session)); + + // Reserve all resources. + ASSERT_EQ(0, api_table.nv_soc_hwpm_session_reserve_all_resources_fn(session)); + + // Allocate PMA buffers. + nv_soc_hwpm_pma_buffer_params record_buffer_params = {}; + record_buffer_params.size = + ((platform == TEGRA_SOC_HWPM_PLATFORM_SILICON) ? 100 : 32) * 1024 * 1024; + ASSERT_EQ(0, + api_table.nv_soc_hwpm_session_alloc_pma_fn( + session, &record_buffer_params)); + + session_attr = NV_SOC_HWPM_SESSION_ATTRIBUTE_PMA_RECORD_BUFFER_PMA_VA; + uint64_t pma_record_buffer_pma_va; + ASSERT_EQ(0, + api_table.nv_soc_hwpm_session_get_info_fn( + session, + session_attr, + sizeof(pma_record_buffer_pma_va), + &pma_record_buffer_pma_va)); + ASSERT_NE(0U, pma_record_buffer_pma_va); + + // Start session. + ASSERT_EQ(0, api_table.nv_soc_hwpm_session_start_fn(session)); + + printf("Session started\n"); + + // Flush leftover records at the beginning of each subtest + nv_soc_hwpm_pma_channel_state_params set_get_state_param = {}; + set_get_state_param.in_mem_bump = 0; + set_get_state_param.in_stream_mem_bytes = 1; + set_get_state_param.in_check_overflow = 1; + set_get_state_param.in_read_mem_head = 1; + ASSERT_EQ(0, + api_table.nv_soc_hwpm_session_set_get_pma_state_fn( + session, &set_get_state_param)); + SocModeEBuffer soc_mode_e_buffer(api_table, session); + soc_mode_e_buffer.Initialize(); + soc_mode_e_buffer.SetRecordFormat(RecordFormatType::ModeE_userdata); + num_mem_bytes = soc_mode_e_buffer.GetMemBytes(); + soc_mode_e_buffer.FlushRecordsInBuffer(num_mem_bytes); + + PmaConfigurationParams pma_params; + pma_params.enable_streaming = true; + SetupPma(session, pma_params); + + printf("PMA setup done\n"); + + PmmConfigurationParams pmm_params; + InitPmmParams(resource, pmm_params); + pmm_params.mode = PmmConfigurationParams::Mode::MODE_E_USERDATA; + SetupPmm(session, pmm_params); + + printf("PMM setup done\n"); + switch (resource) { + case NV_SOC_HWPM_RESOURCE_NVTHERM: + SetupWatchbusNvtherm(session, pmm_params); + break; + case NV_SOC_HWPM_RESOURCE_CPU: + SetupWatchbusIpmu(session, pmm_params); + break; + default: + ASSERT_TRUE(false); + break; + } + + printf("Watchbus setup done\n"); + + const char *halt_to_override = getenv("HALT_TO_OVERRIDE"); + if (halt_to_override && strcmp(halt_to_override, "1") == 0) { + printf("Halt to override programming before trigger\n"); + getchar(); + } + + usleep(100000); // 100 milisecond + + TeardownPerfmux(session); + printf("Perfmux teardown done\n"); + + TeardownPmm(session, pmm_params); + printf("PMM teardown done\n"); + + TeardownPma(session); + printf("PMA teardown done\n"); + + printf("num_valid_records: %u\n", soc_mode_e_buffer.GetNumValidRecords()); + printf("num_unique_perfmon_id: %u\n", soc_mode_e_buffer.GetNumUniquePerfmonID()); + + // Stream & verify membytes + set_get_state_param.in_mem_bump = 0; + set_get_state_param.in_stream_mem_bytes = 1; + set_get_state_param.in_check_overflow = 1; + set_get_state_param.in_read_mem_head = 1; + ASSERT_EQ(0, + api_table.nv_soc_hwpm_session_set_get_pma_state_fn( + session, &set_get_state_param)); + num_mem_bytes = soc_mode_e_buffer.GetMemBytes(); + printf("num_mem_bytes: %u\n", num_mem_bytes); + EXPECT_GT(num_mem_bytes, 0U); + EXPECT_EQ(num_mem_bytes, + soc_mode_e_buffer.GetNumValidRecords() * sizeof(ModeERecordRaw)); + + soc_mode_e_buffer.PrintRecords(100); + + printf("================ BEGIN BUFFER DUMP ================\n"); + soc_mode_e_buffer.DumpBuffer(); + printf("================ END BUFFER DUMP ================\n"); + + // Free session. + ASSERT_EQ(0, api_table.nv_soc_hwpm_session_free_fn(session)); + } +} + +TEST_F(T410Tests, SessionStreamoutTestModeEUserDataNvtherm) +{ + ModeETestUserData(NV_SOC_HWPM_RESOURCE_NVTHERM); +} + +TEST_F(T410Tests, SessionStreamoutTestModeEUserDataIpmu) +{ + ModeETestUserData(NV_SOC_HWPM_RESOURCE_CPU); +} \ No newline at end of file diff --git a/libnvsochwpm/test/t410_test.h b/libnvsochwpm/test/t410_test.h index af716c8..20b34c1 100644 --- a/libnvsochwpm/test/t410_test.h +++ b/libnvsochwpm/test/t410_test.h @@ -48,7 +48,8 @@ protected: enum Mode { MODE_B, MODE_C, - MODE_E + MODE_E, + MODE_E_USERDATA }; PmmConfigurationParams() @@ -97,7 +98,11 @@ protected: void TeardownPma(nv_soc_hwpm_session session); void TeardownPmm(nv_soc_hwpm_session session, const PmmConfigurationParams ¶ms); void TeardownPerfmux(nv_soc_hwpm_session session); - void IssuePmaTrigger(nv_soc_hwpm_session session); + void IssuePmaTrigger( + nv_soc_hwpm_session session, + bool halt_before_trigger = false, + bool halt_after_trigger = false, + uint32_t delay_after_trigger = 0); void HarvestCounters( nv_soc_hwpm_session session, const PmmConfigurationParams ¶ms, @@ -106,9 +111,10 @@ protected: void InitPmmParams(nv_soc_hwpm_resource resource, PmmConfigurationParams ¶ms); void ModeBTest(nv_soc_hwpm_resource resource); void ModeETest(nv_soc_hwpm_resource resource); + void ModeETestUserData(nv_soc_hwpm_resource resource); nv_soc_hwpm_device t410_dev[T410_MAX_SOCKETS]; uint32_t t410_dev_count; }; -#endif // T410_TEST_H +#endif // T410_TEST_H \ No newline at end of file