perf arm_spe: Separate setting of memory levels for loads and stores

For a load hit, the lowest-level cache reflects the latency of fetching
a data. Otherwise, the highest-level cache involved in refilling
indicates the overhead caused by a load.

Store operations remain unchanged to keep the descending order when
iterating through cache levels.

Split into two functions: one is for setting memory levels for loads and
another for stores.

Reviewed-by: James Clark <james.clark@linaro.org>
Signed-off-by: Leo Yan <leo.yan@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ali Saidi <alisaidi@amazon.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Leo Yan
2025-09-12 16:42:15 +01:00
committed by Arnaldo Carvalho de Melo
parent 98f993ae6f
commit 14d4ecb15e
+43 -2
View File
@@ -45,6 +45,9 @@
#define arm_spe_is_cache_level(type, lvl) \
((type) & ARM_SPE_CACHE_EVENT(lvl))
#define arm_spe_is_cache_hit(type, lvl) \
(((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS)
#define arm_spe_is_cache_miss(type, lvl) \
((type) & ARM_SPE_##lvl##_MISS)
@@ -819,9 +822,38 @@ static const struct data_source_handle data_source_handles[] = {
DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
};
static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
/*
* To find a cache hit, search in ascending order from the lower level
* caches to the higher level caches. This reflects the best scenario
* for a cache hit.
*/
if (arm_spe_is_cache_hit(record->type, L1D)) {
data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
} else if (arm_spe_is_cache_hit(record->type, LLC)) {
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
/*
* To find a cache miss, search in descending order from the higher
* level cache to the lower level cache. This represents the worst
* scenario for a cache miss.
*/
} else if (arm_spe_is_cache_miss(record->type, LLC)) {
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
} else if (arm_spe_is_cache_miss(record->type, L1D)) {
data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
}
}
static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
/* Record the greatest level info for a store operation. */
if (arm_spe_is_cache_level(record->type, LLC)) {
data_src->mem_lvl = PERF_MEM_LVL_L3;
data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ?
@@ -833,6 +865,15 @@ static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
}
}
static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
if (data_src->mem_op == PERF_MEM_OP_LOAD)
arm_spe__synth_ld_memory_level(record, data_src);
if (data_src->mem_op == PERF_MEM_OP_STORE)
arm_spe__synth_st_memory_level(record, data_src);
if (!data_src->mem_lvl) {
data_src->mem_lvl = PERF_MEM_LVL_NA;