Files oprofile-0.9.3/ID and oprofile-0.9.3-arch/ID differ diff -X oprofile-exclude -urpN oprofile-0.9.3/events/Makefile.am oprofile-0.9.3-arch/events/Makefile.am --- oprofile-0.9.3/events/Makefile.am 2007-07-16 20:22:17.000000000 +0200 +++ oprofile-0.9.3-arch/events/Makefile.am 2008-07-14 21:53:41.000000000 +0200 @@ -13,6 +13,7 @@ event_files = \ i386/ppro/events i386/ppro/unit_masks \ i386/p6_mobile/events i386/p6_mobile/unit_masks \ i386/core/events i386/core/unit_masks \ + i386/arch_perfmon/events i386/arch_perfmon/unit_masks \ ia64/ia64/events ia64/ia64/unit_masks \ ia64/itanium2/events ia64/itanium2/unit_masks \ ia64/itanium/events ia64/itanium/unit_masks \ diff -X oprofile-exclude -urpN oprofile-0.9.3/events/i386/arch_perfmon/events oprofile-0.9.3-arch/events/i386/arch_perfmon/events --- oprofile-0.9.3/events/i386/arch_perfmon/events 1970-01-01 01:00:00.000000000 +0100 +++ oprofile-0.9.3-arch/events/i386/arch_perfmon/events 2008-08-20 14:20:33.000000000 +0200 @@ -0,0 +1,10 @@ +# +# Intel Architectural events as of arch perfmon v2 +# +event:0x3c counters:cpuid um:zero minimum:6000 filter:0 name:CPU_CLK_UNHALTED : Clock cycles when not halted +event:0x3c counters:cpuid um:one minimum:6000 filter:2 name:UNHALTED_REFERENCE_CYCLES : Unhalted reference cycles +event:0xc0 counters:cpuid um:zero minimum:6000 filter:1 name:INST_RETIRED.ANY_P : number of instructions retired +event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES : L2 cache demand requests from this core that missed the L2 +event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : L2 cache demand requests from this core +event:0xc4 counters:cpuid um:zero minimum:500 filter:6 name:BR_INST_RETIRED : number of branch instructions retired +event:0xc5 counters:cpuid um:zero minimum:500 filter:7 name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired (precise) diff -X oprofile-exclude -urpN oprofile-0.9.3/events/i386/arch_perfmon/unit_masks oprofile-0.9.3-arch/events/i386/arch_perfmon/unit_masks --- oprofile-0.9.3/events/i386/arch_perfmon/unit_masks 1970-01-01 01:00:00.000000000 +0100 +++ oprofile-0.9.3-arch/events/i386/arch_perfmon/unit_masks 2008-08-20 14:03:22.000000000 +0200 @@ -0,0 +1,11 @@ +# Intel architectural perfmon unit masks +# +name:zero type:mandatory default:0x0 + 0x0 No unit mask +name:one type:mandatory default:0x1 + 0x1 No unit mask +name:x41 type:mandatory default:0x41 + 0x41 No unit mask +name:x4f type:mandatory default:0x4f + 0x4f No unit mask +# \ No newline at end of file diff -X oprofile-exclude -urpN oprofile-0.9.3/libop/op_cpu_type.c oprofile-0.9.3-arch/libop/op_cpu_type.c --- oprofile-0.9.3/libop/op_cpu_type.c 2007-07-16 20:22:17.000000000 +0200 +++ oprofile-0.9.3-arch/libop/op_cpu_type.c 2008-08-20 16:30:11.000000000 +0200 @@ -14,6 +14,7 @@ #include #include "op_cpu_type.h" +#include "op_hw_specific.h" struct cpu_descr { char const * pretty; @@ -72,6 +73,7 @@ static struct cpu_descr const cpu_descrs { "ARM MPCore", "arm/mpcore", CPU_ARM_MPCORE, 2 }, { "ARM V6 PMU", "arm/armv6", CPU_ARM_V6, 3 }, { "ppc64 POWER5++", "ppc64/power5++", CPU_PPC64_POWER5pp, 6 }, + { "Intel Architectural Perfmon", "i386/arch_perfmon", CPU_ARCH_PERFMON, 0}, }; static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr); @@ -149,8 +151,14 @@ char const * op_get_cpu_name(op_cpu cpu_ int op_get_nr_counters(op_cpu cpu_type) { + int cnt; + if (cpu_type <= CPU_NO_GOOD || cpu_type >= MAX_CPU_TYPE) return 0; + cnt = arch_num_counters(cpu_type); + if (cnt >= 0) + return cnt; + return cpu_descrs[cpu_type].nr_counters; } diff -X oprofile-exclude -urpN oprofile-0.9.3/libop/op_cpu_type.h oprofile-0.9.3-arch/libop/op_cpu_type.h --- oprofile-0.9.3/libop/op_cpu_type.h 2007-07-16 20:22:17.000000000 +0200 +++ oprofile-0.9.3-arch/libop/op_cpu_type.h 2008-06-29 00:30:54.000000000 +0200 @@ -70,6 +70,7 @@ typedef enum { CPU_ARM_MPCORE, /**< ARM MPCore */ CPU_ARM_V6, /**< ARM V6 */ CPU_PPC64_POWER5pp, /**< ppc64 Power5++ family */ + CPU_ARCH_PERFMON, MAX_CPU_TYPE } op_cpu; diff -X oprofile-exclude -urpN oprofile-0.9.3/libop/op_events.c oprofile-0.9.3-arch/libop/op_events.c --- oprofile-0.9.3/libop/op_events.c 2008-08-20 14:01:24.000000000 +0200 +++ oprofile-0.9.3-arch/libop/op_events.c 2008-08-20 16:47:44.000000000 +0200 @@ -16,6 +16,7 @@ #include "op_fileio.h" #include "op_string.h" #include "op_cpufreq.h" +#include "op_hw_specific.h" #include #include @@ -27,6 +28,8 @@ static LIST_HEAD(um_list); static char const * filename; static unsigned int line_nr; +static void delete_event(struct op_event * event); + static void parse_error(char const * context) { fprintf(stderr, "oprofile: parse error in %s, line %u\n", @@ -326,7 +329,8 @@ static void read_events(char const * fil seen_um = 0; seen_minimum = 0; event = new_event(); - + event->filter = -1; + c = line; while (next_token(&c, &name, &value)) { if (strcmp(name, "name") == 0) { @@ -346,7 +350,10 @@ static void read_events(char const * fil if (seen_counters) parse_error("duplicate counters: tag"); seen_counters = 1; - event->counter_mask = parse_counter_mask(value); + if (!strcmp(value, "cpuid")) + event->counter_mask = arch_get_counter_mask(); + else + event->counter_mask = parse_counter_mask(value); free(value); } else if (strcmp(name, "um") == 0) { if (seen_um) @@ -363,6 +370,9 @@ static void read_events(char const * fil free(value); } else if (strcmp(name, "desc") == 0) { event->desc = value; + } else if (strcmp(name, "filter") == 0) { + event->filter = parse_int(value); + free(value); } else { parse_error("unknown tag"); } @@ -418,6 +428,18 @@ static void check_unit_mask(struct op_un } } +static void arch_filter_events(op_cpu cpu_type) +{ + struct list_head * pos, * pos2; + unsigned filter = arch_get_filter(cpu_type); + if (!filter) + return; + list_for_each_safe (pos, pos2, &events_list) { + struct op_event * event = list_entry(pos, struct op_event, event_next); + if (event->filter >= 0 && ((1U << event->filter) & filter)) + delete_event(event); + } +} static void load_events(op_cpu cpu_type) { @@ -454,6 +476,8 @@ static void load_events(op_cpu cpu_type) read_unit_masks(um_file); read_events(event_file); + arch_filter_events(cpu_type); + /* sanity check: all unit mask must be used */ list_for_each(pos, &um_list) { struct op_unit_mask * um = list_entry(pos, struct op_unit_mask, um_next); @@ -466,10 +490,10 @@ static void load_events(op_cpu cpu_type) free(event_dir); } - struct list_head * op_events(op_cpu cpu_type) { load_events(cpu_type); + arch_filter_events(cpu_type); return &events_list; } @@ -530,6 +554,23 @@ static struct op_event * find_event(u32 return NULL; } +static struct op_event * find_event_um(u32 nr, u32 um) +{ + struct list_head * pos; + int i; + + list_for_each(pos, &events_list) { + struct op_event * event = list_entry(pos, struct op_event, event_next); + if (event->val == nr) { + for (i = 0; i < event->unit->num; i++) { + if (event->unit->um[i].value == um) + return event; + } + } + } + + return NULL; +} static FILE * open_event_mapping_file(char const * cpu_name) { @@ -682,13 +723,13 @@ struct op_event * find_event_by_name(cha } -struct op_event * op_find_event(op_cpu cpu_type, u32 nr) +struct op_event * op_find_event(op_cpu cpu_type, u32 nr, u32 um) { struct op_event * event; load_events(cpu_type); - event = find_event(nr); + event = find_event_um(nr, um); return event; } @@ -696,38 +737,44 @@ struct op_event * op_find_event(op_cpu c int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type) { - int ret = OP_OK_EVENT; + int ret = OP_INVALID_EVENT; struct op_event * event; size_t i; u32 ctr_mask = 1 << ctr; + struct list_head * pos; load_events(cpu_type); - event = find_event(nr); + list_for_each(pos, &events_list) { + struct op_event * event = list_entry(pos, struct op_event, event_next); + if (event->val != nr) + continue; - if (!event) { - ret |= OP_INVALID_EVENT; - return ret; - } + ret = OP_OK_EVENT; - if ((event->counter_mask & ctr_mask) == 0) - ret |= OP_INVALID_COUNTER; + if ((event->counter_mask & ctr_mask) == 0) + ret |= OP_INVALID_COUNTER; - if (event->unit->unit_type_mask == utm_bitmask) { - for (i = 0; i < event->unit->num; ++i) - um &= ~(event->unit->um[i].value); - - if (um) - ret |= OP_INVALID_UM; + if (event->unit->unit_type_mask == utm_bitmask) { + for (i = 0; i < event->unit->num; ++i) + um &= ~(event->unit->um[i].value); + + if (um) + ret |= OP_INVALID_UM; + + } else { + for (i = 0; i < event->unit->num; ++i) { + if (event->unit->um[i].value == um) + break; + } + + if (i == event->unit->num) + ret |= OP_INVALID_UM; - } else { - for (i = 0; i < event->unit->num; ++i) { - if (event->unit->um[i].value == um) - break; } - if (i == event->unit->num) - ret |= OP_INVALID_UM; + if (ret == OP_OK_EVENT) + return ret; } return ret; @@ -754,6 +801,7 @@ void op_default_event(op_cpu cpu_type, s case CPU_ATHLON: case CPU_HAMMER: case CPU_FAMILY10: + case CPU_ARCH_PERFMON: descr->name = "CPU_CLK_UNHALTED"; break; diff -X oprofile-exclude -urpN oprofile-0.9.3/libop/op_events.h oprofile-0.9.3-arch/libop/op_events.h --- oprofile-0.9.3/libop/op_events.h 2007-07-16 20:22:17.000000000 +0200 +++ oprofile-0.9.3-arch/libop/op_events.h 2008-08-20 16:46:11.000000000 +0200 @@ -56,6 +56,7 @@ struct op_event { char * name; /**< the event name */ char * desc; /**< the event description */ int min_count; /**< minimum counter value allowed */ + int filter; /**< architecture specific filter or -1 */ struct list_head event_next; /**< next event in list */ }; @@ -63,7 +64,7 @@ struct op_event { struct list_head * op_events(op_cpu cpu_type); /** Find a given event, returns NULL on error */ -struct op_event * op_find_event(op_cpu cpu_type, u32 nr); +struct op_event * op_find_event(op_cpu cpu_type, u32 nr, u32 um); /** Find a given event by name */ struct op_event * find_event_by_name(char const * name); diff -X oprofile-exclude -urpN oprofile-0.9.3/libop/op_hw_specific.h oprofile-0.9.3-arch/libop/op_hw_specific.h --- oprofile-0.9.3/libop/op_hw_specific.h 1970-01-01 01:00:00.000000000 +0100 +++ oprofile-0.9.3-arch/libop/op_hw_specific.h 2008-08-20 14:14:31.000000000 +0200 @@ -0,0 +1,58 @@ +/* + * @file architecture specific interfaces + * @remark Copyright 2008 Intel Corporation + * @remark Read the file COPYING + * @author Andi Kleen + */ + +#if defined(__i386__) || defined(__x86_64__) + +/* Assume we run on the same host as the profilee */ + +#define num_to_mask(x) ((1U << (x)) - 1) + +static inline unsigned arch_get_filter(op_cpu cpu_type) +{ + if (cpu_type == CPU_ARCH_PERFMON) { + unsigned ebx, eax; + asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : "ecx","edx"); + return ebx & num_to_mask(eax >> 24); + } + return 0; +} + +static inline int arch_num_counters(op_cpu cpu_type) +{ + if (cpu_type == CPU_ARCH_PERFMON) { + unsigned v; + asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx"); + return (v >> 8) & 0xff; + } + return -1; +} + +static inline unsigned arch_get_counter_mask(void) +{ + unsigned v; + asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx"); + return num_to_mask((v >> 8) & 0xff); +} + +#else + +static inline unsigned arch_get_filter(op_cpu cpu_type) +{ + return 0; +} + +static inline int arch_num_counters(op_cpu cpu_type) +{ + return -1; +} + +static inline u32 arch_get_counter_mask(void) +{ + return 0; +} + +#endif diff -X oprofile-exclude -urpN oprofile-0.9.3/libpp/op_header.cpp oprofile-0.9.3-arch/libpp/op_header.cpp --- oprofile-0.9.3/libpp/op_header.cpp 2007-07-16 20:22:17.000000000 +0200 +++ oprofile-0.9.3-arch/libpp/op_header.cpp 2008-08-20 16:48:41.000000000 +0200 @@ -126,7 +126,7 @@ string const op_print_event(op_cpu cpu_t return str; } - struct op_event * event = op_find_event(cpu_type, type); + struct op_event * event = op_find_event(cpu_type, type, um); if (!event) { cerr << "Could not locate event " << int(type) << endl; @@ -166,7 +166,7 @@ string const op_xml_print_event(op_cpu c if (cpu_type == CPU_TIMER_INT || cpu_type == CPU_RTC) return xml_utils::get_timer_setup((size_t)count); - struct op_event * event = op_find_event(cpu_type, type); + struct op_event * event = op_find_event(cpu_type, type, um); if (!event) { cerr << "Could not locate event " << int(type) << endl; return ""; diff -X oprofile-exclude -urpN oprofile-0.9.3/utils/ophelp.c oprofile-0.9.3-arch/utils/ophelp.c --- oprofile-0.9.3/utils/ophelp.c 2007-07-16 20:22:17.000000000 +0200 +++ oprofile-0.9.3-arch/utils/ophelp.c 2008-08-19 08:47:45.000000000 +0200 @@ -401,6 +401,13 @@ int main(int argc, char const * argv[]) printf("See Intel Architecture Developer's Manual Volume 3, Appendix A and\n" "Intel Architecture Optimization Reference Manual (730795-001)\n\n"); break; + + case CPU_ARCH_PERFMON: + printf("See Intel 64 and IA-32 Architectures Software Developer's Manual\n" + "Volume 3B (Document 253669) Chapter 18 for architectural perfmon events\n" + "This is a limited set of fallback events because oprofile doesn't know your CPU\n"); + break; + case CPU_IA64: case CPU_IA64_1: case CPU_IA64_2: