diff -urN rhas.e25/arch/ia64/kernel/ia64_ksyms.c linux.e25/arch/ia64/kernel/ia64_ksyms.c
--- rhas.e25/arch/ia64/kernel/ia64_ksyms.c	2003-02-06 16:32:10.000000000 -0800
+++ linux.e25/arch/ia64/kernel/ia64_ksyms.c	2003-02-06 16:39:01.000000000 -0800
@@ -168,3 +168,8 @@
 #endif
 EXPORT_SYMBOL(machvec_noop);
 
+#ifdef CONFIG_PERFMON
+#include <asm/perfmon.h>
+EXPORT_SYMBOL(pfm_install_alternate_syswide_subsystem);
+EXPORT_SYMBOL(pfm_remove_alternate_syswide_subsystem);
+#endif
diff -urN rhas.e25/arch/ia64/kernel/irq_ia64.c linux.e25/arch/ia64/kernel/irq_ia64.c
--- rhas.e25/arch/ia64/kernel/irq_ia64.c	2003-02-06 16:32:09.000000000 -0800
+++ linux.e25/arch/ia64/kernel/irq_ia64.c	2003-02-06 16:39:01.000000000 -0800
@@ -192,7 +192,7 @@
 	register_percpu_irq(IA64_TASK_MIGRATION, &task_migration_irqaction);
 #endif
 #ifdef CONFIG_PERFMON
-	perfmon_init_percpu();	
+	pfm_init_percpu();
 #endif	
 	platform_irq_init();
 }
diff -urN rhas.e25/arch/ia64/kernel/perfmon.c linux.e25/arch/ia64/kernel/perfmon.c
--- rhas.e25/arch/ia64/kernel/perfmon.c	2003-02-06 16:32:06.000000000 -0800
+++ linux.e25/arch/ia64/kernel/perfmon.c	2003-02-06 16:39:01.000000000 -0800
@@ -8,11 +8,10 @@
  * Modifications by Stephane Eranian, Hewlett-Packard Co.
  * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
  *
- * Copyright (C) 1999-2002  Hewlett Packard Co
+ * Copyright (C) 1999-2003  Hewlett Packard Co
  *               Stephane Eranian <eranian@hpl.hp.com>
  *               David Mosberger-Tang <davidm@hpl.hp.com>
  */
-
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -28,7 +27,6 @@
 #include <asm/bitops.h>
 #include <asm/errno.h>
 #include <asm/page.h>
-#include <asm/pal.h>
 #include <asm/perfmon.h>
 #include <asm/processor.h>
 #include <asm/signal.h>
@@ -56,27 +54,53 @@
 /*
  * Reset register flags
  */
-#define PFM_RELOAD_LONG_RESET	1
-#define PFM_RELOAD_SHORT_RESET	2
+#define PFM_PMD_LONG_RESET	1
+#define PFM_PMD_SHORT_RESET	2
 
 /*
  * Misc macros and definitions
  */
 #define PMU_FIRST_COUNTER	4
+#define PMU_MAX_PMCS		256
+#define PMU_MAX_PMDS		256
+
+/*
+ * type of a PMU register (bitmask).
+ * bitmask structure:
+ * 	bit0   : register implemented
+ * 	bit1   : end marker 
+ * 	bit2-3 : reserved
+ * 	bit4-7 : register type
+ * 	bit8-31: reserved
+ */
+#define PFM_REG_IMPL		0x1 /* register implemented */
+#define PFM_REG_END		0x2 /* end marker */
+#define PFM_REG_MONITOR		(0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
+#define PFM_REG_COUNTING	(0x2<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm AND pmc.oi, a PMD used as a counter */
+#define PFM_REG_CONTROL		(0x3<<4|PFM_REG_IMPL) /* PMU control register */
+#define	PFM_REG_CONFIG		(0x4<<4|PFM_REG_IMPL) /* refine configuration */
+#define PFM_REG_BUFFER	 	(0x5<<4|PFM_REG_IMPL) /* PMD used as buffer */
+
+#define PMC_IS_LAST(i)	(pmu_conf.pmc_desc[i].type & PFM_REG_END)
+#define PMD_IS_LAST(i)	(pmu_conf.pmd_desc[i].type & PFM_REG_END)
 
-#define PFM_IS_DISABLED() pmu_conf.pfm_is_disabled
+#define PFM_IS_DISABLED() pmu_conf.disabled
 
 #define PMC_OVFL_NOTIFY(ctx, i)	((ctx)->ctx_soft_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
 #define PFM_FL_INHERIT_MASK	(PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)
 
 /* i assume unsigned */
-#define PMC_IS_IMPL(i)	  (i<pmu_conf.num_pmcs && pmu_conf.impl_regs[i>>6] & (1UL<< (i) %64))
-#define PMD_IS_IMPL(i)	  (i<pmu_conf.num_pmds &&  pmu_conf.impl_regs[4+(i>>6)] & (1UL<<(i) % 64))
+#define PMC_IS_IMPL(i)	  (i< PMU_MAX_PMCS && (pmu_conf.pmc_desc[i].type & PFM_REG_IMPL))
+#define PMD_IS_IMPL(i)	  (i< PMU_MAX_PMDS && (pmu_conf.pmd_desc[i].type & PFM_REG_IMPL))
 
 /* XXX: these three assume that register i is implemented */
 #define PMD_IS_COUNTING(i) (pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING)
 #define PMC_IS_COUNTING(i) (pmu_conf.pmc_desc[i].type == PFM_REG_COUNTING)
-#define PMC_IS_MONITOR(c)  (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR)
+#define PMC_IS_MONITOR(i)  (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR)
+#define PMC_DFL_VAL(i)     pmu_conf.pmc_desc[i].default_value
+#define PMC_RSVD_MASK(i)   pmu_conf.pmc_desc[i].reserved_mask
+#define PMD_PMD_DEP(i)	   pmu_conf.pmd_desc[i].dep_pmd[0]
+#define PMC_PMD_DEP(i)	   pmu_conf.pmc_desc[i].dep_pmd[0]
 
 /* k assume unsigned */
 #define IBR_IS_IMPL(k)	  (k<pmu_conf.num_ibrs)
@@ -106,6 +130,9 @@
 
 #define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
 
+#define PFM_CPUINFO_CLEAR(v)	local_cpu_data->pfm_syst_info &= ~(v)
+#define PFM_CPUINFO_SET(v)	local_cpu_data->pfm_syst_info |= (v)
+
 #ifdef CONFIG_SMP
 #define cpu_is_online(i) (cpu_online_map & (1UL << i))
 #else
@@ -176,28 +203,17 @@
 #define UNLOCK_PSB(p)	spin_unlock(&(p)->psb_lock)
 
 /*
- * The possible type of a PMU register
- */
-typedef enum { 
-	PFM_REG_NOTIMPL, /* not implemented */
-	PFM_REG_NONE, 	 /* end marker */
-	PFM_REG_MONITOR, /* a PMC with a pmc.pm field only */
-	PFM_REG_COUNTING,/* a PMC with a pmc.pm AND pmc.oi, a PMD used as a counter */
-	PFM_REG_CONTROL, /* PMU control register */
-	PFM_REG_CONFIG,  /* refine configuration */
-	PFM_REG_BUFFER	 /* PMD used as buffer */
-} pfm_pmu_reg_type_t;
-
-/*
  * 64-bit software counter structure
  */
 typedef struct {
 	u64 val;	/* virtual 64bit counter value */
-	u64 ival;	/* initial value from user */
+	u64 lval;	/* last value */
 	u64 long_reset;	/* reset value on sampling overflow */
 	u64 short_reset;/* reset value on overflow */
 	u64 reset_pmds[4]; /* which other pmds to reset when this counter overflows */
-	int flags;	/* notify/do not notify */
+	u64 seed;	/* seed for random-number generator */
+	u64 mask;	/* mask for random-number generator */
+	unsigned int flags; /* notify/do not notify */
 } pfm_counter_t;
 
 /*
@@ -212,9 +228,16 @@
 	unsigned int frozen:1;		/* pmu must be kept frozen on ctxsw in */
 	unsigned int protected:1;	/* allow access to creator of context only */
 	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
-	unsigned int reserved:24;
+	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
+	unsigned int trap_reason:2;	/* reason for going into pfm_block_ovfl_reset() */
+	unsigned int reserved:21;
 } pfm_context_flags_t;
 
+#define PFM_TRAP_REASON_NONE		0x0	/* default value */
+#define PFM_TRAP_REASON_BLOCKSIG	0x1	/* we need to block on overflow and signal user */
+#define PFM_TRAP_REASON_SIG		0x2	/* we simply need to signal user */
+#define PFM_TRAP_REASON_RESET		0x3	/* we need to reset PMDs */
+
 /*
  * perfmon context: encapsulates all the state of a monitoring session
  * XXX: probably need to change layout
@@ -247,10 +270,8 @@
 
 	u64			ctx_saved_psr;		/* copy of psr used for lazy ctxsw */
 	unsigned long		ctx_saved_cpus_allowed;	/* copy of the task cpus_allowed (system wide) */
-	unsigned long		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
+	unsigned int		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
 
-	atomic_t		ctx_saving_in_progress;	/* flag indicating actual save in progress */
-	atomic_t		ctx_is_busy;		/* context accessed by overflow handler */
 	atomic_t		ctx_last_cpu;		/* CPU id of current or last CPU used */
 } pfm_context_t;
 
@@ -260,6 +281,8 @@
 #define ctx_fl_frozen		ctx_flags.frozen
 #define ctx_fl_protected	ctx_flags.protected
 #define ctx_fl_using_dbreg	ctx_flags.using_dbreg
+#define ctx_fl_excl_idle	ctx_flags.excl_idle
+#define ctx_fl_trap_reason	ctx_flags.trap_reason
 
 /*
  * global information about all sessions
@@ -268,10 +291,10 @@
 typedef struct {
 	spinlock_t		pfs_lock;		   /* lock the structure */
 
-	unsigned long		pfs_task_sessions;	   /* number of per task sessions */
-	unsigned long		pfs_sys_sessions;	   /* number of per system wide sessions */
-	unsigned long   	pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
-	unsigned long   	pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
+	unsigned int 		pfs_task_sessions;	   /* number of per task sessions */
+	unsigned int		pfs_sys_sessions;	   /* number of per system wide sessions */
+	unsigned int		pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
+	unsigned int		pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
 	struct task_struct	*pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
 } pfm_session_t;
 
@@ -281,13 +304,16 @@
  * dep_pmc[]: a bitmask of dependent PMC registers
  */
 typedef struct {
-	pfm_pmu_reg_type_t	type;
+	unsigned int		type;
 	int			pm_pos;
+	unsigned long		default_value;	/* power-on default value */
+	unsigned long		reserved_mask;	/* bitmask of reserved bits */
 	int			(*read_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
 	int			(*write_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
 	unsigned long		dep_pmd[4];
 	unsigned long		dep_pmc[4];
 } pfm_reg_desc_t;
+
 /* assume cnum is a valid monitor */
 #define PMC_PM(cnum, val)	(((val) >> (pmu_conf.pmc_desc[cnum].pm_pos)) & 0x1)
 #define PMC_WR_FUNC(cnum)	(pmu_conf.pmc_desc[cnum].write_check)
@@ -296,23 +322,22 @@
 
 /*
  * This structure is initialized at boot time and contains
- * a description of the PMU main characteristic as indicated
- * by PAL along with a list of inter-registers dependencies and configurations.
+ * a description of the PMU main characteristics.
  */
 typedef struct {
-	unsigned long pfm_is_disabled;	/* indicates if perfmon is working properly */
-	unsigned long perf_ovfl_val;	/* overflow value for generic counters   */
-	unsigned long max_counters;	/* upper limit on counter pair (PMC/PMD) */
-	unsigned long num_pmcs ;	/* highest PMC implemented (may have holes) */
-	unsigned long num_pmds;		/* highest PMD implemented (may have holes) */
-	unsigned long impl_regs[16];	/* buffer used to hold implememted PMC/PMD mask */
-	unsigned long num_ibrs;		/* number of instruction debug registers */
-	unsigned long num_dbrs;		/* number of data debug registers */
-	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register descriptions */
-	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register descriptions */
+	unsigned int  disabled;		/* indicates if perfmon is working properly */
+	unsigned long ovfl_val;		/* overflow value for generic counters   */
+	unsigned long impl_pmcs[4];	/* bitmask of implemented PMCS */
+	unsigned long impl_pmds[4];	/* bitmask of implemented PMDS */
+	unsigned int  num_pmcs;		/* number of implemented PMCS */
+	unsigned int  num_pmds;		/* number of implemented PMDS */
+	unsigned int  num_ibrs;		/* number of implemented IBRS */
+	unsigned int  num_dbrs;		/* number of implemented DBRS */
+	unsigned int  num_counters;	/* number of PMD/PMC counters */
+	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register dependencies descriptions */
+	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register dependencies descriptions */
 } pmu_config_t;
 
-
 /*
  * structure used to pass argument to/from remote CPU 
  * using IPI to check and possibly save the PMU context on SMP systems.
@@ -336,7 +361,7 @@
 
 #define PFM_CMD_PID		0x1	/* command requires pid argument */
 #define PFM_CMD_ARG_READ	0x2	/* command must read argument(s) */
-#define PFM_CMD_ARG_WRITE	0x4	/* command must write argument(s) */
+#define PFM_CMD_ARG_RW		0x4	/* command must read/write argument(s) */
 #define PFM_CMD_CTX		0x8	/* command needs a perfmon context */
 #define PFM_CMD_NOCHK		0x10	/* command does not need to check task's state */
 
@@ -347,7 +372,7 @@
 
 #define PFM_CMD_USE_PID(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_PID) != 0)
 #define PFM_CMD_READ_ARG(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ) != 0)
-#define PFM_CMD_WRITE_ARG(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_WRITE) != 0)
+#define PFM_CMD_RW_ARG(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_RW) != 0)
 #define PFM_CMD_USE_CTX(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_CTX) != 0)
 #define PFM_CMD_CHK(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_NOCHK) == 0)
 
@@ -366,15 +391,16 @@
 	unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
 	unsigned long pfm_recorded_samples_count;
 	unsigned long pfm_full_smpl_buffer_count; /* how many times the sampling buffer was full */
+	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
 } pfm_stats_t;
 
 /*
  * perfmon internal variables
  */
-static pmu_config_t	pmu_conf; 	/* PMU configuration */
 static pfm_session_t	pfm_sessions;	/* global sessions information */
 static struct proc_dir_entry *perfmon_dir; /* for debug only */
-static pfm_stats_t	pfm_stats;
+static pfm_stats_t	pfm_stats[NR_CPUS];
+static pfm_intr_handler_desc_t	*pfm_alternate_intr_handler;
 
 /* sysctl() controls */
 static pfm_sysctl_t pfm_sysctl;
@@ -395,8 +421,6 @@
 };
 static struct ctl_table_header *pfm_sysctl_header;
 
-static unsigned long reset_pmcs[IA64_NUM_PMC_REGS];	/* contains PAL reset values for PMCS */
-
 static void pfm_vm_close(struct vm_area_struct * area);
 
 static struct vm_operations_struct pfm_vm_ops={
@@ -408,17 +432,13 @@
  */
 static struct {
 	struct task_struct *owner;
-} ____cacheline_aligned pmu_owners[NR_CPUS];
-
-
+	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
+} pmu_owners[NR_CPUS];
 
 /*
  * forward declarations
  */
-static void ia64_reset_pmu(struct task_struct *);
-#ifdef CONFIG_SMP
-static void pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx);
-#endif
+static void pfm_reset_pmu(struct task_struct *);
 static void pfm_lazy_save_regs (struct task_struct *ta);
 
 #if   defined(CONFIG_ITANIUM)
@@ -429,42 +449,78 @@
 #include "perfmon_generic.h"
 #endif
 
+static inline void
+pfm_clear_psr_pp(void)
+{
+	__asm__ __volatile__ ("rsm psr.pp;; srlz.i;;"::: "memory");
+}
+
+static inline void
+pfm_set_psr_pp(void)
+{
+	__asm__ __volatile__ ("ssm psr.pp;; srlz.i;;"::: "memory");
+}
+
+static inline void
+pfm_clear_psr_up(void)
+{
+	__asm__ __volatile__ ("rum psr.up;; srlz.i;;"::: "memory");
+}
+
+static inline void
+pfm_set_psr_up(void)
+{
+	__asm__ __volatile__ ("sum psr.up;; srlz.i;;"::: "memory");
+}
+
+static inline unsigned long
+pfm_get_psr(void)
+{
+	unsigned long tmp;
+	__asm__ __volatile__ ("mov %0=psr;;": "=r"(tmp) :: "memory");
+	return tmp;
+}
+
+static inline void
+pfm_set_psr_l(unsigned long val)
+{
+	__asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(val): "memory");
+}
+
+
+
+static inline void
+pfm_freeze_pmu(void)
+{
+	ia64_set_pmc(0,1UL);
+	ia64_srlz_d();
+}
+
+static inline void
+pfm_unfreeze_pmu(void)
+{
+	ia64_set_pmc(0,0UL);
+	ia64_srlz_d();
+}
+
+
 static inline unsigned long
 pfm_read_soft_counter(pfm_context_t *ctx, int i)
 {
-	return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.perf_ovfl_val);
+	return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.ovfl_val);
 }
 
 static inline void
 pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
 {
-	ctx->ctx_soft_pmds[i].val = val  & ~pmu_conf.perf_ovfl_val;
+	ctx->ctx_soft_pmds[i].val = val  & ~pmu_conf.ovfl_val;
 	/*
 	 * writing to unimplemented part is ignore, so we do not need to
 	 * mask off top part
 	 */
-	ia64_set_pmd(i, val & pmu_conf.perf_ovfl_val);
-}
-
-/*
- * finds the number of PM(C|D) registers given
- * the bitvector returned by PAL
- */
-static unsigned long __init
-find_num_pm_regs(long *buffer)
-{
-	int i=3; /* 4 words/per bitvector */
-
-	/* start from the most significant word */
-	while (i>=0 && buffer[i] == 0 ) i--;
-	if (i< 0) {
-		printk(KERN_ERR "perfmon: No bit set in pm_buffer\n");
-		return 0;
-	}
-	return 1+ ia64_fls(buffer[i]) + 64 * i;
+	ia64_set_pmd(i, val & pmu_conf.ovfl_val);
 }
 
-
 /*
  * Generates a unique (per CPU) timestamp
  */
@@ -477,31 +533,6 @@
 	return ia64_get_itc();
 }
 
-/* Given PGD from the address space's page table, return the kernel
- * virtual mapping of the physical memory mapped at ADR.
- */
-static inline unsigned long
-uvirt_to_kva(pgd_t *pgd, unsigned long adr)
-{
-	unsigned long ret = 0UL;
-	pmd_t *pmd;
-	pte_t *ptep, pte;
-
-	if (!pgd_none(*pgd)) {
-		pmd = pmd_offset(pgd, adr);
-		if (!pmd_none(*pmd)) {
-			ptep = pte_offset(pmd, adr);
-			pte = *ptep;
-			if (pte_present(pte)) {
-				ret = (unsigned long) page_address(pte_page(pte));
-				ret |= (adr & (PAGE_SIZE - 1));
-			}
-		}
-	}
-	DBprintk(("[%d] uv2kva(%lx-->%lx)\n", current->pid, adr, ret));
-	return ret;
-}
-
 /* Here we want the physical address of the memory.
  * This is used when initializing the contents of the
  * area and marking the pages as reserved.
@@ -518,18 +549,18 @@
 pfm_rvmalloc(unsigned long size)
 {
 	void *mem;
-	unsigned long adr, page;
+	unsigned long adr;
 
+	size=PAGE_ALIGN(size);
 	mem=vmalloc(size);
 	if (mem) {
 		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
 		memset(mem, 0, size); /* Clear the ram out, no junk to the user */
 		adr=(unsigned long) mem;
 		while (size > 0) {
-			page = pfm_kvirt_to_pa(adr);
-			mem_map_reserve(virt_to_page(__va(page)));
-			adr  += PAGE_SIZE;
-			size -= PAGE_SIZE;
+			mem_map_reserve(vmalloc_to_page((void *)adr));
+			adr+=PAGE_SIZE;
+			size-=PAGE_SIZE;
 		}
 	}
 	return mem;
@@ -538,13 +569,12 @@
 static void
 pfm_rvfree(void *mem, unsigned long size)
 {
-	unsigned long adr, page = 0;
+	unsigned long adr;
 
 	if (mem) {
 		adr=(unsigned long) mem;
-		while (size > 0) {
-			page = pfm_kvirt_to_pa(adr);
-			mem_map_unreserve(virt_to_page(__va(page)));
+		while ((long) size > 0) {
+			mem_map_unreserve(vmalloc_to_page((void*)adr));
 			adr+=PAGE_SIZE;
 			size-=PAGE_SIZE;
 		}
@@ -767,15 +797,14 @@
 	psb = kmalloc(sizeof(*psb), GFP_KERNEL);
 	if (psb == NULL) {
 		DBprintk(("Can't allocate sampling buffer descriptor\n"));
-		pfm_rvfree(smpl_buf, size);
-		return -ENOMEM;
+		goto error_kmalloc;
 	}
 
 	/* allocate vma */
 	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!vma) {
 		DBprintk(("Cannot allocate vma\n"));
-		goto error;
+		goto error_kmem;
 	}
 	/*
 	 * partially initialize the vma for the sampling buffer
@@ -873,13 +902,126 @@
 	ctx->ctx_smpl_vaddr = *(unsigned long *)user_vaddr = vma->vm_start;
 
 	return 0;
-
 error:
-	pfm_rvfree(smpl_buf, size);
+	kmem_cache_free(vm_area_cachep, vma);
+error_kmem:
 	kfree(psb);
+error_kmalloc:
+	pfm_rvfree(smpl_buf, size);
 	return -ENOMEM;
 }
 
+static int
+pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
+{
+	unsigned long m, undo_mask;
+	unsigned int n, i;
+
+	/*
+	 * validy checks on cpu_mask have been done upstream
+	 */
+	LOCK_PFS();
+
+	if (is_syswide) {
+		/* 
+		 * cannot mix system wide and per-task sessions
+		 */
+		if (pfm_sessions.pfs_task_sessions > 0UL) {
+			DBprintk(("system wide not possible, %u conflicting task_sessions\n", 
+			  	pfm_sessions.pfs_task_sessions));
+			goto abort;
+		}
+
+		m = cpu_mask; undo_mask = 0UL; n = 0;
+		DBprintk(("cpu_mask=0x%lx\n", cpu_mask));
+		for(i=0; m; i++, m>>=1) {
+
+			if ((m & 0x1) == 0UL) continue;
+
+			if (pfm_sessions.pfs_sys_session[i]) goto undo;
+
+			DBprintk(("reserving CPU%d currently on CPU%d\n", i, smp_processor_id()));
+
+			pfm_sessions.pfs_sys_session[i] = task;
+			undo_mask |= 1UL << i;
+			n++;
+		}
+		pfm_sessions.pfs_sys_sessions += n;
+	} else {
+		if (pfm_sessions.pfs_sys_sessions) goto abort;
+		pfm_sessions.pfs_task_sessions++;
+	}
+	UNLOCK_PFS();
+	return 0;
+undo:
+	DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n",
+  		pfm_sessions.pfs_sys_session[i]->pid, i));
+
+	for(i=0; undo_mask; i++, undo_mask >>=1) {
+		pfm_sessions.pfs_sys_session[i] = NULL;
+	}
+abort:
+	UNLOCK_PFS();
+
+	return -EBUSY;
+
+}
+
+static int
+pfm_unreserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
+{
+	pfm_context_t *ctx;
+	unsigned long m;
+	unsigned int n, i;
+
+	ctx = task ? task->thread.pfm_context : NULL;
+
+	/*
+	 * validy checks on cpu_mask have been done upstream
+	 */
+	LOCK_PFS();
+
+	DBprintk(("[%d] sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu_mask=0x%lx\n",
+		task->pid,
+		pfm_sessions.pfs_sys_sessions,
+		pfm_sessions.pfs_task_sessions,
+		pfm_sessions.pfs_sys_use_dbregs,
+		is_syswide,
+		cpu_mask));
+		
+
+	if (is_syswide) {
+		m = cpu_mask; n = 0;
+		for(i=0; m; i++, m>>=1) {
+			if ((m & 0x1) == 0UL) continue;
+			pfm_sessions.pfs_sys_session[i] = NULL;
+			n++;
+		}
+		/* 
+		 * would not work with perfmon+more than one bit in cpu_mask
+		 */
+		if (ctx && ctx->ctx_fl_using_dbreg) {
+			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
+				printk("perfmon: invalid release for [%d] sys_use_dbregs=0\n", task->pid);
+			} else {
+				pfm_sessions.pfs_sys_use_dbregs--;
+			}
+		}
+		pfm_sessions.pfs_sys_sessions -= n;
+
+		DBprintk(("CPU%d sys_sessions=%u\n", 
+			smp_processor_id(), pfm_sessions.pfs_sys_sessions));
+	} else {
+		pfm_sessions.pfs_task_sessions--;
+		DBprintk(("[%d] task_sessions=%u\n", 
+			task->pid, pfm_sessions.pfs_task_sessions));
+	}
+
+	UNLOCK_PFS();
+
+	return 0;
+}
+
 /*
  * XXX: do something better here
  */
@@ -895,6 +1037,7 @@
 static int
 pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
 {
+	unsigned long smpl_pmds = pfx->ctx_smpl_regs[0];
 	int ctx_flags;
 	int cpu;
 
@@ -961,6 +1104,11 @@
 		}
 #endif
 	}
+	/* verify validity of smpl_regs */
+	if ((smpl_pmds & pmu_conf.impl_pmds[0]) != smpl_pmds) {
+		DBprintk(("invalid smpl_regs 0x%lx\n", smpl_pmds));
+		return -EINVAL;
+	}
 	/* probably more to add here */
 
 	return 0;
@@ -972,7 +1120,7 @@
 {
 	pfarg_context_t tmp;
 	void *uaddr = NULL;
-	int ret, cpu = 0;
+	int ret;
 	int ctx_flags;
 	pid_t notify_pid;
 
@@ -991,40 +1139,8 @@
 
 	ctx_flags = tmp.ctx_flags;
 
-	ret =  -EBUSY;
-
-	LOCK_PFS();
-
-	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
-
-		/* at this point, we know there is at least one bit set */
-		cpu = ffz(~tmp.ctx_cpu_mask);
-
-		DBprintk(("requesting CPU%d currently on CPU%d\n",cpu, smp_processor_id()));
-
-		if (pfm_sessions.pfs_task_sessions > 0) {
-			DBprintk(("system wide not possible, task_sessions=%ld\n", pfm_sessions.pfs_task_sessions));
-			goto abort;
-		}
-
-		if (pfm_sessions.pfs_sys_session[cpu]) {
-			DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n",pfm_sessions.pfs_sys_session[cpu]->pid, cpu));
-			goto abort;
-		}
-		pfm_sessions.pfs_sys_session[cpu] = task;
-		/*
-		 * count the number of system wide sessions
-		 */
-		pfm_sessions.pfs_sys_sessions++;
-
-	} else if (pfm_sessions.pfs_sys_sessions == 0) {
-		pfm_sessions.pfs_task_sessions++;
-	} else {
-		/* no per-process monitoring while there is a system wide session */
-		goto abort;
-	}
-
-	UNLOCK_PFS();
+	ret = pfm_reserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE, tmp.ctx_cpu_mask);
+	if (ret) goto abort;
 
 	ret = -ENOMEM;
 
@@ -1057,7 +1173,10 @@
 			/*
 			 * check if we can send this task a signal
 			 */
-			if (pfm_bad_permissions(notify_task)) goto buffer_error;
+			if (pfm_bad_permissions(notify_task)) {
+				read_unlock(&tasklist_lock);
+				goto buffer_error;
+			}
 
 			/* 
 		 	 * make visible
@@ -1101,10 +1220,12 @@
 		tmp.ctx_smpl_vaddr = uaddr;
 	}
 	/* initialization of context's flags */
-	ctx->ctx_fl_inherit   = ctx_flags & PFM_FL_INHERIT_MASK;
-	ctx->ctx_fl_block     = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
-	ctx->ctx_fl_system    = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
-	ctx->ctx_fl_frozen    = 0;
+	ctx->ctx_fl_inherit     = ctx_flags & PFM_FL_INHERIT_MASK;
+	ctx->ctx_fl_block       = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
+	ctx->ctx_fl_system      = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
+	ctx->ctx_fl_excl_idle   = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
+	ctx->ctx_fl_frozen      = 0;
+	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
 	/*
 	 * setting this flag to 0 here means, that the creator or the task that the
 	 * context is being attached are granted access. Given that a context can only
@@ -1114,13 +1235,10 @@
 	ctx->ctx_fl_protected = 0;
 
 	/* for system wide mode only (only 1 bit set) */
-	ctx->ctx_cpu         = cpu;
-
-	atomic_set(&ctx->ctx_last_cpu,-1); /* SMP only, means no CPU */
+	ctx->ctx_cpu = ffz(~tmp.ctx_cpu_mask);
 
-	/* may be redudant with memset() but at least it's easier to remember */
-	atomic_set(&ctx->ctx_saving_in_progress, 0); 
-	atomic_set(&ctx->ctx_is_busy, 0); 
+	/* SMP only, -1 means no CPU */
+	atomic_set(&ctx->ctx_last_cpu,-1); 
 
 	sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */
 
@@ -1132,9 +1250,9 @@
 	DBprintk(("context=%p, pid=%d notify_task=%p\n",
 			(void *)ctx, task->pid, ctx->ctx_notify_task));
 
-	DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d\n", 
+	DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d\n", 
 			(void *)ctx, task->pid, ctx_flags, ctx->ctx_fl_inherit, 
-			ctx->ctx_fl_block, ctx->ctx_fl_system));
+			ctx->ctx_fl_block, ctx->ctx_fl_system, ctx->ctx_fl_excl_idle));
 
 	/*
 	 * when no notification is required, we can make this visible at the last moment
@@ -1147,9 +1265,8 @@
 	 */
 	if (ctx->ctx_fl_system) {
 		ctx->ctx_saved_cpus_allowed = task->cpus_allowed;
-		task->cpus_allowed = 1UL << cpu;
-		task->need_resched = 1;
-		DBprintk(("[%d] rescheduled allowed=0x%lx\n", task->pid,task->cpus_allowed));
+		set_cpus_allowed(task, tmp.ctx_cpu_mask);
+		DBprintk(("[%d] rescheduled allowed=0x%lx\n", task->pid, task->cpus_allowed));
 	}
 
 	return 0;
@@ -1157,32 +1274,40 @@
 buffer_error:
 	pfm_context_free(ctx);
 error:
-	/*
-	 * undo session reservation
-	 */
-	LOCK_PFS();
-
-	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
-		pfm_sessions.pfs_sys_session[cpu] = NULL;
-		pfm_sessions.pfs_sys_sessions--;
-	} else {
-		pfm_sessions.pfs_task_sessions--;
-	}
+	pfm_unreserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE , tmp.ctx_cpu_mask);
 abort:
-	UNLOCK_PFS();
+	/* make sure we don't leave anything behind */
+	task->thread.pfm_context = NULL;
 
 	return ret;
 }
 
+static inline unsigned long
+pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
+{
+	unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
+	unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
+	extern unsigned long carta_random32 (unsigned long seed);
+
+	if (reg->flags & PFM_REGFL_RANDOM) {
+		new_seed = carta_random32(old_seed);
+		val -= (old_seed & mask);	/* counter values are negative numbers! */
+		if ((mask >> 32) != 0)
+			/* construct a full 64-bit random value: */
+			new_seed |= carta_random32(old_seed >> 32) << 32;
+		reg->seed = new_seed;
+	}
+	reg->lval = val;
+	return val;
+}
+
 static void
 pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
 {
 	unsigned long mask = ovfl_regs[0];
 	unsigned long reset_others = 0UL;
 	unsigned long val;
-	int i;
-
-	DBprintk(("masks=0x%lx\n", mask));
+	int i, is_long_reset = (flag == PFM_PMD_LONG_RESET);
 
 	/*
 	 * now restore reset value on sampling overflowed counters
@@ -1190,15 +1315,11 @@
 	mask >>= PMU_FIRST_COUNTER;
 	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
 		if (mask & 0x1) {
-			val  = flag == PFM_RELOAD_LONG_RESET ? 
-					ctx->ctx_soft_pmds[i].long_reset:
-					ctx->ctx_soft_pmds[i].short_reset;
-
+			val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
 			reset_others |= ctx->ctx_soft_pmds[i].reset_pmds[0];
 
-			DBprintk(("[%d] %s reset soft_pmd[%d]=%lx\n", 
-			  	current->pid, 
-				flag == PFM_RELOAD_LONG_RESET ? "long" : "short", i, val));
+			DBprintk_ovfl(("[%d] %s reset soft_pmd[%d]=%lx\n", current->pid,
+				  is_long_reset ? "long" : "short", i, val));
 
 			/* upper part is ignored on rval */
 			pfm_write_soft_counter(ctx, i, val);
@@ -1212,23 +1333,17 @@
 
 		if ((reset_others & 0x1) == 0) continue;
 
-		val  = flag == PFM_RELOAD_LONG_RESET ? 
-					ctx->ctx_soft_pmds[i].long_reset:
-					ctx->ctx_soft_pmds[i].short_reset;
+		val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
 
 		if (PMD_IS_COUNTING(i)) {
 			pfm_write_soft_counter(ctx, i, val);
 		} else {
 			ia64_set_pmd(i, val);
 		}
-
-		DBprintk(("[%d] %s reset_others pmd[%d]=%lx\n", 
-			  	current->pid, 
-				flag == PFM_RELOAD_LONG_RESET ? "long" : "short", i, val));
+		DBprintk_ovfl(("[%d] %s reset_others pmd[%d]=%lx\n", current->pid,
+			  is_long_reset ? "long" : "short", i, val));
 	}
 	ia64_srlz_d();
-	/* just in case ! */
-	ctx->ctx_ovfl_regs[0] = 0UL;
 }
 
 static int
@@ -1236,9 +1351,10 @@
 {
 	struct thread_struct *th = &task->thread;
 	pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
-	unsigned int cnum;
+	unsigned long value, reset_pmds;
+	unsigned int cnum, reg_flags, flags;
 	int i;
-	int ret = 0, reg_retval = 0;
+	int ret = -EINVAL;
 
 	/* we don't quite support this right now */
 	if (task != current) return -EINVAL;
@@ -1252,7 +1368,11 @@
 
 		if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
 
-		cnum = tmp.reg_num;
+		cnum       = tmp.reg_num;
+		reg_flags  = tmp.reg_flags;
+		value      = tmp.reg_value;
+		reset_pmds = tmp.reg_reset_pmds[0];
+		flags      = 0;
 
 		/* 
 		 * we reject all non implemented PMC as well
@@ -1261,8 +1381,7 @@
 		 */
 		if (!PMC_IS_IMPL(cnum) || cnum < 4) {
 			DBprintk(("pmc[%u] is unimplemented or invalid\n", cnum));
-			ret = -EINVAL;
-			goto abort_mission;
+			goto error;
 		}
 		/*
 		 * A PMC used to configure monitors must be:
@@ -1271,51 +1390,58 @@
 		 * any other configuration is rejected.
 		 */
 		if (PMC_IS_MONITOR(cnum) || PMC_IS_COUNTING(cnum)) {
-			DBprintk(("pmc[%u].pm=%ld\n", cnum, PMC_PM(cnum, tmp.reg_value))); 
+			DBprintk(("pmc[%u].pm=%ld\n", cnum, PMC_PM(cnum, value))); 
 
-			if (ctx->ctx_fl_system ^ PMC_PM(cnum, tmp.reg_value)) {
-				DBprintk(("pmc_pm=%ld fl_system=%d\n", PMC_PM(cnum, tmp.reg_value), ctx->ctx_fl_system));
-				ret = -EINVAL;
-				goto abort_mission;
+			if (ctx->ctx_fl_system ^ PMC_PM(cnum, value)) {
+				DBprintk(("pmc_pm=%ld fl_system=%d\n", PMC_PM(cnum, value), ctx->ctx_fl_system));
+				goto error;
 			}
 		}
 
 		if (PMC_IS_COUNTING(cnum)) {
-			pfm_monitor_t *p = (pfm_monitor_t *)&tmp.reg_value;
+			pfm_monitor_t *p = (pfm_monitor_t *)&value;
 			/*
 		 	 * enforce generation of overflow interrupt. Necessary on all
 		 	 * CPUs.
 		 	 */
 			p->pmc_oi = 1;
 
-			if (tmp.reg_flags & PFM_REGFL_OVFL_NOTIFY) {
+			if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
 				/*
-				 * must have a target for the signal
-				 */
+			 	 * must have a target for the signal
+			 	 */
 				if (ctx->ctx_notify_task == NULL) {
-					DBprintk(("no notify_task && PFM_REGFL_OVFL_NOTIFY\n"));
-					ret = -EINVAL;
-					goto abort_mission;
+					DBprintk(("cannot set ovfl_notify: no notify_task\n"));
+					goto error;
 				}
+				flags |= PFM_REGFL_OVFL_NOTIFY;
+			}
 
-				ctx->ctx_soft_pmds[cnum].flags |= PFM_REGFL_OVFL_NOTIFY;
+			if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
+
+			/* verify validity of reset_pmds */
+			if ((reset_pmds & pmu_conf.impl_pmds[0]) != reset_pmds) {
+				DBprintk(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
+				goto error;
 			}
-			/*
-			 * copy reset vector
-			 */
-			ctx->ctx_soft_pmds[cnum].reset_pmds[0] = tmp.reg_reset_pmds[0];
-			ctx->ctx_soft_pmds[cnum].reset_pmds[1] = tmp.reg_reset_pmds[1];
-			ctx->ctx_soft_pmds[cnum].reset_pmds[2] = tmp.reg_reset_pmds[2];
-			ctx->ctx_soft_pmds[cnum].reset_pmds[3] = tmp.reg_reset_pmds[3];
+		} else if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
+				DBprintk(("cannot set ovfl_notify or random on pmc%u\n", cnum));
+				goto error;
 		}
+
 		/*
 		 * execute write checker, if any
 		 */
-		if (PMC_WR_FUNC(cnum)) ret = PMC_WR_FUNC(cnum)(task, cnum, &tmp.reg_value, regs);
-abort_mission:
-		if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;
+		if (PMC_WR_FUNC(cnum)) {
+			ret = PMC_WR_FUNC(cnum)(task, cnum, &value, regs);
+			if (ret) goto error;
+			ret = -EINVAL;
+		}
 
-		PFM_REG_RETFLAG_SET(tmp.reg_flags, reg_retval);
+		/*
+		 * no error on this register
+		 */
+		PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
 
 		/*
 		 * update register return value, abort all if problem during copy.
@@ -1323,21 +1449,20 @@
 		if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
 
 		/*
-		 * if there was something wrong on this register, don't touch
-		 * the hardware at all and abort write request for others.
-		 *
-		 * On error, the user mut sequentially scan the table and the first
-		 * entry which has a return flag set is the one that caused the error.
+		 * Now we commit the changes to the software state
 		 */
-		if (ret != 0) {
-			DBprintk(("[%d] pmc[%u]=0x%lx error %d\n",
-				  task->pid, cnum, tmp.reg_value, reg_retval));
-			break;
-		}
 
 		/* 
-		 * We can proceed with this register!
+		 * full flag update each time a register is programmed
 		 */
+		ctx->ctx_soft_pmds[cnum].flags = flags;
+
+		if (PMC_IS_COUNTING(cnum)) {
+			ctx->ctx_soft_pmds[cnum].reset_pmds[0] = reset_pmds;
+
+			/* mark all PMDS to be accessed as used */
+			CTX_USED_PMD(ctx, reset_pmds);
+		}
 
 		/*
 		 * Needed in case the user does not initialize the equivalent
@@ -1349,16 +1474,25 @@
 		/* 
 		 * keep copy the pmc, used for register reload
 		 */
-		th->pmc[cnum] = tmp.reg_value;
+		th->pmc[cnum] = value;
 
-		ia64_set_pmc(cnum, tmp.reg_value);
+		ia64_set_pmc(cnum, value);
 
 		DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x used_pmds=0x%lx\n", 
-			  task->pid, cnum, tmp.reg_value, 
+			  task->pid, cnum, value, 
 			  ctx->ctx_soft_pmds[cnum].flags, 
 			  ctx->ctx_used_pmds[0]));
-
 	}
+
+	return 0;
+
+error:
+	PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
+
+	if (copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT;
+
+	DBprintk(("[%d] pmc[%u]=0x%lx error %d\n", task->pid, cnum, value, ret));
+
 	return ret;
 }
 
@@ -1366,9 +1500,10 @@
 pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
 	pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
+	unsigned long value, hw_value;
 	unsigned int cnum;
 	int i;
-	int ret = 0, reg_retval = 0;
+	int ret = -EINVAL;
 
 	/* we don't quite support this right now */
 	if (task != current) return -EINVAL;
@@ -1385,66 +1520,92 @@
 
 		if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
 
-		cnum = tmp.reg_num;
+		cnum  = tmp.reg_num;
+		value = tmp.reg_value;
+
 		if (!PMD_IS_IMPL(cnum)) {
-			ret = -EINVAL;
+			DBprintk(("pmd[%u] is unimplemented or invalid\n", cnum));
 			goto abort_mission;
 		}
 
-		/* update virtualized (64bits) counter */
-		if (PMD_IS_COUNTING(cnum)) {
-			ctx->ctx_soft_pmds[cnum].ival = tmp.reg_value;
-			ctx->ctx_soft_pmds[cnum].val  = tmp.reg_value & ~pmu_conf.perf_ovfl_val;
-			ctx->ctx_soft_pmds[cnum].long_reset = tmp.reg_long_reset;
-			ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
-
-		}
 		/*
 		 * execute write checker, if any
 		 */
-		if (PMD_WR_FUNC(cnum)) ret = PMD_WR_FUNC(cnum)(task, cnum, &tmp.reg_value, regs);
-abort_mission:
-		if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;
-
-		PFM_REG_RETFLAG_SET(tmp.reg_flags, reg_retval);
+		if (PMD_WR_FUNC(cnum)) {
+			unsigned long v = value;
+			ret = PMD_WR_FUNC(cnum)(task, cnum, &v, regs);
+			if (ret) goto abort_mission;
+			value = v;
+			ret = -EINVAL;
+		}
+		hw_value = value;
+		/*
+		 * no error on this register
+		 */
+		PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
 
-		if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
+		if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT;
 
 		/*
-		 * if there was something wrong on this register, don't touch
-		 * the hardware at all and abort write request for others.
-		 *
-		 * On error, the user mut sequentially scan the table and the first
-		 * entry which has a return flag set is the one that caused the error.
+		 * now commit changes to software state
 		 */
-		if (ret != 0) {
-			DBprintk(("[%d] pmc[%u]=0x%lx error %d\n",
-				  task->pid, cnum, tmp.reg_value, reg_retval));
-			break;
+
+		/* update virtualized (64bits) counter */
+		if (PMD_IS_COUNTING(cnum)) {
+			ctx->ctx_soft_pmds[cnum].lval = value;
+			ctx->ctx_soft_pmds[cnum].val  = value & ~pmu_conf.ovfl_val;
+
+			hw_value = value & pmu_conf.ovfl_val;
+
+			ctx->ctx_soft_pmds[cnum].long_reset  = tmp.reg_long_reset;
+			ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
+
+			ctx->ctx_soft_pmds[cnum].seed = tmp.reg_random_seed;
+			ctx->ctx_soft_pmds[cnum].mask = tmp.reg_random_mask;
 		}
 
 		/* keep track of what we use */
 		CTX_USED_PMD(ctx, pmu_conf.pmd_desc[(cnum)].dep_pmd[0]);
+
 		/* mark this register as used as well */
 		CTX_USED_PMD(ctx, RDEP(cnum));
 
 		/* writes to unimplemented part is ignored, so this is safe */
-		ia64_set_pmd(cnum, tmp.reg_value & pmu_conf.perf_ovfl_val);
+		ia64_set_pmd(cnum, hw_value);
 
 		/* to go away */
 		ia64_srlz_d();
 
-		DBprintk(("[%d] pmd[%u]: soft_pmd=0x%lx  short_reset=0x%lx "
+		DBprintk(("[%d] pmd[%u]: value=0x%lx hw_value=0x%lx soft_pmd=0x%lx  short_reset=0x%lx "
 			  "long_reset=0x%lx hw_pmd=%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx\n",
 				task->pid, cnum,
+				value, hw_value,
 				ctx->ctx_soft_pmds[cnum].val,
 				ctx->ctx_soft_pmds[cnum].short_reset,
 				ctx->ctx_soft_pmds[cnum].long_reset,
-				ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val,
+				ia64_get_pmd(cnum) & pmu_conf.ovfl_val,
 				PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
 				ctx->ctx_used_pmds[0],
 				ctx->ctx_soft_pmds[cnum].reset_pmds[0]));
 	}
+
+	return 0;
+
+abort_mission:
+	/*
+	 * for now, we have only one possibility for error
+	 */
+	PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
+
+	/*
+	 * we change the return value to EFAULT in case we cannot write register return code.
+	 * The caller first must correct this error, then a resubmission of the request will
+	 * eventually yield the EINVAL.
+	 */
+	if (copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT;
+
+	DBprintk(("[%d] pmc[%u]=0x%lx ret %d\n", task->pid, cnum, value, ret));
+
 	return ret;
 }
 
@@ -1452,10 +1613,10 @@
 pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
 	struct thread_struct *th = &task->thread;
-	unsigned long val=0;
+	unsigned long val, lval;
 	pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
-	unsigned int cnum;
-	int i, ret = 0;
+	unsigned int cnum, reg_flags = 0;
+	int i, ret = -EINVAL;
 
 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
 
@@ -1471,11 +1632,12 @@
 	DBprintk(("ctx_last_cpu=%d for [%d]\n", atomic_read(&ctx->ctx_last_cpu), task->pid));
 
 	for (i = 0; i < count; i++, req++) {
-		unsigned long ctx_val = ~0UL;
 
 		if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
 
-		cnum = tmp.reg_num;
+		cnum      = tmp.reg_num;
+		reg_flags = tmp.reg_flags;
+		lval      = 0UL;
 
 		if (!PMD_IS_IMPL(cnum)) goto abort_mission;
 		/*
@@ -1498,46 +1660,30 @@
 			val = ia64_get_pmd(cnum);
 			DBprintk(("reading pmd[%u]=0x%lx from hw\n", cnum, val));
 		} else {
-#ifdef CONFIG_SMP
-			int cpu;
-			/*
-			 * for SMP system, the context may still be live on another
-			 * CPU so we need to fetch it before proceeding with the read
-			 * This call we only be made once for the whole loop because
-			 * of ctx_last_cpu becoming == -1.
-			 *
-			 * We cannot reuse ctx_last_cpu as it may change before we get to the
-			 * actual IPI call. In this case, we will do the call for nothing but
-			 * there is no way around it. The receiving side will simply do nothing.
-			 */
-			cpu = atomic_read(&ctx->ctx_last_cpu);
-			if (cpu != -1) {
-				DBprintk(("must fetch on CPU%d for [%d]\n", cpu, task->pid));
-				pfm_fetch_regs(cpu, task, ctx);
-			}
-#endif
-			/* context has been saved */
 			val = th->pmd[cnum];
 		}
 		if (PMD_IS_COUNTING(cnum)) {
 			/*
 			 * XXX: need to check for overflow
 			 */
+			val &= pmu_conf.ovfl_val;
+			val += ctx->ctx_soft_pmds[cnum].val;
 
-			val &= pmu_conf.perf_ovfl_val;
-			val += ctx_val = ctx->ctx_soft_pmds[cnum].val;
+			lval = ctx->ctx_soft_pmds[cnum].lval;
 		} 
 
-		tmp.reg_value = val;
-
 		/*
 		 * execute read checker, if any
 		 */
 		if (PMD_RD_FUNC(cnum)) {
-			ret = PMD_RD_FUNC(cnum)(task, cnum, &tmp.reg_value, regs);
+			unsigned long v = val;
+			ret = PMD_RD_FUNC(cnum)(task, cnum, &v, regs);
+			val = v;
 		}
 
-		PFM_REG_RETFLAG_SET(tmp.reg_flags, ret);
+		tmp.reg_value = val;
+
+		PFM_REG_RETFLAG_SET(reg_flags, ret);
 
 		DBprintk(("read pmd[%u] ret=%d value=0x%lx pmc=0x%lx\n", 
 					cnum, ret, val, ia64_get_pmc(cnum)));
@@ -1547,12 +1693,10 @@
 	return 0;
 abort_mission:
 	PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
-	/* 
-	 * XXX: if this fails, we stick with the original failure, flag not updated!
-	 */
-	copy_to_user(req, &tmp, sizeof(tmp));
-	return -EINVAL;
 
+	if (copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT;
+
+	return ret;
 }
 
 #ifdef PFM_PMU_USES_DBR
@@ -1594,7 +1738,7 @@
 	else
 		pfm_sessions.pfs_ptrace_use_dbregs++;
 
-	DBprintk(("ptrace_use_dbregs=%lu  sys_use_dbregs=%lu by [%d] ret = %d\n", 
+	DBprintk(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n", 
 		  pfm_sessions.pfs_ptrace_use_dbregs, 
 		  pfm_sessions.pfs_sys_use_dbregs, 
 		  task->pid, ret));
@@ -1660,9 +1804,12 @@
 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
 
 	if (task == current) {
-		DBprintk(("restarting self %d frozen=%d \n", current->pid, ctx->ctx_fl_frozen));
+		DBprintk(("restarting self %d frozen=%d ovfl_regs=0x%lx\n", 
+			task->pid, 
+			ctx->ctx_fl_frozen,
+			ctx->ctx_ovfl_regs[0]));
 
-		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_RELOAD_LONG_RESET);
+		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
 
 		ctx->ctx_ovfl_regs[0] = 0UL;
 
@@ -1678,8 +1825,7 @@
 		}
 
 		/* simply unfreeze */
-		ia64_set_pmc(0, 0);
-		ia64_srlz_d();
+		pfm_unfreeze_pmu();
 
 		return 0;
 	} 
@@ -1701,6 +1847,7 @@
 		up(sem);
 	} else {
 		task->thread.pfm_ovfl_block_reset = 1;
+		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
 	}
 #if 0
 	/*
@@ -1721,44 +1868,6 @@
 	return 0;
 }
 
-#ifndef CONFIG_SMP
-/*
- * On UP kernels, we do not need to constantly set the psr.pp bit
- * when a task is scheduled. The psr.pp bit can only be changed in
- * the kernel because of a user request. Given we are on a UP non preeemptive 
- * kernel we know that no other task is running, so we cna simply update their
- * psr.pp from their saved state. There is this no impact on the context switch
- * code compared to the SMP case.
- */
-static void
-pfm_tasklist_toggle_pp(unsigned int val)
-{
-	struct task_struct *p;
-	struct pt_regs *regs;
-
-	DBprintk(("invoked by [%d] pp=%u\n", current->pid, val));
-
-	read_lock(&tasklist_lock);
-
-	for_each_task(p) {
-       		regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET);
-
-		/*
-		 * position on pt_regs saved on stack on 1st entry into the kernel
-		 */
-		regs--;
-
-		/*
-		 * update psr.pp
-		 */
-		ia64_psr(regs)->pp = val;
-	}
-	read_unlock(&tasklist_lock);
-}
-#endif
-
-
-
 static int
 pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, 
 	 struct pt_regs *regs)
@@ -1783,22 +1892,17 @@
 		ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
 
 		/* stop monitoring */
-		__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
-
+		pfm_clear_psr_pp();
 		ia64_srlz_i();
 
-#ifdef CONFIG_SMP
-		local_cpu_data->pfm_dcr_pp  = 0;
-#else
-		pfm_tasklist_toggle_pp(0);
-#endif
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+
 		ia64_psr(regs)->pp = 0;
 
 	} else {
 
 		/* stop monitoring */
-		__asm__ __volatile__ ("rum psr.up;;"::: "memory");
-
+		pfm_clear_psr_up();
 		ia64_srlz_i();
 
 		/*
@@ -1960,14 +2064,9 @@
 	int i, ret = 0;
 
 	/*
-	 * for range restriction: psr.db must be cleared or the
-	 * the PMU will ignore the debug registers.
-	 *
-	 * XXX: may need more in system wide mode,
-	 * no task can have this bit set?
+	 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
+	 * ensuring that no real breakpoint can be installed via this call.
 	 */
-	if (ia64_psr(regs)->db == 1) return -EINVAL;
-
 
 	first_time = ctx->ctx_fl_using_dbreg == 0;
 
@@ -2196,16 +2295,13 @@
 
 	if (ctx->ctx_fl_system) {
 		
-#ifdef CONFIG_SMP
-		local_cpu_data->pfm_dcr_pp  = 1;
-#else
-		pfm_tasklist_toggle_pp(1);
-#endif
+		PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
+
 		/* set user level psr.pp */
 		ia64_psr(regs)->pp = 1;
 
 		/* start monitoring at kernel level */
-		__asm__ __volatile__ ("ssm psr.pp;;"::: "memory");
+		pfm_set_psr_pp();
 
 		/* enable dcr pp */
 		ia64_set_dcr(ia64_get_dcr()|IA64_DCR_PP);
@@ -2221,7 +2317,7 @@
 		ia64_psr(regs)->up = 1;
 
 		/* start monitoring at kernel level */
-		__asm__ __volatile__ ("sum psr.up;;"::: "memory");
+		pfm_set_psr_up();
 
 		ia64_srlz_i();
 	}
@@ -2240,7 +2336,7 @@
 		pfm_lazy_save_regs(PMU_OWNER());
 
 	/* reset all registers to stable quiet state */
-	ia64_reset_pmu(task);
+	pfm_reset_pmu(task);
 
 	/* make sure nothing starts */
 	if (ctx->ctx_fl_system) {
@@ -2248,13 +2344,12 @@
 		ia64_psr(regs)->up = 0; /* just to make sure! */
 
 		/* make sure monitoring is stopped */
-		__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
+		pfm_clear_psr_pp();
 		ia64_srlz_i();
 
-#ifdef CONFIG_SMP
-		local_cpu_data->pfm_syst_wide = 1;
-		local_cpu_data->pfm_dcr_pp    = 0;
-#endif
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+		PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
+		if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
 	} else {
 		/*
 		 * needed in case the task was a passive task during
@@ -2265,7 +2360,7 @@
 		ia64_psr(regs)->up = 0;
 
 		/* make sure monitoring is stopped */
-		__asm__ __volatile__ ("rum psr.up;;"::: "memory");
+		pfm_clear_psr_up();
 		ia64_srlz_i();
 
 		DBprintk(("clearing psr.sp for [%d]\n", current->pid));
@@ -2283,8 +2378,7 @@
 	atomic_set(&ctx->ctx_last_cpu, smp_processor_id());
 
 	/* simply unfreeze */
-	ia64_set_pmc(0, 0);
-	ia64_srlz_d();
+	pfm_unfreeze_pmu();
 
 	return 0;
 }
@@ -2295,7 +2389,7 @@
 {
 	pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
 	unsigned int cnum;
-	int i;
+	int i, ret = -EINVAL;
 
 	for (i = 0; i < count; i++, req++) {
 
@@ -2305,7 +2399,7 @@
 
 		if (!PMC_IS_IMPL(cnum)) goto abort_mission;
 
-		tmp.reg_value = reset_pmcs[cnum];
+		tmp.reg_value = PMC_DFL_VAL(cnum);
 
 		PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
 
@@ -2316,11 +2410,9 @@
 	return 0;
 abort_mission:
 	PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
-	/* 
-	 * XXX: if this fails, we stick with the original failure, flag not updated!
-	 */
-	copy_to_user(req, &tmp, sizeof(tmp));
-	return -EINVAL;
+	if (copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT;
+
+	return ret;
 }
 
 /*
@@ -2328,21 +2420,21 @@
  */
 static pfm_cmd_desc_t pfm_cmd_tab[]={
 /* 0  */{ NULL, 0, 0, 0}, /* not used */
-/* 1  */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, 
-/* 2  */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
-/* 3  */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, 
+/* 1  */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, 
+/* 2  */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
+/* 3  */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, 
 /* 4  */{ pfm_stop, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
 /* 5  */{ pfm_start, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
 /* 6  */{ pfm_enable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
 /* 7  */{ pfm_disable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
-/* 8  */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, 1, sizeof(pfarg_context_t)},
+/* 8  */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_RW, 1, sizeof(pfarg_context_t)},
 /* 9  */{ pfm_context_destroy, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
 /* 10 */{ pfm_restart, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_NOCHK, 0, 0},
 /* 11 */{ pfm_protect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
-/* 12 */{ pfm_get_features, PFM_CMD_ARG_WRITE, 0, 0},
+/* 12 */{ pfm_get_features, PFM_CMD_ARG_RW, 0, 0},
 /* 13 */{ pfm_debug, 0, 1, sizeof(unsigned int)},
 /* 14 */{ pfm_context_unprotect, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
-/* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
+/* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
 /* 16 */{ NULL, 0, 0, 0}, /* not used */
 /* 17 */{ NULL, 0, 0, 0}, /* not used */
 /* 18 */{ NULL, 0, 0, 0}, /* not used */
@@ -2360,8 +2452,8 @@
 /* 30 */{ NULL, 0, 0, 0}, /* not used */
 /* 31 */{ NULL, 0, 0, 0}, /* not used */
 #ifdef PFM_PMU_USES_DBR
-/* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)},
-/* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}
+/* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)},
+/* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}
 #endif
 };
 #define PFM_CMD_COUNT	(sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
@@ -2376,19 +2468,10 @@
 	 * after the task is marked as STOPPED but before pfm_save_regs()
 	 * is completed.
 	 */
-	for (;;) {
-
-		task_lock(task);
-		if (!task_has_cpu(task)) break;
-		task_unlock(task);
-
-		do {
-			if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) return -EBUSY;
-			barrier();
-			cpu_relax();
-		} while (task_has_cpu(task));
-	}
-	task_unlock(task);
+	if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) return -EBUSY;
+	DBprintk(("before wait_task_inactive [%d] state %ld\n", task->pid, task->state));
+	wait_task_inactive(task);
+	DBprintk(("after wait_task_inactive [%d] state %ld\n", task->pid, task->state));
 #else
 	if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
 		DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
@@ -2398,7 +2481,7 @@
 	return ret;
 }
 
-asmlinkage int
+asmlinkage long
 sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6, long arg7, 
 		long arg8, long stack)
 {
@@ -2406,7 +2489,8 @@
 	struct task_struct *task = current;
 	pfm_context_t *ctx;
 	size_t sz;
-	int ret, narg;
+	long ret;
+	int narg;
 
 	/* 
 	 * reject any call if perfmon was disabled at initialization time
@@ -2426,7 +2510,7 @@
 
 	if (PFM_CMD_READ_ARG(cmd) && !access_ok(VERIFY_READ, arg, sz*count)) return -EFAULT;
 
-	if (PFM_CMD_WRITE_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT;
+	if (PFM_CMD_RW_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT;
 
 	if (PFM_CMD_USE_PID(cmd))  {
 		/* 
@@ -2442,6 +2526,10 @@
 
 			task = find_task_by_pid(pid);
 
+			if (task) get_task_struct(task);
+
+			read_unlock(&tasklist_lock);
+
 			if (!task) goto abort_call;
 
 			ret = -EPERM;
@@ -2479,23 +2567,118 @@
 	ret = (*pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func)(task, ctx, arg, count, regs);
 
 abort_call:
-	if (task != current) read_unlock(&tasklist_lock);
+	if (task && task != current) free_task_struct(task);
 
 	return ret;
 }
 
-#if __GNUC__ >= 3
-void asmlinkage
-pfm_ovfl_block_reset(u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, 
-		      u64 arg6, u64 arg7, long info)
-#else
+/*
+ * send SIGPROF to register task, must be invoked when it
+ * is safe to send a signal, e.g., not holding any runqueue
+ * related locks.
+ */
+static int
+pfm_notify_user(pfm_context_t *ctx)
+{
+	struct siginfo si;
+	int ret;
+
+	if (ctx->ctx_notify_task == NULL) {
+		DBprintk(("[%d] no notifier\n", current->pid));
+		return -EINVAL;
+	}
+
+	si.si_errno    = 0;
+	si.si_addr     = NULL;
+	si.si_pid      = current->pid; /* who is sending */
+	si.si_signo    = SIGPROF;
+	si.si_code     = PROF_OVFL;
+
+	si.si_pfm_ovfl[0] = ctx->ctx_ovfl_regs[0];
+
+	/*
+	 * when the target of the signal is not ourself, we have to be more
+	 * careful. The notify_task may being cleared by the target task itself
+	 * in release_thread(). We must ensure mutual exclusion here such that
+	 * the signal is delivered (even to a dying task) safely.
+	 */
+
+	if (ctx->ctx_notify_task != current) {
+		/*
+		 * grab the notification lock for this task
+		 * This guarantees that the sequence: test + send_signal
+		 * is atomic with regards to the ctx_notify_task field.
+		 *
+		 * We need a spinlock and not just an atomic variable for this.
+		 *
+		 */
+		spin_lock(&ctx->ctx_lock);
+
+		/*
+		 * now notify_task cannot be modified until we're done
+		 * if NULL, they it got modified while we were in the handler
+		 */
+		if (ctx->ctx_notify_task == NULL) {
+
+			spin_unlock(&ctx->ctx_lock);
+
+			/*
+			 * If we've lost the notified task, then we will run
+			 * to completion wbut keep the PMU frozen. Results
+			 * will be incorrect anyway. We do not kill task
+			 * to leave it possible to attach perfmon context
+			 * to already running task.
+			 */
+			printk("perfmon: pfm_notify_user() lost notify_task\n");
+			DBprintk_ovfl(("notification task has disappeared !\n"));
+
+			/* we cannot afford to block now */
+			ctx->ctx_fl_block = 0;
+
+			return  -EINVAL;
+		}
+
+		/*
+		 * required by send_sig_info() to make sure the target
+		 * task does not disappear on us.
+		 */
+		read_lock(&tasklist_lock);
+	}
+	/*
+ 	 * in this case, we don't stop the task, we let it go on. It will
+ 	 * necessarily go to the signal handler (if any) when it goes back to
+ 	 * user mode.
+ 	 */
+	DBprintk_ovfl(("[%d] sending notification to [%d]\n", 
+			current->pid, ctx->ctx_notify_task->pid));
+
+	/* 
+	 * this call is safe in an interrupt handler, so does read_lock() on tasklist_lock
+	 */
+	ret = send_sig_info(SIGPROF, &si, ctx->ctx_notify_task);
+	if (ret) {
+		printk("perfmon: send_sig_info(process %d, SIGPROF)=%d\n", 
+				ctx->ctx_notify_task->pid, ret);
+	}
+
+	/*
+	 * now undo the protections in order
+	 */
+	if (ctx->ctx_notify_task != current) {
+		read_unlock(&tasklist_lock);
+		spin_unlock(&ctx->ctx_lock);
+	}
+	return ret;
+}
+
+
 void asmlinkage
 pfm_ovfl_block_reset(u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, 
 		      u64 arg6, u64 arg7, long info)
-#endif
 {
 	struct thread_struct *th = &current->thread;
 	pfm_context_t *ctx = current->thread.pfm_context;
+	unsigned int reason;
 	int ret;
 
 	/*
@@ -2503,7 +2686,6 @@
 	 * again
 	 */
 	th->pfm_ovfl_block_reset = 0;
-
 	/*
 	 * do some sanity checks first
 	 */
@@ -2511,8 +2693,31 @@
 		printk("perfmon: [%d] has no PFM context\n", current->pid);
 		return;
 	}
+	/*
+	 * extract reason for being here and clear
+	 */
+	reason = ctx->ctx_fl_trap_reason;
+	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
+
+	DBprintk(("[%d] reason=%d\n", current->pid, reason));
+
+	/*
+	 * just here for a reset (non-blocking context only)
+	 */
+	if (reason == PFM_TRAP_REASON_RESET) goto non_blocking;
+
+	/*
+	 * first notify user. This can fail if notify_task has disappeared.
+	 */
+	if (reason == PFM_TRAP_REASON_SIG || reason == PFM_TRAP_REASON_BLOCKSIG) {
+		ret = pfm_notify_user(ctx);
+		if (ret) return;
+	}
 
-	if (CTX_OVFL_NOBLOCK(ctx)) goto non_blocking;
+	/*
+	 * came here just to signal (non-blocking)
+	 */
+	if (reason == PFM_TRAP_REASON_SIG) return;
 
 	DBprintk(("[%d] before sleeping\n", current->pid));
 
@@ -2537,7 +2742,7 @@
 		 * use the local reference
 		 */
 
-		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_RELOAD_LONG_RESET);
+		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
 
 		ctx->ctx_ovfl_regs[0] = 0UL;
 
@@ -2550,8 +2755,7 @@
 			ctx->ctx_psb->psb_index = 0;
 		}
 
-		ia64_set_pmc(0, 0);
-		ia64_srlz_d();
+		pfm_unfreeze_pmu();
 
 		/* state restored, can go back to work (user mode) */
 	}
@@ -2577,12 +2781,12 @@
 	DBprintk_ovfl(("recording index=%ld entries=%ld\n", idx-1, psb->psb_entries));
 
 	/*
-	* XXX: there is a small chance that we could run out on index before resetting
-	* but index is unsigned long, so it will take some time.....
-	* We use > instead of == because fetch_and_add() is off by one (see below)
-	*
-	* This case can happen in non-blocking mode or with multiple processes.
-	* For non-blocking, we need to reload and continue.
+	 * XXX: there is a small chance that we could run out on index before resetting
+	 * but index is unsigned long, so it will take some time.....
+	 * We use > instead of == because fetch_and_add() is off by one (see below)
+	 *
+	 * This case can happen in non-blocking mode or with multiple processes.
+	 * For non-blocking, we need to reload and continue.
 	 */
 	if (idx > psb->psb_entries) return 0;
 
@@ -2596,13 +2800,12 @@
 	 */
 	h->pid  = current->pid;
 	h->cpu  = smp_processor_id();
-	h->rate = 0; /* XXX: add the sampling rate used here */
-	h->ip   = regs ? regs->cr_iip : 0x0;	/* where did the fault happened */
+	h->last_reset_value = ovfl_mask ? ctx->ctx_soft_pmds[ffz(~ovfl_mask)].lval : 0UL;
+	h->ip   = regs ? regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3): 0x0UL;
 	h->regs = ovfl_mask; 			/* which registers overflowed */
 
 	/* guaranteed to monotonically increase on each cpu */
 	h->stamp  = pfm_get_stamp();
-	h->period = 0UL; /* not yet used */
 
 	/* position for first pmd */
 	e = (unsigned long *)(h+1);
@@ -2617,15 +2820,13 @@
 
 		if (PMD_IS_COUNTING(j)) {
 			*e  =  pfm_read_soft_counter(ctx, j);
-			/* check if this pmd overflowed as well */
-			*e +=  ovfl_mask & (1UL<<j) ? 1 + pmu_conf.perf_ovfl_val : 0;
 		} else {
 			*e = ia64_get_pmd(j); /* slow */
 		}
 		DBprintk_ovfl(("e=%p pmd%d =0x%lx\n", (void *)e, j, *e));
 		e++;
 	}
-	pfm_stats.pfm_recorded_samples_count++;
+	pfm_stats[smp_processor_id()].pfm_recorded_samples_count++;
 
 	/*
 	 * make the new entry visible to user, needs to be atomic
@@ -2642,7 +2843,7 @@
 		/*
 		 * XXX: must reset buffer in blocking mode and lost notified
 		 */
-		pfm_stats.pfm_full_smpl_buffer_count++;
+		pfm_stats[smp_processor_id()].pfm_full_smpl_buffer_count++;
 		return 1;
 	}
 	return 0;
@@ -2663,7 +2864,6 @@
 	unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL;
 	int i;
 	int ret = 1;
-	struct siginfo si;
 	/*
 	 * It is never safe to access the task for which the overflow interrupt is destinated
 	 * using the current variable as the interrupt may occur in the middle of a context switch
@@ -2686,7 +2886,7 @@
 	if ((t->flags & IA64_THREAD_PM_VALID) == 0 && ctx->ctx_fl_system == 0) {
 		printk("perfmon: Spurious overflow interrupt: process %d not using perfmon\n", 
 			task->pid);
-		return 0x1;
+		return 0x1UL;
 	}
 	/*
 	 * sanity test. Should never happen
@@ -2694,7 +2894,7 @@
 	if ((pmc0 & 0x1) == 0) {
 		printk("perfmon: pid %d pmc0=0x%lx assumption error for freeze bit\n", 
 			task->pid, pmc0);
-		return 0x0;
+		return 0x0UL;
 	}
 
 	mask = pmc0 >> PMU_FIRST_COUNTER;
@@ -2719,23 +2919,13 @@
 			  i, ia64_get_pmd(i), ctx->ctx_soft_pmds[i].val));
 
 		/*
-		 * Because we sometimes (EARS/BTB) reset to a specific value, we cannot simply use
-		 * val to count the number of times we overflowed. Otherwise we would loose the 
-		 * current value in the PMD (which can be >0). So to make sure we don't loose
-		 * the residual counts we set val to contain full 64bits value of the counter.
+		 * Note that the pmd is not necessarily 0 at this point as qualified events
+		 * may have happened before the PMU was frozen. The residual count is not
+		 * taken into consideration here but will be with any read of the pmd via
+		 * pfm_read_pmds().
 		 */
 		old_val = ctx->ctx_soft_pmds[i].val;
-		ctx->ctx_soft_pmds[i].val = 1 + pmu_conf.perf_ovfl_val + pfm_read_soft_counter(ctx, i);
-
-		DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx\n", 
-			  i, ctx->ctx_soft_pmds[i].val, old_val, 
-			  ia64_get_pmd(i) & pmu_conf.perf_ovfl_val));
-
-		/*
-		 * now that we have extracted the hardware counter, we can clear it to ensure
-		 * that a subsequent PFM_READ_PMDS will not include it again.
-		 */
-		ia64_set_pmd(i, 0UL);
+		ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
 
 		/*
 		 * check for overflow condition
@@ -2744,12 +2934,13 @@
 
 			ovfl_pmds |= 1UL << i;
 
-			DBprintk_ovfl(("soft_pmd[%d] overflowed flags=0x%x, ovfl=0x%lx\n", i, ctx->ctx_soft_pmds[i].flags, ovfl_pmds));
-
 			if (PMC_OVFL_NOTIFY(ctx, i)) {
 				ovfl_notify |= 1UL << i;
 			}
 		}
+		DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", 
+			  i, ctx->ctx_soft_pmds[i].val, old_val, 
+			  ia64_get_pmd(i) & pmu_conf.ovfl_val, ovfl_pmds, ovfl_notify));
 	}
 
 	/*
@@ -2763,7 +2954,7 @@
 		if (ret == 1) {
 			/*
 			 * Sampling buffer became full
-			 * If no notication was requested, then we reset buffer index
+			 * If no notification was requested, then we reset buffer index
 			 * and reset registers (done below) and resume.
 			 * If notification requested, then defer reset until pfm_restart()
 			 */
@@ -2784,8 +2975,8 @@
 	 */
 	if (ovfl_notify == 0UL) {
 		if (ovfl_pmds) 
-			pfm_reset_regs(ctx, &ovfl_pmds, PFM_RELOAD_SHORT_RESET);
-		return 0x0;
+			pfm_reset_regs(ctx, &ovfl_pmds, PFM_PMD_SHORT_RESET);
+		return 0x0UL;
 	}
 
 	/* 
@@ -2793,152 +2984,52 @@
 	 */
 	ctx->ctx_ovfl_regs[0]  = ovfl_pmds; 
 
-	/*
-	 * we have come to this point because there was an overflow and that notification
-	 * was requested. The notify_task may have disappeared, in which case notify_task
-	 * is NULL.
-	 */
-	if (ctx->ctx_notify_task) {
-
-		si.si_errno    = 0;
-		si.si_addr     = NULL;
-		si.si_pid      = task->pid; /* who is sending */
-
-		si.si_signo    = SIGPROF;
-		si.si_code     = PROF_OVFL; /* indicates a perfmon SIGPROF signal */
-		/*
-		 * Shift the bitvector such that the user sees bit 4 for PMD4 and so on.
-		 * We only use smpl_ovfl[0] for now. It should be fine for quite a while
-		 * until we have more than 61 PMD available.
-		 */
-		si.si_pfm_ovfl[0] = ovfl_notify;
-	
-		/*
-		 * when the target of the signal is not ourself, we have to be more
-		 * careful. The notify_task may being cleared by the target task itself
-		 * in release_thread(). We must ensure mutual exclusion here such that
-		 * the signal is delivered (even to a dying task) safely.
-		 */
-
-		if (ctx->ctx_notify_task != current) {
-			/*
-			 * grab the notification lock for this task
-			 * This guarantees that the sequence: test + send_signal
-			 * is atomic with regards to the ctx_notify_task field.
-			 *
-			 * We need a spinlock and not just an atomic variable for this.
-			 *
-			 */
-			spin_lock(&ctx->ctx_lock);
-
-			/*
-			 * now notify_task cannot be modified until we're done
-			 * if NULL, they it got modified while we were in the handler
-			 */
-			if (ctx->ctx_notify_task == NULL) {
-
-				spin_unlock(&ctx->ctx_lock);
-
-				/*
-				 * If we've lost the notified task, then we will run
-				 * to completion wbut keep the PMU frozen. Results
-				 * will be incorrect anyway. We do not kill task
-				 * to leave it possible to attach perfmon context
-				 * to already running task.
-				 */
-				goto lost_notify;
-			}
-			/*
-			 * required by send_sig_info() to make sure the target
-			 * task does not disappear on us.
-			 */
-			read_lock(&tasklist_lock);
-		}
-		/*
-	 	 * in this case, we don't stop the task, we let it go on. It will
-	 	 * necessarily go to the signal handler (if any) when it goes back to
-	 	 * user mode.
-	 	 */
-		DBprintk_ovfl(("[%d] sending notification to [%d]\n", 
-			  task->pid, ctx->ctx_notify_task->pid));
-
-
-		/* 
-		 * this call is safe in an interrupt handler, so does read_lock() on tasklist_lock
-		 */
-		ret = send_sig_info(SIGPROF, &si, ctx->ctx_notify_task);
-		if (ret != 0) 
-			printk("send_sig_info(process %d, SIGPROF)=%d\n",  
-			       ctx->ctx_notify_task->pid, ret);
-		/*
-		 * now undo the protections in order
-		 */
-		if (ctx->ctx_notify_task != current) {
-			read_unlock(&tasklist_lock);
-			spin_unlock(&ctx->ctx_lock);
-		}
-
-		/*
-		 * if we block set the pfm_must_block bit
-		 * when in block mode, we can effectively block only when the notified
-		 * task is not self, otherwise we would deadlock. 
-		 * in this configuration, the notification is sent, the task will not 
-		 * block on the way back to user mode, but the PMU will be kept frozen
-		 * until PFM_RESTART.
-		 * Note that here there is still a race condition with notify_task
-		 * possibly being nullified behind our back, but this is fine because
-		 * it can only be changed to NULL which by construction, can only be
-		 * done when notify_task != current. So if it was already different
-		 * before, changing it to NULL will still maintain this invariant.
-		 * Of course, when it is equal to current it cannot change at this point.
-		 */
-		DBprintk_ovfl(("block=%d notify [%d] current [%d]\n", 
-			ctx->ctx_fl_block,
-			ctx->ctx_notify_task ? ctx->ctx_notify_task->pid: -1, 
-			current->pid ));
+	DBprintk_ovfl(("block=%d notify [%d] current [%d]\n", 
+		ctx->ctx_fl_block,
+		ctx->ctx_notify_task ? ctx->ctx_notify_task->pid: -1, 
+		current->pid ));
 
-		if (!CTX_OVFL_NOBLOCK(ctx) && ctx->ctx_notify_task != task) {
-			t->pfm_ovfl_block_reset = 1; /* will cause blocking */
-		}
+	/* 
+	 * ctx_notify_task could already be NULL, checked in pfm_notify_user() 
+	 */
+	if (CTX_OVFL_NOBLOCK(ctx) == 0 && ctx->ctx_notify_task != task) {
+		t->pfm_ovfl_block_reset = 1; /* will cause blocking */
+		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCKSIG;
 	} else {
-lost_notify: /* XXX: more to do here, to convert to non-blocking (reset values) */
-
-		DBprintk_ovfl(("notification task has disappeared !\n"));
-		/*
-		 * for a non-blocking context, we make sure we do not fall into the 
-		 * pfm_overflow_notify() trap. Also in the case of a blocking context with lost 
-		 * notify process, then we do not want to block either (even though it is 
-		 * interruptible). In this case, the PMU will be kept frozen and the process will 
-		 * run to completion without monitoring enabled.
-		 *
-		 * Of course, we cannot loose notify process when self-monitoring.
-		 */
-		t->pfm_ovfl_block_reset = 0; 
-
+		t->pfm_ovfl_block_reset = 1; /* will cause blocking */
+		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_SIG;
 	}
+
 	/*
-	 * If notification was successful, then we rely on the pfm_restart()
-	 * call to unfreeze and reset (in both blocking or non-blocking mode).
-	 *
-	 * If notification failed, then we will keep the PMU frozen and run
-	 * the task to completion
+	 * keep the PMU frozen until either pfm_restart() or 
+	 * task completes (non-blocking or notify_task gone).
 	 */
 	ctx->ctx_fl_frozen = 1;
 
-	DBprintk_ovfl(("return pmc0=0x%x must_block=%ld\n",
-				ctx->ctx_fl_frozen ? 0x1 : 0x0, t->pfm_ovfl_block_reset));
+	DBprintk_ovfl(("return pmc0=0x%x must_block=%ld reason=%d\n",
+		ctx->ctx_fl_frozen ? 0x1 : 0x0, 
+		t->pfm_ovfl_block_reset,
+		ctx->ctx_fl_trap_reason));
 
-	return ctx->ctx_fl_frozen ? 0x1 : 0x0;
+	return 0x1UL;
 }
 
 static void
-perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
+pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 {
 	u64 pmc0;
 	struct task_struct *task;
 	pfm_context_t *ctx;
 
-	pfm_stats.pfm_ovfl_intr_count++;
+	pfm_stats[smp_processor_id()].pfm_ovfl_intr_count++;
+
+	/*
+	 * if an alternate handler is registered, just bypass the default one
+	 */
+	if (pfm_alternate_intr_handler) {
+		(*pfm_alternate_intr_handler->handler)(irq, arg, regs);
+		return;
+	}
 
 	/* 
 	 * srlz.d done before arriving here
@@ -2963,75 +3054,59 @@
 				task->pid);
 			return;
 		}
-#ifdef CONFIG_SMP
-		/*
-		 * Because an IPI has higher priority than the PMU overflow interrupt, it is 
-		 * possible that the handler be interrupted by a request from another CPU to fetch 
-		 * the PMU state of the currently active context. The task may have just been 
-		 * migrated to another CPU which is trying to restore the context. If there was
-		 * a pending overflow interrupt when the task left this CPU, it is possible for
-		 * the handler to get interrupt by the IPI. In which case, we fetch request
-		 * MUST be postponed until the interrupt handler is done. The ctx_is_busy
-		 * flag indicates such a condition. The other CPU must busy wait until it's cleared.
-		 */
-		atomic_set(&ctx->ctx_is_busy, 1);
-#endif
-
 		/* 
 		 * assume PMC[0].fr = 1 at this point 
 		 */
 		pmc0 = pfm_overflow_handler(task, ctx, pmc0, regs);
 
 		/*
-		 * We always clear the overflow status bits and either unfreeze
-		 * or keep the PMU frozen.
-		 */
-		ia64_set_pmc(0, pmc0);
-		ia64_srlz_d();
-
-#ifdef CONFIG_SMP
-		/*
-		 * announce that we are doing with the context
+		 * we can only update pmc0 when the overflow
+		 * is for the current context. In UP the current
+		 * task may not be the one owning the PMU
 		 */
-		atomic_set(&ctx->ctx_is_busy, 0);
-#endif
+		if (task == current) {
+			/*
+		 	* We always clear the overflow status bits and either unfreeze
+		 	* or keep the PMU frozen.
+		 	*/
+			ia64_set_pmc(0, pmc0);
+			ia64_srlz_d();
+		} else {
+			task->thread.pmc[0] = pmc0;
+		}
 	} else {
-		pfm_stats.pfm_spurious_ovfl_intr_count++;
-
-		printk("perfmon: Spurious PMU overflow interrupt on CPU%d: pmc0=0x%lx owner=%p\n", 
-			smp_processor_id(), pmc0, (void *)PMU_OWNER());
+		pfm_stats[smp_processor_id()].pfm_spurious_ovfl_intr_count++;
 	}
 }
 
 /* for debug only */
 static int
-perfmon_proc_info(char *page)
+pfm_proc_info(char *page)
 {
 	char *p = page;
 	int i;
 
-	p += sprintf(p, "enabled          : %s\n", pmu_conf.pfm_is_disabled ? "No": "Yes");
-	p += sprintf(p, "fastctxsw        : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No");
-	p += sprintf(p, "ovfl_mask        : 0x%lx\n", pmu_conf.perf_ovfl_val);
-	p += sprintf(p, "overflow intrs   : %lu\n", pfm_stats.pfm_ovfl_intr_count);
-	p += sprintf(p, "spurious intrs   : %lu\n", pfm_stats.pfm_spurious_ovfl_intr_count);
-	p += sprintf(p, "recorded samples : %lu\n", pfm_stats.pfm_recorded_samples_count);
-	p += sprintf(p, "smpl buffer full : %lu\n", pfm_stats.pfm_full_smpl_buffer_count);
+	p += sprintf(p, "fastctxsw              : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No");
+	p += sprintf(p, "ovfl_mask              : 0x%lx\n", pmu_conf.ovfl_val);
 
-#ifdef CONFIG_SMP
-	p += sprintf(p, "CPU%d syst_wide   : %d\n"
-			"CPU%d dcr_pp      : %d\n", 
-			smp_processor_id(), 
-			local_cpu_data->pfm_syst_wide, 
-			smp_processor_id(), 
-			local_cpu_data->pfm_dcr_pp);
-#endif
+	for(i=0; i < NR_CPUS; i++) {
+		if (cpu_is_online(i) == 0) continue;
+		p += sprintf(p, "CPU%-2d overflow intrs   : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_count);
+		p += sprintf(p, "CPU%-2d spurious intrs   : %lu\n", i, pfm_stats[i].pfm_spurious_ovfl_intr_count);
+		p += sprintf(p, "CPU%-2d recorded samples : %lu\n", i, pfm_stats[i].pfm_recorded_samples_count);
+		p += sprintf(p, "CPU%-2d smpl buffer full : %lu\n", i, pfm_stats[i].pfm_full_smpl_buffer_count);
+		p += sprintf(p, "CPU%-2d syst_wide        : %d\n", i, cpu_data(i)->pfm_syst_info & PFM_CPUINFO_SYST_WIDE ? 1 : 0);
+		p += sprintf(p, "CPU%-2d dcr_pp           : %d\n", i, cpu_data(i)->pfm_syst_info & PFM_CPUINFO_DCR_PP ? 1 : 0);
+		p += sprintf(p, "CPU%-2d exclude idle     : %d\n", i, cpu_data(i)->pfm_syst_info & PFM_CPUINFO_EXCL_IDLE ? 1 : 0);
+		p += sprintf(p, "CPU%-2d owner            : %d\n", i, pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1);
+	}
 
 	LOCK_PFS();
-	p += sprintf(p, "proc_sessions    : %lu\n"
-			"sys_sessions     : %lu\n"
-			"sys_use_dbregs   : %lu\n"
-			"ptrace_use_dbregs: %lu\n", 
+
+	p += sprintf(p, "proc_sessions          : %u\n"
+			"sys_sessions           : %u\n"
+			"sys_use_dbregs         : %u\n"
+			"ptrace_use_dbregs      : %u\n", 
 			pfm_sessions.pfs_task_sessions, 
 			pfm_sessions.pfs_sys_sessions,
 			pfm_sessions.pfs_sys_use_dbregs,
@@ -3039,30 +3114,6 @@
 
 	UNLOCK_PFS();
 
-	for(i=0; i < NR_CPUS; i++) {
-		if (cpu_is_online(i)) {
-			p += sprintf(p, "CPU%d owner : %-6d\n",
-					i, 
-					pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1);
-		}
-	}
-
-	for(i=0; pmd_desc[i].type != PFM_REG_NONE; i++) {
-		p += sprintf(p, "PMD%-2d: %d 0x%lx 0x%lx\n", 
-				i,
-				pmd_desc[i].type, 
-				pmd_desc[i].dep_pmd[0], 
-				pmd_desc[i].dep_pmc[0]); 
-	}
-
-	for(i=0; pmc_desc[i].type != PFM_REG_NONE; i++) {
-		p += sprintf(p, "PMC%-2d: %d 0x%lx 0x%lx\n", 
-				i, 
-				pmc_desc[i].type, 
-				pmc_desc[i].dep_pmd[0], 
-				pmc_desc[i].dep_pmc[0]); 
-	}
-
 	return p - page;
 }
 
@@ -3070,7 +3121,7 @@
 static int
 perfmon_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
 {
-	int len = perfmon_proc_info(page);
+	int len = pfm_proc_info(page);
 
 	if (len <= off+count) *eof = 1;
 
@@ -3083,35 +3134,75 @@
 	return len;
 }
 
-#ifdef CONFIG_SMP
+/*
+ * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
+ * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
+ * is active or inactive based on mode. We must rely on the value in 
+ * local_cpu_data->pfm_syst_info
+ */
 void
-pfm_syst_wide_update_task(struct task_struct *task, int mode)
+pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
 {
-	struct pt_regs *regs = (struct pt_regs *)((unsigned long) task + IA64_STK_OFFSET);
+	struct pt_regs *regs;
+	unsigned long dcr;
+	unsigned long dcr_pp;
 
-	regs--;
+	dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
 
 	/*
-	 * propagate the value of the dcr_pp bit to the psr
+	 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 
+	 * on every CPU, so we can rely on the pid to identify the idle task.
+	 */
+	if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
+		regs = (struct pt_regs *)((unsigned long) task + IA64_STK_OFFSET);
+		regs--;
+		ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
+		return;
+	}
+	/*
+	 * we are the idle task  and there is exclusion.
+	 *
+	 * if monitoring has started
 	 */
-	ia64_psr(regs)->pp = mode ? local_cpu_data->pfm_dcr_pp : 0;
+	if (dcr_pp) {
+		dcr = ia64_get_dcr();
+		/* 
+		 * context switching in? 
+		 */
+		if (is_ctxswin) {
+			/* mask monitoring for the idle task */
+			ia64_set_dcr(dcr & ~IA64_DCR_PP);
+			pfm_clear_psr_pp();
+			ia64_srlz_i();
+			return;
+		}
+		/* 
+		 * context switching out
+		 * restore normal kernel level settings 
+		 *
+		 * Due to inlining this odd if-then-else construction generates 
+		 * better code.
+	         */
+		ia64_set_dcr(dcr |IA64_DCR_PP);
+		pfm_set_psr_pp();
+		ia64_srlz_i();
+	}
 }
-#endif
-
 
 void
 pfm_save_regs (struct task_struct *task)
 {
 	pfm_context_t *ctx;
+	unsigned long mask;
 	u64 psr;
+	int i;
 
 	ctx = task->thread.pfm_context;
 
-
 	/*
 	 * save current PSR: needed because we modify it
 	 */
-	__asm__ __volatile__ ("mov %0=psr;;": "=r"(psr) :: "memory");
+	psr = pfm_get_psr();
 
 	/*
 	 * stop monitoring:
@@ -3120,129 +3211,52 @@
 	 * We do not need to set psr.sp because, it is irrelevant in kernel.
 	 * It will be restored from ipsr when going back to user level
 	 */
-	__asm__ __volatile__ ("rum psr.up;;"::: "memory");
+	pfm_clear_psr_up();
 	ia64_srlz_i();
 
 	ctx->ctx_saved_psr = psr;
 
-	//ctx->ctx_last_cpu  = smp_processor_id();
-
-}
-
-static void
-pfm_lazy_save_regs (struct task_struct *task)
-{
-	pfm_context_t *ctx;
-	struct thread_struct *t;
-	unsigned long mask;
-	int i;
-
-	DBprintk(("on [%d] by [%d]\n", task->pid, current->pid));
-
-	t   = &task->thread;
-	ctx = task->thread.pfm_context;
-
 #ifdef CONFIG_SMP
-	/* 
-	 * announce we are saving this PMU state
-	 * This will cause other CPU, to wait until we're done
-	 * before using the context.h
-	 *
-	 * must be an atomic operation
-	 */
-	atomic_set(&ctx->ctx_saving_in_progress, 1);
-
-	 /*
-	  * if owner is NULL, it means that the other CPU won the race
-	  * and the IPI has caused the context to be saved in pfm_handle_fectch_regs()
-	  * instead of here. We have nothing to do
-	  *
-	  * note that this is safe, because the other CPU NEVER modifies saving_in_progress.
-	  */
-	if (PMU_OWNER() == NULL) goto do_nothing;
-#endif
-
 	/*
-	 * do not own the PMU
+	 * release ownership of this PMU.
+	 * must be done before we save the registers.
 	 */
 	SET_PMU_OWNER(NULL);
 
-	ia64_srlz_d();
-
 	/*
 	 * XXX needs further optimization.
 	 * Also must take holes into account
 	 */
+	ia64_srlz_d();
+
 	mask = ctx->ctx_used_pmds[0];
 	for (i=0; mask; i++, mask>>=1) {
-		if (mask & 0x1) t->pmd[i] =ia64_get_pmd(i);
+		if (mask & 0x1) task->thread.pmd[i] = ia64_get_pmd(i);
 	}
 
-	/* save pmc0 */
-	t->pmc[0] = ia64_get_pmc(0);
+	/* save pmc0 ia64_srlz_d() done in pfm_save_pmds() */
+	task->thread.pmc[0] = ia64_get_pmc(0);
 
-	/* not owned by this CPU */
+	/* force a full reload */
 	atomic_set(&ctx->ctx_last_cpu, -1);
-
-#ifdef CONFIG_SMP
-do_nothing:
 #endif
-	/*
-	 * declare we are done saving this context
-	 *
-	 * must be an atomic operation
-	 */
-	atomic_set(&ctx->ctx_saving_in_progress,0);
-
 }
 
-#ifdef CONFIG_SMP
-/*
- * Handles request coming from other CPUs
- */
-static void 
-pfm_handle_fetch_regs(void *info)
+static void
+pfm_lazy_save_regs (struct task_struct *task)
 {
-	pfm_smp_ipi_arg_t *arg = info;
-	struct thread_struct *t;
 	pfm_context_t *ctx;
+	struct thread_struct *t;
 	unsigned long mask;
 	int i;
 
-	ctx = arg->task->thread.pfm_context;
-	t   = &arg->task->thread;
-
-	DBprintk(("task=%d owner=%d saving=%d\n", 
-		  arg->task->pid,
-		  PMU_OWNER() ? PMU_OWNER()->pid: -1,
-		  atomic_read(&ctx->ctx_saving_in_progress)));
-
-	/* must wait until not busy before retrying whole request */
-	if (atomic_read(&ctx->ctx_is_busy)) {
-		arg->retval = 2;
-		return;
-	}
-
-	/* must wait if saving was interrupted */
-	if (atomic_read(&ctx->ctx_saving_in_progress)) {
-		arg->retval = 1;
-		return;
-	}
-
-	/* can proceed, done with context */
-	if (PMU_OWNER() != arg->task) {
-		arg->retval = 0;
-		return;
-	}
+	DBprintk(("on [%d] by [%d]\n", task->pid, current->pid));
 
-	DBprintk(("saving state for [%d] used_pmcs=0x%lx reload_pmcs=0x%lx used_pmds=0x%lx\n", 
-		arg->task->pid,
-		ctx->ctx_used_pmcs[0],
-		ctx->ctx_reload_pmcs[0],
-		ctx->ctx_used_pmds[0]));
+	t   = &task->thread;
+	ctx = task->thread.pfm_context;
 
 	/*
-	 * XXX: will be replaced with pure assembly call
+	 * do not own the PMU
 	 */
 	SET_PMU_OWNER(NULL);
 
@@ -3250,10 +3264,11 @@
 
 	/*
 	 * XXX needs further optimization.
+	 * Also must take holes into account
 	 */
 	mask = ctx->ctx_used_pmds[0];
 	for (i=0; mask; i++, mask>>=1) {
-		if (mask & 0x1) t->pmd[i] = ia64_get_pmd(i);
+		if (mask & 0x1) t->pmd[i] =ia64_get_pmd(i);
 	}
 
 	/* save pmc0 */
@@ -3261,66 +3276,7 @@
 
 	/* not owned by this CPU */
 	atomic_set(&ctx->ctx_last_cpu, -1);
-
-	/* can proceed */
-	arg->retval = 0;
-}
-
-/*
- * Function call to fetch PMU state from another CPU identified by 'cpu'.
- * If the context is being saved on the remote CPU, then we busy wait until
- * the saving is done and then we return. In this case, non IPI is sent.
- * Otherwise, we send an IPI to the remote CPU, potentially interrupting 
- * pfm_lazy_save_regs() over there.
- *
- * If the retval==1, then it means that we interrupted remote save and that we must
- * wait until the saving is over before proceeding.
- * Otherwise, we did the saving on the remote CPU, and it was done by the time we got there.
- * in either case, we can proceed.
- */
-static void
-pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx)
-{
-	pfm_smp_ipi_arg_t  arg;
-	int ret;
-
-	arg.task   = task;
-	arg.retval = -1;
-
-	if (atomic_read(&ctx->ctx_is_busy)) {
-must_wait_busy:
-		while (atomic_read(&ctx->ctx_is_busy));
-	}
-
-	if (atomic_read(&ctx->ctx_saving_in_progress)) {
-		DBprintk(("no IPI, must wait for [%d] to be saved on [%d]\n", task->pid, cpu));
-must_wait_saving:
-		/* busy wait */
-		while (atomic_read(&ctx->ctx_saving_in_progress));
-		DBprintk(("done saving for [%d] on [%d]\n", task->pid, cpu));
-		return;
-	}
-	DBprintk(("calling CPU %d from CPU %d\n", cpu, smp_processor_id()));
-
-	if (cpu == -1) {
-		printk("refusing to use -1 for [%d]\n", task->pid);
-		return;
-	}
-
-	/* will send IPI to other CPU and wait for completion of remote call */
-	if ((ret=smp_call_function_single(cpu, pfm_handle_fetch_regs, &arg, 0, 1))) {
-		printk("perfmon: remote CPU call from %d to %d error %d\n", smp_processor_id(), cpu, ret);
-		return;
-	}
-	/*
-	 * we must wait until saving is over on the other CPU
-	 * This is the case, where we interrupted the saving which started just at the time we sent the
-	 * IPI.
-	 */
-	if (arg.retval == 1) goto must_wait_saving;
-	if (arg.retval == 2) goto must_wait_busy;
 }
-#endif /* CONFIG_SMP */
 
 void
 pfm_load_regs (struct task_struct *task)
@@ -3331,14 +3287,16 @@
 	unsigned long mask;
 	u64 psr;
 	int i;
-#ifdef CONFIG_SMP
-	int cpu;
-#endif
 
 	owner = PMU_OWNER();
 	ctx   = task->thread.pfm_context;
 	t     = &task->thread;
 
+	if (ctx == NULL) {
+		printk("perfmon: pfm_load_regs: null ctx for [%d]\n", task->pid);
+		return;
+	}
+
 	/*
 	 * we restore ALL the debug registers to avoid picking up 
 	 * stale state.
@@ -3371,32 +3329,17 @@
 				atomic_read(&ctx->ctx_last_cpu), task->pid));
 
 		psr = ctx->ctx_saved_psr;
-		__asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(psr): "memory");
+		pfm_set_psr_l(psr);
 
 		return;
 	}
-	DBprintk(("load_regs: must reload for [%d] owner=%d\n", 
-		task->pid, owner ? owner->pid : -1 ));
+
 	/*
 	 * someone else is still using the PMU, first push it out and
 	 * then we'll be able to install our stuff !
 	 */
 	if (owner) pfm_lazy_save_regs(owner);
 
-#ifdef CONFIG_SMP
-	/* 
-	 * check if context on another CPU (-1 means saved)
-	 * We MUST use the variable, as last_cpu may change behind our 
-	 * back. If it changes to -1 (not on a CPU anymore), then in cpu
-	 * we have the last CPU the context was on. We may be sending the 
-	 * IPI for nothing, but we have no way of verifying this. 
-	 */
-	cpu = atomic_read(&ctx->ctx_last_cpu);
-	if (cpu != -1) {
-		pfm_fetch_regs(cpu, task, ctx);
-	}
-#endif
-
 	/*
 	 * To avoid leaking information to the user level when psr.sp=0,
 	 * we must reload ALL implemented pmds (even the ones we don't use).
@@ -3409,7 +3352,7 @@
 	 */
 	mask = pfm_sysctl.fastctxsw || ctx->ctx_fl_protected ?  ctx->ctx_used_pmds[0] : ctx->ctx_reload_pmds[0];
 	for (i=0; mask; i++, mask>>=1) {
-		if (mask & 0x1) ia64_set_pmd(i, t->pmd[i] & pmu_conf.perf_ovfl_val);
+		if (mask & 0x1) ia64_set_pmd(i, t->pmd[i] & pmu_conf.ovfl_val);
 	}
 
 	/* 
@@ -3425,17 +3368,20 @@
 		if (mask & 0x1) ia64_set_pmc(i, t->pmc[i]);
 	}
 
+	/*
+	 * manually invoke core interrupt handler
+	 * if the task had a pending overflow when it was ctxsw out.
+	 * Side effect on ctx_fl_frozen is possible.
+	 */
 	if (t->pmc[0] & ~0x1) {
-		pfm_overflow_handler(task, ctx, t->pmc[0], NULL);
+		t->pmc[0] = pfm_overflow_handler(task, ctx, t->pmc[0], NULL);
 	}
 
 	/*
-	 * fl_frozen==1 when we are in blocking mode waiting for restart
+	 * unfreeze PMU if possible
 	 */
-	if (ctx->ctx_fl_frozen == 0) {
-		ia64_set_pmc(0, 0);
-		ia64_srlz_d();
-	}
+	if (ctx->ctx_fl_frozen == 0) pfm_unfreeze_pmu();
+
 	atomic_set(&ctx->ctx_last_cpu, smp_processor_id());
 
 	SET_PMU_OWNER(task);
@@ -3444,55 +3390,51 @@
 	 * restore the psr we changed in pfm_save_regs()
 	 */
 	psr = ctx->ctx_saved_psr;
-	__asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(psr): "memory");
-
+	pfm_set_psr_l(psr);
 }
 
 /*
  * XXX: make this routine able to work with non current context
  */
 static void
-ia64_reset_pmu(struct task_struct *task)
+pfm_reset_pmu(struct task_struct *task)
 {
 	struct thread_struct *t = &task->thread;
 	pfm_context_t *ctx = t->pfm_context;
-	unsigned long mask;
 	int i;
 
 	if (task != current) {
-		printk("perfmon: invalid task in ia64_reset_pmu()\n");
+		printk("perfmon: invalid task in pfm_reset_pmu()\n");
 		return;
 	}
 
 	/* Let's make sure the PMU is frozen */
-	ia64_set_pmc(0,1);
+	pfm_freeze_pmu();
 
 	/*
 	 * install reset values for PMC. We skip PMC0 (done above)
 	 * XX: good up to 64 PMCS
 	 */
-	mask = pmu_conf.impl_regs[0] >> 1;
-	for(i=1; mask; mask>>=1, i++) {
-		if (mask & 0x1) {
-			ia64_set_pmc(i, reset_pmcs[i]);
-			/*
-			 * When restoring context, we must restore ALL pmcs, even the ones 
-			 * that the task does not use to avoid leaks and possibly corruption
-			 * of the sesion because of configuration conflicts. So here, we 
-			 * initialize the entire set used in the context switch restore routine.
-	 		 */
-			t->pmc[i] = reset_pmcs[i];
-			DBprintk((" pmc[%d]=0x%lx\n", i, reset_pmcs[i]));
-						 
-		}
+	for (i=1; (pmu_conf.pmc_desc[i].type & PFM_REG_END) == 0; i++) {
+		if ((pmu_conf.pmc_desc[i].type & PFM_REG_IMPL) == 0) continue;
+		ia64_set_pmc(i, PMC_DFL_VAL(i));
+		/*
+		 * When restoring context, we must restore ALL pmcs, even the ones 
+		 * that the task does not use to avoid leaks and possibly corruption
+		 * of the sesion because of configuration conflicts. So here, we 
+		 * initialize the entire set used in the context switch restore routine.
+	 	 */
+		t->pmc[i] = PMC_DFL_VAL(i);
+		DBprintk(("pmc[%d]=0x%lx\n", i, t->pmc[i]));
 	}
+
 	/*
 	 * clear reset values for PMD. 
-	 * XXX: good up to 64 PMDS. Suppose that zero is a valid value.
+	 * XXX: good up to 64 PMDS.
 	 */
-	mask = pmu_conf.impl_regs[4];
-	for(i=0; mask; mask>>=1, i++) {
-		if (mask & 0x1) ia64_set_pmd(i, 0UL);
+	for (i=0; (pmu_conf.pmd_desc[i].type & PFM_REG_END) == 0; i++) {
+		if ((pmu_conf.pmd_desc[i].type & PFM_REG_IMPL) == 0) continue;
+		ia64_set_pmd(i, 0UL);
 		t->pmd[i] = 0UL;
 	}
 
@@ -3521,13 +3463,13 @@
 	  *
 	  * We never directly restore PMC0 so we do not include it in the mask.
 	  */
-	ctx->ctx_reload_pmcs[0] = pmu_conf.impl_regs[0] & ~0x1;
+	ctx->ctx_reload_pmcs[0] = pmu_conf.impl_pmcs[0] & ~0x1;
 	/*
 	 * We must include all the PMD in this mask to avoid picking
 	 * up stale value and leak information, especially directly
 	 * at the user level when psr.sp=0
 	 */
-	ctx->ctx_reload_pmds[0] = pmu_conf.impl_regs[4];
+	ctx->ctx_reload_pmds[0] = pmu_conf.impl_pmds[0];
 
 	/* 
 	 * Keep track of the pmds we want to sample
@@ -3537,7 +3479,7 @@
 	 *
 	 * We ignore the unimplemented pmds specified by the user
 	 */
-	ctx->ctx_used_pmds[0] = ctx->ctx_smpl_regs[0] & pmu_conf.impl_regs[4];
+	ctx->ctx_used_pmds[0] = ctx->ctx_smpl_regs[0];
 	ctx->ctx_used_pmcs[0] = 1; /* always save/restore PMC[0] */
 
 	/*
@@ -3586,26 +3528,20 @@
 	 */
 	if (ctx->ctx_fl_system) {
 
-
 		/* disable dcr pp */
 		ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
 
 		/* stop monitoring */
-		__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
-
+		pfm_clear_psr_pp();
 		ia64_srlz_i();
 
-#ifdef CONFIG_SMP
-		local_cpu_data->pfm_syst_wide = 0;
-		local_cpu_data->pfm_dcr_pp    = 0;
-#else
-		pfm_tasklist_toggle_pp(0);
-#endif
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
 	} else  {
 
 		/* stop monitoring */
-		__asm__ __volatile__ ("rum psr.up;;"::: "memory");
-
+		pfm_clear_psr_up();
 		ia64_srlz_i();
 
 		/* no more save/restore on ctxsw */
@@ -3637,8 +3573,7 @@
 	 * This destroys the overflow information. This is required to make sure
 	 * next process does not start with monitoring on if not requested
 	 */
-	ia64_set_pmc(0, 1);
-	ia64_srlz_d();
+	pfm_freeze_pmu();
 
 	/*
 	 * We don't need to restore psr, because we are on our way out
@@ -3670,10 +3605,14 @@
 		val = ia64_get_pmd(i);
 
 		if (PMD_IS_COUNTING(i)) {
-			DBprintk(("[%d] pmd[%d] soft_pmd=0x%lx hw_pmd=0x%lx\n", task->pid, i, ctx->ctx_soft_pmds[i].val, val & pmu_conf.perf_ovfl_val));
+			DBprintk(("[%d] pmd[%d] soft_pmd=0x%lx hw_pmd=0x%lx\n", 
+				task->pid, 
+				i, 
+				ctx->ctx_soft_pmds[i].val, 
+				val & pmu_conf.ovfl_val));
 
 			/* collect latest results */
-			ctx->ctx_soft_pmds[i].val += val & pmu_conf.perf_ovfl_val;
+			ctx->ctx_soft_pmds[i].val += val & pmu_conf.ovfl_val;
 
 			/*
 			 * now everything is in ctx_soft_pmds[] and we need
@@ -3686,7 +3625,7 @@
 			 * take care of overflow inline
 			 */
 			if (pmc0 & (1UL << i)) {
-				ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.perf_ovfl_val;
+				ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
 				DBprintk(("[%d] pmd[%d] overflowed soft_pmd=0x%lx\n",
 					task->pid, i, ctx->ctx_soft_pmds[i].val));
 			}
@@ -3751,9 +3690,7 @@
 	 * clear cpu pinning restriction for child
 	 */
 	if (ctx->ctx_fl_system) {
-		task->cpus_allowed = ctx->ctx_saved_cpus_allowed;
-		task->need_resched = 1;
-
+		set_cpus_allowed(task, ctx->ctx_saved_cpus_allowed);
 	 	DBprintk(("setting cpus_allowed for [%d] to 0x%lx from 0x%lx\n", 
 			task->pid,
 			ctx->ctx_saved_cpus_allowed, 
@@ -3820,21 +3757,17 @@
 	m = nctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER;
 	for(i = PMU_FIRST_COUNTER ; m ; m>>=1, i++) {
 		if ((m & 0x1) && pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) {
-			nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].ival & ~pmu_conf.perf_ovfl_val;
-			thread->pmd[i]	      	   = nctx->ctx_soft_pmds[i].ival & pmu_conf.perf_ovfl_val;
+			nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].lval & ~pmu_conf.ovfl_val;
+			thread->pmd[i]	      	   = nctx->ctx_soft_pmds[i].lval & pmu_conf.ovfl_val;
+		} else {
+			thread->pmd[i]	      	   = 0UL; /* reset to initial state */
 		}
-		/* what about the other pmds? zero or keep as is */
-
 	}
-	/*
-	 * clear BTB index register
-	 * XXX: CPU-model specific knowledge!
-	 */
-	thread->pmd[16] = 0;
 
+	nctx->ctx_fl_frozen      = 0;
+	nctx->ctx_ovfl_regs[0]   = 0UL;
+	nctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
 
-	nctx->ctx_fl_frozen    = 0;
-	nctx->ctx_ovfl_regs[0] = 0UL;
 	atomic_set(&nctx->ctx_last_cpu, -1);
 
 	/*
@@ -3866,6 +3799,11 @@
 
 	sema_init(&nctx->ctx_restart_sem, 0); /* reset this semaphore to locked */
 
+	/*
+	 * propagate kernel psr in new context (used for first ctxsw in
+	 */
+	nctx->ctx_saved_psr = pfm_get_psr();
+
 	/* link with new task */
 	thread->pfm_context = nctx;
 
@@ -3994,31 +3932,15 @@
 
 	UNLOCK_CTX(ctx);
 
-	LOCK_PFS();
+	pfm_unreserve_session(task, ctx->ctx_fl_system, 1UL << ctx->ctx_cpu);
 
 	if (ctx->ctx_fl_system) {
-
-		pfm_sessions.pfs_sys_session[ctx->ctx_cpu] = NULL;
-		pfm_sessions.pfs_sys_sessions--;
-		DBprintk(("freeing syswide session on CPU%ld\n", ctx->ctx_cpu));
-
-		/* update perfmon debug register usage counter */
-		if (ctx->ctx_fl_using_dbreg) {
-			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
-				printk("perfmon: invalid release for [%d] sys_use_dbregs=0\n", task->pid);
-			} else
-				pfm_sessions.pfs_sys_use_dbregs--;
-		}
-
 		/*
 	 	 * remove any CPU pinning
 	 	 */
 		task->cpus_allowed = ctx->ctx_saved_cpus_allowed;
 		task->need_resched = 1;
-	} else {
-		pfm_sessions.pfs_task_sessions--;
-	}
-	UNLOCK_PFS();
+	} 
 
 	pfm_context_free(ctx);
 	/* 
@@ -4178,125 +4100,165 @@
 }
 
 static struct irqaction perfmon_irqaction = {
-	handler:	perfmon_interrupt,
-	flags:		SA_INTERRUPT,
-	name:		"perfmon"
+	.handler = pfm_interrupt_handler,
+	.flags   = SA_INTERRUPT,
+	.name    = "perfmon"
 };
 
-
-static void
-pfm_pmu_snapshot(void)
+int
+pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
 {
-	int i;
+	int ret;
+
+	/* some sanity checks */
+	if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
 
-	for (i=0; i < IA64_NUM_PMC_REGS; i++) {
-		if (i >= pmu_conf.num_pmcs) break;
-		if (PMC_IS_IMPL(i)) reset_pmcs[i] = ia64_get_pmc(i);
+	/* do the easy test first */
+	if (pfm_alternate_intr_handler) return -EBUSY;
+
+	/* reserve our session */
+	ret = pfm_reserve_session(NULL, 1, cpu_online_map);
+	if (ret) return ret;
+
+	if (pfm_alternate_intr_handler) {
+		printk("perfmon: install_alternate, intr_handler not NULL after reserve\n");
+		return -EINVAL;
 	}
-#ifdef CONFIG_MCKINLEY
-	/*
-	 * set the 'stupid' enable bit to power the PMU!
+
+	pfm_alternate_intr_handler = hdl;
+
+	return 0;
+}
+
+int
+pfm_remove_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
+{
+	if (hdl == NULL) return -EINVAL;
+
+	/* cannot remove someone else's handler! */
+	if (pfm_alternate_intr_handler != hdl) return -EINVAL;
+
+	pfm_alternate_intr_handler = NULL;
+
+	/* 
+	 * XXX: assume cpu_online_map has not changed since reservation 
 	 */
-	reset_pmcs[4] |= 1UL << 23;
-#endif
+	pfm_unreserve_session(NULL, 1, cpu_online_map);
+
+	return 0;
 }
 
 /*
  * perfmon initialization routine, called from the initcall() table
  */
 int __init
-perfmon_init (void)
+pfm_init(void)
 {
-	pal_perf_mon_info_u_t pm_info;
-	s64 status;
+	unsigned int n, n_counters, i;
 
-	pmu_conf.pfm_is_disabled = 1;
+	pmu_conf.disabled = 1;
 
-	printk("perfmon: version %u.%u (sampling format v%u.%u) IRQ %u\n", 
+	printk("perfmon: version %u.%u IRQ %u\n", 
 		PFM_VERSION_MAJ, 
 		PFM_VERSION_MIN, 
-		PFM_SMPL_VERSION_MAJ, 
-		PFM_SMPL_VERSION_MIN, 
 		IA64_PERFMON_VECTOR);
 
-	if ((status=ia64_pal_perf_mon_info(pmu_conf.impl_regs, &pm_info)) != 0) {
-		printk("perfmon: PAL call failed (%ld), perfmon disabled\n", status);
-		return -1;
-	}
-
-	pmu_conf.perf_ovfl_val = (1UL << pm_info.pal_perf_mon_info_s.width) - 1;
-	pmu_conf.max_counters  = pm_info.pal_perf_mon_info_s.generic;
-	pmu_conf.num_pmcs      = find_num_pm_regs(pmu_conf.impl_regs);
-	pmu_conf.num_pmds      = find_num_pm_regs(&pmu_conf.impl_regs[4]);
-
-	printk("perfmon: %u bits counters\n", pm_info.pal_perf_mon_info_s.width);
-
-	printk("perfmon: %lu PMC/PMD pairs, %lu PMCs, %lu PMDs\n", 
-	       pmu_conf.max_counters, pmu_conf.num_pmcs, pmu_conf.num_pmds);
+	/*
+	 * compute the number of implemented PMD/PMC from the
+	 * description tables
+	 */
+	n = 0;
+	for (i=0; PMC_IS_LAST(i) == 0;  i++) {
+		if (PMC_IS_IMPL(i) == 0) continue;
+		pmu_conf.impl_pmcs[i>>6] |= 1UL << (i&63);
+		n++;
+	}
+	pmu_conf.num_pmcs = n;
+
+	n = 0; n_counters = 0;
+	for (i=0; PMD_IS_LAST(i) == 0;  i++) {
+		if (PMD_IS_IMPL(i) == 0) continue;
+		pmu_conf.impl_pmds[i>>6] |= 1UL << (i&63);
+		n++;
+		if (PMD_IS_COUNTING(i)) n_counters++;
+	}
+	pmu_conf.num_pmds      = n;
+	pmu_conf.num_counters  = n_counters;
+
+	printk("perfmon: %u PMCs, %u PMDs, %u counters (%lu bits)\n", 
+	       pmu_conf.num_pmcs, 
+	       pmu_conf.num_pmds,
+	       pmu_conf.num_counters,
+	       ffz(pmu_conf.ovfl_val));
 
 	/* sanity check */
 	if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) {
-		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon is DISABLED\n");
-		return -1; /* no need to continue anyway */
-	}
-
-	if (ia64_pal_debug_info(&pmu_conf.num_ibrs, &pmu_conf.num_dbrs)) {
-		printk(KERN_WARNING "perfmon: unable to get number of debug registers\n");
-		pmu_conf.num_ibrs = pmu_conf.num_dbrs = 0;
+		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
+		return -1;
 	}
-	/* PAL reports the number of pairs */
-	pmu_conf.num_ibrs <<=1;
-	pmu_conf.num_dbrs <<=1;
 
 	/*
-	 * take a snapshot of all PMU registers. PAL is supposed
-	 * to configure them with stable/safe values, i.e., not
-	 * capturing anything.
-	 * We take a snapshot now, before we make any modifications. This
-	 * will become our master copy. Then we will reuse the snapshot
-	 * to reset the PMU in pfm_enable(). Using this technique, perfmon
-	 * does NOT have to know about the specific values to program for
-	 * the PMC/PMD. The safe values may be different from one CPU model to
-	 * the other.
+	 * for now here for debug purposes
 	 */
-	pfm_pmu_snapshot();
+	perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL);
+	if (perfmon_dir == NULL) {
+		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
+		return -1; 
+	}
 
 	/*
-	 * setup the register configuration descriptions for the CPU
+	 * create /proc/sys/kernel/perfmon
 	 */
-	pmu_conf.pmc_desc = pmc_desc;
-	pmu_conf.pmd_desc = pmd_desc;
-
-	/* we are all set */
-	pmu_conf.pfm_is_disabled = 0;
+	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
 
 	/*
-	 * for now here for debug purposes
+	 * initialize all our spinlocks
 	 */
-	perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL);
-
-	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
-
 	spin_lock_init(&pfm_sessions.pfs_lock);
 
+	/* we are all set */
+	pmu_conf.disabled = 0;
+
 	return 0;
 }
 
-__initcall(perfmon_init);
+__initcall(pfm_init);
 
 void
-perfmon_init_percpu (void)
+pfm_init_percpu(void)
 {
+	int i;
+
 	if (smp_processor_id() == 0)
 		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
 
 	ia64_set_pmv(IA64_PERFMON_VECTOR);
 	ia64_srlz_d();
+
+	/*
+	 * we first initialize the PMU to a stable state.
+	 * the values may have been changed from their power-up
+	 * values by software executed before the kernel took over.
+	 *
+	 * At this point, pmu_conf has not yet been initialized
+	 *
+	 * On McKinley, this code is ineffective until PMC4 is initialized.
+	 */
+	for (i=1; PMC_IS_LAST(i) == 0;  i++) {
+		if (PMC_IS_IMPL(i) == 0) continue;
+		ia64_set_pmc(i, PMC_DFL_VAL(i));
+	}
+
+	for (i=0; PMD_IS_LAST(i); i++) {
+		if (PMD_IS_IMPL(i) == 0) continue;
+		ia64_set_pmd(i, 0UL);
+	}
+	pfm_freeze_pmu();
 }
 
 #else /* !CONFIG_PERFMON */
 
-asmlinkage int
+asmlinkage long
 sys_perfmonctl (int pid, int cmd, void *req, int count, long arg5, long arg6, 
 		long arg7, long arg8, long stack)
 {
diff -urN rhas.e25/arch/ia64/kernel/perfmon_generic.h linux.e25/arch/ia64/kernel/perfmon_generic.h
--- rhas.e25/arch/ia64/kernel/perfmon_generic.h	2003-02-06 16:32:06.000000000 -0800
+++ linux.e25/arch/ia64/kernel/perfmon_generic.h	2003-02-06 18:00:28.000000000 -0800
@@ -1,29 +1,48 @@
+/*
+ * This file contains the architected PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
 #define RDEP(x)	(1UL<<(x))
 
-#ifdef CONFIG_ITANIUM
-#error "This file should not be used when CONFIG_ITANIUM is defined"
+#if defined(CONFIG_ITANIUM) || defined (CONFIG_MCKINLEY)
+#error "This file should not be used when CONFIG_ITANIUM or CONFIG_MCKINLEY is defined"
 #endif
 
-static pfm_reg_desc_t pmc_desc[256]={
-/* pmc0  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc1  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc2  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc3  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc4  */ { PFM_REG_COUNTING, 0, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc5  */ { PFM_REG_COUNTING, 0, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc6  */ { PFM_REG_COUNTING, 0, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc7  */ { PFM_REG_COUNTING, 0, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-	    { PFM_REG_NONE, 0, NULL, NULL, {0,}, {0,}}, /* end marker */
+static pfm_reg_desc_t pmc_gen_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
-static pfm_reg_desc_t pmd_desc[256]={
-/* pmd0  */ { PFM_REG_NOTIMPL, 0, NULL, NULL, {0,}, {0,}},
-/* pmd1  */ { PFM_REG_NOTIMPL, 0, NULL, NULL, {0,}, {0,}},
-/* pmd2  */ { PFM_REG_NOTIMPL, 0, NULL, NULL, {0,}, {0,}},
-/* pmd3  */ { PFM_REG_NOTIMPL, 0, NULL, NULL, {0,}, {0,}},
-/* pmd4  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
-/* pmd5  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
-/* pmd6  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
-/* pmd7  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
-	    { PFM_REG_NONE, 0, NULL, NULL, {0,}, {0,}}, /* end marker */
+static pfm_reg_desc_t pmd_gen_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd1  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd2  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd3  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf={
+	.disabled = 1,
+	.ovfl_val = (1UL << 32) - 1,
+	.num_ibrs = 8,
+	.num_dbrs = 8,
+	.pmd_desc = pfm_gen_pmd_desc,
+	.pmc_desc = pfm_gen_pmc_desc
 };
diff -urN rhas.e25/arch/ia64/kernel/perfmon_itanium.h linux.e25/arch/ia64/kernel/perfmon_itanium.h
--- rhas.e25/arch/ia64/kernel/perfmon_itanium.h	2003-02-06 16:32:06.000000000 -0800
+++ linux.e25/arch/ia64/kernel/perfmon_itanium.h	2003-02-06 18:00:28.000000000 -0800
@@ -15,46 +15,59 @@
 static int pfm_ita_pmc_check(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
 static int pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs);
 
-static pfm_reg_desc_t pmc_desc[256]={
-/* pmc0  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc1  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc2  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc3  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc4  */ { PFM_REG_COUNTING, 6, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc5  */ { PFM_REG_COUNTING, 6, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc6  */ { PFM_REG_COUNTING, 6, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc7  */ { PFM_REG_COUNTING, 6, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc8  */ { PFM_REG_CONFIG, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc9  */ { PFM_REG_CONFIG, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc10 */ { PFM_REG_MONITOR, 6, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc11 */ { PFM_REG_MONITOR, 6, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc12 */ { PFM_REG_MONITOR, 6, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc13 */ { PFM_REG_CONFIG, 0, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-	    { PFM_REG_NONE, 0, NULL, NULL, {0,}, {0,}}, /* end marker */
+static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
-static pfm_reg_desc_t pmd_desc[256]={
-/* pmd0  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
-/* pmd1  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
-/* pmd2  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
-/* pmd3  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
-/* pmd4  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
-/* pmd5  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
-/* pmd6  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
-/* pmd7  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
-/* pmd8  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd9  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd10 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd11 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd12 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd13 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd14 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd15 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd16 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd17 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
-	    { PFM_REG_NONE, 0, NULL, NULL, {0,}, {0,}}, /* end marker */
+static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf={
+	.disabled = 1,
+	.ovfl_val = (1UL << 32) - 1,
+	.num_ibrs = 8,
+	.num_dbrs = 8,
+	.pmd_desc = pfm_ita_pmd_desc,
+	.pmc_desc = pfm_ita_pmc_desc
+};
+
+
 static int
 pfm_ita_pmc_check(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
 {
diff -urN rhas.e25/arch/ia64/kernel/perfmon_mckinley.h linux.e25/arch/ia64/kernel/perfmon_mckinley.h
--- rhas.e25/arch/ia64/kernel/perfmon_mckinley.h	2003-02-06 16:32:06.000000000 -0800
+++ linux.e25/arch/ia64/kernel/perfmon_mckinley.h	2003-02-06 18:00:28.000000000 -0800
@@ -12,51 +12,86 @@
 #error "This file is only valid when CONFIG_MCKINLEY is defined"
 #endif
 
+static int pfm_mck_reserved(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
 static int pfm_mck_pmc_check(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
 static int pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs);
 
-static pfm_reg_desc_t pmc_desc[256]={
-/* pmc0  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc1  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc2  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc3  */ { PFM_REG_CONTROL, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc4  */ { PFM_REG_COUNTING, 6, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc5  */ { PFM_REG_COUNTING, 6, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc6  */ { PFM_REG_COUNTING, 6, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc7  */ { PFM_REG_COUNTING, 6, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc8  */ { PFM_REG_CONFIG, 0, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc9  */ { PFM_REG_CONFIG, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc10 */ { PFM_REG_MONITOR, 4, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc11 */ { PFM_REG_MONITOR, 6, NULL, NULL, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc12 */ { PFM_REG_MONITOR, 6, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc13 */ { PFM_REG_CONFIG, 0, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc14 */ { PFM_REG_CONFIG, 0, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc15 */ { PFM_REG_CONFIG, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-	    { PFM_REG_NONE, 0, NULL, NULL, {0,}, {0,}}, /* end marker */
+static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_reserved, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_reserved, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_reserved, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xffffffff3fffffffUL, 0xffffffff9fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xffffffff3ffffffcUL, 0xffffffff9fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_reserved, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL,  pfm_mck_reserved, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL,  pfm_mck_reserved, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc14 */ { PFM_REG_CONFIG  , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc15 */ { PFM_REG_CONFIG  , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_reserved, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
-static pfm_reg_desc_t pmd_desc[256]={
-/* pmd0  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
-/* pmd1  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
-/* pmd2  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
-/* pmd3  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
-/* pmd4  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
-/* pmd5  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
-/* pmd6  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
-/* pmd7  */ { PFM_REG_COUNTING, 0, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
-/* pmd8  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd9  */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd10 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd11 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd12 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd13 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd14 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd15 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd16 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
-/* pmd17 */ { PFM_REG_BUFFER, 0, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
-	    { PFM_REG_NONE, 0, NULL, NULL, {0,}, {0,}}, /* end marker */
+static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf={
+	.disabled = 1,
+	.ovfl_val = (1UL << 47) - 1,
+	.num_ibrs = 8,
+	.num_dbrs = 8,
+	.pmd_desc = pfm_mck_pmd_desc,
+	.pmc_desc = pfm_mck_pmc_desc
+};
+
+
+/*
+ * PMC reserved fields must have their power-up values preserved
+ */
+static int
+pfm_mck_reserved(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	unsigned long tmp1, tmp2, ival = *val;
+
+	/* remove reserved areas from user value */
+	tmp1 = ival & PMC_RSVD_MASK(cnum);
+
+	/* get reserved fields values */
+	tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
+
+	*val = tmp1 | tmp2;
+
+	DBprintk(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", 
+		  cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); 
+	return 0;
+}
+
 static int
 pfm_mck_pmc_check(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
 {
@@ -65,6 +100,9 @@
 	int ret = 0, check_case1 = 0;
 	unsigned long val8 = 0, val14 = 0, val13 = 0;
 
+	/* first preserve the reserved fields */
+	pfm_mck_reserved(task, cnum, val, regs);
+
 	/*
 	 * we must clear the debug registers if any pmc13.ena_dbrpX bit is enabled 
 	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information. 
@@ -105,8 +143,11 @@
 		case  8: val8 = *val;
 			 val13 = th->pmc[13];
 			 val14 = th->pmc[14];
+			 *val |= 1UL << 2; /* bit 2 must always be 1 */
 			 check_case1 = 1;
 			 break;
+		case  9: *val |= 1UL << 2; /* bit 2 must always be 1 */
+			 break;
 		case 13: val8  = th->pmc[8];
 			 val13 = *val;
 			 val14 = th->pmc[14];
diff -urN rhas.e25/arch/ia64/kernel/process.c linux.e25/arch/ia64/kernel/process.c
--- rhas.e25/arch/ia64/kernel/process.c	2003-02-06 16:32:06.000000000 -0800
+++ linux.e25/arch/ia64/kernel/process.c	2003-02-06 16:39:01.000000000 -0800
@@ -168,6 +168,9 @@
 void
 ia64_save_extra (struct task_struct *task)
 {
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
 	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
 		ia64_save_debug_regs(&task->thread.dbr[0]);
 
@@ -175,10 +178,9 @@
 	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
 		pfm_save_regs(task);
 
-# ifdef CONFIG_SMP
-	if (local_cpu_data->pfm_syst_wide)
-		pfm_syst_wide_update_task(task, 0);
-# endif
+	info = local_cpu_data->pfm_syst_info;
+	if (info & PFM_CPUINFO_SYST_WIDE)
+		pfm_syst_wide_update_task(task, info, 0);
 #endif
 
 #ifdef CONFIG_IA32_SUPPORT
@@ -190,6 +192,10 @@
 void
 ia64_load_extra (struct task_struct *task)
 {
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
+
 	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
 		ia64_load_debug_regs(&task->thread.dbr[0]);
 
@@ -197,10 +203,9 @@
 	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
 		pfm_load_regs(task);
 
-# ifdef CONFIG_SMP
-	if (local_cpu_data->pfm_syst_wide)
-		pfm_syst_wide_update_task(task, 1);
-# endif
+	info = local_cpu_data->pfm_syst_info;
+	if (info & PFM_CPUINFO_SYST_WIDE)
+		pfm_syst_wide_update_task(task, info, 1);
 #endif
 
 #ifdef CONFIG_IA32_SUPPORT
@@ -347,8 +352,8 @@
 	 */
 	atomic_set(&p->thread.pfm_notifiers_check, 0);
 	atomic_set(&p->thread.pfm_owners_check, 0);
-        /* clear list of sampling buffer to free for new task */
-        p->thread.pfm_smpl_buf_list = NULL;
+	/* clear list of sampling buffer to free for new task */
+	p->thread.pfm_smpl_buf_list = NULL;
 
 	if (current->thread.pfm_context) 
 		retval = pfm_inherit(p, child_ptregs);
diff -urN rhas.e25/arch/ia64/kernel/smpboot.c linux.e25/arch/ia64/kernel/smpboot.c
--- rhas.e25/arch/ia64/kernel/smpboot.c	2003-02-06 16:32:09.000000000 -0800
+++ linux.e25/arch/ia64/kernel/smpboot.c	2003-02-06 16:39:01.000000000 -0800
@@ -321,7 +321,7 @@
 	extern void ia64_init_itm(void);
 
 #ifdef CONFIG_PERFMON
-	extern void perfmon_init_percpu(void);
+	extern void pfm_init_percpu(void);
 #endif
 
 	cpuid = smp_processor_id();
@@ -356,7 +356,7 @@
 #endif
 
 #ifdef CONFIG_PERFMON
-	perfmon_init_percpu();
+	pfm_init_percpu();
 #endif
 
 	local_irq_enable();
diff -urN rhas.e25/arch/ia64/lib/Makefile linux.e25/arch/ia64/lib/Makefile
--- rhas.e25/arch/ia64/lib/Makefile	2003-02-06 16:32:11.000000000 -0800
+++ linux.e25/arch/ia64/lib/Makefile	2003-02-06 16:39:01.000000000 -0800
@@ -11,7 +11,7 @@
 
 obj-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o					\
 	__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o					\
-	checksum.o clear_page.o csum_partial_copy.o					\
+	carta_random.o checksum.o clear_page.o csum_partial_copy.o					\
 	clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o			\
 	flush.o ip_fast_csum.o io.o do_csum.o						\
 	memset.o strlen.o swiotlb.o fancy.o page_valid.o
diff -urN rhas.e25/arch/ia64/lib/carta_random.S linux.e25/arch/ia64/lib/carta_random.S
--- rhas.e25/arch/ia64/lib/carta_random.S	1969-12-31 16:00:00.000000000 -0800
+++ linux.e25/arch/ia64/lib/carta_random.S	2003-02-06 16:39:01.000000000 -0800
@@ -0,0 +1,54 @@
+/*
+ * Fast, simple, yet decent quality random number generator based on
+ * a paper by David G. Carta ("Two Fast Implementations of the
+ * `Minimal Standard' Random Number Generator," Communications of the
+ * ACM, January, 1990).
+ *
+ * Copyright (C) 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+
+#define a	r2
+#define m	r3
+#define lo	r8
+#define hi	r9
+#define t0	r16
+#define t1	r17
+#define	seed	r32
+
+GLOBAL_ENTRY(carta_random32)
+	movl	a = (16807 << 16) | 16807
+	;;
+	pmpyshr2.u t0 = a, seed, 0
+	pmpyshr2.u t1 = a, seed, 16
+	;;
+	unpack2.l t0 = t1, t0
+	dep	m = -1, r0, 0, 31
+	;;
+	zxt4	lo = t0
+	shr.u	hi = t0, 32
+	;;
+	dep	t0 = 0, hi, 15, 49	// t0 = (hi & 0x7fff)
+	;;
+	shl	t0 = t0, 16		// t0 = (hi & 0x7fff) << 16
+	shr	t1 = hi, 15		// t1 = (hi >> 15)
+	;;
+	add	lo = lo, t0
+	;;
+	cmp.gtu	p6, p0 = lo, m
+	;;
+(p6)	and	lo = lo, m
+	;;
+(p6)	add	lo = 1, lo
+	;;
+	add	lo = lo, t1
+	;;
+	cmp.gtu p6, p0 = lo, m
+	;;
+(p6)	and	lo = lo, m
+	;;
+(p6)	add	lo = 1, lo
+	br.ret.sptk.many rp
+END(carta_random32)
diff -urN rhas.e25/include/asm-ia64/perfmon.h linux.e25/include/asm-ia64/perfmon.h
--- rhas.e25/include/asm-ia64/perfmon.h	2003-02-06 16:32:06.000000000 -0800
+++ linux.e25/include/asm-ia64/perfmon.h	2003-02-06 16:39:01.000000000 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2001-2002 Hewlett-Packard Co
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
  *               Stephane Eranian <eranian@hpl.hp.com>
  */
 
@@ -40,11 +40,13 @@
 #define PFM_FL_INHERIT_ALL	 0x02	/* always clone pfm_context across fork() */
 #define PFM_FL_NOTIFY_BLOCK    	 0x04	/* block task on user level notifications */
 #define PFM_FL_SYSTEM_WIDE	 0x08	/* create a system wide context */
+#define PFM_FL_EXCL_IDLE         0x20   /* exclude idle task from system wide session */
 
 /*
  * PMC flags
  */
 #define PFM_REGFL_OVFL_NOTIFY	0x1	/* send notification on overflow */
+#define PFM_REGFL_RANDOM	0x2	/* randomize sampling interval */
 
 /*
  * PMD/PMC/IBR/DBR return flags (ignored on input)
@@ -85,9 +87,12 @@
 	unsigned long	reg_long_reset;	/* reset after sampling buffer overflow (large) */
 	unsigned long	reg_short_reset;/* reset after counter overflow (small) */
 
-	unsigned long	reg_reset_pmds[4]; /* which other counters to reset on overflow */
+	unsigned long	reg_reset_pmds[4];   /* which other counters to reset on overflow */
+	unsigned long	reg_random_seed;     /* seed value when randomization is used */
+	unsigned long	reg_random_mask;     /* bitmask used to limit random value */
+	unsigned long	reg_last_reset_value;/* last value used to reset the PMD (PFM_READ_PMDS) */
 
-	unsigned long   reserved[16];	/* for future use */
+	unsigned long   reserved[13];	/* for future use */
 } pfarg_reg_t;
 
 typedef struct {
@@ -104,6 +109,31 @@
 } pfarg_features_t;
 
 /*
+ * Entry header in the sampling buffer.
+ * The header is directly followed with the PMDS saved in increasing index 
+ * order: PMD4, PMD5, .... How many PMDs are present is determined by the 
+ * user program during context creation.
+ *
+ * XXX: in this version of the entry, only up to 64 registers can be recorded
+ * This should be enough for quite some time. Always check sampling format
+ * before parsing entries!
+ *
+ * In the case where multiple counters overflow at the same time, the
+ * last_reset_value member indicates the initial value of the PMD with
+ * the smallest index.  For instance, if PMD2 and PMD5 have overflowed,
+ * the last_reset_value member contains the initial value of PMD2.
+ */
+typedef struct {
+	int		pid;		 /* identification of process */
+	int		cpu;		 /* which cpu was used */
+	unsigned long	last_reset_value;/* initial value of overflowed counter */
+	unsigned long	stamp;		 /* timestamp (unique per CPU) */
+	unsigned long	ip;		 /* where did the overflow interrupt happened */
+	unsigned long	regs;		 /* bitmask of which registers overflowed */
+	unsigned long   period;		 /* unused */
+} perfmon_smpl_entry_t;
+
+/*
  * This header is at the beginning of the sampling buffer returned to the user.
  * It is exported as Read-Only at this point. It is directly followed by the
  * first record.
@@ -120,7 +150,7 @@
  * Define the version numbers for both perfmon as a whole and the sampling buffer format.
  */
 #define PFM_VERSION_MAJ		1U
-#define PFM_VERSION_MIN		0U
+#define PFM_VERSION_MIN		3U
 #define PFM_VERSION		(((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
 
 #define PFM_SMPL_VERSION_MAJ	1U
@@ -131,35 +161,15 @@
 #define PFM_VERSION_MAJOR(x)	(((x)>>16) & 0xffff)
 #define PFM_VERSION_MINOR(x)	((x) & 0xffff)
 
-/*
- * Entry header in the sampling buffer.
- * The header is directly followed with the PMDS saved in increasing index 
- * order: PMD4, PMD5, .... How many PMDs are present is determined by the 
- * user program during context creation.
- *
- * XXX: in this version of the entry, only up to 64 registers can be recorded
- * This should be enough for quite some time. Always check sampling format
- * before parsing entries!
- *
- * Inn the case where multiple counters have overflowed at the same time, the 
- * rate field indicate the initial value of the first PMD, based on the index.
- * For instance, if PMD2 and PMD5 have ovewrflowed for this entry, the rate field
- * will show the initial value of PMD2.
- */
-typedef struct {
-	int		pid;		/* identification of process */
-	int		cpu;		/* which cpu was used */
-	unsigned long	rate;		/* initial value of overflowed counter */
-	unsigned long	stamp;		/* timestamp */
-	unsigned long	ip;		/* where did the overflow interrupt happened */
-	unsigned long	regs;		/* bitmask of which registers overflowed */
-	unsigned long   period;		/* sampling period used by overflowed counter (smallest pmd index) */
-} perfmon_smpl_entry_t;
-
-extern int perfmonctl(pid_t pid, int cmd, void *arg, int narg);
 
 #ifdef __KERNEL__
 
+extern long perfmonctl(pid_t pid, int cmd, void *arg, int narg);
+
+typedef struct {
+	void (*handler)(int irq, void *arg, struct pt_regs *regs);
+} pfm_intr_handler_desc_t;
+
 extern void pfm_save_regs (struct task_struct *);
 extern void pfm_load_regs (struct task_struct *);
 
@@ -171,8 +181,22 @@
 extern int  pfm_use_debug_registers(struct task_struct *);
 extern int  pfm_release_debug_registers(struct task_struct *);
 extern int  pfm_cleanup_smpl_buf(struct task_struct *);
-extern void pfm_syst_wide_update_task(struct task_struct *, int);
-extern void perfmon_init_percpu(void);
+extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin);
+extern void pfm_init_percpu(void);
+
+/* 
+ * hooks to allow VTune/Prospect to cooperate with perfmon.
+ * (reserved for system wide monitoring modules only)
+ */
+extern int pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t *h);
+extern int pfm_remove_alternate_syswide_subsystem(pfm_intr_handler_desc_t *h);
+
+/*
+ * describe the content of the local_cpu_date->pfm_syst_info field
+ */
+#define PFM_CPUINFO_SYST_WIDE	0x1	/* if set a system wide session exist on the CPU */
+#define PFM_CPUINFO_DCR_PP	0x2	/* if set a system wide session started on the CPU */
+#define PFM_CPUINFO_EXCL_IDLE	0x4	/* system wide session excludes the idle task */
 
 #endif /* __KERNEL__ */
 
diff -urN rhas.e25/include/asm-ia64/processor.h linux.e25/include/asm-ia64/processor.h
--- rhas.e25/include/asm-ia64/processor.h	2003-02-06 16:32:11.000000000 -0800
+++ linux.e25/include/asm-ia64/processor.h	2003-02-06 18:00:29.000000000 -0800
@@ -169,16 +169,15 @@
 	__u32 ptce_count[2];
 	__u32 ptce_stride[2];
 	struct task_struct *ksoftirqd;	/* kernel softirq daemon for this CPU */
+# ifdef CONFIG_PERFMON
+	unsigned long pfm_syst_info;
+# endif
 #ifdef CONFIG_SMP
 	int processor;
 	__u64 loops_per_jiffy;
 	__u64 ipi_count;
 	__u64 prof_counter;
 	__u64 prof_multiplier;
-# ifdef CONFIG_PERFMON
-       __u32 pfm_syst_wide;
-       __u32 pfm_dcr_pp;
-# endif
        union {
                /*
                 *  This is written to by *other* CPUs,
diff -urN rhas.e25/include/asm-ia64/system.h linux.e25/include/asm-ia64/system.h
--- rhas.e25/include/asm-ia64/system.h	2003-02-06 16:32:11.000000000 -0800
+++ linux.e25/include/asm-ia64/system.h	2003-02-06 18:00:28.000000000 -0800
@@ -393,8 +393,8 @@
 extern void ia64_save_extra (struct task_struct *task);
 extern void ia64_load_extra (struct task_struct *task);
 
-#if defined(CONFIG_SMP) && defined(CONFIG_PERFMON)
-# define PERFMON_IS_SYSWIDE() (local_cpu_data->pfm_syst_wide != 0)
+#ifdef CONFIG_PERFMON
+# define PERFMON_IS_SYSWIDE() (local_cpu_data->pfm_syst_info & 0x1)
 #else
 # define PERFMON_IS_SYSWIDE() (0)
 #endif
