x86: --- arch/x86_64/mm/ioremap.c | 42 +++-------- arch/x86_64/mm/pat.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++ include/asm-i386/pat.h | 1 include/asm-x86_64/pat.h | 12 +++ 4 files changed, 200 insertions(+), 30 deletions(-) Index: linux/arch/x86_64/mm/pat.c =================================================================== --- linux.orig/arch/x86_64/mm/pat.c +++ linux/arch/x86_64/mm/pat.c @@ -2,9 +2,13 @@ #include #include #include +#include #include #include #include +#include +#include +#include static u64 boot_pat_state; @@ -51,3 +55,174 @@ void pat_shutdown(void) wrmsrl(MSR_IA32_CR_PAT, boot_pat_state); } +static char *cattr_name(unsigned long flags) +{ + switch (flags & _PAGE_CACHE_MASK) { + case _PAGE_WC: return "write combining"; + case _PAGE_PCD: return "uncached"; + case 0: return "default"; + default: return "broken"; + } +} + +/* The global memattr list keeps track of caching attributes for specific + physical memory areas. Conflicting caching attributes in different + mappings can cause CPU cache corruption. To avoid this we keep track + and reject illegal mappings. + + Normal Write-back mappings are only kept track when they get + allocated first through one of the "usual suspect" interfaces like + /dev/mem or ioremap. This means ioremap on a memory page that + is also mapped elsewhere can lead to inconsistencies. Caller has + to ensure this doesn't happen. + + The list is sorted and can contain multiple entries for each address + (this allows reference counting for overlapping areas). All the aliases + have the same cache attributes of course. + + Currently the data structure is a list because the number of mappings + are right now expected to be relatively small. If this should be a problem + it could be changed to a rbtree or similar. + + init_mm.mmap_sem protects the list */ + +struct memattr { + struct list_head nd; + u64 start; + u64 end; + unsigned long attr; +}; + +static LIST_HEAD(mattr_list); + +static void +wrong_attr(char *on, u64 start, u64 end, unsigned long attr, struct memattr *ma) +{ + printk(KERN_ERR + "%s:%d conflicting cache attribute on %s for %Lx-%Lx %s<->%s\n", + current->comm, current->pid, on, + start, end, + cattr_name(attr), cattr_name(ma->attr)); +} + +int reserve_mattr(u64 start, u64 end, unsigned long attr, unsigned long *fattr) +{ + struct memattr *ma = NULL, *ml; + int err = 0; + int flush = 0; + unsigned long pfn; + + ma = kmalloc(sizeof(struct memattr), GFP_KERNEL); + if (!ma) + return -ENOMEM; + ma->start = start; + ma->end = end; + ma->attr = attr; + if (fattr) + *fattr = attr; + down_write(&init_mm.mmap_sem); + list_for_each_entry(ml, &mattr_list, nd) { + if (ml->start <= start && ml->end >= end) { + if (fattr) { + attr = ml->attr; + *fattr = attr; + } + if (attr != ml->attr) { + wrong_attr("reservation",start,end,attr,ma); + err = -EBUSY; + break; + } + } else if (ml->start >= end) { + if (ma) { + list_add(&ma->nd, ml->nd.prev); + ma = NULL; + } + break; + } + } + if (ma) + list_add_tail(&ma->nd, &mattr_list); + /* Fix up direct mapping */ + pfn = start >> PAGE_SHIFT; + if (pfn < max_low_pfn) { + err = __change_page_attr_pfn(pfn, + ALIGN(end - start, PAGE_SIZE) >> PAGE_SHIFT, + PAGE_KERNEL_ATTR(attr)); + flush = 1; + } + up_write(&init_mm.mmap_sem); + if (flush) + global_flush_tlb(); + return err; +} + +static void +mark_hole(unsigned long *holemap, struct memattr *ml, u64 start, u64 end) +{ + unsigned long s = ml->start; + unsigned long e = ml->end; + unsigned long i; + if (s < start) + s = start; + if (e > end) + e = end; + for (i = s; i < e; i += PAGE_SIZE) + __set_bit((i - start) >> PAGE_SHIFT, holemap); +} + +/* Revert any direct mapping pages not covered anymore by an mattr entry */ +static int revert_hole(unsigned long *holemap, u64 start, u64 end, int *flush) +{ + unsigned long i = -1; + int err = 0; + u64 len = end - start; + while ((i = find_next_zero_bit(holemap, len, i + 1)) < len) { + *flush = 1; + err = __change_page_attr_pfn((start >> PAGE_SHIFT) + i, 1, + PAGE_KERNEL); + if (err) + break; + } + return err; +} + +int free_mattr(u64 start, u64 end, unsigned long attr) +{ + unsigned long *holemap = NULL; + struct memattr *ml, *tml; + int err = attr ? -EBUSY : 0; + int flush = 0; + + down_write(&init_mm.mmap_sem); + if (start < (max_low_pfn << PAGE_SHIFT)) { + holemap = kzalloc(BITS_TO_LONGS(end-start)/sizeof(long), + GFP_KERNEL); + if (!holemap) { + up_write(&init_mm.mmap_sem); + return -ENOMEM; + } + } + list_for_each_entry_safe(ml, tml, &mattr_list, nd) { + if (ml->end <= ml->start) + continue; + if (ml->start >= end) + break; + if (ml->start == start && ml->end == end) { + if (attr == -1L || ml->attr == attr) + err = 0; + list_del(&ml->nd); + } else if (holemap) + mark_hole(holemap, ml, start, end); + } + if (err) + wrong_attr("free", start, end, attr, ml); + if (holemap && !err) { + err = revert_hole(holemap, start, end, &flush); + kfree(holemap); + } + up_write(&init_mm.mmap_sem); + if (flush) + global_flush_tlb(); + return err; +} + Index: linux/include/asm-x86_64/pat.h =================================================================== --- /dev/null +++ linux/include/asm-x86_64/pat.h @@ -0,0 +1,12 @@ +#ifndef _ASM_PAT_H +#define _ASM_PAT_H 1 + +#include + +/* Handle the page attribute table (PAT) of the CPU */ + +int reserve_mattr(u64 start, u64 end, unsigned long attr, unsigned long *fattr); +int free_mattr(u64 start, u64 end, unsigned long attr); + +#endif + Index: linux/arch/x86_64/mm/ioremap.c =================================================================== --- linux.orig/arch/x86_64/mm/ioremap.c +++ linux/arch/x86_64/mm/ioremap.c @@ -18,35 +18,12 @@ #include #include #include +#include #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 /* - * Fix up the linear direct mapping of the kernel to avoid cache attribute - * conflicts. - */ -static int -ioremap_change_attr(unsigned long phys_addr, unsigned long size, - unsigned long flags) -{ - int err = 0; - if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) { - unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long vaddr = (unsigned long) __va(phys_addr); - - /* - * Must use a address here and not struct page because the phys addr - * can be a in hole between nodes and not have an memmap entry. - */ - err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags)); - if (!err) - global_flush_tlb(); - } - return err; -} - -/* * Generic mapping function */ @@ -116,12 +93,16 @@ void __iomem * __ioremap(unsigned long p remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); return NULL; } - if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) { - area->flags &= 0xffffff; - vunmap(addr); - return NULL; + if (flags) { + if (reserve_mattr(phys_addr, phys_addr + size, flags, NULL) < 0) + goto out; } return (__force void __iomem *) (offset + (char *)addr); + +out: + area->flags &= 0xffffff; + vunmap(addr); + return NULL; } EXPORT_SYMBOL(__ioremap); @@ -189,8 +170,9 @@ void iounmap(volatile void __iomem *addr } /* Reset the direct mapping. Can block */ - if (p->flags >> 20) - ioremap_change_attr(p->phys_addr, p->size, 0); + if (p->flags >> 20) { + free_mattr(p->phys_addr, p->phys_addr + p->size, p->flags>>20); + } /* Finally remove it */ o = remove_vm_area((void *)addr); Index: linux/include/asm-i386/pat.h =================================================================== --- /dev/null +++ linux/include/asm-i386/pat.h @@ -0,0 +1 @@ +#include