Reorganize ioremap to be able to handle attribute checking properly Also fix up change_page_attr a bit and make it consistent. This prepares for the PAT conflict checking patch. Signed-off-by: Andi Kleen Index: linux/arch/x86_64/mm/pageattr.c =================================================================== --- linux.orig/arch/x86_64/mm/pageattr.c +++ linux/arch/x86_64/mm/pageattr.c @@ -151,7 +151,7 @@ __change_page_attr(unsigned long address set_pte(kpte, pfn_pte(pfn, ref_prot)); __put_page(kpte_page); } else - BUG(); + return 0; /* on x86-64 the direct mapping set at boot is not using 4k pages */ BUG_ON(PageReserved(kpte_page)); @@ -180,8 +180,9 @@ __change_page_attr(unsigned long address * * Caller must call global_flush_tlb() after this. */ -int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) +int change_page_attr_pfn(unsigned long pfn, int numpages, pgprot_t prot) { + unsigned long address = pfn << PAGE_SHIFT; int err = 0; int i; @@ -210,7 +211,7 @@ int change_page_attr_addr(unsigned long int change_page_attr(struct page *page, int numpages, pgprot_t prot) { unsigned long addr = (unsigned long)page_address(page); - return change_page_attr_addr(addr, numpages, prot); + return change_page_attr_pfn(addr >> PAGE_SHIFT, numpages, prot); } void global_flush_tlb(void) Index: linux/include/asm-i386/cacheflush.h =================================================================== --- linux.orig/include/asm-i386/cacheflush.h +++ linux/include/asm-i386/cacheflush.h @@ -25,6 +25,7 @@ void global_flush_tlb(void); int change_page_attr(struct page *page, int numpages, pgprot_t prot); +int change_page_attr_pfn(unsigned long pfn, int numpages, pgprot_t prot); #ifdef CONFIG_DEBUG_PAGEALLOC /* internal debugging function */ Index: linux/arch/i386/mm/pageattr.c =================================================================== --- linux.orig/arch/i386/mm/pageattr.c +++ linux/arch/i386/mm/pageattr.c @@ -105,14 +105,17 @@ static inline void revert_page(struct pa PAGE_KERNEL_LARGE)); } -static int -__change_page_attr(struct page *page, pgprot_t prot) +static int __change_page_attr(unsigned long pfn, pgprot_t prot) { + struct page *page; pte_t *kpte; unsigned long address; struct page *kpte_page; - BUG_ON(PageHighMem(page)); + if (!pfn_valid(pfn)) + return 0; + page = pfn_to_page(pfn); + address = (unsigned long)page_address(page); kpte = lookup_address(address); @@ -134,7 +137,7 @@ __change_page_attr(struct page *page, pg set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); __put_page(kpte_page); } else - BUG(); + return 0; /* * If the pte was reserved, it means it was created at boot @@ -171,15 +174,15 @@ static inline void flush_map(void) * * Caller must call global_flush_tlb() after this. */ -int change_page_attr(struct page *page, int numpages, pgprot_t prot) +int change_page_attr_pfn(unsigned long pfn, int numpages, pgprot_t prot) { int err = 0; int i; unsigned long flags; spin_lock_irqsave(&cpa_lock, flags); - for (i = 0; i < numpages; i++, page++) { - err = __change_page_attr(page, prot); + for (i = 0; i < numpages; i++, pfn++) { + err = __change_page_attr(pfn, prot); if (err) break; } @@ -187,6 +190,11 @@ int change_page_attr(struct page *page, return err; } +int change_page_attr(struct page *page, int numpages, pgprot_t prot) +{ + return change_page_attr_pfn(page_to_pfn(page), numpages, prot); +} + void global_flush_tlb(void) { LIST_HEAD(l); Index: linux/include/asm-x86_64/cacheflush.h =================================================================== --- linux.orig/include/asm-x86_64/cacheflush.h +++ linux/include/asm-x86_64/cacheflush.h @@ -25,7 +25,7 @@ void global_flush_tlb(void); int change_page_attr(struct page *page, int numpages, pgprot_t prot); -int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot); +int change_page_attr_pfn(unsigned long pfn, int numpages, pgprot_t prot); #define ARCH_HAS_CPA 1 Index: linux/arch/i386/mm/ioremap.c =================================================================== --- linux.orig/arch/i386/mm/ioremap.c +++ linux/arch/i386/mm/ioremap.c @@ -22,7 +22,7 @@ #define ISA_END_ADDRESS 0x100000 static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) + unsigned long end, unsigned long phys_addr, pgprot_t prot) { pte_t *pte; unsigned long pfn; @@ -33,15 +33,14 @@ static int ioremap_pte_range(pmd_t *pmd, return -ENOMEM; do { BUG_ON(!pte_none(*pte)); - set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_DIRTY | _PAGE_ACCESSED | flags))); + set_pte(pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); return 0; } static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) + unsigned long end, unsigned long phys_addr, pgprot_t prot) { pmd_t *pmd; unsigned long next; @@ -52,14 +51,14 @@ static inline int ioremap_pmd_range(pud_ return -ENOMEM; do { next = pmd_addr_end(addr, end); - if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, flags)) + if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) return -ENOMEM; } while (pmd++, addr = next, addr != end); return 0; } static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) + unsigned long end, unsigned long phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; @@ -70,14 +69,14 @@ static inline int ioremap_pud_range(pgd_ return -ENOMEM; do { next = pud_addr_end(addr, end); - if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, flags)) + if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) return -ENOMEM; } while (pud++, addr = next, addr != end); return 0; } static int ioremap_page_range(unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) + unsigned long end, unsigned long phys_addr, pgprot_t prot) { pgd_t *pgd; unsigned long next; @@ -90,7 +89,7 @@ static int ioremap_page_range(unsigned l spin_lock(&init_mm.page_table_lock); do { next = pgd_addr_end(addr, end); - err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags); + err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); if (err) break; } while (pgd++, addr = next, addr != end); @@ -112,11 +111,13 @@ static int ioremap_page_range(unsigned l * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) +void __iomem * +__ioremap(unsigned long phys_addr, unsigned long size, pgprot_t attr) { void __iomem * addr; struct vm_struct * area; unsigned long offset, last_addr; + int err; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; @@ -124,24 +125,19 @@ void __iomem * __ioremap(unsigned long p return NULL; /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) - return (void __iomem *) phys_to_virt(phys_addr); - - /* * Don't allow anybody to remap normal RAM that we're using.. */ if (phys_addr <= virt_to_phys(high_memory - 1)) { - char *t_addr, *t_end; - struct page *page; + unsigned long pfn; - t_addr = __va(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) + for (pfn = phys_addr >> PAGE_SHIFT; + pfn < (phys_addr + size - 1) >> PAGE_SHIFT; + pfn++) { + if (!pfn_valid(pfn)) + continue; + if (!PageReserved(pfn_to_page(pfn))) return NULL; + } } /* @@ -151,21 +147,29 @@ void __iomem * __ioremap(unsigned long p phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; + err = change_page_attr_pfn(phys_addr >> PAGE_SHIFT, + size >> PAGE_SHIFT, attr); + if (err) + return NULL; + /* * Ok, go for it.. */ - area = get_vm_area(size, VM_IOREMAP | (flags << 20)); + area = get_vm_area(size, VM_IOREMAP); if (!area) - return NULL; + goto error; area->phys_addr = phys_addr; addr = (void __iomem *) area->addr; - if (ioremap_page_range((unsigned long) addr, - (unsigned long) addr + size, phys_addr, flags)) { + if (ioremap_page_range((unsigned long) addr, phys_addr, size, attr)) { vunmap((void __force *) addr); +error: + change_page_attr_pfn(phys_addr >> PAGE_SHIFT, + size >> PAGE_SHIFT, PAGE_KERNEL); return NULL; } return (void __iomem *) (offset + (char __iomem *)addr); } + EXPORT_SYMBOL(__ioremap); /** @@ -192,70 +196,42 @@ EXPORT_SYMBOL(__ioremap); void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) { - unsigned long last_addr; - void __iomem *p = __ioremap(phys_addr, size, _PAGE_MA_UC); - if (!p) - return p; - - /* Guaranteed to be > phys_addr, as per __ioremap() */ - last_addr = phys_addr + size - 1; - - if (last_addr < virt_to_phys(high_memory) - 1) { - struct page *ppage = virt_to_page(__va(phys_addr)); - unsigned long npages; - - phys_addr &= PAGE_MASK; - - /* This might overflow and become zero.. */ - last_addr = PAGE_ALIGN(last_addr); - - /* .. but that's ok, because modulo-2**n arithmetic will make - * the page-aligned "last - first" come out right. - */ - npages = (last_addr - phys_addr) >> PAGE_SHIFT; - - if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { - iounmap(p); - p = NULL; - } - global_flush_tlb(); - } - - return p; + return __ioremap(phys_addr, size, PAGE_KERNEL_NOCACHE); } EXPORT_SYMBOL(ioremap_nocache); +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + */ +void __iomem *ioremap(unsigned long phys_addr, unsigned long size) +{ + return __ioremap(phys_addr, size, PAGE_KERNEL); +} +EXPORT_SYMBOL(ioremap); + void iounmap(volatile void __iomem *addr) { struct vm_struct *p; - if ((void __force *)addr <= high_memory) - return; - - /* - * __ioremap special-cases the PCI/ISA range by not instantiating a - * vm_area and by simply returning an address into the kernel mapping - * of ISA space. So handle that here. - */ - if (addr >= phys_to_virt(ISA_START_ADDRESS) && - addr < phys_to_virt(ISA_END_ADDRESS)) - return; - write_lock(&vmlist_lock); p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr)); if (!p) { printk(KERN_WARNING "iounmap: bad address %p\n", addr); dump_stack(); - goto out_unlock; - } - - if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) { - change_page_attr(virt_to_page(__va(p->phys_addr)), - p->size >> PAGE_SHIFT, - PAGE_KERNEL); + } else { + change_page_attr_pfn(p->phys_addr >> PAGE_SHIFT, + p->size >> PAGE_SHIFT, + PAGE_KERNEL); global_flush_tlb(); - } -out_unlock: + } write_unlock(&vmlist_lock); kfree(p); } Index: linux/include/asm-i386/io.h =================================================================== --- linux.orig/include/asm-i386/io.h +++ linux/include/asm-i386/io.h @@ -46,7 +46,6 @@ #ifdef __KERNEL__ #include - #include /* @@ -101,25 +100,9 @@ static inline void * phys_to_virt(unsign */ #define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) -extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); - -/** - * ioremap - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - */ - -static inline void __iomem * ioremap(unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, 0); -} +extern void __iomem * __ioremap(unsigned long offset, unsigned long size, pgprot_t attr); +extern void __iomem * ioremap(unsigned long offset, unsigned long size); extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size); extern void iounmap(volatile void __iomem *addr); Index: linux/arch/x86_64/mm/ioremap.c =================================================================== --- linux.orig/arch/x86_64/mm/ioremap.c +++ linux/arch/x86_64/mm/ioremap.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -22,7 +23,7 @@ #define ISA_END_ADDRESS 0x100000 static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) + unsigned long phys_addr, pgprot_t prot) { unsigned long end; unsigned long pfn; @@ -39,8 +40,7 @@ static inline void remap_area_pte(pte_t printk("remap_area_pte: page already exists\n"); BUG(); } - set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_GLOBAL | _PAGE_DIRTY | _PAGE_ACCESSED | flags))); + set_pte(pte, pfn_pte(pfn, prot)); address += PAGE_SIZE; pfn++; pte++; @@ -48,7 +48,7 @@ static inline void remap_area_pte(pte_t } static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) + unsigned long phys_addr, pgprot_t prot) { unsigned long end; @@ -63,7 +63,8 @@ static inline int remap_area_pmd(pmd_t * pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); if (!pte) return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, flags); + remap_area_pte(pte, address, end - address, address + phys_addr, + prot); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); @@ -71,7 +72,7 @@ static inline int remap_area_pmd(pmd_t * } static inline int remap_area_pud(pud_t * pud, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) + unsigned long phys_addr, pgprot_t prot) { unsigned long end; @@ -86,7 +87,8 @@ static inline int remap_area_pud(pud_t * pmd_t * pmd = pmd_alloc(&init_mm, pud, address); if (!pmd) return -ENOMEM; - remap_area_pmd(pmd, address, end - address, address + phys_addr, flags); + remap_area_pmd(pmd, address, end - address, address + phys_addr, + prot); address = (address + PUD_SIZE) & PUD_MASK; pud++; } while (address && (address < end)); @@ -94,7 +96,7 @@ static inline int remap_area_pud(pud_t * } static int remap_area_pages(unsigned long address, unsigned long phys_addr, - unsigned long size, unsigned long flags) + unsigned long size, pgprot_t prot) { int error; pgd_t *pgd; @@ -113,7 +115,7 @@ static int remap_area_pages(unsigned lon if (!pud) break; if (remap_area_pud(pud, address, end - address, - phys_addr + address, flags)) + phys_addr + address, prot)) break; error = 0; address = (address + PGDIR_SIZE) & PGDIR_MASK; @@ -125,34 +127,6 @@ static int remap_area_pages(unsigned lon } /* - * Fix up the linear direct mapping of the kernel to avoid cache attribute - * conflicts. - */ -static int -ioremap_change_attr(unsigned long phys_addr, unsigned long size, - unsigned long flags) -{ - int err = 0; - if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) { - unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long vaddr = (unsigned long) __va(phys_addr); - - /* - * Must use a address here and not struct page because the phys addr - * can be a in hole between nodes and not have an memmap entry. - */ - err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags)); - if (!err) - global_flush_tlb(); - } - return err; -} - -/* - * Generic mapping function - */ - -/* * Remap an arbitrary physical address space into the kernel virtual * address space. Needed when the kernel wants to access high addresses * directly. @@ -161,11 +135,13 @@ ioremap_change_attr(unsigned long phys_a * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) +void __iomem * +__ioremap(unsigned long phys_addr, unsigned long size, pgprot_t attr) { - void * addr; + void __iomem * addr; struct vm_struct * area; unsigned long offset, last_addr; + int err; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; @@ -173,27 +149,20 @@ void __iomem * __ioremap(unsigned long p return NULL; /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) - return (__force void __iomem *)phys_to_virt(phys_addr); - -#ifdef CONFIG_FLATMEM - /* * Don't allow anybody to remap normal RAM that we're using.. */ - if (last_addr < virt_to_phys(high_memory)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = __va(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) + if (phys_addr <= virt_to_phys(high_memory - 1)) { + unsigned long pfn; + + for (pfn = phys_addr >> PAGE_SHIFT; + pfn < (phys_addr + size - 1) >> PAGE_SHIFT; + pfn++) { + if (!pfn_valid(pfn)) + continue; + if (!PageReserved(pfn_to_page(pfn))) return NULL; + } } -#endif /* * Mappings have to be page-aligned @@ -202,25 +171,29 @@ void __iomem * __ioremap(unsigned long p phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; + err = change_page_attr_pfn(phys_addr >> PAGE_SHIFT, size >> PAGE_SHIFT, + attr); + if (err) + return NULL; + /* * Ok, go for it.. */ - area = get_vm_area(size, VM_IOREMAP | (flags << 20)); + area = get_vm_area(size, VM_IOREMAP); if (!area) - return NULL; + goto error; area->phys_addr = phys_addr; - addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { - remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); + addr = (void __iomem *) area->addr; + if (remap_area_pages((unsigned long) addr, phys_addr, size, attr)) { + vunmap((void __force *) addr); +error: + change_page_attr_pfn(phys_addr >> PAGE_SHIFT, + size >> PAGE_SHIFT, PAGE_KERNEL); return NULL; } - if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) { - area->flags &= 0xffffff; - vunmap(addr); - return NULL; - } - return (__force void __iomem *) (offset + (char *)addr); + return (void __iomem *) (offset + (char __iomem *)addr); } +EXPORT_SYMBOL(__ioremap); /** * ioremap_nocache - map bus memory into CPU space @@ -246,25 +219,44 @@ void __iomem * __ioremap(unsigned long p void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) { - return __ioremap(phys_addr, size, _PAGE_MA_UC); + return __ioremap(phys_addr, size, PAGE_KERNEL_NOCACHE); } +EXPORT_SYMBOL(ioremap_nocache); + +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + */ +void __iomem *ioremap(unsigned long phys_addr, unsigned long size) +{ + return __ioremap(phys_addr, size, PAGE_KERNEL); +} +EXPORT_SYMBOL(ioremap); void iounmap(volatile void __iomem *addr) { struct vm_struct *p; - if (addr <= high_memory) - return; - if (addr >= phys_to_virt(ISA_START_ADDRESS) && - addr < phys_to_virt(ISA_END_ADDRESS)) - return; - write_lock(&vmlist_lock); - p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK)); - if (!p) - printk("iounmap: bad address %p\n", addr); - else if (p->flags >> 20) - ioremap_change_attr(p->phys_addr, p->size, 0); + p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr)); + if (!p) { + printk(KERN_WARNING "iounmap: bad address %p\n", addr); + dump_stack(); + } else { + change_page_attr_pfn(p->phys_addr >> PAGE_SHIFT, + p->size >> PAGE_SHIFT, + PAGE_KERNEL); + global_flush_tlb(); + } write_unlock(&vmlist_lock); kfree(p); } +EXPORT_SYMBOL(iounmap); + Index: linux/include/asm-x86_64/io.h =================================================================== --- linux.orig/include/asm-x86_64/io.h +++ linux/include/asm-x86_64/io.h @@ -128,12 +128,9 @@ static inline void * phys_to_virt(unsign #include -extern void __iomem *__ioremap(unsigned long offset, unsigned long size, unsigned long flags); - -static inline void __iomem * ioremap (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, 0); -} +extern void __iomem *__ioremap(unsigned long offset, unsigned long size, + pgprot_t prot); +extern void __iomem * ioremap (unsigned long offset, unsigned long size); /* * This one maps high address device memory and turns off caching for that area. Index: linux/include/asm-i386/pgtable.h =================================================================== --- linux.orig/include/asm-i386/pgtable.h +++ linux/include/asm-i386/pgtable.h @@ -461,9 +461,6 @@ extern void noexec_setup(const char *str #define kern_addr_valid(addr) (1) #endif /* CONFIG_FLATMEM */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #define MK_IOSPACE_PFN(space, pfn) (pfn) #define GET_IOSPACE(pfn) 0 #define GET_PFN(pfn) (pfn)