diff -prauN pgcl-2.5.68-1E/include/asm-generic/rmap.h pgcl-2.5.68-1F/include/asm-generic/rmap.h --- pgcl-2.5.68-1E/include/asm-generic/rmap.h 2003-04-27 16:09:07.000000000 -0700 +++ pgcl-2.5.68-1F/include/asm-generic/rmap.h 2003-04-27 21:28:42.000000000 -0700 @@ -43,6 +43,17 @@ static inline void pgtable_add_rmap(stru if (!mem_init_done) return; #endif + + /* rmap's accounting is already set up */ + if (page->mapping) { + /* + * address is presumably large. if smaller, overflow traps + * the error; if larger, check the distance + */ + WARN_ON(address - page->index >= VIRT_AREA_MAPPED_PER_PTE_PAGE); + return; + } + page->mapping = (void *)mm; page->index = address & ~(VIRT_AREA_MAPPED_PER_PTE_PAGE - 1); inc_page_state(nr_page_table_pages); @@ -50,6 +61,13 @@ static inline void pgtable_add_rmap(stru static inline void pgtable_remove_rmap(struct page *page) { + /* rmap's accounting is alrady torn down */ + if (!page->mapping) { + /* this can't catch them all */ + WARN_ON(page->index); + return; + } + page->mapping = NULL; page->index = 0; dec_page_state(nr_page_table_pages); diff -prauN pgcl-2.5.68-1E/include/asm-i386/pgalloc.h pgcl-2.5.68-1F/include/asm-i386/pgalloc.h --- pgcl-2.5.68-1E/include/asm-i386/pgalloc.h 2003-04-27 20:17:17.000000000 -0700 +++ pgcl-2.5.68-1F/include/asm-i386/pgalloc.h 2003-04-27 23:25:05.000000000 -0700 @@ -30,13 +30,15 @@ static inline void pmd_populate(struct m } for (k = 0; k < PAGE_MMUCOUNT; ++k) { - unsigned long long pteval; - pteval = (unsigned long long)(pfn + k) << MMUPAGE_SHIFT; - if (unlikely(!pmd_none(pmd[k])) && pmd_present(pmd[k])) { + unsigned long long pmdval; + pmdval = (unsigned long long)(pfn + k) << MMUPAGE_SHIFT; + if (likely(pmd_none(pmd[k]) || !pmd_present(pmd[k]))) + set_pmd(&pmd[k], __pmd(_PAGE_TABLE + pmdval)); + else { WARN_ON(1); printk(KERN_DEBUG "pmdval=%Lx\n", (u64)pmd_val(pmd[k])); - } else - set_pmd(&pmd[k], __pmd(_PAGE_TABLE + pteval)); + put_page(page); /* a reference will be omitted */ + } } } /* @@ -67,12 +69,10 @@ struct mmu_gather; static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page); static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *page) { - if (PAGE_MMUCOUNT == 1) - tlb_remove_page(tlb, page); - else if (atomic_dec_and_test(&page->count)) { + /* restore the reference count so later frees don't BUG() */ + if (PAGE_MMUCOUNT > 1 && atomic_dec_and_test(&page->count)) atomic_set(&page->count, 1); - tlb_remove_page(tlb, page); - } + tlb_remove_page(tlb, page); } /* diff -prauN pgcl-2.5.68-1E/mm/memory.c pgcl-2.5.68-1F/mm/memory.c --- pgcl-2.5.68-1E/mm/memory.c 2003-04-27 17:51:27.000000000 -0700 +++ pgcl-2.5.68-1F/mm/memory.c 2003-04-28 04:11:43.000000000 -0700 @@ -149,11 +149,32 @@ pte_t * pte_alloc_map(struct mm_struct * /* * Because we dropped the lock, we should re-check the * entry, as somebody else could have populated it.. + * If we raced, we also need to drop all the reference + * counts originally taken with the intent of conferring + * them to all the pte entries spanned by the pte page. */ if (pmd_present(*pmd)) { + if (PAGE_MMUCOUNT > 1) + atomic_sub(PAGE_MMUCOUNT-1, &new->count); pte_free(new); goto out; } +#if 1 + { + int k; + pmd_t *base; + unsigned long addr, __pmd = (unsigned long)pmd; + addr = address & ~(PAGE_MMUCOUNT*PMD_SIZE - 1); + base = pmd - ((__pmd/sizeof(pmd_t)) % PAGE_MMUCOUNT); + for (k = 0; k < PAGE_MMUCOUNT; ++k) + if (!pmd_none(base[k]) || pmd_present(base[k])) + printk(KERN_DEBUG + "redundant pmd instantiation " + "at vaddr 0x%lx pmd = 0x%p\n", + addr + PMD_SIZE*k, + &base[k]); + } +#endif pgtable_add_rmap(new, mm, address); pmd_populate(mm, pmd, new); } @@ -413,8 +434,14 @@ zap_pte_range(struct mmu_gather *tlb, pm } } } else { - if (!pte_file(pte)) - free_swap_and_cache(pte_to_swp_entry(pte)); + if (!pte_file(pte)) { + if (pte_to_swp_entry(pte).val == 0x8073756dUL) + printk(KERN_DEBUG "detected fsckup " + "early, leaking stuff to " + "work around it\n"); + else + free_swap_and_cache(pte_to_swp_entry(pte)); + } pte_clear(ptep); } } @@ -1388,8 +1415,9 @@ do_anonymous_page(struct mm_struct *mm, clear_user_highpage(page, addr); } - lo_vaddr = max(addr & PMD_MASK, vma->vm_start); - hi_vaddr = min((addr + PMD_SIZE - 1) & PMD_MASK, vma->vm_end); + lo_vaddr = max(addr & ~(PAGE_MMUCOUNT*PMD_SIZE - 1), vma->vm_start); + hi_vaddr = min(vma->vm_end, (addr + PAGE_MMUCOUNT*PMD_SIZE - 1) + & ~(PAGE_MMUCOUNT*PMD_SIZE - 1)); dn_subpfn = 0; up_subpfn = PAGE_MMUCOUNT - 1; dn_vaddr = addr & MMUPAGE_MASK; @@ -1475,6 +1503,8 @@ do_anonymous_page(struct mm_struct *mm, /* * this computes the offset from the * PAGE_SIZE-aligned kmap_atomic() aperture + * the PAGE_SIZE-sized pte pages end up + * mapping PAGE_MMUCOUNT*PMD_SIZE; hence... */ vaddr &= PAGE_MASK; @@ -1484,7 +1514,8 @@ do_anonymous_page(struct mm_struct *mm, pr_debug("vaddr offset in ptes = 0x%lx\n", vaddr); - vaddr = (lo_vaddr & PMD_MASK) + vaddr * MMUPAGE_SIZE; + vaddr = (lo_vaddr & ~(PAGE_MMUCOUNT*PMD_SIZE-1)) + + vaddr * MMUPAGE_SIZE; pr_debug("vaddr = 0x%lx\n", vaddr); diff -prauN pgcl-2.5.68-1E/mm/swapfile.c pgcl-2.5.68-1F/mm/swapfile.c --- pgcl-2.5.68-1E/mm/swapfile.c 2003-04-27 11:37:43.000000000 -0700 +++ pgcl-2.5.68-1F/mm/swapfile.c 2003-04-28 03:09:35.000000000 -0700 @@ -165,8 +165,11 @@ static struct swap_info_struct * swap_in if (!entry.val) goto out; type = swp_type(entry); - if (type >= nr_swapfiles) + if (type >= nr_swapfiles) { + printk(KERN_ERR "bad type %lu beyond nr_swapfiles %u " + "in swap_info_get()\n", type, nr_swapfiles); goto bad_nofile; + } p = & swap_info[type]; if (!(p->flags & SWP_USED)) goto bad_device; @@ -188,22 +191,73 @@ static struct swap_info_struct * swap_in bad_free: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); - BUG(); + WARN_ON(1); goto out; bad_offset: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val); - BUG(); + WARN_ON(1); goto out; bad_device: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val); - BUG(); + WARN_ON(1); goto out; bad_nofile: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); - BUG(); + WARN_ON(1); + +/* dump pagetables */ +#if 1 + { + struct mm_struct *mm = current->mm; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + unsigned long vaddr; + + if (!mm) { + /* we're dead here anyway, but... */ + printk(KERN_ERR "bug in free_swap_and_cache() " + "with no mm!\n"); + goto out_noscan; + } + + for (vaddr = 0; vaddr < TASK_SIZE; vaddr += PGDIR_SIZE) { + pgd = pgd_offset(mm, vaddr); + printk(KERN_DEBUG "pgd for 0x%lx = 0x%Lx\n", + vaddr, (u64)pgd_val(*pgd)); + } + + if (PTRS_PER_PMD > 1) { + for (vaddr = 0; vaddr < TASK_SIZE; vaddr += PMD_SIZE) { + pgd = pgd_offset(mm, vaddr); + if (pgd_none(*pgd) || !pgd_present(*pgd)) + continue; + pmd = pmd_offset(pgd, vaddr); + printk(KERN_DEBUG "pmd for 0x%lx = 0x%Lx\n", + vaddr, (u64)pmd_val(*pmd)); + } + } + + for (vaddr = 0; vaddr < TASK_SIZE; vaddr += MMUPAGE_SIZE) { + pgd = pgd_offset(mm, vaddr); + if (pgd_none(*pgd) || !pgd_present(*pgd)) + continue; + pmd = pmd_offset(pgd, vaddr); + if (pmd_none(*pmd) || !pmd_present(*pmd)) + continue; + pte = pte_offset_map_nested(pmd, vaddr); + if (!pte_none(*pte) && pte_present(*pte)) + printk(KERN_DEBUG "pte for 0x%lx = 0x%Lx\n", + vaddr, (u64)pte_val(*pte)); + pte_unmap_nested(pte); + } +out_noscan: + ; + } +#endif out: return NULL; -} +} static void swap_info_put(struct swap_info_struct * p) { @@ -1547,7 +1601,7 @@ int valid_swaphandles(swp_entry_t entry, if (!page_cluster) /* no readahead */ return 0; - toff = (swp_offset(entry) >> page_cluster) << page_cluster; + toff = (swp_offset(entry)/PAGE_MMUCOUNT) & ~((1UL << page_cluster)-1); if (!toff) /* first page is swap header */ toff++, i--; *offset = toff;