Take a few stabs at bugfixing and cleaning up the intelligent COW fault handling. Things are still oopsing a bit. diff -prauN pgcl-2.5.70-bk13-5/include/asm-generic/rmap.h pgcl-2.5.70-bk13-6/include/asm-generic/rmap.h --- pgcl-2.5.70-bk13-5/include/asm-generic/rmap.h 2003-06-08 06:20:34.000000000 -0700 +++ pgcl-2.5.70-bk13-6/include/asm-generic/rmap.h 2003-06-08 16:01:05.000000000 -0700 @@ -88,8 +88,13 @@ static inline struct mm_struct * ptep_to static inline unsigned long ptep_to_address(pte_t * ptep) { - struct page *page = kmap_atomic_to_page(ptep); - unsigned long swpage_voff = ((unsigned long)ptep)/sizeof(pte_t); + struct page *page; + unsigned long kvaddr = (unsigned long)ptep;; + unsigned long swpage_voff = kvaddr/sizeof(pte_t); + + WARN_ON(kvaddr > (unsigned long)(-PAGE_SIZE)); + + page = kmap_atomic_to_page(ptep); swpage_voff %= MMUPAGES_MAPPED_PER_PTE_PAGE; return page->index + MMUPAGE_SIZE*swpage_voff; } diff -prauN pgcl-2.5.70-bk13-5/mm/memory.c pgcl-2.5.70-bk13-6/mm/memory.c --- pgcl-2.5.70-bk13-5/mm/memory.c 2003-06-08 13:28:01.000000000 -0700 +++ pgcl-2.5.70-bk13-6/mm/memory.c 2003-06-08 16:35:33.000000000 -0700 @@ -96,13 +96,6 @@ static inline void copy_cow_page(unsigne } } kunmap_atomic(dstaddr, KM_USER0); -/* - if (from == ZERO_PAGE(address)) { - clear_user_highpage(to, address); - return; - } - copy_user_highpage(to, from, address); -*/ } /* @@ -1026,6 +1019,7 @@ int remap_page_range(struct vm_area_stru */ static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry) { + BUG_ON((unsigned long)page_table > (unsigned long)(-PAGE_SIZE)); set_pte(page_table, entry); flush_tlb_page(vma, address); update_mmu_cache(vma, address, entry); @@ -1037,6 +1031,7 @@ static inline void establish_pte(struct static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, pte_t *page_table, unsigned long subpfn) { pte_t pte = pfn_pte(page_to_pfn(new_page) + subpfn, vma->vm_page_prot); + BUG_ON((unsigned long)page_table > (unsigned long)(-PAGE_SIZE)); invalidate_vcache(address, vma->vm_mm, new_page); flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(pte))); @@ -1048,7 +1043,7 @@ static pte_t *fill_anonymizable_ptevec(s int map_pte, int cow) { unsigned long lo_vaddr, hi_vaddr, dn_vaddr, up_vaddr, - dn_subpfn, up_subpfn, rss = 0; + dn_subpfn, up_subpfn, rss = 0, loop; pte_t *up_pte, *dn_pte; pr_debug("fill_anonymizable_ptevec() for addr 0x%lx\n", addr); @@ -1075,7 +1070,26 @@ static pte_t *fill_anonymizable_ptevec(s up_pte = dn_pte + 1; do { + loop = 0; + + if (dn_vaddr >= lo_vaddr && dn_subpfn <= up_subpfn) { + BUG_ON((unsigned long)dn_pte > (unsigned long)(-PAGE_SIZE)); + if (pte_none(*dn_pte) || (cow && pte_present(*dn_pte) && !pte_write(*dn_pte))) { + ptes[dn_subpfn] = dn_pte; + dn_subpfn++; + ++rss; + pr_debug("dn side grabbing pte\n"); + } else if (ptep_to_address(dn_pte)==(addr&MMUPAGE_MASK)) + pr_debug("dn badness, skipping needed pte\n"); + else + pr_debug("dn side legitimately skipping\n"); + dn_vaddr -= MMUPAGE_SIZE; + dn_pte--; + loop = 1; + } + if (up_vaddr < hi_vaddr && up_subpfn > dn_subpfn) { + BUG_ON((unsigned long)up_pte > (unsigned long)(-PAGE_SIZE)); if (pte_none(*up_pte) || (cow && pte_present(*up_pte) && !pte_write(*up_pte))) { ptes[up_subpfn] = up_pte; up_subpfn--; @@ -1087,27 +1101,15 @@ static pte_t *fill_anonymizable_ptevec(s pr_debug("up side legitimately skipping\n"); up_vaddr += MMUPAGE_SIZE; up_pte++; + loop = 1; } - if (dn_vaddr >= lo_vaddr && dn_subpfn < up_subpfn) { - if (pte_none(*dn_pte) || (cow && pte_present(*dn_pte) && !pte_write(*dn_pte))) { - ptes[dn_subpfn] = dn_pte; - dn_subpfn++; - ++rss; - pr_debug("dn side grabbing pte\n"); - } else if (ptep_to_address(dn_pte)==(addr&MMUPAGE_MASK)) - pr_debug("dn badness, skipping needed pte\n"); - else - pr_debug("dn side legitimately skipping\n"); - dn_vaddr -= MMUPAGE_SIZE; - dn_pte--; - } pr_debug("dn_vaddr = 0x%lx, up_vaddr = 0x%lx\n", dn_vaddr, up_vaddr); pr_debug("dn_subpfn = 0x%lx, up_subpfn = 0x%lx\n", dn_subpfn, up_subpfn); } while ((up_vaddr < hi_vaddr || dn_vaddr >= lo_vaddr) && - up_subpfn > dn_subpfn); + loop && up_subpfn >= dn_subpfn); pr_debug("finishing PTE search loop\n"); pr_debug("starting PTE instantiation loop\n"); @@ -1118,7 +1120,7 @@ static pte_t *fill_anonymizable_ptevec(s static int set_anon_ptes(struct vm_area_struct *vma, struct page *page, unsigned long addr, pte_t *ptes[], - struct pte_chain *pte_chain, int write_access, int cow) + struct pte_chain **pte_chain, int write_access) { unsigned long pfn, subpfn; int rss; @@ -1144,24 +1146,18 @@ static int set_anon_ptes(struct vm_area_ set_pte(ptes[subpfn], pte_wrprotect(pte)); } else { pr_debug("setting pte to newly zeroed anonymous page\n"); - BUG_ON(!pte_chain); + BUG_ON(!*pte_chain); pr_debug("setting pte for anonymous zeroed page\n"); pr_debug("ptep = 0x%p, pte = 0x%Lx\n", ptes[subpfn], (u64)pte_val(pte)); set_pte(ptes[subpfn], pte_mkwrite(pte_mkdirty(pte))); pr_debug("about to page_add_rmap()\n"); - pte_chain = page_add_rmap_chained(page, ptes[subpfn], - pte_chain); - if (cow) { - invalidate_vcache(addr, vma->vm_mm, page); - flush_cache_page(vma, addr); - flush_tlb_page(vma, addr); - } + *pte_chain = page_add_rmap_chained(page, ptes[subpfn], + *pte_chain); pr_debug("about to update_mmu_cache()\n"); update_mmu_cache(vma, addr, pte); rss++; pr_debug("about to page_cache_get()\n"); - page_cache_get(page); } pr_debug("falling through to next subpfn\n"); } @@ -1175,8 +1171,10 @@ static void get_cow_pages(struct vm_area pr_debug("get_cow_pages()\n"); for (k = 0; k < PAGE_MMUCOUNT; ++k) { + BUG_ON((unsigned long)ptes[k] > (unsigned long)(-PAGE_SIZE)); + if (!ptes[k]) - continue; + pfns[k] = 0; else if (pte_present(*ptes[k])) { if (pte_write(*ptes[k])) pr_debug("writable pte in get_cow_pages()!\n"); @@ -1190,6 +1188,7 @@ static void get_cow_pages(struct vm_area if (!vma->vm_file) { pr_debug("saw no vma->vm_file, wiping pte\n"); ptes[k] = NULL; + pfns[k] = 0; } else { struct page *page; unsigned long offset, subpfn; @@ -1206,6 +1205,7 @@ static void get_cow_pages(struct vm_area page = find_get_page(mapping, offset); if (!page) { ptes[k] = NULL; + pfns[k] = 0; pr_debug("find_get_page() failed, wiping pte\n"); } else { pfns[k] = page_to_pfn(page) + subpfn; @@ -1222,12 +1222,14 @@ static void save_ptes(pte_t *ptes[], pte { int k, rss = 0; pr_debug("save_ptes()\n"); - for (k = 0; k < PAGE_MMUCOUNT; ++k) + for (k = 0; k < PAGE_MMUCOUNT; ++k) { + BUG_ON((unsigned long)ptes[k] > (unsigned long)(-PAGE_SIZE)); if (ptes[k]) { ptevals[k] = *ptes[k]; ptes[k] = (pte_t *)ptep_to_address(ptes[k]); ++rss; } + } pr_debug("return from save_ptes()\n"); pr_debug("erm, save_ptes() saw %d ptes set!\n", rss); } @@ -1251,11 +1253,14 @@ static pte_t *reconstitute_ptes(pmd_t *p return NULL; addr = (unsigned long)ptes[j]; ptes[j] = pte_offset_map(pmd, addr); + BUG_ON((unsigned long)ptes[j] > (unsigned long)(-PAGE_SIZE)); - for (k = j + 1; k < PAGE_MMUCOUNT; ++k) + for (k = j + 1; k < PAGE_MMUCOUNT; ++k) { + BUG_ON((unsigned long)ptes[k] > (unsigned long)(-PAGE_SIZE)); if (ptes[k]) ptes[k] = ptes[j] + ((unsigned long)ptes[k] - addr)/MMUPAGE_SIZE; + } pr_debug("return 0x%p from reconstitute_ptes()\n", ptes[j]); return ptes[j]; } @@ -1266,11 +1271,13 @@ static int recheck_ptes(pte_t *ptes[], u pr_debug("recheck_ptes()\n"); for (k = 0; k < PAGE_MMUCOUNT; ++k) { + BUG_ON((unsigned long)ptes[k] > (unsigned long)(-PAGE_SIZE)); if (!ptes[k] || !pfns[k]) continue; else if (pte_same(*ptes[k], ptevals[k])) ++rss; else { + pr_debug("recheck_ptes() dropped racy pfn\n"); if (pfn_valid(pfns[k])) page_cache_release(pfn_to_page(pfns[k])); ptes[k] = NULL; @@ -1300,13 +1307,26 @@ static struct pte_chain *move_mappings(s pr_debug("move_mappings()\n"); for (k = 0; k < PAGE_MMUCOUNT; ++k) { + struct page *page; + int release; + BUG_ON((unsigned long)ptes[k] > (unsigned long)(-PAGE_SIZE)); + if (!ptes[k] || !pfns[k]) continue; - if (PageReserved(pfn_to_page(pfns[k]))) - ptep_to_mm(ptes[k])->rss++; - page_remove_rmap(pfn_to_page(pfns[k]), ptes[k]); + + release = pte_present(*ptes[k]); + page = pfn_valid(pfns[k]) ? pfn_to_page(pfns[k]) : NULL; + if (page) { + if (PageReserved(page)) + ptep_to_mm(ptes[k])->rss++; + else + page_remove_rmap(page, ptes[k]); + } break_cow(vma, new_page, address, ptes[k], k); - pte_chain = page_add_rmap_chained(new_page, ptes[k], pte_chain); + pte_chain = page_add_rmap_chained(new_page,ptes[k],pte_chain); + /* nuke the pte's reference since we retargeted the pte */ + if (page && release) + page_cache_release(page); } pr_debug("return 0x%p from move_mappings()\n", pte_chain); return pte_chain; @@ -1354,7 +1374,8 @@ static int do_wp_page(struct mm_struct * pte_unmap(page_table); printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", address); - goto oom; + ret = VM_FAULT_OOM; + goto out; } old_page = pfn_to_page(pfn); @@ -1387,21 +1408,22 @@ static int do_wp_page(struct mm_struct * save_ptes(ptes, ptevals); pte_unmap(page_table); - /* - * Ok, we need to copy. Oh, well.. - * XXX: This needs to sweep the pagetables in an analogous - * manner to do_anonymous_page(). - */ - /* page_cache_get(old_page); */ + /* Ok, we need to copy. Oh, well.. */ spin_unlock(&mm->page_table_lock); pr_debug("calling pte_chain_alloc_chained()\n"); pte_chain = pte_chain_alloc_chained(GFP_KERNEL); - if (!pte_chain) - goto no_mem; + if (!pte_chain) { + release_pfns(pfns); + ret = VM_FAULT_OOM; + goto out; + } new_page = alloc_page(GFP_HIGHUSER); - if (!new_page) - goto no_mem; + if (!new_page) { + release_pfns(pfns); + ret = VM_FAULT_OOM; + goto out; + } /* copy all the pieces of pages */ pr_debug("calling copy_cow_page()\n"); copy_cow_page(pfns, new_page, ptes); @@ -1433,24 +1455,22 @@ static int do_wp_page(struct mm_struct * goto out; } pr_debug("calling move_mappings()\n"); - pte_chain = move_mappings(vma, new_page, address, pfns, ptes, pte_chain); - pr_debug("calling release_pfns()\n"); - release_pfns(pfns); + pte_chain = move_mappings(vma,new_page,address,pfns,ptes,pte_chain); atomic_add(rss, &new_page->count); - pr_debug("calling lru_cache_add_active()\n"); - lru_cache_add_active(new_page); pr_debug("do_wp_page() returning VM_FAULT_MINOR\n"); pte_unmap(page_table); + + pr_debug("calling lru_cache_add_active(0x%p)\n", new_page); + lru_cache_add_active(new_page); + + /* release reference we acquired as well as pagetable references */ + pr_debug("calling release_pfns()\n"); + release_pfns(pfns); page_cache_release(new_page); - /* page_cache_release(old_page); */ ret = VM_FAULT_MINOR; goto out; -no_mem: - page_cache_release(old_page); -oom: - ret = VM_FAULT_OOM; out: spin_unlock(&mm->page_table_lock); pte_chain_free_chained(pte_chain); @@ -1765,7 +1785,7 @@ do_anonymous_page(struct mm_struct *mm, spin_lock(&mm->page_table_lock); page_table = fill_anonymizable_ptevec(vma, pmd, page_table, ptes, addr, write_access, 0); - rss = set_anon_ptes(vma, page, addr, ptes, pte_chain, write_access, 0); + rss = set_anon_ptes(vma, page, addr, ptes, &pte_chain, write_access); pr_debug("doing pte_unmap(0x%p)\n", page_table); pte_unmap(page_table); pr_debug("adding %d to rss\n", rss); @@ -1774,12 +1794,15 @@ do_anonymous_page(struct mm_struct *mm, pr_debug("broke out of PTE instantiation loop\n"); if (write_access) { if (rss) { - pr_debug("adding page to LRU\n"); + if (rss > 1) + atomic_add(rss - 1, &page->count); + pr_debug("adding page 0x%p to LRU\n", page); lru_cache_add_active(page); mark_page_accessed(page); + } else { + pr_debug("releasing page\n"); + page_cache_release(page); } - pr_debug("releasing page\n"); - page_cache_release(page); } pr_debug("doing pte_chain_free()\n"); pte_chain_free_chained(pte_chain); @@ -1836,6 +1859,7 @@ do_no_page(struct mm_struct *mm, struct } copy_user_highpage(page, new_page, address); page_cache_release(new_page); + pr_debug("adding page 0x%p to LRU\n", page); lru_cache_add_active(page); new_page = page; } @@ -1951,6 +1975,8 @@ static inline int handle_pte_fault(struc { pte_t entry; + BUG_ON((unsigned long)pte > (unsigned long)(-PAGE_SIZE)); + entry = *pte; if (!pte_present(entry)) { /* diff -prauN pgcl-2.5.70-bk13-5/mm/page_alloc.c pgcl-2.5.70-bk13-6/mm/page_alloc.c --- pgcl-2.5.70-bk13-5/mm/page_alloc.c 2003-06-08 06:20:34.000000000 -0700 +++ pgcl-2.5.70-bk13-6/mm/page_alloc.c 2003-06-08 14:40:45.000000000 -0700 @@ -67,7 +67,7 @@ static int bad_range(struct zone *zone, static void bad_page(const char *function, struct page *page) { - printk("Bad page state at %s\n", function); + printk("Bad page state for 0x%p at %s\n", page, function); printk("flags:0x%08lx mapping:%p mapped:%d count:%d\n", page->flags, page->mapping, page_mapped(page), page_count(page)); diff -prauN pgcl-2.5.70-bk13-5/mm/rmap.c pgcl-2.5.70-bk13-6/mm/rmap.c --- pgcl-2.5.70-bk13-5/mm/rmap.c 2003-06-08 08:48:37.000000000 -0700 +++ pgcl-2.5.70-bk13-6/mm/rmap.c 2003-06-08 16:30:31.000000000 -0700 @@ -170,6 +170,9 @@ page_add_rmap(struct page *page, pte_t * pte_addr_t pte_paddr = ptep_to_paddr(ptep); struct pte_chain *cur_pte_chain; + BUG_ON(!pte_chain); + BUG_ON(pte_chain->next_and_idx); + if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) return pte_chain; @@ -207,6 +210,7 @@ page_add_rmap(struct page *page, pte_t * cur_pte_chain->next_and_idx--; out: pte_chain_unlock(page); + BUG_ON(pte_chain && pte_chain->next_and_idx); return pte_chain; } @@ -235,12 +239,17 @@ page_add_rmap_chained(struct page *page, */ void page_remove_rmap(struct page *page, pte_t *ptep) { - pte_addr_t pte_paddr = ptep_to_paddr(ptep); + pte_addr_t pte_paddr; struct pte_chain *pc; + unsigned long kvaddr = (unsigned long)ptep; + + BUG_ON(kvaddr > (unsigned long)(-PAGE_SIZE)); if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) return; + pte_paddr = ptep_to_paddr(ptep); + pte_chain_lock(page); if (!page_mapped(page)) @@ -254,12 +263,17 @@ void page_remove_rmap(struct page *page, } } else { struct pte_chain *start = page->pte.chain; - struct pte_chain *next; - int victim_i = -1; + struct pte_chain *next, *orig_start; + int loops, victim_i = -1; - for (pc = start; pc; pc = next) { + orig_start = start; + for (loops = 0, pc = start; pc; ++loops, pc = next) { int i; + /* crude cycle detection */ + BUG_ON(loops && pc == orig_start); + BUG_ON(loops > 65536); + next = pte_chain_next(pc); if (next) prefetch(next); @@ -497,8 +511,14 @@ void __pte_chain_free(struct pte_chain * if (pte_chain->next_and_idx) pte_chain->next_and_idx = 0; pte_chainp = &per_cpu(local_pte_chain, cpu); - if (*pte_chainp) + if (*pte_chainp) { + int k; + char *s = (char *)*pte_chainp; + + for (k = 0; k < sizeof(**pte_chainp); ++k) + BUG_ON(s[k]); kmem_cache_free(pte_chain_cache, *pte_chainp); + } *pte_chainp = pte_chain; put_cpu(); } @@ -532,6 +552,13 @@ struct pte_chain *pte_chain_alloc(int gf put_cpu(); ret = kmem_cache_alloc(pte_chain_cache, gfp_flags); } + if (ret) { + char *s = (char *)ret; + int k; + + for (k = 0; k < sizeof(*ret); ++k) + BUG_ON(s[k]); + } return ret; } @@ -558,7 +585,7 @@ void pte_chain_free_chained(struct pte_c while (pc) { struct pte_chain *next = pte_chain_next(pc); pc->next_and_idx = 0; - pte_chain_free(pc); + __pte_chain_free(pc); pc = next; } }