Implement gang swapin faulting, comment some TODO items, and clean up some cruft that shouldn't stay long. -- wli memory.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++----------- swapfile.c | 6 ++ 2 files changed, 112 insertions(+), 22 deletions(-) diff -urpN pgcl-2.5.64-3/mm/memory.c pgcl-2.5.64-4/mm/memory.c --- pgcl-2.5.64-3/mm/memory.c 2003-03-05 21:10:36.000000000 -0800 +++ pgcl-2.5.64-4/mm/memory.c 2003-03-06 21:38:41.000000000 -0800 @@ -1004,6 +1004,10 @@ static int do_wp_page(struct mm_struct * int reuse = can_share_swap_page(old_page); unlock_page(old_page); if (reuse) { + /* + * XXX: this should sweep the pagetables to + * prefault all the pte's. This is free, take it. + */ flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); @@ -1016,6 +1020,8 @@ static int do_wp_page(struct mm_struct * /* * Ok, we need to copy. Oh, well.. + * XXX: This needs to sweep the pagetables in an analogous + * manner to do_anonymous_page(). */ page_cache_get(old_page); spin_unlock(&mm->page_table_lock); @@ -1180,9 +1186,13 @@ static int do_swap_page(struct mm_struct struct page *page; swp_entry_t entry = pte_to_swp_entry(orig_pte); pte_t pte; - int ret = VM_FAULT_MINOR; + int rss, ret = VM_FAULT_MINOR; struct pte_chain *pte_chain = NULL; - unsigned long subpfn = swp_offset(entry) % PAGE_MMUCOUNT; + unsigned long subpfn, flt_subpfn = swp_offset(entry) % PAGE_MMUCOUNT; + unsigned long pfn, lo_vaddr, hi_vaddr, vaddr; + + lo_vaddr = max(address & PAGE_MASK, vma->vm_start); + hi_vaddr = min(PAGE_ALIGN(address), vma->vm_end); pte_unmap(page_table); spin_unlock(&mm->page_table_lock); @@ -1214,7 +1224,7 @@ static int do_swap_page(struct mm_struct mark_page_accessed(page); pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) { - ret = -ENOMEM; + ret = VM_FAULT_OOM; goto out; } lock_page(page); @@ -1236,28 +1246,91 @@ static int do_swap_page(struct mm_struct /* The page isn't present yet, go ahead with the fault. */ + /* swap_free(entry); if (vm_swap_full()) remove_exclusive_swap_page(page); + */ - mm->rss++; - /* - * This is obviously wrong. How to fix? - */ - pte = pfn_pte(page_to_pfn(page) + subpfn, vma->vm_page_prot); - if (write_access && can_share_swap_page(page)) - pte = pte_mkdirty(pte_mkwrite(pte)); - unlock_page(page); + rss = 0; + vaddr = lo_vaddr; + page_table -= (address - lo_vaddr)/MMUPAGE_SIZE; flush_page_to_ram(page); flush_icache_page(vma, page); - set_pte(page_table, pte); - pte_chain = page_add_rmap(page, page_table, pte_chain); - /* No need to invalidate - it was non-present before */ + pfn = page_to_pfn(page); + + do { + /* already faulted in? less work for me */ + if (pte_present(*page_table)) + goto next; + + entry = pte_to_swp_entry(*page_table); + + if (!pte_none(*page_table) && + swp_offset(entry)/PAGE_MMUCOUNT == page->index) { + + swap_free(entry); + if (vm_swap_full()) + remove_exclusive_swap_page(page); + subpfn = swp_offset(entry) % PAGE_MMUCOUNT; + pte = pfn_pte(pfn + subpfn, vma->vm_page_prot); + + } else if (pte_none(*page_table)) { + + subpfn = flt_subpfn + (vaddr - address)/MMUPAGE_SHIFT; + + /* it'd fall outside the page */ + if (subpfn >= PAGE_MMUCOUNT) + goto next; + + pte = pfn_pte(pfn + subpfn, vma->vm_page_prot); + + /* !pte_none() && swp_offset()/PAGE_MMUCOUNT != page->index */ + } else + goto next; + + if (write_access && can_share_swap_page(page)) + pte = pte_mkdirty(pte_mkwrite(pte)); + + if (!pte_chain) + pte_chain = pte_chain_alloc(GFP_ATOMIC); + if (!pte_chain) { + pte_unmap(page_table); + spin_unlock(&mm->page_table_lock); + pte_chain = pte_chain_alloc(GFP_KERNEL); + if (!pte_chain) { + ret = VM_FAULT_OOM; + spin_lock(&mm->page_table_lock); + mm->rss += rss; + spin_unlock(&mm->page_table_lock); + goto no_mem; + } + spin_lock(&mm->page_table_lock); + page_table = pte_offset_map(pmd, vaddr); + } + + set_pte(page_table, pte); + ++rss; + pte_chain = page_add_rmap(page, page_table, pte_chain); +next: + vaddr += MMUPAGE_SIZE; + page_table++; + } while (vaddr < hi_vaddr); + + unlock_page(page); update_mmu_cache(vma, address, pte); - pte_unmap(page_table); + mm->rss += rss; + pte_unmap(page_table-1); spin_unlock(&mm->page_table_lock); +no_mem: + if (!page) + goto out; + if (!rss) + page_cache_release(page); + else if (rss > 1) + atomic_add(rss - 1, &page->count); out: pte_chain_free(pte_chain); return ret; @@ -1273,17 +1346,15 @@ do_anonymous_page(struct mm_struct *mm, pte_t *page_table, pmd_t *pmd, int write_access, unsigned long addr) { - struct page *page; + struct page *page = NULL; struct pte_chain *pte_chain = NULL; unsigned long vaddr, lo_vaddr, hi_vaddr; unsigned long pfn; - int rss, ret = VM_FAULT_MINOR; + int rss = 0, ret = VM_FAULT_MINOR; lo_vaddr = max(addr & PAGE_MASK, vma->vm_start); hi_vaddr = min(PAGE_ALIGN(addr), vma->vm_end); - BUG_ON(lo_vaddr > hi_vaddr); - if (!write_access) { page = ZERO_PAGE(addr); page_table -= (addr - lo_vaddr)/MMUPAGE_SIZE; @@ -1294,18 +1365,27 @@ do_anonymous_page(struct mm_struct *mm, spin_unlock(&mm->page_table_lock); if (!pte_chain) { pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) + if (!pte_chain) { + ret = VM_FAULT_OOM; goto no_mem; + } } page = alloc_page(GFP_HIGHUSER); - if (!page) + if (!page) { + ret = VM_FAULT_OOM; goto no_mem; + } clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); page_table = pte_offset_map(pmd, lo_vaddr); } flush_page_to_ram(page); + /* + * XXX: locks are dropped in the interior of the loop; + * is an elevated reference count required to pin the page + * while it's being operated on? + */ pfn = page_to_pfn(page) + ((lo_vaddr/MMUPAGE_SIZE) % PAGE_MMUCOUNT); vaddr = lo_vaddr; rss = 0; @@ -1322,6 +1402,9 @@ do_anonymous_page(struct mm_struct *mm, pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) { ret = VM_FAULT_OOM; + spin_lock(&mm->page_table_lock); + mm->rss += rss; + spin_unlock(&mm->page_table_lock); goto no_mem; } spin_lock(&mm->page_table_lock); @@ -1341,6 +1424,7 @@ do_anonymous_page(struct mm_struct *mm, pte_unmap(page_table-1); update_mmu_cache(vma, addr, entry); + mm->rss += rss; spin_unlock(&mm->page_table_lock); no_mem: @@ -1425,6 +1509,8 @@ do_no_page(struct mm_struct *mm, struct * an exclusive copy of the page, or this is a shared mapping, * so we can make it writable and dirty to avoid having to * handle that later. + * + * XXX: this should sweep pagetables and prefault */ /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { diff -urpN pgcl-2.5.64-3/mm/swapfile.c pgcl-2.5.64-4/mm/swapfile.c --- pgcl-2.5.64-3/mm/swapfile.c 2003-03-05 07:08:37.000000000 -0800 +++ pgcl-2.5.64-4/mm/swapfile.c 2003-03-06 21:36:56.000000000 -0800 @@ -188,16 +188,19 @@ static struct swap_info_struct * swap_in bad_free: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); - dump_stack(); + BUG(); goto out; bad_offset: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val); + BUG(); goto out; bad_device: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val); + BUG(); goto out; bad_nofile: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); + BUG(); out: return NULL; } @@ -1495,6 +1498,7 @@ out: bad_file: printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val); + BUG(); goto out; }