This gets up far enough to see INIT: version 2.85 booting but dies later on after handling a number of pagefaults on distinct addresses. That's relatively far along wrt. stabilizing this sort of affair and so snapshot-worthy. So here's the snapshot. diff -prauN pgcl-2.6.0-test5-bk3-2/arch/i386/kernel/process.c pgcl-2.6.0-test5-bk3-3/arch/i386/kernel/process.c --- pgcl-2.6.0-test5-bk3-2/arch/i386/kernel/process.c 2003-09-14 23:26:24.000000000 -0700 +++ pgcl-2.6.0-test5-bk3-3/arch/i386/kernel/process.c 2003-09-17 18:15:21.000000000 -0700 @@ -606,6 +606,8 @@ asmlinkage int sys_execve(struct pt_regs int error; char * filename; + printk("sys_execve()\n"); + filename = getname((char __user *) regs.ebx); error = PTR_ERR(filename); if (IS_ERR(filename)) @@ -621,6 +623,7 @@ asmlinkage int sys_execve(struct pt_regs } putname(filename); out: + printk("return from sys_execve()\n"); return error; } diff -prauN pgcl-2.6.0-test5-bk3-2/arch/i386/mm/fault.c pgcl-2.6.0-test5-bk3-3/arch/i386/mm/fault.c --- pgcl-2.6.0-test5-bk3-2/arch/i386/mm/fault.c 2003-09-14 23:49:19.000000000 -0700 +++ pgcl-2.6.0-test5-bk3-3/arch/i386/mm/fault.c 2003-09-17 17:44:35.000000000 -0700 @@ -369,6 +369,8 @@ no_context: out_of_memory: up_read(&mm->mmap_sem); if (tsk->pid == 1) { + printk("/sbin/init is OOM?\n"); + dump_stack(); yield(); down_read(&mm->mmap_sem); goto survive; diff -prauN pgcl-2.6.0-test5-bk3-2/arch/i386/mm/highmem.c pgcl-2.6.0-test5-bk3-3/arch/i386/mm/highmem.c --- pgcl-2.6.0-test5-bk3-2/arch/i386/mm/highmem.c 2003-09-16 20:50:50.000000000 -0700 +++ pgcl-2.6.0-test5-bk3-3/arch/i386/mm/highmem.c 2003-09-18 15:49:08.000000000 -0700 @@ -152,6 +152,24 @@ void kmap_atomic_sg(pte_t *ptes[], pte_a __flush_tlb_one(vaddr); } } + + if (type == KM_FOLIO) { + if (!paddrs[PAGE_MMUCOUNT]) + ptes[PAGE_MMUCOUNT] = NULL; + else { + unsigned long pfn = paddrs[PAGE_MMUCOUNT]/MMUPAGE_SIZE; + idx = KM_FOLIO2 + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_END) + PAGE_SIZE*idx; + pgd = pgd_offset_k(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + ptes[PAGE_MMUCOUNT] = (pte_t *)(vaddr + ((u32)paddrs[PAGE_MMUCOUNT] & ~MMUPAGE_MASK)); + if (pte_pfn(pte[PAGE_MMUCOUNT]) != pfn) { + set_pte(&pte[PAGE_MMUCOUNT], pfn_pte(pfn, kmap_prot)); + __flush_tlb_one(vaddr); + } + } + } } void kunmap_atomic_sg(pte_t *ptes[], enum km_type type) diff -prauN pgcl-2.6.0-test5-bk3-2/arch/i386/pci/numa.c pgcl-2.6.0-test5-bk3-3/arch/i386/pci/numa.c --- pgcl-2.6.0-test5-bk3-2/arch/i386/pci/numa.c 2003-09-08 12:49:57.000000000 -0700 +++ pgcl-2.6.0-test5-bk3-3/arch/i386/pci/numa.c 2003-09-17 16:44:15.000000000 -0700 @@ -115,7 +115,7 @@ static int __init pci_numa_init(void) return 0; pci_root_bus = pcibios_scan_root(0); - if (numnodes > 1) { + if (0 && numnodes > 1) { for (quad = 1; quad < numnodes; ++quad) { printk("Scanning PCI bus %d for quad %d\n", QUADLOCAL2BUS(quad,0), quad); diff -prauN pgcl-2.6.0-test5-bk3-2/fs/exec.c pgcl-2.6.0-test5-bk3-3/fs/exec.c --- pgcl-2.6.0-test5-bk3-2/fs/exec.c 2003-09-14 23:58:45.000000000 -0700 +++ pgcl-2.6.0-test5-bk3-3/fs/exec.c 2003-09-17 18:19:39.000000000 -0700 @@ -1068,13 +1068,17 @@ int do_execve(char * filename, struct file *file; int retval; + printk("do_execve(%p, %p, %p, %p)\n", filename, argv, envp, regs); + sched_balance_exec(); file = open_exec(filename); retval = PTR_ERR(file); - if (IS_ERR(file)) + if (IS_ERR(file)) { + printk("return 1 from do_execve()\n"); return retval; + } bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); @@ -1130,6 +1134,7 @@ int do_execve(char * filename, /* execve success */ security_bprm_free(&bprm); + printk("return 2 from do_execve()\n"); return retval; } @@ -1150,6 +1155,7 @@ out_file: fput(bprm.file); } + printk("return 3 from do_execve()\n"); return retval; } diff -prauN pgcl-2.6.0-test5-bk3-2/include/asm-i386/kmap_types.h pgcl-2.6.0-test5-bk3-3/include/asm-i386/kmap_types.h --- pgcl-2.6.0-test5-bk3-2/include/asm-i386/kmap_types.h 2003-09-16 18:57:25.000000000 -0700 +++ pgcl-2.6.0-test5-bk3-3/include/asm-i386/kmap_types.h 2003-09-18 13:49:06.000000000 -0700 @@ -25,7 +25,8 @@ D(11) KM_IRQ1, D(12) KM_SOFTIRQ0, D(13) KM_SOFTIRQ1, D(14) KM_FOLIO, -D(15) KM_TYPE_NR +D(15) KM_FOLIO2, +D(16) KM_TYPE_NR }; #undef D diff -prauN pgcl-2.6.0-test5-bk3-2/include/linux/folio.h pgcl-2.6.0-test5-bk3-3/include/linux/folio.h --- pgcl-2.6.0-test5-bk3-2/include/linux/folio.h 2003-09-16 20:06:52.000000000 -0700 +++ pgcl-2.6.0-test5-bk3-3/include/linux/folio.h 2003-09-18 17:26:05.000000000 -0700 @@ -49,7 +49,7 @@ static inline int vma_neighbourly(struct return 1; } -#define NOPTE (~((pte_addr_t)0)) +#define NOPTE (((pte_addr_t)0)) /* * Prepare folio of page table pointers for the do_ fault handlers. @@ -179,7 +179,7 @@ static int prepare_folio(pte_addr_t foli */ static struct page *private_folio_page(pte_addr_t paddrs[], struct page *swap_page) { - pte_t *folio[PAGE_MMUCOUNT]; + pte_t *folio[PAGE_MMUCOUNT+1]; unsigned long pfn; struct page *page; swp_entry_t entry; @@ -293,7 +293,7 @@ static inline struct page *private_folio static void restrict_folio(pte_addr_t paddrs[], struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - pte_t *folio[PAGE_MMUCOUNT]; + pte_t *folio[PAGE_MMUCOUNT+1]; unsigned long addr; int j; @@ -316,7 +316,7 @@ static void restrict_folio(pte_addr_t pa static void copy_folio(pte_addr_t paddrs[], struct page *dst_page, struct page *src_page, unsigned long address) { - pte_t *folio[PAGE_MMUCOUNT]; + pte_t *folio[PAGE_MMUCOUNT+1]; char *src, *dst; unsigned int size; unsigned int offset = 0; @@ -363,7 +363,7 @@ static inline unsigned long set_folio_page(pte_addr_t paddrs[], struct page *page, pgprot_t prot, unsigned long flags) { - pte_t *folio[PAGE_MMUCOUNT]; + pte_t *folio[PAGE_MMUCOUNT+1]; unsigned long offset = 0, rss = 0, pfn = page_to_pfn(page); int j; @@ -373,7 +373,7 @@ set_folio_page(pte_addr_t paddrs[], stru if (!folio[j]) continue; set_pte(folio[j], - pte_modify(pfn_pte(pfn + j, prot), __pgprot(flags))); + pfn_pte(pfn + j, __pgprot(prot.pgprot | flags))); rss++; } kunmap_atomic_sg(folio, KM_FOLIO); diff -prauN pgcl-2.6.0-test5-bk3-2/init/main.c pgcl-2.6.0-test5-bk3-3/init/main.c --- pgcl-2.6.0-test5-bk3-2/init/main.c 2003-09-14 23:49:20.000000000 -0700 +++ pgcl-2.6.0-test5-bk3-3/init/main.c 2003-09-17 18:12:12.000000000 -0700 @@ -568,14 +568,18 @@ static int init(void * unused) smp_init(); do_basic_setup(); + printk("about to prepare_namespace()\n"); prepare_namespace(); + printk("return from prepare_namespace()\n"); /* * Ok, we have completed the initial bootup, and * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ + printk("about to free_initmem()\n"); free_initmem(); + printk("return from free_initmem()\n"); unlock_kernel(); system_running = 1; @@ -592,6 +596,7 @@ static int init(void * unused) * trying to recover a really broken machine. */ + printk("about to execve(\"/sbin/init\")\n"); if (execute_command) execve(execute_command,argv_init,envp_init); execve("/sbin/init",argv_init,envp_init); diff -prauN pgcl-2.6.0-test5-bk3-2/mm/memory.c pgcl-2.6.0-test5-bk3-3/mm/memory.c --- pgcl-2.6.0-test5-bk3-2/mm/memory.c 2003-09-16 21:11:01.000000000 -0700 +++ pgcl-2.6.0-test5-bk3-3/mm/memory.c 2003-09-18 16:53:52.000000000 -0700 @@ -36,6 +36,8 @@ * (Gerhard.Wichert@pdb.siemens.de) */ +#define DEBUG + #include #include #include @@ -47,6 +49,7 @@ #include #include #include + #include #include #include @@ -54,8 +57,6 @@ #include #include -#include - #ifndef CONFIG_DISCONTIGMEM /* use the per-pgdat data instead for discontigmem - mbligh */ unsigned long max_mapnr; @@ -1023,9 +1024,12 @@ static int do_wp_page(struct mm_struct * struct page *old_page, *new_page; unsigned long pfn = pte_pfn(pte); struct pte_chain *pte_chain = NULL; - pte_addr_t folio[PAGE_MMUCOUNT]; + pte_addr_t folio[PAGE_MMUCOUNT+1]; int reprep, rss, ret; + printk("do_wp_page(%p, %p, 0x%lx, %p, %p, %Lx\n", + mm, vma, address, page_table, pmd, pte_val(pte)); + if (unlikely(!pfn_valid(pfn))) { /* * This should really halt the system so it can be debugged or @@ -1093,6 +1097,7 @@ oom: out: spin_unlock(&mm->page_table_lock); pte_chain_free(pte_chain); + printk("return from do_wp_page()\n"); return ret; } @@ -1249,6 +1254,9 @@ static int do_swap_page(struct mm_struct int rss, ret = VM_FAULT_MINOR; struct pte_chain *pte_chain = NULL; + printk("do_swap_page(%p, %p, %lx, %p, %p, %Lx, %d)\n", + mm, vma, address, page_table, pmd, pte_val(orig_pte), write_access); + pte_unmap(page_table); spin_unlock(&mm->page_table_lock); swap_page = lookup_swap_cache(entry); @@ -1336,6 +1344,7 @@ static int do_swap_page(struct mm_struct spin_unlock(&mm->page_table_lock); out: pte_chain_free(pte_chain); + printk("return from do_swap_page()\n"); return ret; } @@ -1349,11 +1358,14 @@ do_anonymous_page(struct mm_struct *mm, pte_t *page_table, pmd_t *pmd, int write_access, unsigned long addr) { - pte_addr_t folio[PAGE_MMUCOUNT]; + pte_addr_t folio[PAGE_MMUCOUNT+1]; struct page *new_page, *page = ZERO_PAGE(addr); struct pte_chain *pte_chain; int ret; + printk("do_anonymous_page(%p, %p, %p, %p, %d, %lx)\n", + mm, vma, page_table, pmd, write_access, addr); + pte_chain = pte_chain_alloc(GFP_ATOMIC); if (!pte_chain) { pte_unmap(page_table); @@ -1418,6 +1430,7 @@ no_mem: ret = VM_FAULT_OOM; out: pte_chain_free(pte_chain); + printk("return from do_anonymous_page()\n"); return ret; } @@ -1438,11 +1451,14 @@ do_no_page(struct mm_struct *mm, struct unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd) { struct page *page, *new_page = NULL; - pte_addr_t folio[PAGE_MMUCOUNT]; + pte_addr_t folio[PAGE_MMUCOUNT+1]; struct address_space *mapping = NULL; struct pte_chain *pte_chain; int ret, rss, sequence = 0; + printk("do_no_page(%p, %p, %lx, %d, %p, %p)\n", + mm, vma, address, write_access, page_table, pmd); + pte_unmap(page_table); spin_unlock(&mm->page_table_lock); @@ -1455,10 +1471,13 @@ retry: page = vma->vm_ops->nopage(vma, address, 0); /* no page was available -- either SIGBUS or OOM */ - if (new_page == NOPAGE_SIGBUS) + if (page == NOPAGE_SIGBUS) { + printk("return VM_FAULT_SIGBUS from do_no_page()\n"); return VM_FAULT_SIGBUS; - if (new_page == NOPAGE_OOM) + } else if (page == NOPAGE_OOM) { + printk("return VM_FAULT_OOM from do_no_page()\n"); return VM_FAULT_OOM; + } pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) @@ -1474,10 +1493,10 @@ retry: page_cache_release(page); goto oom; } + lru_cache_add_active(new_page); } - } - lru_cache_add_active(page); - new_page = page; + } else + new_page = page; spin_lock(&mm->page_table_lock); /* @@ -1521,6 +1540,7 @@ retry: } /* No need to invalidate - it was non-present before */ + pte_unmap(page_table); update_mmu_cache(vma, address, pte); spin_unlock(&mm->page_table_lock); ret = VM_FAULT_MAJOR; @@ -1529,6 +1549,7 @@ oom: ret = VM_FAULT_OOM; out: pte_chain_free(pte_chain); + printk("return %d from do_no_page()\n", ret); return ret; } @@ -1543,6 +1564,9 @@ static int do_file_page(struct mm_struct unsigned long pgoff; int err; + printk("do_file_page(%p, %p, %lx, %d, %p, %p)\n", + mm, vma, address, write_access, pte, pmd); + BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage); /* * Fall back to the linear mapping if the fs does not support @@ -1560,6 +1584,7 @@ static int do_file_page(struct mm_struct spin_unlock(&mm->page_table_lock); err = vma->vm_ops->populate(vma, address & MMUPAGE_MASK, MMUPAGE_SIZE, vma->vm_page_prot, pgoff, 0); + printk("return from do_file_page()\n"); if (err == -ENOMEM) return VM_FAULT_OOM; if (err) @@ -1633,6 +1658,9 @@ int handle_mm_fault(struct mm_struct *mm pgd_t *pgd; pmd_t *pmd; + printk("handle_mm_fault(%p, %p, %lx, %d)\n", + mm, vma, address, write_access); + __set_current_state(TASK_RUNNING); pgd = pgd_offset(mm, address);