Name: copy_device_tree to use Open Firmware claim call Status: Untested Version: ppc64 copy_device_tree (called in prom.c before relocation is on) currently just uses memory by incrementing klimit. Unfortunately, the device tree can be huge, which runs over the initrd. Moving the initrd in general would require yaboot changes, and just delay the problem. The solution is to use the Open Firmware "claim" method to get the memory we need, then mark it reserved in the lmb code so future lmb_alloc calls know about it. We must ensure that Open Firmware doesn't return an address where we're going to move the kernel (ie. between zero and klimit), by making sure all that memory is already claimed. diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .22762-linux-ppc64-2.5/arch/ppc64/kernel/lmb.c .22762-linux-ppc64-2.5.updated/arch/ppc64/kernel/lmb.c --- .22762-linux-ppc64-2.5/arch/ppc64/kernel/lmb.c 2004-03-01 15:13:25.000000000 +1100 +++ .22762-linux-ppc64-2.5.updated/arch/ppc64/kernel/lmb.c 2004-04-16 18:59:18.000000000 +1000 @@ -200,6 +200,33 @@ lmb_reserve(unsigned long base, unsigned return lmb_add_region(_rgn, base, size); } +void __init +lmb_trim(unsigned long base, unsigned long size, unsigned long oldsize) +{ + unsigned long extra, offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + struct lmb_region *rgn = &(_lmb->reserved); + unsigned int i; + + for (i = 0; i < rgn->cnt; i++) { + if (base >= rgn->region[i].base + && base < rgn->region[i].base + rgn->region[i].size) + break; + } + /* Doesn't work since it's too early, but you get the idea. */ + BUG_ON(i == rgn->cnt); + BUG_ON(rgn->region[i].base + rgn->region[i].size < base + oldsize); + + /* We might have added to the end of this reservation. */ + extra = (rgn->region[i].base + rgn->region[i].size) - (base + oldsize); + + /* Trim this region to size */ + rgn->region[i].size = (base + size) - rgn->region[i].base; + + if (extra) + lmb_add_region(rgn, rgn->region[i].base + oldsize, extra); +} + long __init lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, unsigned long size) { @@ -222,6 +249,7 @@ lmb_alloc(unsigned long size, unsigned l return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); } +/* FIXME: We assume this allocates from the top: see lmb_and_of_alloc_topmem */ unsigned long __init lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) { diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .22762-linux-ppc64-2.5/arch/ppc64/kernel/prom.c .22762-linux-ppc64-2.5.updated/arch/ppc64/kernel/prom.c --- .22762-linux-ppc64-2.5/arch/ppc64/kernel/prom.c 2004-04-15 18:49:42.000000000 +1000 +++ .22762-linux-ppc64-2.5.updated/arch/ppc64/kernel/prom.c 2004-04-17 16:48:39.000000000 +1000 @@ -67,6 +67,7 @@ extern const struct linux_logo logo_linu * on a PCI-PCI bridge. */ #define MAX_PROPERTY_LENGTH (1UL * 1024 * 1024) +#define GOOD_DEVTREE_ALLOC_SIZE (4 * MAX_PROPERTY_LENGTH) /* * prom_init() is called very early on, before the kernel text @@ -624,6 +625,148 @@ prom_initialize_lmb(unsigned long mem) return mem; } +/* Open Firmware has memory marked as used, including its own crap + * which we won't need after prom_init. The lmb system has memory + * marked as used which will continue to be used after prom_init + * (ie. the moved kernel text, etc). So we keep OF informed of lmb + * allocations so it's a superset, and we can use its allocator to get + * memory we want. */ +static unsigned long __init +lmb_and_of_alloc(unsigned long size, unsigned long align) +{ + unsigned long mem; + unsigned long offset = reloc_offset(); + + mem = call_prom(RELOC("claim"), 3, 1, 0, size, align ?: 1); + if ((prom_arg_t)mem == (prom_arg_t)-1) + prom_panic(RELOC("OF allocation failed")); + + lmb_reserve(mem, size); + return mem; +} + +static phandle of_memory_node(unsigned int *naddr, unsigned int *nsize) +{ + phandle node = 0; + unsigned long offset = reloc_offset(); + + while (prom_next_node(&node)) { + char type[64] = { 0 }; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("device_type"), + type, sizeof(type)); + if (strcmp(RELOC("memory"), type) == 0) { + phandle parent; + + /* We establish format from parent. */ + parent = call_prom(RELOC("parent"), 1, 1, node); + call_prom(RELOC("getprop"), 4, 1, parent, + RELOC("#address-cells"), naddr, + sizeof(*naddr)); + call_prom(RELOC("getprop"), 4, 1, parent, + RELOC("#size-cells"), nsize, + sizeof(*nsize)); + if ((*naddr != 1 && *naddr != 2) + || (*nsize != 1 && *nsize != 2)) + prom_panic("Bad address or size cells"); + return node; + } + } + prom_panic(RELOC("OF memory node not found")); +} + +/* Return true if they overlap: return intersection in start and len. */ +static int overlap(unsigned long *start, unsigned long *len, + unsigned long begin, unsigned long end) +{ + if (begin >= *start + *len || end <= *start) + return 0; + + if (begin > *start) { + *len -= (begin - *start); + *start = begin; + } + if (end < *start + *len) + *len = end - *start; + return 1; +} + +static void __init of_reserve_mem(unsigned long base, unsigned long size) +{ + unsigned int naddr = 1, nsize = 1, propsize; + phandle memnode; + u32 *memmap; + unsigned long offset = reloc_offset(); + static __initdata unsigned long scratchmem, scratchsize = 0; + + /* We only worry about the bottom 32 bits, because claim can't + * return anything higher anyway. */ + if (base >= 0x100000000UL) + return; + if (base + size > 0x100000000UL) + size = 0x100000000UL - base; + + /* Make claims to cover area, by looking at memory node's + * available property. */ + memnode = of_memory_node(&naddr, &nsize); + propsize = call_prom(RELOC("getproplen"), 4, 1,memnode, + RELOC("available")); + + /* We need memory to put the property in! But if we free it + * up, then we might accidentally unreserve somthing. So we + * don't free it (we don't reserve it in lmb though). */ + if (propsize > scratchsize) { + scratchsize = propsize * 2; + scratchmem = call_prom(RELOC("claim"), 3, 1, 0, propsize, 4); + if ((prom_arg_t)scratchmem == (prom_arg_t)-1) + prom_panic(RELOC("OF reserve allocation failed")); + } + call_prom(RELOC("getprop"), 4, 1, memnode, RELOC("available"), + scratchmem, propsize); + memmap = (u32 *)scratchmem; + + do { + unsigned long start, len; + if (naddr == 1) { + memcpy(&start, memmap, 4); + start >>= 32; + } else + memcpy(&start, memmap, 8); + memmap += naddr; + if (nsize == 1) { + memcpy(&len, memmap, 4); + len >>= 32; + } else + memcpy(&len, memmap, 8); + memmap += nsize; + + if (overlap(&start, &len, base, base + size) + && call_prom(RELOC("claim"), 3, 1, start, len, 0) != start) + prom_panic(RELOC("OF reserve claim failed")); + } while (memmap < (u32 *)(scratchmem + propsize)); +} + +static unsigned long __init +lmb_and_of_alloc_topmem(unsigned long size, unsigned long align) +{ + unsigned long base; + unsigned long offset = reloc_offset(); + + /* Hack: LMB allocates from the top, and we're the first ones + * to ask it for memory, so we get top of memory. */ + base = lmb_alloc(size, align); + if (!base) + prom_panic(RELOC("ERROR, cannot find space for TCE table.\n")); + + of_reserve_mem(base, size); + return base; +} + +static void __init lmb_and_of_reserve(unsigned long base, unsigned long size) +{ + lmb_reserve(base, size); + of_reserve_mem(base, size); +} + static char hypertas_funcs[1024]; static void __init @@ -659,17 +802,10 @@ prom_instantiate_rtas(void) _rtas->size = getprop_rval; prom_print(RELOC("instantiating rtas")); if (_rtas->size != 0) { - unsigned long rtas_region = RTAS_INSTANTIATE_MAX; - - /* Grab some space within the first RTAS_INSTANTIATE_MAX bytes - * of physical memory (or within the RMO region) because RTAS - * runs in 32-bit mode and relocate off. - */ - if ( _systemcfg->platform == PLATFORM_PSERIES_LPAR ) { - struct lmb *_lmb = PTRRELOC(&lmb); - rtas_region = min(_lmb->rmo_size, RTAS_INSTANTIATE_MAX); - } - _rtas->base = lmb_alloc_base(_rtas->size, PAGE_SIZE, rtas_region); + /* Assume that OF returns memory within sane + region, ie. less than RTAS_INSTANTIATE_MAX + and within RMO region. */ + _rtas->base = lmb_and_of_alloc(_rtas->size, PAGE_SIZE); prom_print(RELOC(" at 0x")); prom_print_hex(_rtas->base); @@ -685,8 +821,7 @@ prom_instantiate_rtas(void) _rtas->entry = (long)_prom->args.rets[1]; } RELOC(rtas_rmo_buf) - = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, - rtas_region); + = lmb_and_of_alloc(RTAS_RMOBUF_MAX, PAGE_SIZE); } if (_rtas->entry <= 0) { @@ -797,7 +932,7 @@ void prom_initialize_dart_table(void) * will blow up an entire large page anyway in the kernel mapping */ RELOC(dart_tablebase) = - abs_to_virt(lmb_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); + abs_to_virt(lmb_and_of_alloc(1UL<<24, 1UL<<24)); prom_print(RELOC("Dart at: ")); prom_print_hex(RELOC(dart_tablebase)); @@ -892,13 +1027,8 @@ prom_initialize_tce_table(void) /* Align to the greater of the align or size */ align = max(minalign, minsize); - /* Carve out storage for the TCE table. */ - base = lmb_alloc(minsize, align); - - if ( !base ) { - prom_panic(RELOC("ERROR, cannot find space for TCE table.\n")); - } - + /* Storage for the TCE table: must be top of memory */ + base = lmb_and_of_alloc_topmem(minsize, align); vbase = (unsigned long)abs_to_virt(base); /* Save away the TCE table attributes for later use. */ @@ -1440,24 +1570,26 @@ static int __init prom_find_machine_type /* * Make a copy of the device tree from the PROM. */ -static unsigned long __init -copy_device_tree(unsigned long mem_start) +static void __init +copy_device_tree(void) { phandle root; - unsigned long new_start; struct device_node **allnextp; unsigned long offset = reloc_offset(); - unsigned long mem_end = mem_start + (8<<20); + unsigned long mem_start, mem_end; root = call_prom(RELOC("peer"), 1, 1, (phandle)0); - if (root == (phandle)0) { + if (root == (phandle)0) prom_panic(RELOC("couldn't get device tree root\n")); - } + + mem_start = mem_end = 0; allnextp = &RELOC(allnodes); - mem_start = DOUBLEWORD_ALIGN(mem_start); - new_start = inspect_node(root, 0, mem_start, mem_end, &allnextp); + inspect_node(root, 0, &mem_start, &mem_end, &allnextp); *allnextp = 0; - return new_start; + + /* Don't bother trimming OF claim, but trim lmb allocation to + * exact length. */ + lmb_trim(mem_start, mem_end - mem_start, GOOD_DEVTREE_ALLOC_SIZE); } /* Verify bi_recs are good */ @@ -1605,7 +1776,7 @@ prom_bi_rec_reserve(unsigned long mem) switch (rec->tag) { #ifdef CONFIG_BLK_DEV_INITRD case BI_INITRD: - lmb_reserve(rec->data[0], rec->data[1]); + lmb_and_of_reserve(rec->data[0], rec->data[1]); break; #endif /* CONFIG_BLK_DEV_INITRD */ } @@ -1870,6 +2041,10 @@ prom_init(unsigned long r3, unsigned lon mem = prom_initialize_lmb(mem); + /* We need very top of physical memory, so do this first. */ + if (_systemcfg->platform == PLATFORM_PSERIES) + prom_initialize_tce_table(); + mem = prom_bi_rec_reserve(mem); mem = check_display(mem); @@ -1888,17 +2063,14 @@ prom_init(unsigned long r3, unsigned lon */ prom_hold_cpus(mem); + RELOC(klimit) = mem + offset; + /* Tel lmb subsys all this space is reserved. */ + lmb_and_of_reserve(0, __pa(RELOC(klimit))); + #ifdef DEBUG_PROM prom_print(RELOC("copying OF device tree...\n")); #endif - mem = copy_device_tree(mem); - - RELOC(klimit) = mem + offset; - - lmb_reserve(0, __pa(RELOC(klimit))); - - if (_systemcfg->platform == PLATFORM_PSERIES) - prom_initialize_tce_table(); + copy_device_tree(); #ifdef CONFIG_PMAC_DART if (_systemcfg->platform == PLATFORM_POWERMAC) diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .22762-linux-ppc64-2.5/include/asm-ppc64/lmb.h .22762-linux-ppc64-2.5.updated/include/asm-ppc64/lmb.h --- .22762-linux-ppc64-2.5/include/asm-ppc64/lmb.h 2004-03-21 00:04:51.000000000 +1100 +++ .22762-linux-ppc64-2.5.updated/include/asm-ppc64/lmb.h 2004-04-17 16:43:27.000000000 +1000 @@ -56,6 +56,9 @@ extern long __init lmb_reserve(unsigned extern unsigned long __init lmb_alloc(unsigned long, unsigned long); extern unsigned long __init lmb_alloc_base(unsigned long, unsigned long, unsigned long); +extern void __init lmb_trim(unsigned long base, + unsigned long size, + unsigned long oldsize); extern unsigned long __init lmb_phys_mem_size(void); extern unsigned long __init lmb_end_of_DRAM(void); extern unsigned long __init lmb_abs_to_phys(unsigned long);