Inline various things to cope with identified regressions and for utterly trivial functions that can be inlined with the non-private structure declaration. -- wli arch/i386/mm/tlb.c | 222 --------------------------------------------- include/asm-i386/pgalloc.h | 53 ++++++++++ include/asm-i386/tlb.h | 178 +++++++++++++++++++++++++++++++++--- 3 files changed, 221 insertions(+), 232 deletions(-) diff -prauN pgcl-2.5.70-bk9-2/arch/i386/mm/tlb.c pgcl-2.5.70-bk9-3/arch/i386/mm/tlb.c --- pgcl-2.5.70-bk9-2/arch/i386/mm/tlb.c 2003-06-04 21:28:28.000000000 -0700 +++ pgcl-2.5.70-bk9-3/arch/i386/mm/tlb.c 2003-06-04 22:10:49.000000000 -0700 @@ -9,49 +9,7 @@ #include #include -#define __GFP_PTE (GFP_KERNEL|__GFP_REPEAT) -#ifdef CONFIG_HIGHMEM -#define GFP_PTE (__GFP_PTE|__GFP_HIGHMEM) -#else -#define GFP_PTE __GFP_PTE -#endif - -#define PG_PTE PG_arch_1 -#define NR_PTE 128 -#define NR_NONPTE 512 -#define MAX_ZONE_ID (MAX_NUMNODES * MAX_NR_ZONES) - -#define PagePTE(page) test_bit(PG_PTE, &(page)->flags) -#define SetPagePTE(page) set_bit(PG_PTE, &(page)->flags) -#define ClearPagePTE(page) clear_bit(PG_PTE, &(page)->flags) -#define PageZoneID(page) ((page)->flags >> ZONE_SHIFT) - -struct mmu_gather { - struct mm_struct *mm; - - /* number of active ptes needing a TLB flush before reuse */ - int nr_pte_active; - - /* whether some ptes were unmapped */ - unsigned int need_flush; - - /* non-zero means full mm flush */ - unsigned int fullmm; - - /* number freed for RSS adjustment */ - unsigned long freed; - - /* number of ready ptes */ - int nr_pte_ready; - - struct list_head active_list[MAX_ZONE_ID], ready_list[MAX_ZONE_ID]; - int active_count[MAX_ZONE_ID], ready_count[MAX_ZONE_ID]; - - int nr_nonpte; - struct page *nonpte[NR_NONPTE]; -}; - -static DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); void tlb_init(void) { @@ -67,41 +25,12 @@ void tlb_init(void) } } -struct mm_struct *tlb_mm(struct mmu_gather *tlb) -{ - return tlb->mm; -} - -void tlb_inc_freed(struct mmu_gather *tlb) -{ - tlb->freed++; -} - /* * When an mmu_gather fills, we must flush the entire mm, in no * small part because whole-mm flushes are the sole bulk TLB * invalidation primitive on i386. */ -void tlb_flush(struct mmu_gather *tlb) -{ - flush_tlb_mm(tlb->mm); -} - -struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int flush) -{ - struct mmu_gather *tlb = &per_cpu(mmu_gathers, get_cpu()); - tlb->mm = mm; - tlb->fullmm = flush; - put_cpu(); - return tlb; -} - -void tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *pte, unsigned long addr) -{ - tlb->need_flush = 1; -} - -static void tlb_flush_ready(struct mmu_gather *tlb) +void tlb_flush_ready(struct mmu_gather *tlb) { int count, zone = 0; while (tlb->nr_pte_ready >= NR_PTE) { @@ -126,153 +55,6 @@ static void tlb_flush_ready(struct mmu_g BUG_ON(count != tlb->nr_pte_ready); } -void tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, - unsigned long end) -{ - int zone; - - if (!tlb->need_flush && tlb->nr_nonpte < NR_NONPTE) { - BUG_ON(tlb->nr_nonpte < 0); - BUG_ON(tlb->nr_pte_active < 0); - BUG_ON(tlb->nr_pte_ready < 0); - return; - } - - tlb->need_flush = 0; - tlb_flush(tlb); - BUG_ON(tlb->nr_nonpte < 0); - if (tlb->nr_nonpte) { - free_pages_and_swap_cache(tlb->nonpte, tlb->nr_nonpte); - tlb->nr_nonpte = 0; - } - - for (zone = 0; zone < MAX_ZONE_ID; ++zone) { - if (list_empty(&tlb->active_list[zone])) { - BUG_ON(tlb->active_count[zone]); - continue; - } - - list_splice_init(&tlb->active_list[zone], - &tlb->ready_list[zone]); - BUG_ON(tlb->active_count[zone] < 0); - BUG_ON(tlb->ready_count[zone] < 0); - tlb->ready_count[zone] += tlb->active_count[zone]; - tlb->active_count[zone] = 0; - } - tlb->nr_pte_ready += tlb->nr_pte_active; - tlb->nr_pte_active = 0; - if (tlb->nr_pte_ready >= NR_PTE) - tlb_flush_ready(tlb); -} - -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, - unsigned long end) -{ - if (tlb->mm->rss >= tlb->freed) - tlb->mm->rss -= tlb->freed; - else - tlb->mm->rss = 0; - tlb_flush_mmu(tlb, start, end); -} - -static void tlb_remove_nonpte_page(struct mmu_gather *tlb, struct page *page) -{ - BUG_ON(tlb->nr_nonpte >= NR_NONPTE); - tlb->nonpte[tlb->nr_nonpte] = page; - tlb->nr_nonpte++; - if (tlb->nr_nonpte == NR_NONPTE) - tlb_flush_mmu(tlb, 0, 0); -} - -static void tlb_remove_pte_page(struct mmu_gather *tlb, struct page *page) -{ - int zone; - - if (!atomic_dec_and_test(&page->count)) - return; - - zone = PageZoneID(page); - ClearPagePTE(page); - BUG_ON(tlb->nr_pte_active < 0); - BUG_ON(tlb->active_count[zone] < 0); - tlb->nr_pte_active++; - tlb->active_count[zone]++; - list_add(&page->list, &tlb->active_list[zone]); -} - -void tlb_remove_page(struct mmu_gather *tlb, struct page *page) -{ - tlb->need_flush = 1; - if (PagePTE(page)) - tlb_remove_pte_page(tlb, page); - else - tlb_remove_nonpte_page(tlb, page); -} - -static struct page *pte_alloc_fresh(void) -{ - struct page *page = alloc_page(GFP_PTE); - if (page) { - clear_highpage(page); - BUG_ON(PagePTE(page)); - SetPagePTE(page); - } - return page; -} - -/* - * This needs to be restructured to discourage fallback to lowmem when - * nodes > 0 have lowmem. - */ -static struct page *pte_alloc_ready(void) -{ - struct mmu_gather *tlb = &per_cpu(mmu_gathers, get_cpu()); - struct page *page; - - BUG_ON(tlb->nr_pte_ready < 0); - if (!tlb->nr_pte_ready) { - BUG_ON(tlb->nr_pte_active < 0); - BUG_ON(tlb->nr_nonpte < 0); - page = NULL; - } else { - int zone; - for (zone = MAX_ZONE_ID - 1; zone >= 0; --zone) { - if (!list_empty(&tlb->ready_list[zone])) - break; - } - - BUG_ON(zone < 0); - BUG_ON(list_empty(&tlb->ready_list[zone])); - - page = list_entry(tlb->ready_list[zone].next, struct page, list); - BUG_ON(PagePTE(page)); - SetPagePTE(page); - list_del(&page->list); - atomic_set(&page->count, 1); - tlb->ready_count[zone]--; - tlb->nr_pte_ready--; - BUG_ON(tlb->ready_count[zone] < 0); - BUG_ON(tlb->nr_pte_ready < 0); - } - put_cpu(); - return page; -} - -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - struct page *page = pte_alloc_ready(); - return page ? page : pte_alloc_fresh(); -} - -/* - * pmd freeing occurs as part of freeing the pgd on PAE, and is not - * meaningful for non-PAE. - */ -void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) -{ - tlb->need_flush = 1; -} - /* * oddly declared in pgalloc.h; in general these are TLB-related pmd * and pte twiddlings. diff -prauN pgcl-2.5.70-bk9-2/include/asm-i386/pgalloc.h pgcl-2.5.70-bk9-3/include/asm-i386/pgalloc.h --- pgcl-2.5.70-bk9-2/include/asm-i386/pgalloc.h 2003-06-04 18:46:26.000000000 -0700 +++ pgcl-2.5.70-bk9-3/include/asm-i386/pgalloc.h 2003-06-04 22:00:31.000000000 -0700 @@ -34,7 +34,6 @@ void pmd_populate(struct mm_struct *mm, pgd_t *pgd_alloc(struct mm_struct *); void pgd_free(pgd_t *pgd); pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); -struct page *pte_alloc_one(struct mm_struct *, unsigned long); static inline void pte_free_kernel(pte_t *pte) { @@ -48,4 +47,56 @@ static inline void pte_free(struct page #include +static inline struct page *pte_alloc_fresh(void) +{ + struct page *page = alloc_page(GFP_PTE); + if (page) { + clear_highpage(page); + BUG_ON(PagePTE(page)); + SetPagePTE(page); + } + return page; +} + +static inline struct page *pte_alloc_ready(void) +{ + struct mmu_gather *tlb = &per_cpu(mmu_gathers, get_cpu()); + struct page *page; + + BUG_ON(tlb->nr_pte_ready < 0); + if (!tlb->nr_pte_ready) { + BUG_ON(tlb->nr_pte_active < 0); + BUG_ON(tlb->nr_nonpte < 0); + page = NULL; + } else { + int zone; + for (zone = MAX_ZONE_ID - 1; zone >= 0; --zone) { + if (!list_empty(&tlb->ready_list[zone])) + break; + } + + BUG_ON(zone < 0); + BUG_ON(list_empty(&tlb->ready_list[zone])); + + page = list_entry(tlb->ready_list[zone].next, struct page, list); + BUG_ON(PagePTE(page)); + SetPagePTE(page); + list_del(&page->list); + atomic_set(&page->count, 1); + tlb->ready_count[zone]--; + tlb->nr_pte_ready--; + BUG_ON(tlb->ready_count[zone] < 0); + BUG_ON(tlb->nr_pte_ready < 0); + } + put_cpu(); + return page; +} + +static inline struct page *pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + struct page *page = pte_alloc_ready(); + return page ? page : pte_alloc_fresh(); +} + #endif /* _I386_PGALLOC_H */ diff -prauN pgcl-2.5.70-bk9-2/include/asm-i386/tlb.h pgcl-2.5.70-bk9-3/include/asm-i386/tlb.h --- pgcl-2.5.70-bk9-2/include/asm-i386/tlb.h 2003-06-04 18:48:24.000000000 -0700 +++ pgcl-2.5.70-bk9-3/include/asm-i386/tlb.h 2003-06-04 22:17:30.000000000 -0700 @@ -5,26 +5,61 @@ #include #include #include +#include #include #include -#include +#include + +#define __GFP_PTE (GFP_KERNEL|__GFP_REPEAT) +#ifdef CONFIG_HIGHMEM +#define GFP_PTE (__GFP_PTE|__GFP_HIGHMEM) +#else +#define GFP_PTE __GFP_PTE +#endif + +#define PG_PTE PG_arch_1 +#define NR_PTE 128 +#define NR_NONPTE 512 +#define MAX_ZONE_ID (MAX_NUMNODES * MAX_NR_ZONES) + +#define PagePTE(page) test_bit(PG_PTE, &(page)->flags) +#define SetPagePTE(page) set_bit(PG_PTE, &(page)->flags) +#define ClearPagePTE(page) clear_bit(PG_PTE, &(page)->flags) +#define PageZoneID(page) ((page)->flags >> ZONE_SHIFT) /* * x86 doesn't need any special per-pte or * per-vma handling.. */ struct vm_area_struct; -struct mmu_gather; +struct mmu_gather { + struct mm_struct *mm; + + /* number of active ptes needing a TLB flush before reuse */ + int nr_pte_active; + + /* whether some ptes were unmapped */ + unsigned int need_flush; + + /* non-zero means full mm flush */ + unsigned int fullmm; + + /* number freed for RSS adjustment */ + unsigned long freed; + + /* number of ready ptes */ + int nr_pte_ready; -void tlb_flush(struct mmu_gather *); -struct mm_struct *tlb_mm(struct mmu_gather *tlb); -void tlb_inc_freed(struct mmu_gather *tlb); -struct mmu_gather *tlb_gather_mmu(struct mm_struct *, unsigned int flush); -void tlb_flush_mmu(struct mmu_gather *, unsigned long, unsigned long); -void tlb_finish_mmu(struct mmu_gather *, unsigned long, unsigned long); -void tlb_remove_page(struct mmu_gather *, struct page *); -void tlb_remove_tlb_entry(struct mmu_gather *, pte_t *, unsigned long addr); -void pmd_free_tlb(struct mmu_gather *, pmd_t *); + struct list_head active_list[MAX_ZONE_ID], ready_list[MAX_ZONE_ID]; + int active_count[MAX_ZONE_ID], ready_count[MAX_ZONE_ID]; + + int nr_nonpte; + struct page *nonpte[NR_NONPTE]; +}; + +DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); + +void tlb_flush_ready(struct mmu_gather *tlb); void tlb_init(void); static inline void tlb_start_vma(struct mmu_gather *tlb, @@ -37,9 +72,130 @@ static inline void tlb_end_vma(struct mm { } +static inline void tlb_inc_freed(struct mmu_gather *tlb) +{ + tlb->freed++; +} + +static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) +{ + tlb->need_flush = 1; +} + +static inline void tlb_flush(struct mmu_gather *tlb) +{ + flush_tlb_mm(tlb->mm); +} + +static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, + unsigned int flush) +{ + struct mmu_gather *tlb = &per_cpu(mmu_gathers, get_cpu()); + tlb->mm = mm; + tlb->fullmm = flush; + put_cpu(); + return tlb; +} + +static inline void tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, + unsigned long end) +{ + int zone; + + if (!tlb->need_flush && tlb->nr_nonpte < NR_NONPTE) { + BUG_ON(tlb->nr_nonpte < 0); + BUG_ON(tlb->nr_pte_active < 0); + BUG_ON(tlb->nr_pte_ready < 0); + return; + } + + tlb->need_flush = 0; + tlb_flush(tlb); + BUG_ON(tlb->nr_nonpte < 0); + if (tlb->nr_nonpte) { + free_pages_and_swap_cache(tlb->nonpte, tlb->nr_nonpte); + tlb->nr_nonpte = 0; + } + + for (zone = 0; zone < MAX_ZONE_ID; ++zone) { + if (list_empty(&tlb->active_list[zone])) { + BUG_ON(tlb->active_count[zone]); + continue; + } + + list_splice_init(&tlb->active_list[zone], + &tlb->ready_list[zone]); + BUG_ON(tlb->active_count[zone] < 0); + BUG_ON(tlb->ready_count[zone] < 0); + tlb->ready_count[zone] += tlb->active_count[zone]; + tlb->active_count[zone] = 0; + } + tlb->nr_pte_ready += tlb->nr_pte_active; + tlb->nr_pte_active = 0; + if (tlb->nr_pte_ready >= NR_PTE) + tlb_flush_ready(tlb); +} + +static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, + unsigned long end) +{ + if (tlb->mm->rss >= tlb->freed) + tlb->mm->rss -= tlb->freed; + else + tlb->mm->rss = 0; + tlb_flush_mmu(tlb, start, end); +} + +static inline void tlb_remove_nonpte_page(struct mmu_gather *tlb, + struct page *page) +{ + BUG_ON(tlb->nr_nonpte >= NR_NONPTE); + tlb->nonpte[tlb->nr_nonpte] = page; + tlb->nr_nonpte++; + if (tlb->nr_nonpte == NR_NONPTE) + tlb_flush_mmu(tlb, 0, 0); +} + +static inline void tlb_remove_pte_page(struct mmu_gather *tlb, + struct page *page) +{ + int zone; + + if (!atomic_dec_and_test(&page->count)) + return; + + zone = PageZoneID(page); + ClearPagePTE(page); + BUG_ON(tlb->nr_pte_active < 0); + BUG_ON(tlb->active_count[zone] < 0); + tlb->nr_pte_active++; + tlb->active_count[zone]++; + list_add(&page->list, &tlb->active_list[zone]); +} + +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + tlb->need_flush = 1; + if (PagePTE(page)) + tlb_remove_pte_page(tlb, page); + else + tlb_remove_nonpte_page(tlb, page); +} + static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page) { tlb_remove_page(tlb, page); } +static inline void tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *pte, + unsigned long addr) +{ + tlb->need_flush = 1; +} + +static inline struct mm_struct *tlb_mm(struct mmu_gather *tlb) +{ + return tlb->mm; +} + #endif /* _I386_TLB_H */