diff -u --recursive --new-file v2.4.10/linux/fs/block_dev.c linux/fs/block_dev.c --- v2.4.10/linux/fs/block_dev.c Sun Sep 23 11:41:00 2001 +++ linux/fs/block_dev.c Mon Sep 24 22:21:18 2001 @@ -22,63 +22,85 @@ #include -static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result) -{ - int err; +#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) - err = -EIO; - if (iblock >= buffered_blk_size(inode->i_rdev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS)) - goto out; +static inline unsigned int blksize_bits(unsigned int size) +{ + unsigned int bits = 8; + do { + bits++; + size >>= 1; + } while (size > 256); + return bits; +} - bh_result->b_blocknr = iblock; - bh_result->b_state |= 1UL << BH_Mapped; - err = 0; +static inline unsigned int block_size(kdev_t dev) +{ + int retval = BLOCK_SIZE; + int major = MAJOR(dev); - out: - return err; + if (blksize_size[major]) { + int minor = MINOR(dev); + if (blksize_size[major][minor]) + retval = blksize_size[major][minor]; + } + return retval; } -static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) +static unsigned int max_block(kdev_t dev) { - int i, nr_blocks, retval, dev = inode->i_rdev; - unsigned long * blocks = iobuf->blocks; + unsigned int retval = ~0U; + int major = MAJOR(dev); - if (blocksize != BUFFERED_BLOCKSIZE) - BUG(); + if (blk_size[major]) { + int minor = MINOR(dev); + unsigned int blocks = blk_size[major][minor]; + if (blocks) { + unsigned int size = block_size(dev); + unsigned int sizebits = blksize_bits(size); + blocks += (size-1) >> BLOCK_SIZE_BITS; + retval = blocks << (BLOCK_SIZE_BITS - sizebits); + if (sizebits > BLOCK_SIZE_BITS) + retval = blocks >> (sizebits - BLOCK_SIZE_BITS); + } + } + return retval; +} - nr_blocks = iobuf->length >> BUFFERED_BLOCKSIZE_BITS; - /* build the blocklist */ - for (i = 0; i < nr_blocks; i++, blocknr++) { - struct buffer_head bh; - retval = blkdev_get_block(inode, blocknr, &bh); - if (retval) - goto out; +static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result) +{ + int err; - blocks[i] = bh.b_blocknr; - } + err = -EIO; + if (iblock >= max_block(inode->i_rdev)) + goto out; - retval = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize); + bh_result->b_blocknr = iblock; + bh_result->b_state |= 1UL << BH_Mapped; + err = 0; out: - return retval; + return err; } static int blkdev_writepage(struct page * page) { int err, i; + unsigned int blocksize; unsigned long block; struct buffer_head *bh, *head; struct inode *inode = page->mapping->host; if (!PageLocked(page)) BUG(); + blocksize = block_size(inode->i_rdev); if (!page->buffers) - create_empty_buffers(page, inode->i_rdev, BUFFERED_BLOCKSIZE); + create_empty_buffers(page, inode->i_rdev, blocksize); head = page->buffers; - block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS); + block = page->index << (PAGE_CACHE_SHIFT - blksize_bits(blocksize)); bh = head; i = 0; @@ -132,19 +154,21 @@ struct inode *inode = page->mapping->host; kdev_t dev = inode->i_rdev; unsigned long iblock, lblock; - struct buffer_head *bh, *head, *arr[1 << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS)]; - unsigned int blocks; + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; + unsigned int blocks, blocksize, blocksize_bits; int nr, i; if (!PageLocked(page)) PAGE_BUG(page); + blocksize = block_size(dev); + blocksize_bits = blksize_bits(blocksize); if (!page->buffers) - create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE); + create_empty_buffers(page, dev, blocksize); head = page->buffers; - blocks = PAGE_CACHE_SIZE >> BUFFERED_BLOCKSIZE_BITS; - iblock = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS); - lblock = buffered_blk_size(dev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS); + blocks = PAGE_CACHE_SIZE >> blocksize_bits; + iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + lblock = max_block(dev); bh = head; nr = 0; i = 0; @@ -159,7 +183,7 @@ continue; } if (!buffer_mapped(bh)) { - memset(kmap(page) + i * BUFFERED_BLOCKSIZE, 0, BUFFERED_BLOCKSIZE); + memset(kmap(page) + i * blocksize, 0, blocksize); flush_dcache_page(page); kunmap(page); set_bit(BH_Uptodate, &bh->b_state); @@ -206,19 +230,21 @@ unsigned long block; int err = 0; struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; - kmap(page); + unsigned int blocksize, blocksize_bits; + blocksize = block_size(dev); + blocksize_bits = blksize_bits(blocksize); if (!page->buffers) - create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE); + create_empty_buffers(page, dev, blocksize); head = page->buffers; - block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS); + block = page->index << (PAGE_CACHE_SHIFT - blocksize_bits); for(bh = head, block_start = 0; bh != head || !block_start; block++, block_start=block_end, bh = bh->b_this_page) { if (!bh) BUG(); - block_end = block_start + BUFFERED_BLOCKSIZE; + block_end = block_start + blocksize; if (block_end <= from) continue; if (block_start >= to) @@ -258,7 +284,6 @@ int err = __blkdev_prepare_write(inode, page, from, to); if (err) { ClearPageUptodate(page); - kunmap(page); } return err; } @@ -269,11 +294,13 @@ unsigned block_start, block_end; int partial = 0, need_balance_dirty = 0; struct buffer_head *bh, *head; + unsigned int blocksize; + blocksize = block_size(inode->i_rdev); for(bh = head = page->buffers, block_start = 0; bh != head || !block_start; block_start=block_end, bh = bh->b_this_page) { - block_end = block_start + BUFFERED_BLOCKSIZE; + block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (!buffer_uptodate(bh)) partial = 1; @@ -305,7 +332,6 @@ { struct inode *inode = page->mapping->host; __blkdev_commit_write(inode,page,from,to); - kunmap(page); return 0; } @@ -797,8 +823,6 @@ invalidate_buffers(bd_inode->i_rdev); } lock_super(sb); - if (sb->s_flags & MS_RDONLY) - update_buffers(bd_inode->i_rdev); unlock_super(sb); drop_super(sb); } @@ -837,7 +861,6 @@ sync_page: block_sync_page, prepare_write: blkdev_prepare_write, commit_write: blkdev_commit_write, - direct_IO: blkdev_direct_IO, }; struct file_operations def_blk_fops = { diff -u --recursive --new-file v2.4.10/linux/fs/buffer.c linux/fs/buffer.c --- v2.4.10/linux/fs/buffer.c Sun Sep 23 11:41:00 2001 +++ linux/fs/buffer.c Mon Sep 24 22:33:32 2001 @@ -96,7 +96,8 @@ }; static struct bh_free_head free_list[NR_SIZES]; -static int grow_buffers(int size); +static void truncate_buffers(kdev_t dev); +static int grow_buffers(kdev_t dev, int block, int size); static void __refile_buffer(struct buffer_head *); /* This is used by some architectures to estimate available memory. */ @@ -559,59 +560,28 @@ __insert_into_lru_list(bh, bh->b_list); } -/* This function must only run if there are no other - * references _anywhere_ to this buffer head. - */ -static void put_last_free(struct buffer_head * bh) +struct buffer_head * get_hash_table(kdev_t dev, int block, int size) { - struct bh_free_head *head = &free_list[BUFSIZE_INDEX(bh->b_size)]; - struct buffer_head **bhp = &head->list; - - bh->b_state = 0; + struct buffer_head *bh, **p = &hash(dev, block); - spin_lock(&head->lock); - bh->b_dev = B_FREE; - if(!*bhp) { - *bhp = bh; - bh->b_prev_free = bh; - } - bh->b_next_free = *bhp; - bh->b_prev_free = (*bhp)->b_prev_free; - (*bhp)->b_prev_free->b_next_free = bh; - (*bhp)->b_prev_free = bh; - spin_unlock(&head->lock); -} - -/* - * Why like this, I hear you say... The reason is race-conditions. - * As we don't lock buffers (unless we are reading them, that is), - * something might happen to it while we sleep (ie a read-error - * will force it bad). This shouldn't really happen currently, but - * the code is ready. - */ -static inline struct buffer_head * __get_hash_table(kdev_t dev, int block, int size) -{ - struct buffer_head *bh = hash(dev, block); + read_lock(&hash_table_lock); - for (; bh; bh = bh->b_next) - if (bh->b_blocknr == block && - bh->b_size == size && - bh->b_dev == dev) + for (;;) { + bh = *p; + if (!bh) break; - if (bh) + p = &bh->b_next; + if (bh->b_blocknr != block) + continue; + if (bh->b_size != size) + continue; + if (bh->b_dev != dev) + continue; get_bh(bh); + break; + } - return bh; -} - -struct buffer_head * get_hash_table(kdev_t dev, int block, int size) -{ - struct buffer_head *bh; - - read_lock(&hash_table_lock); - bh = __get_hash_table(dev, block, size); read_unlock(&hash_table_lock); - return bh; } @@ -688,7 +658,7 @@ we think the disk contains more recent information than the buffercache. The update == 1 pass marks the buffers we need to update, the update == 2 pass does the actual I/O. */ -void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers, int update) +void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers) { int i, nlist, slept; struct buffer_head * bh, * bh_next; @@ -722,33 +692,18 @@ /* All buffers in the lru lists are mapped */ if (!buffer_mapped(bh)) BUG(); + if (buffer_dirty(bh)) + printk("invalidate: dirty buffer\n"); if (!atomic_read(&bh->b_count)) { if (destroy_dirty_buffers || !buffer_dirty(bh)) { remove_inode_queue(bh); +#if 0 __remove_from_queues(bh); put_last_free(bh); +#endif } - } else if (update) { - if ((update == 2) ^ buffer_uptodate(bh) && - (update == 2) ^ buffer_req(bh)) { - write_unlock(&hash_table_lock); - atomic_inc(&bh->b_count); - spin_unlock(&lru_list_lock); - - if (update == 2) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - } else { - lock_buffer(bh); - clear_bit(BH_Uptodate, &bh->b_state); - clear_bit(BH_Req, &bh->b_state); - unlock_buffer(bh); - } - - atomic_dec(&bh->b_count); - goto retry; - } - } + } else + printk("invalidate: busy buffer\n"); write_unlock(&hash_table_lock); if (slept) @@ -759,13 +714,14 @@ spin_unlock(&lru_list_lock); if (slept) goto retry; + + /* Get rid of the page cache */ + truncate_buffers(dev); } void set_blocksize(kdev_t dev, int size) { extern int *blksize_size[]; - int i, nlist, slept; - struct buffer_head * bh, * bh_next; if (!blksize_size[MAJOR(dev)]) return; @@ -780,60 +736,10 @@ } if (blksize_size[MAJOR(dev)][MINOR(dev)] == size) return; + sync_buffers(dev, 2); blksize_size[MAJOR(dev)][MINOR(dev)] = size; - - retry: - slept = 0; - spin_lock(&lru_list_lock); - for(nlist = 0; nlist < NR_LIST; nlist++) { - bh = lru_list[nlist]; - if (!bh) - continue; - for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) { - bh_next = bh->b_next_free; - if (bh->b_dev != dev || bh->b_size == size) - continue; - /* Unhashed? */ - if (!bh->b_pprev) - continue; - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&lru_list_lock); - wait_on_buffer(bh); - slept = 1; - spin_lock(&lru_list_lock); - put_bh(bh); - } - - write_lock(&hash_table_lock); - if (!atomic_read(&bh->b_count)) { - if (buffer_dirty(bh)) - printk(KERN_WARNING - "set_blocksize: dev %s buffer_dirty %lu size %hu\n", - kdevname(dev), bh->b_blocknr, bh->b_size); - remove_inode_queue(bh); - __remove_from_queues(bh); - put_last_free(bh); - } else { - if (atomic_set_buffer_clean(bh)) - __refile_buffer(bh); - clear_bit(BH_Uptodate, &bh->b_state); - printk(KERN_WARNING - "set_blocksize: " - "b_count %d, dev %s, block %lu, from %p\n", - atomic_read(&bh->b_count), bdevname(bh->b_dev), - bh->b_blocknr, __builtin_return_address(0)); - } - write_unlock(&hash_table_lock); - if (slept) - goto out; - } - } - out: - spin_unlock(&lru_list_lock); - if (slept) - goto retry; + invalidate_buffers(dev); } static void free_more_memory(void) @@ -1137,57 +1043,16 @@ */ struct buffer_head * getblk(kdev_t dev, int block, int size) { - struct buffer_head * bh; - int isize; - -repeat: - spin_lock(&lru_list_lock); - write_lock(&hash_table_lock); - bh = __get_hash_table(dev, block, size); - if (bh) - goto out; + for (;;) { + struct buffer_head * bh; - isize = BUFSIZE_INDEX(size); - spin_lock(&free_list[isize].lock); - bh = free_list[isize].list; - if (bh) { - __remove_from_free_list(bh, isize); - atomic_set(&bh->b_count, 1); - } - spin_unlock(&free_list[isize].lock); - - /* - * OK, FINALLY we know that this buffer is the only one of - * its kind, we hold a reference (b_count>0), it is unlocked, - * and it is clean. - */ - if (bh) { - init_buffer(bh, NULL, NULL); - bh->b_dev = dev; - bh->b_blocknr = block; - bh->b_state = 1 << BH_Mapped; + bh = get_hash_table(dev, block, size); + if (bh) + return bh; - /* Insert the buffer into the regular lists */ - __insert_into_queues(bh); - out: - write_unlock(&hash_table_lock); - spin_unlock(&lru_list_lock); - touch_buffer(bh); - return bh; + if (!grow_buffers(dev, block, size)) + free_more_memory(); } - - /* - * If we block while refilling the free list, somebody may - * create the buffer first ... search the hashes again. - */ - write_unlock(&hash_table_lock); - spin_unlock(&lru_list_lock); - - if (!grow_buffers(size)) - free_more_memory(); - - /* FIXME: getblk should fail if there's no enough memory */ - goto repeat; } /* -1 -> no need to flush @@ -1313,22 +1178,7 @@ */ void __bforget(struct buffer_head * buf) { - /* grab the lru lock here to block bdflush. */ - spin_lock(&lru_list_lock); - write_lock(&hash_table_lock); - if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf)) - goto in_use; - __hash_unlink(buf); - write_unlock(&hash_table_lock); - remove_inode_queue(buf); - __remove_from_lru_list(buf, buf->b_list); - spin_unlock(&lru_list_lock); - put_last_free(buf); - return; - - in_use: - write_unlock(&hash_table_lock); - spin_unlock(&lru_list_lock); + __brelse(buf); } /** @@ -1524,17 +1374,17 @@ goto try_again; } -static void unmap_buffer(struct buffer_head * bh) +/* + * Called when truncating a buffer on a page completely. + * + * We can avoid IO by marking it clean. + * FIXME!! FIXME!! FIXME!! We need to unmap it too, + * so that the filesystem won't write to it. There's + * some bug somewhere.. + */ +static void discard_buffer(struct buffer_head * bh) { - if (buffer_mapped(bh)) { - mark_buffer_clean(bh); - lock_buffer(bh); - clear_bit(BH_Uptodate, &bh->b_state); - clear_bit(BH_Mapped, &bh->b_state); - clear_bit(BH_Req, &bh->b_state); - clear_bit(BH_New, &bh->b_state); - unlock_buffer(bh); - } + mark_buffer_clean(bh); } /* @@ -1564,7 +1414,7 @@ * is this block fully flushed? */ if (offset <= curr_off) - unmap_buffer(bh); + discard_buffer(bh); curr_off = next_off; bh = next; } while (bh != head); @@ -2141,47 +1991,6 @@ return tmp.b_blocknr; } -int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block) -{ - int i, nr_blocks, retval; - unsigned long * blocks = iobuf->blocks; - - nr_blocks = iobuf->length / blocksize; - /* build the blocklist */ - for (i = 0; i < nr_blocks; i++, blocknr++) { - struct buffer_head bh; - - bh.b_state = 0; - bh.b_dev = inode->i_dev; - bh.b_size = blocksize; - - retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1); - if (retval) - goto out; - - if (rw == READ) { - if (buffer_new(&bh)) - BUG(); - if (!buffer_mapped(&bh)) { - /* there was an hole in the filesystem */ - blocks[i] = -1UL; - continue; - } - } else { - if (buffer_new(&bh)) - unmap_underlying_metadata(&bh); - if (!buffer_mapped(&bh)) - BUG(); - } - blocks[i] = bh.b_blocknr; - } - - retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize); - - out: - return retval; -} - /* * IO completion routine for a buffer_head being used for kiobuf IO: we * can't dispatch the kiobuf callback until io_count reaches 0. @@ -2448,66 +2257,124 @@ } /* + * Create the page-cache page that contains the requested block + */ +static struct page * grow_dev_page(struct block_device *bdev, unsigned long index, int size) +{ + struct page * page; + + page = find_or_create_page(bdev->bd_inode->i_mapping, index, GFP_NOFS); + if (IS_ERR(page)) + return NULL; + + if (!PageLocked(page)) + BUG(); + + if (!page->buffers) { + struct buffer_head *bh, *tail; + struct buffer_head *head = create_buffers(page, size, 0); + if (!head) + goto failed; + + bh = head; + do { + tail = bh; + bh = bh->b_this_page; + } while (bh); + tail->b_this_page = head; + page->buffers = head; + page_cache_get(page); + atomic_inc(&buffermem_pages); + } + return page; + +failed: + UnlockPage(page); + page_cache_release(page); + return NULL; +} + +static void hash_page_buffers(struct page *page, kdev_t dev, int block, int size) +{ + struct buffer_head *head = page->buffers; + struct buffer_head *bh = head; + unsigned int uptodate; + + uptodate = 1 << BH_Mapped; + if (Page_Uptodate(page)) + uptodate |= 1 << BH_Uptodate; + + spin_lock(&lru_list_lock); + write_lock(&hash_table_lock); + do { + if (!(bh->b_state & (1 << BH_Mapped))) { + init_buffer(bh, NULL, NULL); + bh->b_dev = dev; + bh->b_blocknr = block; + bh->b_state = uptodate; + } + + /* Insert the buffer into the regular lists */ + if (!bh->b_pprev) { + __insert_into_queues(bh); + } + + block++; + bh = bh->b_this_page; + } while (bh != head); + write_unlock(&hash_table_lock); + spin_unlock(&lru_list_lock); +} + +/* * Try to increase the number of buffers available: the size argument * is used to determine what kind of buffers we want. */ -static int grow_buffers(int size) +static int grow_buffers(kdev_t dev, int block, int size) { struct page * page; - struct buffer_head *bh, *tmp; - struct buffer_head * insert_point; - int isize; + struct block_device *bdev; + unsigned long index; + int sizebits; if ((size & 511) || (size > PAGE_SIZE)) { printk(KERN_ERR "VFS: grow_buffers: size = %d\n",size); return 0; } + sizebits = -1; + do { + sizebits++; + } while ((size << sizebits) < PAGE_SIZE); - page = alloc_page(GFP_NOFS); - if (!page) - goto out; - LockPage(page); - bh = create_buffers(page, size, 0); - if (!bh) - goto no_buffer_head; - - isize = BUFSIZE_INDEX(size); - - spin_lock(&free_list[isize].lock); - insert_point = free_list[isize].list; - tmp = bh; - while (1) { - if (insert_point) { - tmp->b_next_free = insert_point->b_next_free; - tmp->b_prev_free = insert_point; - insert_point->b_next_free->b_prev_free = tmp; - insert_point->b_next_free = tmp; - } else { - tmp->b_prev_free = tmp; - tmp->b_next_free = tmp; - } - insert_point = tmp; - if (tmp->b_this_page) - tmp = tmp->b_this_page; - else - break; + index = block >> sizebits; + block = index << sizebits; + + bdev = bdget(kdev_t_to_nr(dev)); + if (!bdev) { + printk("No block device for %s\n", kdevname(dev)); + BUG(); } - tmp->b_this_page = bh; - free_list[isize].list = bh; - spin_unlock(&free_list[isize].lock); - - page->buffers = bh; - page->flags &= ~(1 << PG_referenced); - lru_cache_add(page); - UnlockPage(page); - atomic_inc(&buffermem_pages); - return 1; -no_buffer_head: + /* Create a page with the proper size buffers.. */ + page = grow_dev_page(bdev, index, size); + + /* This is "wrong" - talk to Al Viro */ + atomic_dec(&bdev->bd_count); + if (!page) + return 0; + + /* Hash in the buffers on the hash list */ + hash_page_buffers(page, dev, block, size); UnlockPage(page); page_cache_release(page); -out: - return 0; + return 1; +} + +static void truncate_buffers(kdev_t dev) +{ + struct block_device *bdev = bdget(kdev_t_to_nr(dev)); + truncate_inode_pages(bdev->bd_inode->i_mapping, 0); + atomic_dec(&bdev->bd_count); } static int sync_page_buffers(struct buffer_head *bh, unsigned int gfp_mask) diff -u --recursive --new-file v2.4.10/linux/fs/ext2/inode.c linux/fs/ext2/inode.c --- v2.4.10/linux/fs/ext2/inode.c Sun Sep 23 11:41:00 2001 +++ linux/fs/ext2/inode.c Mon Sep 24 22:25:20 2001 @@ -586,10 +586,6 @@ { return generic_block_bmap(mapping,block,ext2_get_block); } -static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) -{ - return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block); -} struct address_space_operations ext2_aops = { readpage: ext2_readpage, writepage: ext2_writepage, @@ -597,7 +593,6 @@ prepare_write: ext2_prepare_write, commit_write: generic_commit_write, bmap: ext2_bmap, - direct_IO: ext2_direct_IO, }; /* diff -u --recursive --new-file v2.4.10/linux/include/linux/blkdev.h linux/include/linux/blkdev.h --- v2.4.10/linux/include/linux/blkdev.h Sun Sep 23 11:41:01 2001 +++ linux/include/linux/blkdev.h Mon Sep 24 22:30:24 2001 @@ -203,15 +203,4 @@ #define blk_finished_io(nsects) do { } while (0) #define blk_started_io(nsects) do { } while (0) -static inline int buffered_blk_size(kdev_t dev) -{ - int ret = INT_MAX; - int major = MAJOR(dev); - - if (blk_size[major]) - ret = blk_size[major][MINOR(dev)] + ((BUFFERED_BLOCKSIZE-1) >> BLOCK_SIZE_BITS); - - return ret; -} - #endif diff -u --recursive --new-file v2.4.10/linux/include/linux/fs.h linux/include/linux/fs.h --- v2.4.10/linux/include/linux/fs.h Sun Sep 23 11:41:01 2001 +++ linux/include/linux/fs.h Mon Sep 24 22:28:23 2001 @@ -46,10 +46,6 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1< #include #include +#include #include #include @@ -56,6 +57,7 @@ #define CLUSTER_PAGES (1 << page_cluster) #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster) +static void FASTCALL(add_page_to_hash_queue(struct page * page, struct page **p)); static void add_page_to_hash_queue(struct page * page, struct page **p) { struct page *next = *p; @@ -792,11 +794,13 @@ } /* - * Same as the above, but lock the page too, verifying that - * it's still valid once we own it. - */ -struct page * __find_lock_page (struct address_space *mapping, - unsigned long offset, struct page **hash) + * Must be called with the pagecache lock held, + * will return with it held (but it may be dropped + * during blocking operations.. + */ +static struct page * FASTCALL(__find_lock_page_helper(struct address_space *, unsigned long, struct page *)); +static struct page * __find_lock_page_helper(struct address_space *mapping, + unsigned long offset, struct page *hash) { struct page *page; @@ -805,27 +809,72 @@ * the hash-list needs a held write-lock. */ repeat: - spin_lock(&pagecache_lock); - page = __find_page_nolock(mapping, offset, *hash); + page = __find_page_nolock(mapping, offset, hash); if (page) { page_cache_get(page); - spin_unlock(&pagecache_lock); + if (TryLockPage(page)) { + spin_unlock(&pagecache_lock); + lock_page(page); + spin_lock(&pagecache_lock); - lock_page(page); + /* Has the page been re-allocated while we slept? */ + if (page->mapping != mapping || page->index != offset) { + UnlockPage(page); + page_cache_release(page); + goto repeat; + } + } + } + return page; +} - /* Is the page still hashed? Ok, good.. */ - if (page->mapping == mapping && page->index == offset) - return page; +/* + * Same as the above, but lock the page too, verifying that + * it's still valid once we own it. + */ +struct page * __find_lock_page (struct address_space *mapping, + unsigned long offset, struct page **hash) +{ + struct page *page; - /* Nope: we raced. Release and try again.. */ - UnlockPage(page); - page_cache_release(page); - goto repeat; - } + spin_lock(&pagecache_lock); + page = __find_lock_page_helper(mapping, offset, *hash); spin_unlock(&pagecache_lock); - return NULL; + return page; } +/* + * Same as above, but create the page if required.. + */ +struct page * find_or_create_page(struct address_space *mapping, unsigned long index, unsigned int gfp_mask) +{ + struct page *page; + struct page **hash = page_hash(mapping, index); + + spin_lock(&pagecache_lock); + page = __find_lock_page_helper(mapping, index, *hash); + spin_unlock(&pagecache_lock); + if (!page) { + struct page *newpage = alloc_page(gfp_mask); + page = ERR_PTR(-ENOMEM); + if (newpage) { + spin_lock(&pagecache_lock); + page = __find_lock_page_helper(mapping, index, *hash); + if (likely(!page)) { + page = newpage; + __add_to_page_cache(page, mapping, index, hash); + newpage = NULL; + } + spin_unlock(&pagecache_lock); + if (unlikely(newpage != NULL)) + page_cache_release(newpage); + } + } + return page; +} + + + #if 0 #define PROFILE_READAHEAD #define DEBUG_READAHEAD @@ -960,10 +1009,7 @@ { unsigned long end_index; - if (!S_ISBLK(inode->i_mode)) - end_index = inode->i_size >> PAGE_CACHE_SHIFT; - else - end_index = buffered_blk_size(inode->i_rdev) >> (PAGE_CACHE_SHIFT - BLOCK_SIZE_BITS); + end_index = inode->i_size >> PAGE_CACHE_SHIFT; return end_index; } @@ -972,10 +1018,7 @@ { loff_t rsize; - if (!S_ISBLK(inode->i_mode)) - rsize = inode->i_size; - else - rsize = (loff_t) buffered_blk_size(inode->i_rdev) << BLOCK_SIZE_BITS; + rsize = inode->i_size; return rsize; } @@ -1316,92 +1359,6 @@ UPDATE_ATIME(inode); } -static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset) -{ - ssize_t retval; - int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress; - struct kiobuf * iobuf; - struct inode * inode = filp->f_dentry->d_inode; - struct address_space * mapping = inode->i_mapping; - - new_iobuf = 0; - iobuf = filp->f_iobuf; - if (test_and_set_bit(0, &filp->f_iobuf_lock)) { - /* - * A parallel read/write is using the preallocated iobuf - * so just run slow and allocate a new one. - */ - retval = alloc_kiovec(1, &iobuf); - if (retval) - goto out; - new_iobuf = 1; - } - - if (!S_ISBLK(inode->i_mode)) { - blocksize = inode->i_sb->s_blocksize; - blocksize_bits = inode->i_sb->s_blocksize_bits; - } else { - blocksize = BUFFERED_BLOCKSIZE; - blocksize_bits = BUFFERED_BLOCKSIZE_BITS; - } - blocksize_mask = blocksize - 1; - chunk_size = KIO_MAX_ATOMIC_IO << 10; - - retval = -EINVAL; - if ((offset & blocksize_mask) || (count & blocksize_mask)) - goto out_free; - if (!mapping->a_ops->direct_IO) - goto out_free; - - /* - * Flush to disk exlusively the _data_, metadata must remains - * completly asynchronous or performance will go to /dev/null. - */ - filemap_fdatasync(mapping); - retval = fsync_inode_data_buffers(inode); - filemap_fdatawait(mapping); - if (retval < 0) - goto out_free; - - progress = retval = 0; - while (count > 0) { - iosize = count; - if (iosize > chunk_size) - iosize = chunk_size; - - retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize); - if (retval) - break; - - retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize); - - if (rw == READ && retval > 0) - mark_dirty_kiobuf(iobuf, retval); - - if (retval >= 0) { - count -= retval; - buf += retval; - progress += retval; - } - - unmap_kiobuf(iobuf); - - if (retval != iosize) - break; - } - - if (progress) - retval = progress; - - out_free: - if (!new_iobuf) - clear_bit(0, &filp->f_iobuf_lock); - else - free_kiovec(1, &iobuf); - out: - return retval; -} - int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) { char *kaddr; @@ -1435,9 +1392,6 @@ if ((ssize_t) count < 0) return -EINVAL; - if (filp->f_flags & O_DIRECT) - goto o_direct; - retval = -EFAULT; if (access_ok(VERIFY_WRITE, buf, count)) { retval = 0; @@ -1456,28 +1410,7 @@ retval = desc.error; } } - out: return retval; - - o_direct: - { - loff_t pos = *ppos, size; - struct inode * inode = filp->f_dentry->d_inode; - - retval = 0; - if (!count) - goto out; /* skip atime */ - size = calc_rsize(inode); - if (pos < size) { - if (pos + count > size) - count = size - pos; - retval = generic_file_direct_IO(READ, filp, buf, count, pos); - if (retval > 0) - *ppos = pos + retval; - } - UPDATE_ATIME(filp->f_dentry->d_inode); - goto out; - } } static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) @@ -2778,9 +2711,6 @@ inode->i_ctime = inode->i_mtime = CURRENT_TIME; mark_inode_dirty_sync(inode); - if (file->f_flags & O_DIRECT) - goto o_direct; - do { unsigned long index, offset; long page_fault; @@ -2855,7 +2785,6 @@ if ((status >= 0) && (file->f_flags & O_SYNC)) status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA); -out_status: err = written ? written : status; out: @@ -2864,25 +2793,6 @@ fail_write: status = -EFAULT; goto unlock; - -o_direct: - written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos); - if (written > 0) { - loff_t end = pos + written; - if (end > inode->i_size && !S_ISBLK(inode->i_mode)) { - inode->i_size = end; - mark_inode_dirty(inode); - } - *ppos = end; - invalidate_inode_pages2(mapping); - } - /* - * Sync the fs metadata but not the minor inode changes and - * of course not the data as we did direct DMA for the IO. - */ - if (written >= 0 && file->f_flags & O_SYNC) - status = generic_osync_inode(inode, OSYNC_METADATA); - goto out_status; } void __init page_cache_init(unsigned long mempages) diff -u --recursive --new-file v2.4.10/linux/mm/memory.c linux/mm/memory.c --- v2.4.10/linux/mm/memory.c Sun Sep 23 11:41:01 2001 +++ linux/mm/memory.c Mon Sep 24 10:44:18 2001 @@ -1101,6 +1101,10 @@ return; } +/* Swap 80% full? Release the pages as they are paged in.. */ +#define vm_swap_full() \ + (swapper_space.nrpages*5 > total_swap_pages*4) + /* * We hold the mm semaphore and the page_table_lock on entry and exit. */ @@ -1158,10 +1162,12 @@ swap_free(entry); mark_page_accessed(page); if (exclusive_swap_page(page)) { - if (vma->vm_flags & VM_WRITE) - pte = pte_mkwrite(pte); - pte = pte_mkdirty(pte); - delete_from_swap_cache(page); + if (write_access || vm_swap_full()) { + pte = pte_mkdirty(pte); + if (vma->vm_flags & VM_WRITE) + pte = pte_mkwrite(pte); + delete_from_swap_cache(page); + } } UnlockPage(page);