diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/CREDITS linux/CREDITS --- /opt/kernel/linux-2.4.10-pre4/CREDITS Mon Aug 13 02:36:24 2001 +++ linux/CREDITS Wed Sep 5 11:13:19 2001 @@ -140,9 +140,11 @@ D: VIA MVP-3/TX Pro III chipset IDE N: Jens Axboe -E: axboe@image.dk -D: Linux CD-ROM maintainer -D: jiffies wrap fixes + schedule timeouts depending on HZ == 100 +E: axboe@suse.de +D: Linux CD-ROM maintainer, DVD support +D: elevator + block layer rewrites +D: highmem I/O support +D: misc hacking on IDE, SCSI, block drivers, etc S: Peter Bangs Vej 258, 2TH S: 2500 Valby S: Denmark diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/Documentation/Configure.help linux/Documentation/Configure.help --- /opt/kernel/linux-2.4.10-pre4/Documentation/Configure.help Wed Sep 5 12:41:17 2001 +++ linux/Documentation/Configure.help Wed Sep 5 11:13:20 2001 @@ -5563,17 +5563,6 @@ Documentation/scsi.txt. The module will be called sg.o. If unsure, say N. -Debug new queueing code for SCSI -CONFIG_SCSI_DEBUG_QUEUES - This option turns on a lot of additional consistency checking for - the new queueing code. This will adversely affect performance, but - it is likely that bugs will be caught sooner if this is turned on. - This will typically cause the kernel to panic if an error is - detected, but it would have probably crashed if the panic weren't - there. Comments/questions/problems to linux-scsi mailing list - please. See http://www.andante.org/scsi_queue.html for more - up-to-date information. - Probe all LUNs on each SCSI device CONFIG_SCSI_MULTI_LUN If you have a SCSI device that supports more than one LUN (Logical diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/Documentation/DMA-mapping.txt linux/Documentation/DMA-mapping.txt --- /opt/kernel/linux-2.4.10-pre4/Documentation/DMA-mapping.txt Wed Aug 15 10:54:44 2001 +++ linux/Documentation/DMA-mapping.txt Wed Sep 5 12:42:08 2001 @@ -6,14 +6,15 @@ Jakub Jelinek Most of the 64bit platforms have special hardware that translates bus -addresses (DMA addresses) to physical addresses similarly to how page -tables and/or TLB translate virtual addresses to physical addresses. -This is needed so that e.g. PCI devices can access with a Single Address -Cycle (32bit DMA address) any page in the 64bit physical address space. -Previously in Linux those 64bit platforms had to set artificial limits on -the maximum RAM size in the system, so that the virt_to_bus() static scheme -works (the DMA address translation tables were simply filled on bootup -to map each bus address to the physical page __pa(bus_to_virt())). +addresses (DMA addresses) into physical addresses. This is similar to +how page tables and/or a TLB translates virtual addresses to physical +addresses on a cpu. This is needed so that e.g. PCI devices can +access with a Single Address Cycle (32bit DMA address) any page in the +64bit physical address space. Previously in Linux those 64bit +platforms had to set artificial limits on the maximum RAM size in the +system, so that the virt_to_bus() static scheme works (the DMA address +translation tables were simply filled on bootup to map each bus +address to the physical page __pa(bus_to_virt())). So that Linux can use the dynamic DMA mapping, it needs some help from the drivers, namely it has to take into account that DMA addresses should be @@ -28,9 +29,10 @@ #include -is in your driver. This file will obtain for you the definition of -the dma_addr_t type which should be used everywhere you hold a DMA -(bus) address returned from the DMA mapping functions. +is in your driver. This file will obtain for you the definition of the +dma_addr_t (which can hold any valid DMA address for the platform) +type which should be used everywhere you hold a DMA (bus) address +returned from the DMA mapping functions. What memory is DMA'able? @@ -49,7 +51,8 @@ _underlying_ memory mapped into a vmalloc() area, but this requires walking page tables to get the physical addresses, and then translating each of those pages back to a kernel address using -something like __va(). +something like __va(). [ EDIT: Update this when we integrate +Gerd Knorr's generic code which does this. ] This rule also means that you may not use kernel image addresses (ie. items in the kernel's data/text/bss segment, or your driver's) @@ -65,60 +68,96 @@ Does your device have any DMA addressing limitations? For example, is your device only capable of driving the low order 24-bits of address -on the PCI bus for DMA transfers? If your device can handle any PCI -dma address fully, then please skip to the next section, the rest of -this section does not concern your device. +on the PCI bus for SAC DMA transfers? If so, you need to inform the +PCI layer of this fact. + +By default, the kernel assumes that your device can address the full +32-bits in a SAC cycle. For a 64-bit DAC capable device, this needs +to be increased. And for a device with limitations, as discussed in +the previous paragraph, it needs to be decreased. For correct operation, you must interrogate the PCI layer in your device probe routine to see if the PCI controller on the machine can -properly support the DMA addressing limitation your device has. This -query is performed via a call to pci_dma_supported(): +properly support the DMA addressing limitation your device has. It is +good style to do this even if your device holds the default setting, +because this shows that you did think about these issues wrt. your +device. + +The query is performed via a call to pci_set_dma_mask(): - int pci_dma_supported(struct pci_dev *pdev, dma_addr_t device_mask) + int pci_set_dma_mask(struct pci_dev *pdev, u64 device_mask); Here, pdev is a pointer to the PCI device struct of your device, and device_mask is a bit mask describing which bits of a PCI address your -device supports. It returns non-zero if your card can perform DMA -properly on the machine. If it returns zero, your device can not -perform DMA properly on this platform, and attempting to do so will -result in undefined behavior. +device supports. It returns zero if your card can perform DMA +properly on the machine given the address mask you provided. -In the failure case, you have two options: - -1) Use some non-DMA mode for data transfer, if possible. -2) Ignore this device and do not initialize it. +If it returns non-zero, your device can not perform DMA properly on +this platform, and attempting to do so will result in undefined +behavior. You must either use a different mask, or not use DMA. + +This means that in the failure case, you have three options: + +1) Use another DMA mask, if possible (see below). +2) Use some non-DMA mode for data transfer, if possible. +3) Ignore this device and do not initialize it. It is recommended that your driver print a kernel KERN_WARNING message -when you do one of these two things. In this manner, if a user of -your driver reports that performance is bad or that the device is not -even detected, you can ask him for the kernel messages to find out +when you end up performing either #2 or #2. In this manner, if a user +of your driver reports that performance is bad or that the device is not +even detected, you can ask them for the kernel messages to find out exactly why. -So if, for example, you device can only drive the low 24-bits of -address during PCI bus mastering you might do something like: +The standard 32-bit addressing PCI device would do something like +this: - if (! pci_dma_supported(pdev, 0x00ffffff)) + if (pci_set_dma_mask(pdev, 0xffffffff)) { + printk(KERN_WARNING + "mydev: No suitable DMA available.\n"); goto ignore_this_device; + } -When DMA is possible for a given mask, the PCI layer must be informed of the -mask for later allocation operations on the device. This is achieved by -setting the dma_mask member of the pci_dev structure, like so: - -#define MY_HW_DMA_MASK 0x00ffffff - - if (! pci_dma_supported(pdev, MY_HW_DMA_MASK)) +Another common scenario is a 64-bit capable device. The approach +here is to try for 64-bit DAC addressing, but back down to a +32-bit mask should that fail. The PCI platform code may fail the +64-bit mask not because the platform is not capable of 64-bit +addressing. Rather, it may fail in this case simply because +32-bit SAC addressing is done more efficiently than DAC addressing. +Sparc64 is one platform which behaves in this way. + +Here is how you would handle a 64-bit capable device which can drive +all 64-bits during a DAC cycle: + + int using_dac; + + if (!pci_set_dma_mask(pdev, 0xffffffffffffffff)) { + using_dac = 1; + } else if (!pci_set_dma_mask(pdev, 0xffffffff)) { + using_dac = 0; + } else { + printk(KERN_WARNING + "mydev: No suitable DMA available.\n"); goto ignore_this_device; + } - pdev->dma_mask = MY_HW_DMA_MASK; +If your 64-bit device is going to be an enormous consumer of DMA +mappings, this can be problematic since the DMA mappings are a +finite resource on many platforms. Please see the "DAC Addressing +for Address Space Hungry Devices" setion near the end of this +document for how to handle this case. -A helper function is provided which performs this common code sequence: +Finally, if your device can only drive the low 24-bits of +address during PCI bus mastering you might do something like: - int pci_set_dma_mask(struct pci_dev *pdev, dma_addr_t device_mask) + if (pci_set_dma_mask(pdev, 0x00ffffff)) { + printk(KERN_WARNING + "mydev: 24-bit DMA addressing not available.\n"); + goto ignore_this_device; + } -Unlike pci_dma_supported(), this returns -EIO when the PCI layer will not be -able to DMA with addresses restricted by that mask, and returns 0 when DMA -transfers are possible. If the call succeeds, the dma_mask will have been -updated so that your driver need not worry about it. +When pci_set_dma_mask() is successful, and returns zero, the PCI layer +saves away this mask you have provided. The PCI layer will use this +information later when you make DMA mappings. There is a case which we are aware of at this time, which is worth mentioning in this documentation. If your device supports multiple @@ -169,6 +208,10 @@ Think of "consistent" as "synchronous" or "coherent". + Consistent DMA mappings are always SAC addressable. That is + to say, consistent DMA addresses given to the driver will always + be in the low 32-bits of the PCI bus space. + Good examples of what to use consistent mappings for are: - Network card DMA ring descriptors. @@ -230,15 +273,26 @@ specific (and often is private to the bus which the device is attached to). -Size is the length of the region you want to allocate. +Size is the length of the region you want to allocate, in bytes. This routine will allocate RAM for that region, so it acts similarly to __get_free_pages (but takes size instead of a page order). If your driver needs regions sized smaller than a page, you may prefer using the pci_pool interface, described below. -It returns two values: the virtual address which you can use to access -it from the CPU and dma_handle which you pass to the card. +The consistent DMA mapping interfaces, for non-NULL dev, will always +return a DMA address which is SAC (Single Address Cycle) addressible. +Even if the device indicates (via PCI dma mask) that it may address +the upper 32-bits and thus perform DAC cycles, consistent allocation +will still only return 32-bit PCI addresses for DMA. This is true +of the pci_pool interface as well. + +In fact, as mentioned above, all consistent memory provided by the +kernel DMA APIs are always SAC addressable. + +pci_alloc_consistent returns two values: the virtual address which you +can use to access it from the CPU and dma_handle which you pass to the +card. The cpu return address and the DMA bus master address are both guaranteed to be aligned to the smallest PAGE_SIZE order which @@ -270,14 +324,15 @@ The "name" is for diagnostics (like a kmem_cache name); dev and size are as above. The device's hardware alignment requirement for this -type of data is "align" (a power of two). The flags are SLAB_ flags -as you'd pass to kmem_cache_create. Not all flags are understood, but -SLAB_POISON may help you find driver bugs. If you call this in a non- -sleeping context (f.e. in_interrupt is true or while holding SMP -locks), pass SLAB_ATOMIC. If your device has no boundary crossing -restrictions, pass 0 for alloc; passing 4096 says memory allocated -from this pool must not cross 4KByte boundaries (but at that time it -may be better to go for pci_alloc_consistent directly instead). +type of data is "align" (which is expressed in bytes, and must be a +power of two). The flags are SLAB_ flags as you'd pass to +kmem_cache_create. Not all flags are understood, but SLAB_POISON may +help you find driver bugs. If you call this in a non- sleeping +context (f.e. in_interrupt is true or while holding SMP locks), pass +SLAB_ATOMIC. If your device has no boundary crossing restrictions, +pass 0 for alloc; passing 4096 says memory allocated from this pool +must not cross 4KByte boundaries (but at that time it may be better to +go for pci_alloc_consistent directly instead). Allocate memory from a pci pool like this: @@ -318,6 +373,8 @@ PCI_DMA_TODEVICE means "from main memory to the PCI device" PCI_DMA_FROMDEVICE means "from the PCI device to main memory" +It is the direction in which the data moves during the DMA +transfer. You are _strongly_ encouraged to specify this as precisely as you possibly can. @@ -333,13 +390,13 @@ precise direction, and this will help catch cases where your direction tracking logic has failed to set things up properly. -Another advantage of specifying this value precisely (outside -of potential platform-specific optimizations of such) is for -debugging. Some platforms actually have a write permission -boolean which DMA mappings can be marked with, much like page -protections in a user program can have. Such platforms can -and do report errors in the kernel logs when the PCI controller -hardware detects violation of the permission setting. +Another advantage of specifying this value precisely (outside of +potential platform-specific optimizations of such) is for debugging. +Some platforms actually have a write permission boolean which DMA +mappings can be marked with, much like page protections in the user +program address space. Such platforms can and do report errors in the +kernel logs when the PCI controller hardware detects violation of the +permission setting. Only streaming mappings specify a direction, consistent mappings implicitly have a direction attribute setting of @@ -362,13 +419,17 @@ Using Streaming DMA mappings -The streaming DMA mapping routines can be called from interrupt context. -There are two versions of each map/unmap, one which map/unmap a single -memory region, one which map/unmap a scatterlist. +The streaming DMA mapping routines can be called from interrupt +context. There are two versions of each map/unmap, one which will +map/unmap a single memory region, and one which will map/unmap a +scatterlist. To map a single region, you do: + struct pci_dev *pdev = mydev->pdev; dma_addr_t dma_handle; + void *addr = buffer->ptr; + size_t size = buffer->len; dma_handle = pci_map_single(dev, addr, size, direction); @@ -377,9 +438,29 @@ pci_unmap_single(dev, dma_handle, size, direction); You should call pci_unmap_single when the DMA activity is finished, e.g. -from interrupt which told you the DMA transfer is done. +from the interrupt which told you that the DMA transfer is done. -Similarly with scatterlists, you map a region gathered from several regions by: +Using cpu pointers like this for single mappings has a disadvantage, +you cannot reference HIGHMEM memory in this way. Thus, there is a +map/unmap interface pair akin to pci_{map,unmap}_single. These +interfaces deal with page/offset pairs instead of cpu pointers. +Specifically: + + struct pci_dev *pdev = mydev->pdev; + dma_addr_t dma_handle; + struct page *page = buffer->page; + unsigned long offset = buffer->offset; + size_t size = buffer->len; + + dma_handle = pci_map_page(dev, page, offset, size, direction); + + ... + + pci_unmap_page(dev, dma_handle, size, direction); + +Here, "offset" means byte offset within the given page. + +With scatterlists, you map a region gathered from several regions by: int i, count = pci_map_sg(dev, sglist, nents, direction); struct scatterlist *sg; @@ -407,7 +488,7 @@ pci_unmap_sg(dev, sglist, nents, direction); -Again, make sure DMA activity finished. +Again, make sure DMA activity has already finished. PLEASE NOTE: The 'nents' argument to the pci_unmap_sg call must be the _same_ one you passed into the pci_map_sg call, @@ -421,8 +502,8 @@ all bus addresses. If you need to use the same streaming DMA region multiple times and touch -the data in between the DMA transfers, just map it -with pci_map_{single,sg}, after each DMA transfer call either: +the data in between the DMA transfers, just map it with +pci_map_{single,sg}, and after each DMA transfer call either: pci_dma_sync_single(dev, dma_handle, size, direction); @@ -430,9 +511,11 @@ pci_dma_sync_sg(dev, sglist, nents, direction); -and after the last DMA transfer call one of the DMA unmap routines +as appropriate. + +After the last DMA transfer call one of the DMA unmap routines pci_unmap_{single,sg}. If you don't touch the data from the first pci_map_* -call till pci_unmap_*, then you don't have to call the pci_sync_* +call till pci_unmap_*, then you don't have to call the pci_dma_sync_* routines at all. Here is pseudo code which shows a situation in which you would need @@ -492,6 +575,119 @@ supports dynamic DMA mapping in hardware) in your driver structures and/or in the card registers. +All PCI drivers should be using these interfaces with no exceptions. +It is planned to completely remove virt_to_bus() and bus_to_virt() as +they are entirely deprecated. Some ports already do not provide these +as it is impossible to correctly support them. + + 64-bit DMA and DAC cycle support + +Do you understand all of the text above? Great, then you already +know how to use 64-bit DMA addressing under Linux. Simply make +the appropriate pci_set_dma_mask() calls based upon your cards +capabilities, then use the mapping APIs above. + +It is that simple. + +Well, not for some odd devices. See the next section for information +about that. + + DAC Addressing for Address Space Hungry Devices + +There exists a class of devices which do not mesh well with the PCI +DMA mapping API. By definition these "mappings" are a finite +resource. The number of total available mappings per bus is platform +specific, but there will always be a reasonable amount. + +What is "reasonable"? Reasonable means that networking and block I/O +devices need not worry about using too many mappings. + +As an example of a problematic device, consider compute cluster cards. +They can potentially need to access gigabytes of memory at once via +DMA. Dynamic mappings are unsuitable for this kind of access pattern. + +To this end we've provided a small API by which a device driver +may use DAC cycles to directly address all of physical memory. +Not all platforms support this, but most do. It is easy to determine +whether the platform will work properly at probe time. + +First, understand that there may be a SEVERE performance penalty for +using these interfaces on some platforms. Therefore, you MUST only +use these interfaces if it is absolutely required. %99 of devices can +use the normal APIs without any problems. + +Note that for streaming type mappings you must either use these +interfaces, or the dynamic mapping interfaces above. You may not mix +usage of both for the same device. Such an act is illegal and is +guarenteed to put a banana in your tailpipe. + +However, consistent mappings may in fact be used in conjunction with +these interfaces. Remember that, as defined, consistent mappings are +always going to be SAC addressable. + +The first thing your driver needs to do is query the PCI platform +layer with your devices DAC addressing capabilities: + + int pci_dac_set_dma_mask(struct pci_dev *pdev, u64 mask); + +This routine behaves identically to pci_set_dma_mask. You may not +use the following interfaces if this routine fails. + +Next, DMA addresses using this API are kept track of using the +dma64_addr_t type. It is guarenteed to be big enough to hold any +DAC address the platform layer will give to you from the following +routines. If you have consistent mappings as well, you still +use plain dma_addr_t to keep track of those. + +All mappings obtained here will be direct. The mappings are not +translated, and this is the purpose of this dialect of the DMA API. + +All routines work with page/offset pairs. This is the _ONLY_ way to +portably refer to any piece of memory. If you have a cpu pointer +(which may be validly DMA'd too) you may easily obtain the page +and offset using something like this: + + struct page *page = virt_to_page(ptr); + unsigned long offset = ((unsigned long)ptr & ~PAGE_MASK); + +Here are the interfaces: + + dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev, + struct page *page, + unsigned long offset, + int direction); + +The DAC address for the tuple PAGE/OFFSET are returned. The direction +argument is the same as for pci_{map,unmap}_single(). The same rules +for cpu/device access apply here as for the streaming mapping +interfaces. To reiterate: + + The cpu may touch the buffer before pci_dac_page_to_dma. + The device may touch the buffer after pci_dac_page_to_dma + is made, but the cpu may NOT. + +When the DMA transfer is complete, invoke: + + void pci_dac_dma_sync_single(struct pci_dev *pdev, + dma64_addr_t dma_addr, + size_t len, int direction); + +This must be done before the CPU looks at the buffer again. +This interface behaves identically to pci_dma_sync_{single,sg}(). + +If you need to get back to the PAGE/OFFSET tuple from a dma64_addr_t +the following interfaces are provided: + + struct page *pci_dac_dma_to_page(struct pci_dev *pdev, + dma64_addr_t dma_addr); + unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev, + dma64_addr_t dma_addr); + +This is possible with the DAC interfaces purely because they are +not translated in any way. + + Closing + This document, and the API itself, would not be in it's current form without the feedback and suggestions from numerous individuals. We would like to specifically mention, in no particular order, the @@ -503,3 +699,6 @@ Grant Grundler Jay Estabrook Thomas Sailer + Andrea Arcangeli + Jens Axboe + David Mosberger-Tang diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/Documentation/DocBook/Makefile linux/Documentation/DocBook/Makefile --- /opt/kernel/linux-2.4.10-pre4/Documentation/DocBook/Makefile Sun Jul 8 22:13:47 2001 +++ linux/Documentation/DocBook/Makefile Fri Aug 3 12:04:41 2001 @@ -107,6 +107,7 @@ $(TOPDIR)/drivers/video/modedb.c \ $(TOPDIR)/fs/devfs/base.c \ $(TOPDIR)/fs/locks.c \ + $(TOPDIR)/fs/bio.c \ $(TOPDIR)/include/asm-i386/bitops.h \ $(TOPDIR)/kernel/pm.c \ $(TOPDIR)/kernel/ksyms.c \ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/alpha_ksyms.c linux/arch/alpha/kernel/alpha_ksyms.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/alpha_ksyms.c Wed Sep 5 12:41:17 2001 +++ linux/arch/alpha/kernel/alpha_ksyms.c Wed Sep 5 12:42:08 2001 @@ -127,10 +127,16 @@ EXPORT_SYMBOL(pci_alloc_consistent); EXPORT_SYMBOL(pci_free_consistent); EXPORT_SYMBOL(pci_map_single); +EXPORT_SYMBOL(pci_map_page); EXPORT_SYMBOL(pci_unmap_single); +EXPORT_SYMBOL(pci_unmap_page); EXPORT_SYMBOL(pci_map_sg); EXPORT_SYMBOL(pci_unmap_sg); EXPORT_SYMBOL(pci_dma_supported); +EXPORT_SYMBOL(pci_dac_dma_supported); +EXPORT_SYMBOL(pci_dac_page_to_dma); +EXPORT_SYMBOL(pci_dac_dma_to_page); +EXPORT_SYMBOL(pci_dac_dma_to_offset); #endif EXPORT_SYMBOL(dump_thread); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/core_cia.c linux/arch/alpha/kernel/core_cia.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/core_cia.c Wed May 23 01:29:57 2001 +++ linux/arch/alpha/kernel/core_cia.c Wed Sep 5 12:42:08 2001 @@ -321,7 +321,7 @@ * be purged to make room for the new entries coming in for the garbage page. */ -#define CIA_BROKEN_TBIA_BASE 0xE0000000 +#define CIA_BROKEN_TBIA_BASE 0x30000000 #define CIA_BROKEN_TBIA_SIZE 1024 /* Always called with interrupts disabled */ @@ -380,10 +380,10 @@ for (i = 0; i < CIA_BROKEN_TBIA_SIZE / sizeof(unsigned long); ++i) ppte[i] = pte; - *(vip)CIA_IOC_PCI_W3_BASE = CIA_BROKEN_TBIA_BASE | 3; - *(vip)CIA_IOC_PCI_W3_MASK = (CIA_BROKEN_TBIA_SIZE*1024 - 1) + *(vip)CIA_IOC_PCI_W1_BASE = CIA_BROKEN_TBIA_BASE | 3; + *(vip)CIA_IOC_PCI_W1_MASK = (CIA_BROKEN_TBIA_SIZE*1024 - 1) & 0xfff00000; - *(vip)CIA_IOC_PCI_T3_BASE = virt_to_phys(ppte) >> 2; + *(vip)CIA_IOC_PCI_T1_BASE = virt_to_phys(ppte) >> 2; } static void __init @@ -586,6 +586,8 @@ failed: printk("pci: disabling sg translation window\n"); *(vip)CIA_IOC_PCI_W0_BASE = 0; + *(vip)CIA_IOC_PCI_W1_BASE = 0; + pci_isa_hose->sg_isa = NULL; alpha_mv.mv_pci_tbi = NULL; goto exit; } @@ -673,13 +675,9 @@ * Set up the PCI to main memory translation windows. * * Window 0 is scatter-gather 8MB at 8MB (for isa) - * Window 1 is direct access 1GB at 1GB - * Window 2 is direct access 1GB at 2GB - * - * We must actually use 2 windows to direct-map the 2GB space, - * because of an idiot-syncrasy of the CYPRESS chip used on - * many PYXIS systems. It may respond to a PCI bus address in - * the last 1MB of the 4GB address range. + * Window 1 is scatter-gather 1MB at 768MB (for tbia) + * Window 2 is direct access 2GB at 2GB + * Window 3 is DAC access 4GB at 8GB * * ??? NetBSD hints that page tables must be aligned to 32K, * possibly due to a hardware bug. This is over-aligned @@ -689,20 +687,35 @@ hose->sg_pci = NULL; hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 32768); - __direct_map_base = 0x40000000; + __direct_map_base = 0x80000000; __direct_map_size = 0x80000000; *(vip)CIA_IOC_PCI_W0_BASE = hose->sg_isa->dma_base | 3; *(vip)CIA_IOC_PCI_W0_MASK = (hose->sg_isa->size - 1) & 0xfff00000; *(vip)CIA_IOC_PCI_T0_BASE = virt_to_phys(hose->sg_isa->ptes) >> 2; - *(vip)CIA_IOC_PCI_W1_BASE = 0x40000000 | 1; - *(vip)CIA_IOC_PCI_W1_MASK = (0x40000000 - 1) & 0xfff00000; - *(vip)CIA_IOC_PCI_T1_BASE = 0 >> 2; - - *(vip)CIA_IOC_PCI_W2_BASE = 0x80000000 | 1; - *(vip)CIA_IOC_PCI_W2_MASK = (0x40000000 - 1) & 0xfff00000; - *(vip)CIA_IOC_PCI_T2_BASE = 0x40000000 >> 2; + *(vip)CIA_IOC_PCI_W2_BASE = __direct_map_base | 1; + *(vip)CIA_IOC_PCI_W2_MASK = (__direct_map_size - 1) & 0xfff00000; + *(vip)CIA_IOC_PCI_T2_BASE = 0 >> 2; + + /* On PYXIS we have the monster window, selected by bit 40, so + there is no need for window3 to be enabled. + + On CIA, we don't have true arbitrary addressing -- bits <39:32> + are compared against W_DAC. We can, however, directly map 4GB, + which is better than before. However, due to assumptions made + elsewhere, we should not claim that we support DAC unless that + 4GB covers all of physical memory. */ + if (is_pyxis || max_low_pfn > (0x100000000 >> PAGE_SHIFT)) { + *(vip)CIA_IOC_PCI_W3_BASE = 0; + } else { + *(vip)CIA_IOC_PCI_W3_BASE = 0x00000000 | 1 | 8; + *(vip)CIA_IOC_PCI_W3_MASK = 0xfff00000; + *(vip)CIA_IOC_PCI_T3_BASE = 0 >> 2; + + alpha_mv.pci_dac_offset = 0x200000000; + *(vip)CIA_IOC_PCI_W_DAC = alpha_mv.pci_dac_offset >> 32; + } /* Prepare workaround for apparently broken tbia. */ cia_prepare_tbia_workaround(); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/core_mcpcia.c linux/arch/alpha/kernel/core_mcpcia.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/core_mcpcia.c Fri Mar 2 20:12:07 2001 +++ linux/arch/alpha/kernel/core_mcpcia.c Wed Sep 5 12:42:08 2001 @@ -406,12 +406,12 @@ * Set up the PCI->physical memory translation windows. * * Window 0 is scatter-gather 8MB at 8MB (for isa) - * Window 1 is scatter-gather 128MB at 1GB + * Window 1 is scatter-gather (up to) 1GB at 1GB (for pci) * Window 2 is direct access 2GB at 2GB - * ??? We ought to scale window 1 with memory. */ hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); - hose->sg_pci = iommu_arena_new(hose, 0x40000000, 0x08000000, 0); + hose->sg_pci = iommu_arena_new(hose, 0x40000000, + size_for_memory(0x40000000), 0); __direct_map_base = 0x80000000; __direct_map_size = 0x80000000; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/core_titan.c linux/arch/alpha/kernel/core_titan.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/core_titan.c Fri Mar 2 20:12:07 2001 +++ linux/arch/alpha/kernel/core_titan.c Wed Sep 5 12:42:08 2001 @@ -20,6 +20,8 @@ #include #undef __EXTERN_INLINE +#include + #include "proto.h" #include "pci_impl.h" @@ -277,6 +279,7 @@ titan_init_one_pachip_port(titan_pachip_port *port, int index) { struct pci_controller *hose; + unsigned long sg_size; hose = alloc_pci_controller(); if (index == 0) @@ -342,40 +345,35 @@ * Note: Window 3 on Titan is Scatter-Gather ONLY * * Window 0 is scatter-gather 8MB at 8MB (for isa) - * Window 1 is direct access 1GB at 1GB - * Window 2 is direct access 1GB at 2GB - * Window 3 is scatter-gather 128MB at 3GB - * ??? We ought to scale window 3 memory. - * - * We must actually use 2 windows to direct-map the 2GB space, - * because of an idiot-syncrasy of the CYPRESS chip. It may - * respond to a PCI bus address in the last 1MB of the 4GB - * address range. + * Window 1 is scatter-gather (up to) 1GB at 1GB + * Window 2 is direct access 2GB at 2GB */ hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); hose->sg_isa->align_entry = 8; /* 64KB for ISA */ - hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000, 0); + hose->sg_pci = iommu_arena_new(hose, 0x40000000, + size_for_memory(0x40000000), 0); hose->sg_pci->align_entry = 4; /* Titan caches 4 PTEs at a time */ - __direct_map_base = 0x40000000; + __direct_map_base = 0x80000000; __direct_map_size = 0x80000000; port->wsba[0].csr = hose->sg_isa->dma_base | 3; port->wsm[0].csr = (hose->sg_isa->size - 1) & 0xfff00000; port->tba[0].csr = virt_to_phys(hose->sg_isa->ptes); - port->wsba[1].csr = 0x40000000 | 1; - port->wsm[1].csr = (0x40000000 - 1) & 0xfff00000; - port->tba[1].csr = 0; + port->wsba[1].csr = hose->sg_pci->dma_base | 3; + port->wsm[1].csr = (hose->sg_pci->size - 1) & 0xfff00000; + port->tba[1].csr = virt_to_phys(hose->sg_pci->ptes); port->wsba[2].csr = 0x80000000 | 1; - port->wsm[2].csr = (0x40000000 - 1) & 0xfff00000; - port->tba[2].csr = 0x40000000; + port->wsm[2].csr = (0x80000000 - 1) & 0xfff00000; + port->tba[2].csr = 0x80000000; + + port->wsba[3].csr = 0; - port->wsba[3].csr = hose->sg_pci->dma_base | 3; - port->wsm[3].csr = (hose->sg_pci->size - 1) & 0xfff00000; - port->tba[3].csr = virt_to_phys(hose->sg_pci->ptes); + /* Enable the Monster Window to make DAC pci64 possible. */ + port->pctl.csr |= pctl_m_mwin; titan_pci_tbi(hose, 0, -1); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/core_tsunami.c linux/arch/alpha/kernel/core_tsunami.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/core_tsunami.c Tue Jun 12 04:15:27 2001 +++ linux/arch/alpha/kernel/core_tsunami.c Wed Sep 5 12:42:08 2001 @@ -279,16 +279,6 @@ #define FN __FUNCTION__ static void __init -tsunami_monster_window_enable(tsunami_pchip * pchip) -{ - volatile unsigned long * csr = &pchip->pctl.csr; - - *csr |= pctl_m_mwin; - mb(); - *csr; -} - -static void __init tsunami_init_one_pchip(tsunami_pchip *pchip, int index) { struct pci_controller *hose; @@ -358,47 +348,34 @@ * Note: Window 3 is scatter-gather only * * Window 0 is scatter-gather 8MB at 8MB (for isa) - * Window 1 is direct access 1GB at 1GB - * Window 2 is direct access 1GB at 2GB - * Window 3 is scatter-gather 128MB at 3GB - * ??? We ought to scale window 3 memory. - * - * We must actually use 2 windows to direct-map the 2GB space, - * because of an idiot-syncrasy of the CYPRESS chip. It may - * respond to a PCI bus address in the last 1MB of the 4GB - * address range. + * Window 1 is scatter-gather (up to) 1GB at 1GB + * Window 2 is direct access 2GB at 2GB */ hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); - { - unsigned long size = 0x08000000; - if (max_low_pfn > (0x80000000 >> PAGE_SHIFT)) - size = 0x40000000; - hose->sg_pci = iommu_arena_new(hose, 0xc0000000, size, 0); - } - - __direct_map_base = 0x40000000; + hose->sg_pci = iommu_arena_new(hose, 0x40000000, + size_for_memory(0x40000000), 0); + + __direct_map_base = 0x80000000; __direct_map_size = 0x80000000; pchip->wsba[0].csr = hose->sg_isa->dma_base | 3; pchip->wsm[0].csr = (hose->sg_isa->size - 1) & 0xfff00000; pchip->tba[0].csr = virt_to_phys(hose->sg_isa->ptes); - pchip->wsba[1].csr = 0x40000000 | 1; - pchip->wsm[1].csr = (0x40000000 - 1) & 0xfff00000; - pchip->tba[1].csr = 0; + pchip->wsba[1].csr = hose->sg_pci->dma_base | 3; + pchip->wsm[1].csr = (hose->sg_pci->size - 1) & 0xfff00000; + pchip->tba[1].csr = virt_to_phys(hose->sg_pci->ptes); pchip->wsba[2].csr = 0x80000000 | 1; - pchip->wsm[2].csr = (0x40000000 - 1) & 0xfff00000; - pchip->tba[2].csr = 0x40000000; + pchip->wsm[2].csr = (0x80000000 - 1) & 0xfff00000; + pchip->tba[2].csr = 0x80000000; - pchip->wsba[3].csr = hose->sg_pci->dma_base | 3; - pchip->wsm[3].csr = (hose->sg_pci->size - 1) & 0xfff00000; - pchip->tba[3].csr = virt_to_phys(hose->sg_pci->ptes); - - tsunami_pci_tbi(hose, 0, -1); + pchip->wsba[3].csr = 0; /* Enable the Monster Window to make DAC pci64 possible. */ - tsunami_monster_window_enable(pchip); + pchip->pctl.csr |= pctl_m_mwin; + + tsunami_pci_tbi(hose, 0, -1); } void __init diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/pci.c linux/arch/alpha/kernel/pci.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/pci.c Mon May 21 22:38:41 2001 +++ linux/arch/alpha/kernel/pci.c Wed Sep 5 12:42:08 2001 @@ -76,40 +76,34 @@ dev->resource[3].end = dev->resource[3].start + 7; } -/* - * Notorious Cy82C693 chip. One of its numerous bugs: although - * Cypress IDE controller doesn't support native mode, it has - * programmable addresses of IDE command/control registers. - * This violates PCI specifications, confuses IDE subsystem - * and causes resource conflict between primary HD_CMD register - * and floppy controller. Ugh. - * Fix that. - */ static void __init -quirk_cypress_ide_ports(struct pci_dev *dev) +quirk_cypress(struct pci_dev *dev) { - if (dev->class >> 8 != PCI_CLASS_STORAGE_IDE) - return; - dev->resource[0].flags = 0; - dev->resource[1].flags = 0; -} + /* The Notorious Cy82C693 chip. One of its numerous bugs: although + Cypress IDE controller doesn't support native mode, it has + programmable addresses of IDE command/control registers. This + violates PCI specifications, confuses IDE subsystem and causes + resource conflict between primary HD_CMD register and floppy + controller. Ugh. Fix that. */ + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE) { + dev->resource[0].flags = 0; + dev->resource[1].flags = 0; + } -static void __init -quirk_vga_enable_rom(struct pci_dev *dev) -{ - /* If it's a VGA, enable its BIOS ROM at C0000. - But if its a Cirrus 543x/544x DISABLE it, since - enabling ROM disables the memory... */ - if ((dev->class >> 8) == PCI_CLASS_DISPLAY_VGA && - (dev->vendor != PCI_VENDOR_ID_CIRRUS || - (dev->device < 0x00a0) || (dev->device > 0x00ac))) - { - u32 reg; - - pci_read_config_dword(dev, dev->rom_base_reg, ®); - reg |= PCI_ROM_ADDRESS_ENABLE; - pci_write_config_dword(dev, dev->rom_base_reg, reg); - dev->resource[PCI_ROM_RESOURCE].flags |= PCI_ROM_ADDRESS_ENABLE; + /* Another bug. The Cypress bridge responds on the PCI bus + in the address range 0xffff0000-0xffffffff (conventional + x86 BIOS ROM). No way to turn this off, so if we use a + large direct-map window, or a large SG window, we must + avoid these addresses. */ + else if (dev->class >> 8 == PCI_CLASS_BRIDGE_ISA) { + if (__direct_map_base + __direct_map_size >= 0xffff0000) + __direct_map_size = 0xffff0000 - __direct_map_base; + else { + struct pci_controller *hose = dev->sysdata; + struct pci_iommu_arena *pci = hose->sg_pci; + if (pci && pci->dma_base + pci->size >= 0xffff0000) + pci->size = 0xffff0000 - pci->dma_base; + } } } @@ -121,8 +115,7 @@ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M5229, quirk_ali_ide_ports }, { PCI_FIXUP_HEADER, PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693, - quirk_cypress_ide_ports }, - { PCI_FIXUP_FINAL, PCI_ANY_ID, PCI_ANY_ID, quirk_vga_enable_rom }, + quirk_cypress }, { 0 } }; @@ -158,26 +151,22 @@ if (start - hose->mem_space->start < PCIBIOS_MIN_MEM) start = PCIBIOS_MIN_MEM + hose->mem_space->start; - /* - * The following holds at least for the Low Cost - * Alpha implementation of the PCI interface: - * - * In sparse memory address space, the first - * octant (16MB) of every 128MB segment is - * aliased to the very first 16 MB of the - * address space (i.e., it aliases the ISA - * memory address space). Thus, we try to - * avoid allocating PCI devices in that range. - * Can be allocated in 2nd-7th octant only. - * Devices that need more than 112MB of - * address space must be accessed through - * dense memory space only! - */ - /* Align to multiple of size of minimum base. */ alignto = MAX(0x1000, size); start = ALIGN(start, alignto); - if (size <= 7 * 16*MB) { + + /* The following holds at least for the Low Cost Alpha + implementation of the PCI interface: + + In sparse memory address space, the first octant (16MB) + of every 128MB segment is aliased to the very first 16 MB + of the address space (i.e., it aliases the ISA memory + address space). Thus, we try to avoid allocating PCI + devices in that range. Can be allocated in 2nd-7th octant + only. Devices that need more than 112MB of address space + must be accessed through dense memory space only! */ + + if (hose->sparse_mem_base && size <= 7 * 16*MB) { if (((start / (16*MB)) & 0x7) == 0) { start &= ~(128*MB - 1); start += 16*MB; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/pci_impl.h linux/arch/alpha/kernel/pci_impl.h --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/pci_impl.h Fri May 25 18:54:50 2001 +++ linux/arch/alpha/kernel/pci_impl.h Wed Sep 5 12:42:08 2001 @@ -161,3 +161,5 @@ extern const char *const pci_io_names[]; extern const char *const pci_mem_names[]; extern const char pci_hae0_name[]; + +extern unsigned long size_for_memory(unsigned long max); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/pci_iommu.c linux/arch/alpha/kernel/pci_iommu.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/pci_iommu.c Sun Aug 12 19:51:41 2001 +++ linux/arch/alpha/kernel/pci_iommu.c Thu Sep 6 09:29:50 2001 @@ -17,17 +17,18 @@ #define DEBUG_ALLOC 0 #if DEBUG_ALLOC > 0 -# define DBGA(args...) printk(KERN_DEBUG ##args) +# define DBGA(args...) printk(KERN_DEBUG args) #else # define DBGA(args...) #endif #if DEBUG_ALLOC > 1 -# define DBGA2(args...) printk(KERN_DEBUG ##args) +# define DBGA2(args...) printk(KERN_DEBUG args) #else # define DBGA2(args...) #endif #define DEBUG_NODIRECT 0 +#define DEBUG_FORCEDAC 0 static inline unsigned long @@ -41,26 +42,19 @@ { return (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT; } - -static void __init -iommu_arena_fixup(struct pci_iommu_arena * arena) -{ - unsigned long base, size; - /* - * The Cypress chip has a quirk, it get confused by addresses - * above -1M so reserve the pagetables that maps pci addresses - * above -1M. - */ - base = arena->dma_base; - size = arena->size; - if (base + size > 0xfff00000) { - int i, fixup_start = (0xfff00000 - base) >> PAGE_SHIFT; - for (i= 0; i < (0x100000 >> PAGE_SHIFT); i++) - arena->ptes[fixup_start+i] = IOMMU_INVALID_PTE; - } -} +/* Return the minimum of MAX or the first power of two larger + than main memory. */ +unsigned long +size_for_memory(unsigned long max) +{ + unsigned long mem = max_low_pfn << PAGE_SHIFT; + if (mem < max) + max = 1UL << ceil_log2(mem); + return max; +} + struct pci_iommu_arena * iommu_arena_new(struct pci_controller *hose, dma_addr_t base, unsigned long window_size, unsigned long align) @@ -90,8 +84,6 @@ unless there are chip bugs. */ arena->align_entry = 1; - iommu_arena_fixup(arena); - return arena; } @@ -165,8 +157,9 @@ Once the device is given the dma address, the device owns this memory until either pci_unmap_single or pci_dma_sync_single is performed. */ -dma_addr_t -pci_map_single(struct pci_dev *pdev, void *cpu_addr, long size, int direction) +static dma_addr_t +pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, + int dac_allowed) { struct pci_controller *hose = pdev ? pdev->sysdata : pci_isa_hose; dma_addr_t max_dma = pdev ? pdev->dma_mask : 0x00ffffff; @@ -175,10 +168,7 @@ unsigned long paddr; dma_addr_t ret; - if (direction == PCI_DMA_NONE) - BUG(); - - paddr = virt_to_phys(cpu_addr); + paddr = __pa(cpu_addr); #if !DEBUG_NODIRECT /* First check to see if we can use the direct map window. */ @@ -186,13 +176,23 @@ && paddr + size <= __direct_map_size) { ret = paddr + __direct_map_base; - DBGA2("pci_map_single: [%p,%lx] -> direct %x from %p\n", + DBGA2("pci_map_single: [%p,%lx] -> direct %lx from %p\n", cpu_addr, size, ret, __builtin_return_address(0)); return ret; } #endif + /* Next, use DAC if selected earlier. */ + if (dac_allowed) { + ret = paddr + alpha_mv.pci_dac_offset; + + DBGA2("pci_map_single: [%p,%lx] -> DAC %lx from %p\n", + cpu_addr, size, ret, __builtin_return_address(0)); + + return ret; + } + /* If the machine doesn't define a pci_tbi routine, we have to assume it doesn't support sg mapping. */ if (! alpha_mv.mv_pci_tbi) { @@ -219,12 +219,30 @@ ret = arena->dma_base + dma_ofs * PAGE_SIZE; ret += (unsigned long)cpu_addr & ~PAGE_MASK; - DBGA("pci_map_single: [%p,%lx] np %ld -> sg %x from %p\n", - cpu_addr, size, npages, ret, __builtin_return_address(0)); + DBGA2("pci_map_single: [%p,%lx] np %ld -> sg %lx from %p\n", + cpu_addr, size, npages, ret, __builtin_return_address(0)); return ret; } +dma_addr_t +pci_map_single(struct pci_dev *pdev, void *cpu_addr, size_t size, int dir) +{ + if (dir == PCI_DMA_NONE) + BUG(); + return pci_map_single_1(pdev, cpu_addr, size, + (pdev->dma_mask >> 32) != 0); +} + +dma_addr_t +pci_map_page(struct pci_dev *pdev, struct page *page, unsigned long offset, + size_t size, int dir) +{ + if (dir == PCI_DMA_NONE) + BUG(); + return pci_map_single_1(pdev, (char *)page_address(page) + offset, + size, (pdev->dma_mask >> 32) != 0); +} /* Unmap a single streaming mode DMA translation. The DMA_ADDR and SIZE must match what was provided for in a previous pci_map_single @@ -233,7 +251,7 @@ wrote there. */ void -pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, long size, +pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size, int direction) { unsigned long flags; @@ -244,17 +262,21 @@ if (direction == PCI_DMA_NONE) BUG(); -#if !DEBUG_NODIRECT if (dma_addr >= __direct_map_base && dma_addr < __direct_map_base + __direct_map_size) { /* Nothing to do. */ - DBGA2("pci_unmap_single: direct [%x,%lx] from %p\n", + DBGA2("pci_unmap_single: direct [%lx,%lx] from %p\n", dma_addr, size, __builtin_return_address(0)); return; } -#endif + + if (dma_addr > 0xffffffff) { + DBGA2("pci64_unmap_single: DAC [%lx,%lx] from %p\n", + dma_addr, size, __builtin_return_address(0)); + return; + } arena = hose->sg_pci; if (!arena || dma_addr < arena->dma_base) @@ -262,7 +284,7 @@ dma_ofs = (dma_addr - arena->dma_base) >> PAGE_SHIFT; if (dma_ofs * PAGE_SIZE >= arena->size) { - printk(KERN_ERR "Bogus pci_unmap_single: dma_addr %x " + printk(KERN_ERR "Bogus pci_unmap_single: dma_addr %lx " " base %x size %x\n", dma_addr, arena->dma_base, arena->size); return; @@ -275,21 +297,24 @@ iommu_arena_free(arena, dma_ofs, npages); - - /* - If we're freeing ptes above the `next_entry' pointer (they + /* If we're freeing ptes above the `next_entry' pointer (they may have snuck back into the TLB since the last wrap flush), - we need to flush the TLB before reallocating the latter. - */ + we need to flush the TLB before reallocating the latter. */ if (dma_ofs >= arena->next_entry) alpha_mv.mv_pci_tbi(hose, dma_addr, dma_addr + size - 1); spin_unlock_irqrestore(&arena->lock, flags); - DBGA("pci_unmap_single: sg [%x,%lx] np %ld from %p\n", - dma_addr, size, npages, __builtin_return_address(0)); + DBGA2("pci_unmap_single: sg [%lx,%lx] np %ld from %p\n", + dma_addr, size, npages, __builtin_return_address(0)); } +void +pci_unmap_page(struct pci_dev *pdev, dma_addr_t dma_addr, + size_t size, int direction) +{ + pci_unmap_single(pdev, dma_addr, size, direction); +} /* Allocate and map kernel buffer using consistent mode DMA for PCI device. Returns non-NULL cpu-view pointer to the buffer if @@ -297,7 +322,7 @@ else DMA_ADDRP is undefined. */ void * -pci_alloc_consistent(struct pci_dev *pdev, long size, dma_addr_t *dma_addrp) +pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp) { void *cpu_addr; long order = get_order(size); @@ -313,8 +338,7 @@ } memset(cpu_addr, 0, size); - *dma_addrp = pci_map_single(pdev, cpu_addr, size, - PCI_DMA_BIDIRECTIONAL); + *dma_addrp = pci_map_single_1(pdev, cpu_addr, size, 0); if (*dma_addrp == 0) { free_pages((unsigned long)cpu_addr, order); return NULL; @@ -326,7 +350,6 @@ return cpu_addr; } - /* Free and unmap a consistent DMA buffer. CPU_ADDR and DMA_ADDR must be values that were returned from pci_alloc_consistent. SIZE must be the same as what as passed into pci_alloc_consistent. @@ -334,7 +357,7 @@ DMA_ADDR past this call are illegal. */ void -pci_free_consistent(struct pci_dev *pdev, long size, void *cpu_addr, +pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu_addr, dma_addr_t dma_addr) { pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); @@ -354,27 +377,35 @@ Write dma_length of each leader with the combined lengths of the mergable followers. */ +#define SG_ENT_VIRT_ADDRESS(SG) \ + ((SG)->address \ + ? (SG)->address \ + : page_address((SG)->page) + (SG)->offset) + +#define SG_ENT_PHYS_ADDRESS(SG) \ + __pa(SG_ENT_VIRT_ADDRESS(SG)) + static void sg_classify(struct scatterlist *sg, struct scatterlist *end, int virt_ok) { - unsigned long next_vaddr; + unsigned long next_paddr; struct scatterlist *leader; long leader_flag, leader_length; leader = sg; leader_flag = 0; leader_length = leader->length; - next_vaddr = (unsigned long)leader->address + leader_length; + next_paddr = SG_ENT_PHYS_ADDRESS(leader) + leader_length; for (++sg; sg < end; ++sg) { unsigned long addr, len; - addr = (unsigned long) sg->address; + addr = SG_ENT_PHYS_ADDRESS(sg); len = sg->length; - if (next_vaddr == addr) { + if (next_paddr == addr) { sg->dma_address = -1; leader_length += len; - } else if (((next_vaddr | addr) & ~PAGE_MASK) == 0 && virt_ok) { + } else if (((next_paddr | addr) & ~PAGE_MASK) == 0 && virt_ok) { sg->dma_address = -2; leader_flag = 1; leader_length += len; @@ -386,7 +417,7 @@ leader_length = len; } - next_vaddr = addr + len; + next_paddr = addr + len; } leader->dma_address = leader_flag; @@ -399,9 +430,9 @@ static inline int sg_fill(struct scatterlist *leader, struct scatterlist *end, struct scatterlist *out, struct pci_iommu_arena *arena, - dma_addr_t max_dma) + dma_addr_t max_dma, int dac_allowed) { - unsigned long paddr = virt_to_phys(leader->address); + unsigned long paddr = SG_ENT_PHYS_ADDRESS(leader); long size = leader->dma_length; struct scatterlist *sg; unsigned long *ptes; @@ -416,13 +447,24 @@ out->dma_address = paddr + __direct_map_base; out->dma_length = size; - DBGA(" sg_fill: [%p,%lx] -> direct %x\n", - leader->address, size, out->dma_address); + DBGA(" sg_fill: [%p,%lx] -> direct %lx\n", + __va(paddr), size, out->dma_address); return 0; } #endif + /* If physically contiguous and DAC is available, use it. */ + if (leader->dma_address == 0 && dac_allowed) { + out->dma_address = paddr + alpha_mv.pci_dac_offset; + out->dma_length = size; + + DBGA(" sg_fill: [%p,%lx] -> DAC %lx\n", + __va(paddr), size, out->dma_address); + + return 0; + } + /* Otherwise, we'll use the iommu to make the pages virtually contiguous. */ @@ -435,17 +477,16 @@ return -1; /* Otherwise, break up the remaining virtually contiguous - hunks into individual direct maps. */ + hunks into individual direct maps and retry. */ sg_classify(leader, end, 0); - /* Retry. */ - return sg_fill(leader, end, out, arena, max_dma); + return sg_fill(leader, end, out, arena, max_dma, dac_allowed); } out->dma_address = arena->dma_base + dma_ofs*PAGE_SIZE + paddr; out->dma_length = size; - DBGA(" sg_fill: [%p,%lx] -> sg %x np %ld\n", - leader->address, size, out->dma_address, npages); + DBGA(" sg_fill: [%p,%lx] -> sg %lx np %ld\n", + __va(paddr), size, out->dma_address, npages); /* All virtually contiguous. We need to find the length of each physically contiguous subsegment to fill in the ptes. */ @@ -457,7 +498,7 @@ #endif size = sg->length; - paddr = virt_to_phys(sg->address); + paddr = SG_ENT_PHYS_ADDRESS(sg); while (sg+1 < end && (int) sg[1].dma_address == -1) { size += sg[1].length; @@ -472,11 +513,11 @@ #if DEBUG_ALLOC > 0 DBGA(" (%ld) [%p,%x] np %ld\n", - last_sg - leader, last_sg->address, + last_sg - leader, SG_ENT_VIRT_ADDRESS(last_sg), last_sg->length, npages); while (++last_sg <= sg) { DBGA(" (%ld) [%p,%x] cont\n", - last_sg - leader, last_sg->address, + last_sg - leader, SG_ENT_VIRT_ADDRESS(last_sg), last_sg->length); } #endif @@ -493,15 +534,19 @@ struct pci_controller *hose; struct pci_iommu_arena *arena; dma_addr_t max_dma; + int dac_allowed; if (direction == PCI_DMA_NONE) BUG(); + dac_allowed = ((pdev->dma_mask >> 32) != 0); + /* Fast path single entry scatterlists. */ if (nents == 1) { sg->dma_length = sg->length; sg->dma_address - = pci_map_single(pdev, sg->address, sg->length, direction); + = pci_map_single_1(pdev, SG_ENT_VIRT_ADDRESS(sg), + sg->length, dac_allowed); return sg->dma_address != 0; } @@ -529,7 +574,7 @@ for (out = sg; sg < end; ++sg) { if ((int) sg->dma_address < 0) continue; - if (sg_fill(sg, end, out, arena, max_dma) < 0) + if (sg_fill(sg, end, out, arena, max_dma, dac_allowed) < 0) goto error; out++; } @@ -544,7 +589,7 @@ return out - start; -error: + error: printk(KERN_WARNING "pci_map_sg failed: " "could not allocate dma page tables\n"); @@ -555,7 +600,6 @@ return 0; } - /* Unmap a set of streaming mode DMA translations. Again, cpu read rules concerning calls here are the same as for pci_unmap_single() above. */ @@ -588,7 +632,8 @@ spin_lock_irqsave(&arena->lock, flags); for (end = sg + nents; sg < end; ++sg) { - unsigned long addr, size; + dma64_addr_t addr; + size_t size; long npages, ofs; dma_addr_t tend; @@ -597,7 +642,13 @@ if (!size) break; -#if !DEBUG_NODIRECT + if (addr > 0xffffffff) { + /* It's a DAC address -- nothing to do. */ + DBGA(" (%ld) DAC [%lx,%lx]\n", + sg - end + nents, addr, size); + continue; + } + if (addr >= __direct_map_base && addr < __direct_map_base + __direct_map_size) { /* Nothing to do. */ @@ -605,7 +656,6 @@ sg - end + nents, addr, size); continue; } -#endif DBGA(" (%ld) sg [%lx,%lx]\n", sg - end + nents, addr, size); @@ -619,29 +669,27 @@ if (fend < tend) fend = tend; } - /* - If we're freeing ptes above the `next_entry' pointer (they + /* If we're freeing ptes above the `next_entry' pointer (they may have snuck back into the TLB since the last wrap flush), - we need to flush the TLB before reallocating the latter. - */ + we need to flush the TLB before reallocating the latter. */ if ((fend - arena->dma_base) >> PAGE_SHIFT >= arena->next_entry) alpha_mv.mv_pci_tbi(hose, fbeg, fend); spin_unlock_irqrestore(&arena->lock, flags); - DBGA("pci_unmap_sg: %d entries\n", nents - (end - sg)); + DBGA("pci_unmap_sg: %ld entries\n", nents - (end - sg)); } + /* Return whether the given PCI device DMA address mask can be supported properly. */ int -pci_dma_supported(struct pci_dev *pdev, dma_addr_t mask) +pci_dma_supported(struct pci_dev *pdev, u64 mask) { struct pci_controller *hose; struct pci_iommu_arena *arena; -#if !DEBUG_NODIRECT /* If there exists a direct map, and the mask fits either MAX_DMA_ADDRESS defined such that GFP_DMA does something useful, or the total system memory as shifted by the @@ -650,7 +698,6 @@ && (__direct_map_base + MAX_DMA_ADDRESS-IDENT_ADDR-1 <= mask || __direct_map_base + (max_low_pfn<sysdata : pci_isa_hose; @@ -662,4 +709,50 @@ return 1; return 0; +} + +/* True if the machine supports DAC addressing, and DEV can + make use of it given MASK. */ + +int +pci_dac_dma_supported(struct pci_dev *dev, u64 mask) +{ + dma64_addr_t dac_offset = alpha_mv.pci_dac_offset; + int ok = 1; + + /* If this is not set, the machine doesn't support DAC at all. */ + if (dac_offset == 0) + ok = 0; + + /* The device has to be able to address our DAC bit. */ + if ((dac_offset & dev->dma_mask) != dac_offset) + ok = 0; + + /* If both conditions above are met, we are fine. */ + DBGA("pci_dac_dma_supported %s from %p\n", + ok ? "yes" : "no", __builtin_return_address(0)); + + return ok; +} + +dma64_addr_t +pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, + unsigned long offset, int direction) +{ + return (alpha_mv.pci_dac_offset + + __pa(page_address(page)) + + (dma64_addr_t) offset); +} + +struct page * +pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + unsigned long paddr = (dma_addr & PAGE_MASK) - alpha_mv.pci_dac_offset; + return virt_to_page(__va(paddr)); +} + +unsigned long +pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + return (dma_addr & ~PAGE_MASK); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_cabriolet.c linux/arch/alpha/kernel/sys_cabriolet.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_cabriolet.c Thu Jan 25 00:16:23 2001 +++ linux/arch/alpha/kernel/sys_cabriolet.c Wed Sep 5 12:42:08 2001 @@ -389,6 +389,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: PYXIS_DAC_OFFSET, nr_irqs: 35, device_interrupt: cabriolet_device_interrupt, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_dp264.c linux/arch/alpha/kernel/sys_dp264.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_dp264.c Sun Aug 12 19:38:47 2001 +++ linux/arch/alpha/kernel/sys_dp264.c Wed Sep 5 12:42:08 2001 @@ -574,6 +574,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: TSUNAMI_DAC_OFFSET, nr_irqs: 64, device_interrupt: dp264_device_interrupt, @@ -598,6 +599,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: TSUNAMI_DAC_OFFSET, nr_irqs: 64, device_interrupt: dp264_device_interrupt, @@ -621,6 +623,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: TSUNAMI_DAC_OFFSET, nr_irqs: 64, device_interrupt: dp264_device_interrupt, @@ -644,6 +647,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: TSUNAMI_DAC_OFFSET, nr_irqs: 64, device_interrupt: dp264_device_interrupt, @@ -672,6 +676,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: TSUNAMI_DAC_OFFSET, nr_irqs: 64, device_interrupt: dp264_device_interrupt, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_eiger.c linux/arch/alpha/kernel/sys_eiger.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_eiger.c Fri Mar 2 20:12:07 2001 +++ linux/arch/alpha/kernel/sys_eiger.c Wed Sep 5 12:42:08 2001 @@ -233,6 +233,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: TSUNAMI_DAC_OFFSET, nr_irqs: 128, device_interrupt: eiger_device_interrupt, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_miata.c linux/arch/alpha/kernel/sys_miata.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_miata.c Sun May 20 02:43:05 2001 +++ linux/arch/alpha/kernel/sys_miata.c Wed Sep 5 12:42:08 2001 @@ -256,6 +256,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: PYXIS_DAC_OFFSET, nr_irqs: 48, device_interrupt: pyxis_device_interrupt, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_rawhide.c linux/arch/alpha/kernel/sys_rawhide.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_rawhide.c Tue Jun 12 04:15:27 2001 +++ linux/arch/alpha/kernel/sys_rawhide.c Wed Sep 5 12:42:08 2001 @@ -254,6 +254,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: MCPCIA_DEFAULT_MEM_BASE, + pci_dac_offset: MCPCIA_DAC_OFFSET, nr_irqs: 128, device_interrupt: rawhide_srm_device_interrupt, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_ruffian.c linux/arch/alpha/kernel/sys_ruffian.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_ruffian.c Thu Feb 8 21:56:29 2001 +++ linux/arch/alpha/kernel/sys_ruffian.c Wed Sep 5 12:42:08 2001 @@ -220,6 +220,7 @@ max_dma_address: ALPHA_RUFFIAN_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: PYXIS_DAC_OFFSET, nr_irqs: 48, device_interrupt: pyxis_device_interrupt, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_sx164.c linux/arch/alpha/kernel/sys_sx164.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_sx164.c Fri Oct 27 19:55:01 2000 +++ linux/arch/alpha/kernel/sys_sx164.c Wed Sep 5 12:42:08 2001 @@ -129,7 +129,9 @@ struct percpu_struct *cpu = (struct percpu_struct*) ((char*)hwrpb + hwrpb->processor_offset); - if (alpha_using_srm && (cpu->pal_revision & 0xffff) == 0x117) { + if (amask(AMASK_MAX) != 0 + && alpha_using_srm + && (cpu->pal_revision & 0xffff) == 0x117) { __asm__ __volatile__( "lda $16,8($31)\n" "call_pal 9\n" /* Allow PALRES insns in kernel mode */ @@ -160,6 +162,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: PYXIS_DAC_OFFSET, nr_irqs: 48, device_interrupt: pyxis_device_interrupt, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_titan.c linux/arch/alpha/kernel/sys_titan.c --- /opt/kernel/linux-2.4.10-pre4/arch/alpha/kernel/sys_titan.c Fri Mar 2 20:12:07 2001 +++ linux/arch/alpha/kernel/sys_titan.c Wed Sep 5 12:42:08 2001 @@ -378,6 +378,7 @@ max_dma_address: ALPHA_MAX_DMA_ADDRESS, min_io_address: DEFAULT_IO_BASE, min_mem_address: DEFAULT_MEM_BASE, + pci_dac_offset: TITAN_DAC_OFFSET, nr_irqs: 80, /* 64 + 16 */ device_interrupt: privateer_device_interrupt, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/i386/kernel/setup.c linux/arch/i386/kernel/setup.c --- /opt/kernel/linux-2.4.10-pre4/arch/i386/kernel/setup.c Wed Sep 5 12:41:17 2001 +++ linux/arch/i386/kernel/setup.c Wed Sep 5 12:56:53 2001 @@ -156,6 +156,8 @@ static int disable_x86_serial_nr __initdata = 1; static int disable_x86_fxsr __initdata = 0; +unsigned long max_pfn; + /* * This is set up by the setup-routine at boot-time */ @@ -772,7 +774,7 @@ void __init setup_arch(char **cmdline_p) { unsigned long bootmap_size, low_mem_size; - unsigned long start_pfn, max_pfn, max_low_pfn; + unsigned long start_pfn, max_low_pfn; int i; #ifdef CONFIG_VISWS diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/ia64/sn/io/pci_dma.c linux/arch/ia64/sn/io/pci_dma.c --- /opt/kernel/linux-2.4.10-pre4/arch/ia64/sn/io/pci_dma.c Thu Apr 12 21:16:35 2001 +++ linux/arch/ia64/sn/io/pci_dma.c Wed Sep 5 12:42:08 2001 @@ -182,7 +182,7 @@ } /* - * On sn1 we use the alt_address entry of the scatterlist to store + * On sn1 we use the orig_address entry of the scatterlist to store * the physical address corresponding to the given virtual address */ int diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/parisc/kernel/ccio-dma.c linux/arch/parisc/kernel/ccio-dma.c --- /opt/kernel/linux-2.4.10-pre4/arch/parisc/kernel/ccio-dma.c Fri Feb 9 20:29:44 2001 +++ linux/arch/parisc/kernel/ccio-dma.c Wed Sep 5 12:42:08 2001 @@ -638,7 +638,7 @@ } -static int ccio_dma_supported( struct pci_dev *dev, dma_addr_t mask) +static int ccio_dma_supported( struct pci_dev *dev, u64 mask) { if (dev == NULL) { printk(MODULE_NAME ": EISA/ISA/et al not supported\n"); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/parisc/kernel/ccio-rm-dma.c linux/arch/parisc/kernel/ccio-rm-dma.c --- /opt/kernel/linux-2.4.10-pre4/arch/parisc/kernel/ccio-rm-dma.c Tue Dec 5 21:29:39 2000 +++ linux/arch/parisc/kernel/ccio-rm-dma.c Wed Sep 5 12:42:08 2001 @@ -93,7 +93,7 @@ } -static int ccio_dma_supported( struct pci_dev *dev, dma_addr_t mask) +static int ccio_dma_supported( struct pci_dev *dev, u64 mask) { if (dev == NULL) { printk(MODULE_NAME ": EISA/ISA/et al not supported\n"); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/parisc/kernel/pci-dma.c linux/arch/parisc/kernel/pci-dma.c --- /opt/kernel/linux-2.4.10-pre4/arch/parisc/kernel/pci-dma.c Fri Feb 9 20:29:44 2001 +++ linux/arch/parisc/kernel/pci-dma.c Wed Sep 5 12:42:08 2001 @@ -77,7 +77,7 @@ static inline void dump_resmap(void) {;} #endif -static int pa11_dma_supported( struct pci_dev *dev, dma_addr_t mask) +static int pa11_dma_supported( struct pci_dev *dev, u64 mask) { return 1; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/parisc/kernel/sba_iommu.c linux/arch/parisc/kernel/sba_iommu.c --- /opt/kernel/linux-2.4.10-pre4/arch/parisc/kernel/sba_iommu.c Fri Feb 9 20:29:44 2001 +++ linux/arch/parisc/kernel/sba_iommu.c Wed Sep 5 12:42:08 2001 @@ -779,7 +779,7 @@ } static int -sba_dma_supported( struct pci_dev *dev, dma_addr_t mask) +sba_dma_supported( struct pci_dev *dev, u64 mask) { if (dev == NULL) { printk(MODULE_NAME ": EISA/ISA/et al not supported\n"); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/sparc64/kernel/iommu_common.c linux/arch/sparc64/kernel/iommu_common.c --- /opt/kernel/linux-2.4.10-pre4/arch/sparc64/kernel/iommu_common.c Wed Sep 5 12:41:17 2001 +++ linux/arch/sparc64/kernel/iommu_common.c Wed Sep 5 12:42:08 2001 @@ -12,7 +12,7 @@ */ #ifdef VERIFY_SG -int verify_lengths(struct scatterlist *sg, int nents, int npages) +static int verify_lengths(struct scatterlist *sg, int nents, int npages) { int sg_len, dma_len; int i, pgcount; @@ -22,8 +22,8 @@ sg_len += sg[i].length; dma_len = 0; - for (i = 0; i < nents && sg[i].dvma_length; i++) - dma_len += sg[i].dvma_length; + for (i = 0; i < nents && sg[i].dma_length; i++) + dma_len += sg[i].dma_length; if (sg_len != dma_len) { printk("verify_lengths: Error, different, sg[%d] dma[%d]\n", @@ -32,13 +32,13 @@ } pgcount = 0; - for (i = 0; i < nents && sg[i].dvma_length; i++) { + for (i = 0; i < nents && sg[i].dma_length; i++) { unsigned long start, end; - start = sg[i].dvma_address; + start = sg[i].dma_address; start = start & IO_PAGE_MASK; - end = sg[i].dvma_address + sg[i].dvma_length; + end = sg[i].dma_address + sg[i].dma_length; end = (end + (IO_PAGE_SIZE - 1)) & IO_PAGE_MASK; pgcount += ((end - start) >> IO_PAGE_SHIFT); @@ -55,15 +55,16 @@ return 0; } -int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg, int nents, iopte_t **__iopte) +static int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg, int nents, iopte_t **__iopte) { struct scatterlist *sg = *__sg; iopte_t *iopte = *__iopte; - u32 dlen = dma_sg->dvma_length; - u32 daddr = dma_sg->dvma_address; + u32 dlen = dma_sg->dma_length; + u32 daddr; unsigned int sglen; unsigned long sgaddr; + daddr = dma_sg->dma_address; sglen = sg->length; sgaddr = (unsigned long) sg->address; while (dlen > 0) { @@ -136,7 +137,7 @@ return nents; } -int verify_maps(struct scatterlist *sg, int nents, iopte_t *iopte) +static int verify_maps(struct scatterlist *sg, int nents, iopte_t *iopte) { struct scatterlist *dma_sg = sg; struct scatterlist *orig_dma_sg = dma_sg; @@ -147,7 +148,7 @@ if (nents <= 0) break; dma_sg++; - if (dma_sg->dvma_length == 0) + if (dma_sg->dma_length == 0) break; } @@ -174,14 +175,15 @@ verify_maps(sg, nents, iopte) < 0) { int i; - printk("verify_sglist: Crap, messed up mappings, dumping, iodma at %08x.\n", - (u32) (sg->dvma_address & IO_PAGE_MASK)); + printk("verify_sglist: Crap, messed up mappings, dumping, iodma at "); + printk("%016lx.\n", sg->dma_address & IO_PAGE_MASK); + for (i = 0; i < nents; i++) { printk("sg(%d): address(%p) length(%x) " - "dma_address[%08x] dma_length[%08x]\n", + "dma_address[%016lx] dma_length[%016lx]\n", i, sg[i].address, sg[i].length, - sg[i].dvma_address, sg[i].dvma_length); + sg[i].dma_address, sg[i].dma_length); } } @@ -204,8 +206,8 @@ sg++; addr = (unsigned long) sg->address; if (! VCONTIG(prev, addr)) { - dma_sg->dvma_address = dent_addr; - dma_sg->dvma_length = dent_len; + dma_sg->dma_address = dent_addr; + dma_sg->dma_length = dent_len; dma_sg++; dent_addr = ((dent_addr + @@ -218,8 +220,8 @@ dent_len += sg->length; prev = addr + sg->length; } - dma_sg->dvma_address = dent_addr; - dma_sg->dvma_length = dent_len; + dma_sg->dma_address = dent_addr; + dma_sg->dma_length = dent_len; return ((unsigned long) dent_addr + (unsigned long) dent_len + diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/sparc64/kernel/iommu_common.h linux/arch/sparc64/kernel/iommu_common.h --- /opt/kernel/linux-2.4.10-pre4/arch/sparc64/kernel/iommu_common.h Wed Sep 5 12:41:17 2001 +++ linux/arch/sparc64/kernel/iommu_common.h Wed Sep 5 12:42:08 2001 @@ -34,10 +34,7 @@ #undef VERIFY_SG #ifdef VERIFY_SG -int verify_lengths(struct scatterlist *sg, int nents, int npages); -int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg, int nents, iopte_t **__iopte); -int verify_maps(struct scatterlist *sg, int nents, iopte_t *iopte); -void verify_sglist(struct scatterlist *sg, int nents, iopte_t *iopte, int npages); +extern void verify_sglist(struct scatterlist *sg, int nents, iopte_t *iopte, int npages); #endif /* Two addresses are "virtually contiguous" if and only if: @@ -47,4 +44,4 @@ #define VCONTIG(__X, __Y) (((__X) == (__Y)) || \ (((__X) | (__Y)) << (64UL - PAGE_SHIFT)) == 0UL) -unsigned long prepare_sg(struct scatterlist *sg, int nents); +extern unsigned long prepare_sg(struct scatterlist *sg, int nents); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/sparc64/kernel/pci_iommu.c linux/arch/sparc64/kernel/pci_iommu.c --- /opt/kernel/linux-2.4.10-pre4/arch/sparc64/kernel/pci_iommu.c Wed Sep 5 12:41:17 2001 +++ linux/arch/sparc64/kernel/pci_iommu.c Wed Sep 5 12:42:08 2001 @@ -378,7 +378,8 @@ ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); #ifdef DEBUG_PCI_IOMMU if (iopte_val(*base) == IOPTE_INVALID) - printk("pci_unmap_single called on non-mapped region %08x,%08x from %016lx\n", bus_addr, sz, __builtin_return_address(0)); + printk("pci_unmap_single called on non-mapped region %08x,%08x from %016lx\n", + bus_addr, sz, __builtin_return_address(0)); #endif bus_addr &= IO_PAGE_MASK; @@ -423,18 +424,25 @@ spin_unlock_irqrestore(&iommu->lock, flags); } -static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, unsigned long iopte_protection) +#define SG_ENT_PHYS_ADDRESS(SG) \ + ((SG)->address ? \ + __pa((SG)->address) : \ + (__pa(page_address((SG)->page)) + (SG)->offset)) + +static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, + int nused, int nelems, unsigned long iopte_protection) { struct scatterlist *dma_sg = sg; + struct scatterlist *sg_end = sg + nelems; int i; for (i = 0; i < nused; i++) { unsigned long pteval = ~0UL; u32 dma_npages; - dma_npages = ((dma_sg->dvma_address & (IO_PAGE_SIZE - 1UL)) + - dma_sg->dvma_length + - ((u32)(IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT; + dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) + + dma_sg->dma_length + + ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT; do { unsigned long offset; signed int len; @@ -447,7 +455,7 @@ for (;;) { unsigned long tmp; - tmp = (unsigned long) __pa(sg->address); + tmp = SG_ENT_PHYS_ADDRESS(sg); len = sg->length; if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { pteval = tmp & IO_PAGE_MASK; @@ -479,10 +487,11 @@ * adjusting pteval along the way. Stop when we * detect a page crossing event. */ - while ((pteval << (64 - IO_PAGE_SHIFT)) != 0UL && - pteval == __pa(sg->address) && + while (sg < sg_end && + (pteval << (64 - IO_PAGE_SHIFT)) != 0UL && + (pteval == SG_ENT_PHYS_ADDRESS(sg)) && ((pteval ^ - (__pa(sg->address) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) { + (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) { pteval += sg->length; sg++; } @@ -511,8 +520,13 @@ /* Fast path single entry scatterlists. */ if (nelems == 1) { - sglist->dvma_address = pci_map_single(pdev, sglist->address, sglist->length, direction); - sglist->dvma_length = sglist->length; + sglist->dma_address = + pci_map_single(pdev, + (sglist->address ? + sglist->address : + (page_address(sglist->page) + sglist->offset)), + sglist->length, direction); + sglist->dma_length = sglist->length; return 1; } @@ -540,8 +554,8 @@ used = nelems; sgtmp = sglist; - while (used && sgtmp->dvma_length) { - sgtmp->dvma_address += dma_base; + while (used && sgtmp->dma_length) { + sgtmp->dma_address += dma_base; sgtmp++; used--; } @@ -559,7 +573,7 @@ iopte_protection = IOPTE_CONSISTENT(ctx); if (direction != PCI_DMA_TODEVICE) iopte_protection |= IOPTE_WRITE; - fill_sg (base, sglist, used, iopte_protection); + fill_sg (base, sglist, used, nelems, iopte_protection); #ifdef VERIFY_SG verify_sglist(sglist, nelems, base, npages); #endif @@ -591,20 +605,20 @@ iommu = pcp->pbm->iommu; strbuf = &pcp->pbm->stc; - bus_addr = sglist->dvma_address & IO_PAGE_MASK; + bus_addr = sglist->dma_address & IO_PAGE_MASK; for (i = 1; i < nelems; i++) - if (sglist[i].dvma_length == 0) + if (sglist[i].dma_length == 0) break; i--; - npages = (IO_PAGE_ALIGN(sglist[i].dvma_address + sglist[i].dvma_length) - bus_addr) >> IO_PAGE_SHIFT; + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT; base = iommu->page_table + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); #ifdef DEBUG_PCI_IOMMU if (iopte_val(*base) == IOPTE_INVALID) - printk("pci_unmap_sg called on non-mapped region %08x,%d from %016lx\n", sglist->dvma_address, nelems, __builtin_return_address(0)); + printk("pci_unmap_sg called on non-mapped region %016lx,%d from %016lx\n", sglist->dma_address, nelems, __builtin_return_address(0)); #endif spin_lock_irqsave(&iommu->lock, flags); @@ -616,7 +630,7 @@ /* Step 1: Kick data out of streaming buffers if necessary. */ if (strbuf->strbuf_enabled) { - u32 vaddr = bus_addr; + u32 vaddr = (u32) bus_addr; PCI_STC_FLUSHFLAG_INIT(strbuf); if (strbuf->strbuf_ctxflush && @@ -735,7 +749,7 @@ iopte_t *iopte; iopte = iommu->page_table + - ((sglist[0].dvma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + ((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT); ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; } @@ -754,13 +768,13 @@ unsigned long i, npages; u32 bus_addr; - bus_addr = sglist[0].dvma_address & IO_PAGE_MASK; + bus_addr = sglist[0].dma_address & IO_PAGE_MASK; for(i = 1; i < nelems; i++) - if (!sglist[i].dvma_length) + if (!sglist[i].dma_length) break; i--; - npages = (IO_PAGE_ALIGN(sglist[i].dvma_address + sglist[i].dvma_length) - bus_addr) >> IO_PAGE_SHIFT; + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT; for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE) pci_iommu_write(strbuf->strbuf_pflush, bus_addr); } @@ -774,10 +788,10 @@ spin_unlock_irqrestore(&iommu->lock, flags); } -int pci_dma_supported(struct pci_dev *pdev, dma_addr_t device_mask) +int pci_dma_supported(struct pci_dev *pdev, u64 device_mask) { struct pcidev_cookie *pcp = pdev->sysdata; - u32 dma_addr_mask; + u64 dma_addr_mask; if (pdev == NULL) { dma_addr_mask = 0xffffffff; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/arch/sparc64/kernel/sbus.c linux/arch/sparc64/kernel/sbus.c --- /opt/kernel/linux-2.4.10-pre4/arch/sparc64/kernel/sbus.c Wed Sep 5 12:41:17 2001 +++ linux/arch/sparc64/kernel/sbus.c Wed Sep 5 12:42:08 2001 @@ -376,18 +376,24 @@ spin_unlock_irqrestore(&iommu->lock, flags); } -static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, unsigned long iopte_bits) +#define SG_ENT_PHYS_ADDRESS(SG) \ + ((SG)->address ? \ + __pa((SG)->address) : \ + (__pa(page_address((SG)->page)) + (SG)->offset)) + +static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, int nelems, unsigned long iopte_bits) { struct scatterlist *dma_sg = sg; + struct scatterlist *sg_end = sg + nelems; int i; for (i = 0; i < nused; i++) { unsigned long pteval = ~0UL; u32 dma_npages; - dma_npages = ((dma_sg->dvma_address & (IO_PAGE_SIZE - 1UL)) + - dma_sg->dvma_length + - ((u32)(IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT; + dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) + + dma_sg->dma_length + + ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT; do { unsigned long offset; signed int len; @@ -400,7 +406,7 @@ for (;;) { unsigned long tmp; - tmp = (unsigned long) __pa(sg->address); + tmp = (unsigned long) SG_ENT_PHYS_ADDRESS(sg); len = sg->length; if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { pteval = tmp & IO_PAGE_MASK; @@ -432,10 +438,11 @@ * adjusting pteval along the way. Stop when we * detect a page crossing event. */ - while ((pteval << (64 - IO_PAGE_SHIFT)) != 0UL && - pteval == __pa(sg->address) && + while (sg < sg_end && + (pteval << (64 - IO_PAGE_SHIFT)) != 0UL && + (pteval == SG_ENT_PHYS_ADDRESS(sg)) && ((pteval ^ - (__pa(sg->address) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) { + (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) { pteval += sg->length; sg++; } @@ -461,8 +468,13 @@ /* Fast path single entry scatterlists. */ if (nents == 1) { - sg->dvma_address = sbus_map_single(sdev, sg->address, sg->length, dir); - sg->dvma_length = sg->length; + sg->dma_address = + sbus_map_single(sdev, + (sg->address ? + sg->address : + (page_address(sg->page) + sg->offset)), + sg->length, dir); + sg->dma_length = sg->length; return 1; } @@ -478,8 +490,8 @@ sgtmp = sg; used = nents; - while (used && sgtmp->dvma_length) { - sgtmp->dvma_address += dma_base; + while (used && sgtmp->dma_length) { + sgtmp->dma_address += dma_base; sgtmp++; used--; } @@ -489,7 +501,7 @@ if (dir != SBUS_DMA_TODEVICE) iopte_bits |= IOPTE_WRITE; - fill_sg(iopte, sg, used, iopte_bits); + fill_sg(iopte, sg, used, nents, iopte_bits); #ifdef VERIFY_SG verify_sglist(sg, nents, iopte, npages); #endif @@ -512,17 +524,17 @@ /* Fast path single entry scatterlists. */ if (nents == 1) { - sbus_unmap_single(sdev, sg->dvma_address, sg->dvma_length, direction); + sbus_unmap_single(sdev, sg->dma_address, sg->dma_length, direction); return; } - dvma_base = sg[0].dvma_address & IO_PAGE_MASK; + dvma_base = sg[0].dma_address & IO_PAGE_MASK; for (i = 0; i < nents; i++) { - if (sg[i].dvma_length == 0) + if (sg[i].dma_length == 0) break; } i--; - size = IO_PAGE_ALIGN(sg[i].dvma_address + sg[i].dvma_length) - dvma_base; + size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - dvma_base; iommu = sdev->bus->iommu; spin_lock_irqsave(&iommu->lock, flags); @@ -550,13 +562,13 @@ u32 base; int i; - base = sg[0].dvma_address & IO_PAGE_MASK; + base = sg[0].dma_address & IO_PAGE_MASK; for (i = 0; i < nents; i++) { - if (sg[i].dvma_length == 0) + if (sg[i].dma_length == 0) break; } i--; - size = IO_PAGE_ALIGN(sg[i].dvma_address + sg[i].dvma_length) - base; + size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base; spin_lock_irqsave(&iommu->lock, flags); strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/acorn/block/mfmhd.c linux/drivers/acorn/block/mfmhd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/acorn/block/mfmhd.c Sun Aug 12 19:38:48 2001 +++ linux/drivers/acorn/block/mfmhd.c Wed Sep 5 11:13:21 2001 @@ -932,26 +932,25 @@ dev = MINOR(CURRENT->rq_dev); block = CURRENT->sector; nsect = CURRENT->nr_sectors; -#ifdef DEBUG - /*if ((dev>>6)==1) */ console_printf("mfm_request: raw vals: dev=%d (block=512 bytes) block=%d nblocks=%d\n", dev, block, nsect); -#endif - if (dev >= (mfm_drives << 6) || - block >= mfm[dev].nr_sects || ((block+nsect) > mfm[dev].nr_sects)) { - if (dev >= (mfm_drives << 6)) - printk("mfm: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev)); + + if (dev >= (mfm_drives << 6) || (dev & 0x3f) || + block >= mfm[dev].nr_sects || + (block+nsect > mfm[dev].nr_sects)) { + if (dev >= (mfm_drives << 6) || (dev & 0x3f)) + printk("mfm: bad minor number: device=%s\n", + kdevname(CURRENT->rq_dev)); else - printk("mfm%c: bad access: block=%d, count=%d, nr_sects=%ld\n", (dev >> 6)+'a', - block, nsect, mfm[dev].nr_sects); + printk("mfm%c: bad access: block=%d, count=%d, nr_sects=%ld\n", + (dev >> 6)+'a', block, nsect, + mfm[dev].nr_sects); printk("mfm: continue 1\n"); end_request(0); Busy = 0; continue; } - block += mfm[dev].start_sect; - - /* DAG: Linux doesn't cope with this - even though it has an array telling - it the hardware block size - silly */ + /* DAG: Linux doesn't cope with this - even though it has + an array telling it the hardware block size - silly */ block <<= 1; /* Now in 256 byte sectors */ nsect <<= 1; /* Ditto */ @@ -1180,22 +1179,21 @@ static int mfm_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg) { - struct hd_geometry *geo = (struct hd_geometry *) arg; kdev_t dev; - int device, major, minor, err; + int device; if (!inode || !(dev = inode->i_rdev)) return -EINVAL; - major = MAJOR(dev); - minor = MINOR(dev); - - device = DEVICE_NR(MINOR(inode->i_rdev)), err; + device = DEVICE_NR(MINOR(inode->i_rdev)); if (device >= mfm_drives) return -EINVAL; switch (cmd) { case HDIO_GETGEO: + { + struct hd_geometry *geo = (struct hd_geometry *) arg; + if (!arg) return -EINVAL; if (put_user (mfm_info[device].heads, &geo->heads)) @@ -1204,31 +1202,21 @@ return -EFAULT; if (put_user (mfm_info[device].cylinders, &geo->cylinders)) return -EFAULT; - if (put_user (mfm[minor].start_sect, &geo->start)) + if (put_user (get_start_sect(inode->i_rdev), &geo->start)) return -EFAULT; return 0; - - case BLKGETSIZE: - return put_user (mfm[minor].nr_sects, (long *)arg); - - case BLKFRASET: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - max_readahead[major][minor] = arg; - return 0; - - case BLKFRAGET: - return put_user(max_readahead[major][minor], (long *) arg); - - case BLKSECTGET: - return put_user(max_sectors[major][minor], (long *) arg); + } case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; return mfm_reread_partitions(dev); + case BLKGETSIZE: + case BLKSECTGET: case BLKFLSBUF: + case BLKFRASET: + case BLKFRAGET: case BLKROSET: case BLKROGET: case BLKRASET: @@ -1294,8 +1282,10 @@ if ((heads < 1) || (mfm_info[drive].cylinders > 1024)) { printk("mfm%c: Insane disc shape! Setting to 512/4/32\n",'a' + (dev >> 6)); - /* These values are fairly arbitary, but are there so that if your - * lucky you can pick apart your disc to find out what is going on - + /* + * These values are fairly arbitary, but are there so + * that if you're lucky you can pick apart your disc + * to find out what is going on - * I reckon these figures won't hurt MOST drives */ mfm_info[drive].sectors = 32; @@ -1306,7 +1296,8 @@ mfm_specify (); mfm_geometry (drive); mfm[drive << 6].start_sect = 0; - mfm[drive << 6].nr_sects = mfm_info[drive].cylinders * mfm_info[drive].heads * mfm_info[drive].sectors / 2; + mfm[drive << 6].nr_sects = mfm_info[drive].cylinders + * mfm_info[drive].heads * mfm_info[drive].sectors / 2; } } @@ -1347,7 +1338,8 @@ mfm_drives == 1 ? "" : "s"); mfm_gendisk.nr_real = mfm_drives; - if (request_irq(mfm_irq, mfm_interrupt_handler, SA_INTERRUPT, "MFM harddisk", NULL)) + if (request_irq(mfm_irq, mfm_interrupt_handler, SA_INTERRUPT, + "MFM harddisk", NULL)) printk("mfm: unable to get IRQ%d\n", mfm_irq); if (mfm_irqenable) @@ -1450,10 +1442,7 @@ blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB?) read ahread */ -#ifndef MODULE - mfm_gendisk.next = gendisk_head; - gendisk_head = &mfm_gendisk; -#endif + add_gendisk(&mfm_gendisk); Busy = 0; lastspecifieddrive = -1; @@ -1469,8 +1458,9 @@ */ static int mfm_reread_partitions(kdev_t dev) { - unsigned int start, i, maxp, target = DEVICE_NR(MINOR(dev)); + unsigned int target = DEVICE_NR(MINOR(dev)); unsigned long flags; + int res; save_flags_cli(flags); if (mfm_info[target].busy || mfm_info[target].access_count > 1) { @@ -1480,24 +1470,19 @@ mfm_info[target].busy = 1; restore_flags (flags); - maxp = mfm_gendisk.max_p; - start = target << mfm_gendisk.minor_shift; - - for (i = maxp - 1; i >= 0; i--) { - int minor = start + i; - invalidate_device (MKDEV(MAJOR_NR, minor), 1); - mfm_gendisk.part[minor].start_sect = 0; - mfm_gendisk.part[minor].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; /* Divide by 2, since sectors are 2 times smaller than usual ;-) */ - grok_partitions(&mfm_gendisk, target, 1<<6, mfm_info[target].heads * + grok_partitions(dev, mfm_info[target].heads * mfm_info[target].cylinders * mfm_info[target].sectors / 2); +leave: mfm_info[target].busy = 0; wake_up (&mfm_wait_open); - return 0; + return res; } #ifdef MODULE @@ -1512,6 +1497,7 @@ outw (0, mfm_irqenable); /* Required to enable IRQs from MFM podule */ free_irq(mfm_irq, NULL); unregister_blkdev(MAJOR_NR, "mfm"); + del_gendisk(&mfm_gendisk); if (ecs) ecard_release(ecs); if (mfm_addr) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/DAC960.c linux/drivers/block/DAC960.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/DAC960.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/block/DAC960.c Wed Sep 5 11:13:21 2001 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -302,9 +303,9 @@ static void DAC960_WaitForCommand(DAC960_Controller_T *Controller) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } @@ -1806,76 +1807,6 @@ /* - DAC960_BackMergeFunction is the Back Merge Function for the DAC960 driver. -*/ - -static int DAC960_BackMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (Request->bhtail->b_data + Request->bhtail->b_size == BufferHeader->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_FrontMergeFunction is the Front Merge Function for the DAC960 driver. -*/ - -static int DAC960_FrontMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (BufferHeader->b_data + BufferHeader->b_size == Request->bh->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_MergeRequestsFunction is the Merge Requests Function for the - DAC960 driver. -*/ - -static int DAC960_MergeRequestsFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - IO_Request_T *NextRequest, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - int TotalSegments = Request->nr_segments + NextRequest->nr_segments; - if (Request->bhtail->b_data + Request->bhtail->b_size - == NextRequest->bh->b_data) - TotalSegments--; - if (TotalSegments > MaxSegments || - TotalSegments > Controller->DriverScatterGatherLimit) - return false; - Request->nr_segments = TotalSegments; - return true; -} - - -/* DAC960_RegisterBlockDevice registers the Block Device structures associated with Controller. */ @@ -1883,7 +1814,6 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) { int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber; - GenericDiskInfo_T *GenericDiskInfo; RequestQueue_T *RequestQueue; int MinorNumber; /* @@ -1900,27 +1830,22 @@ Initialize the I/O Request Queue. */ RequestQueue = BLK_DEFAULT_QUEUE(MajorNumber); - blk_init_queue(RequestQueue, DAC960_RequestFunction); + blk_init_queue(RequestQueue, DAC960_RequestFunction, "dac960"); blk_queue_headactive(RequestQueue, 0); - RequestQueue->back_merge_fn = DAC960_BackMergeFunction; - RequestQueue->front_merge_fn = DAC960_FrontMergeFunction; - RequestQueue->merge_requests_fn = DAC960_MergeRequestsFunction; RequestQueue->queuedata = Controller; + RequestQueue->max_segments = Controller->DriverScatterGatherLimit; + RequestQueue->max_sectors = Controller->MaxBlocksPerCommand; Controller->RequestQueue = RequestQueue; /* Initialize the Disk Partitions array, Partition Sizes array, Block Sizes array, and Max Sectors per Request array. */ for (MinorNumber = 0; MinorNumber < DAC960_MinorCount; MinorNumber++) - { Controller->BlockSizes[MinorNumber] = BLOCK_SIZE; - Controller->MaxSectorsPerRequest[MinorNumber] = - Controller->MaxBlocksPerCommand; - } + Controller->GenericDiskInfo.part = Controller->DiskPartitions; Controller->GenericDiskInfo.sizes = Controller->PartitionSizes; blksize_size[MajorNumber] = Controller->BlockSizes; - max_sectors[MajorNumber] = Controller->MaxSectorsPerRequest; /* Initialize Read Ahead to 128 sectors. */ @@ -1936,15 +1861,10 @@ Controller->GenericDiskInfo.next = NULL; Controller->GenericDiskInfo.fops = &DAC960_BlockDeviceOperations; /* - Install the Generic Disk Information structure at the end of the list. + Install the Generic Disk Information structure. */ - if ((GenericDiskInfo = gendisk_head) != NULL) - { - while (GenericDiskInfo->next != NULL) - GenericDiskInfo = GenericDiskInfo->next; - GenericDiskInfo->next = &Controller->GenericDiskInfo; - } - else gendisk_head = &Controller->GenericDiskInfo; + add_gendisk(&Controller->GenericDiskInfo); + /* Indicate the Block Device Registration completed successfully, */ @@ -1969,27 +1889,16 @@ */ blk_cleanup_queue(BLK_DEFAULT_QUEUE(MajorNumber)); /* + Remove the Generic Disk Information structure from the list. + */ + del_gendisk(&Controller->GenericDiskInfo); + /* Remove the Disk Partitions array, Partition Sizes array, Block Sizes array, Max Sectors per Request array, and Max Segments per Request array. */ Controller->GenericDiskInfo.part = NULL; Controller->GenericDiskInfo.sizes = NULL; - blk_size[MajorNumber] = NULL; - blksize_size[MajorNumber] = NULL; - max_sectors[MajorNumber] = NULL; - /* - Remove the Generic Disk Information structure from the list. - */ - if (gendisk_head != &Controller->GenericDiskInfo) - { - GenericDiskInfo_T *GenericDiskInfo = gendisk_head; - while (GenericDiskInfo != NULL && - GenericDiskInfo->next != &Controller->GenericDiskInfo) - GenericDiskInfo = GenericDiskInfo->next; - if (GenericDiskInfo != NULL) - GenericDiskInfo->next = GenericDiskInfo->next->next; - } - else gendisk_head = Controller->GenericDiskInfo.next; + blk_clear(MajorNumber); } @@ -2627,23 +2536,24 @@ CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2717,23 +2627,24 @@ .ScatterGatherSegments; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2761,7 +2672,7 @@ while (true) { if (list_empty(RequestQueueHead)) return false; - Request = blkdev_entry_next_request(RequestQueueHead); + Request = elv_next_request(RequestQueue); Command = DAC960_AllocateCommand(Controller); if (Command != NULL) break; if (!WaitForCommand) return false; @@ -2772,12 +2683,10 @@ else Command->CommandType = DAC960_WriteCommand; Command->Completion = Request->waiting; Command->LogicalDriveNumber = DAC960_LogicalDriveNumber(Request->rq_dev); - Command->BlockNumber = - Request->sector - + Controller->GenericDiskInfo.part[MINOR(Request->rq_dev)].start_sect; + Command->BlockNumber = Request->sector; Command->BlockCount = Request->nr_sectors; Command->SegmentCount = Request->nr_segments; - Command->BufferHeader = Request->bh; + Command->BufferHeader = Request->bio; Command->RequestBuffer = Request->buffer; blkdev_dequeue_request(Request); blkdev_release_request(Request); @@ -2830,8 +2739,10 @@ static inline void DAC960_ProcessCompletedBuffer(BufferHeader_T *BufferHeader, boolean SuccessfulIO) { - blk_finished_io(BufferHeader->b_size >> 9); - BufferHeader->b_end_io(BufferHeader, SuccessfulIO); + if (SuccessfulIO) + BufferHeader->bi_flags |= BIO_UPTODATE; + blk_finished_io(bio_sectors(BufferHeader)); + BufferHeader->bi_end_io(BufferHeader); } @@ -2885,13 +2796,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %d..%d\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -2918,8 +2829,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } @@ -2933,7 +2844,7 @@ else if ((CommandStatus == DAC960_V1_IrrecoverableDataError || CommandStatus == DAC960_V1_BadDataEncountered) && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; @@ -2947,10 +2858,10 @@ Command->CommandType = DAC960_WriteRetryCommand; CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write; } - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); DAC960_QueueCommand(Command); return; } @@ -2963,8 +2874,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } @@ -2978,8 +2889,8 @@ else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -2996,14 +2907,14 @@ DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(NextBufferHeader->b_data); + Virtual_to_Bus32(bio_data(NextBufferHeader)); DAC960_QueueCommand(Command); return; } @@ -3662,13 +3573,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %d..%d\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -3922,8 +3833,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } @@ -3937,19 +3848,19 @@ else if (Command->V2.RequestSense.SenseKey == DAC960_SenseKey_MediumError && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { if (CommandType == DAC960_ReadCommand) Command->CommandType = DAC960_ReadRetryCommand; else Command->CommandType = DAC960_WriteRetryCommand; - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->SCSI_10.CommandControlBits .AdditionalScatterGatherListMemory = false; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentByteCount = CommandMailbox->SCSI_10.DataTransferSize; @@ -3967,8 +3878,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } @@ -3982,8 +3893,8 @@ else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -3998,16 +3909,16 @@ if (NextBufferHeader != NULL) { Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentDataPointer = - Virtual_to_Bus64(NextBufferHeader->b_data); + Virtual_to_Bus64(bio_data(NextBufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentByteCount = @@ -5035,7 +4946,8 @@ int LogicalDriveNumber = DAC960_LogicalDriveNumber(Inode->i_rdev); DiskGeometry_T Geometry, *UserGeometry; DAC960_Controller_T *Controller; - int PartitionNumber; + int res; + if (File != NULL && (File->f_flags & O_NONBLOCK)) return DAC960_UserIOCTL(Inode, File, Request, Argument); if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) @@ -5084,16 +4996,10 @@ LogicalDeviceInfo->ConfigurableDeviceSizeIn512ByteBlocksOrMB / (Geometry.heads * Geometry.sectors); } - Geometry.start = - Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)].start_sect; + Geometry.start = get_start_sect(Inode->i_rdev); return (copy_to_user(UserGeometry, &Geometry, sizeof(DiskGeometry_T)) ? -EFAULT : 0); case BLKGETSIZE: - /* Get Device Size. */ - if ((long *) Argument == NULL) return -EINVAL; - return put_user(Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)] - .nr_sects, - (long *) Argument); case BLKRAGET: case BLKRASET: case BLKFLSBUF: @@ -5106,46 +5012,17 @@ if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (Controller->LogicalDriveUsageCount[LogicalDriveNumber] > 1) return -EBUSY; - for (PartitionNumber = 0; - PartitionNumber < DAC960_MaxPartitions; - PartitionNumber++) - { - KernelDevice_T Device = DAC960_KernelDevice(ControllerNumber, - LogicalDriveNumber, - PartitionNumber); - int MinorNumber = DAC960_MinorNumber(LogicalDriveNumber, - PartitionNumber); - if (Controller->GenericDiskInfo.part[MinorNumber].nr_sects == 0) - continue; - /* - Flush all changes and invalidate buffered state. - */ - invalidate_device(Device, 1); - /* - Clear existing partition sizes. - */ - if (PartitionNumber > 0) - { - Controller->GenericDiskInfo.part[MinorNumber].start_sect = 0; - Controller->GenericDiskInfo.part[MinorNumber].nr_sects = 0; - } - /* - Reset the Block Size so that the partition table can be read. - */ - set_blocksize(Device, BLOCK_SIZE); - } + res = wipe_partitions(Inode->i_rdev); + if (res) /* nothing */ + return res; + if (Controller->FirmwareType == DAC960_V1_Controller) - grok_partitions(&Controller->GenericDiskInfo, - LogicalDriveNumber, - DAC960_MaxPartitions, - Controller->V1.LogicalDriveInformation - [LogicalDriveNumber] - .LogicalDriveSize); + grok_partitions(Inode->i_rdev, + Controller->V1.LogicalDriveInformation + [LogicalDriveNumber] + .LogicalDriveSize); else - grok_partitions( - &Controller->GenericDiskInfo, - LogicalDriveNumber, - DAC960_MaxPartitions, + grok_partitions(Inode->i_rdev, Controller->V2.LogicalDeviceInformation[LogicalDriveNumber] ->ConfigurableDeviceSizeIn512ByteBlocksOrMB); return 0; @@ -5268,11 +5145,11 @@ while (Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID]) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, !Controller->V1.DirectCommandActive [DCDB.Channel][DCDB.TargetID]); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID] = true; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/DAC960.h linux/drivers/block/DAC960.h --- /opt/kernel/linux-2.4.10-pre4/drivers/block/DAC960.h Mon Aug 6 19:34:38 2001 +++ linux/drivers/block/DAC960.h Tue Aug 7 10:32:32 2001 @@ -2136,7 +2136,7 @@ of the Linux Kernel and I/O Subsystem. */ -typedef struct buffer_head BufferHeader_T; +typedef struct bio BufferHeader_T; typedef struct file File_T; typedef struct block_device_operations BlockDeviceOperations_T; typedef struct completion Completion_T; @@ -2414,7 +2414,6 @@ DiskPartition_T DiskPartitions[DAC960_MinorCount]; int PartitionSizes[DAC960_MinorCount]; int BlockSizes[DAC960_MinorCount]; - int MaxSectorsPerRequest[DAC960_MinorCount]; unsigned char ProgressBuffer[DAC960_ProgressBufferSize]; unsigned char UserStatusBuffer[DAC960_UserMessageSize]; } @@ -2448,7 +2447,7 @@ void DAC960_AcquireControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2460,13 +2459,13 @@ void DAC960_ReleaseControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } /* DAC960_AcquireControllerLockRF acquires exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2478,7 +2477,7 @@ /* DAC960_ReleaseControllerLockRF releases exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2497,7 +2496,7 @@ void DAC960_AcquireControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2510,7 +2509,7 @@ void DAC960_ReleaseControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/Makefile linux/drivers/block/Makefile --- /opt/kernel/linux-2.4.10-pre4/drivers/block/Makefile Fri Dec 29 23:07:21 2000 +++ linux/drivers/block/Makefile Fri Aug 3 12:04:42 2001 @@ -10,9 +10,9 @@ O_TARGET := block.o -export-objs := ll_rw_blk.o blkpg.o loop.o DAC960.o +export-objs := elevator.o ll_rw_blk.o blkpg.o loop.o DAC960.o -obj-y := ll_rw_blk.o blkpg.o genhd.o elevator.o +obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/acsi.c linux/drivers/block/acsi.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/acsi.c Wed Aug 15 10:22:15 2001 +++ linux/drivers/block/acsi.c Wed Sep 5 11:13:21 2001 @@ -1011,7 +1011,6 @@ goto repeat; } - block += acsi_part[dev].start_sect; target = acsi_info[DEVICE_NR(dev)].target; lun = acsi_info[DEVICE_NR(dev)].lun; @@ -1123,7 +1122,7 @@ put_user( 64, &geo->heads ); put_user( 32, &geo->sectors ); put_user( acsi_info[dev].size >> 11, &geo->cylinders ); - put_user( acsi_part[MINOR(inode->i_rdev)].start_sect, &geo->start ); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; } @@ -1134,10 +1133,7 @@ put_user( 0, &((Scsi_Idlun *) arg)->host_unique_id ); return 0; - case BLKGETSIZE: /* Return device size */ - return put_user(acsi_part[MINOR(inode->i_rdev)].nr_sects, - (long *) arg); - + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKFLSBUF: @@ -1792,8 +1788,7 @@ blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ - acsi_gendisk.next = gendisk_head; - gendisk_head = &acsi_gendisk; + add_gendisk(&acsi_gendisk); #ifdef CONFIG_ATARI_SLM err = slm_init(); @@ -1817,8 +1812,6 @@ void cleanup_module(void) { - struct gendisk ** gdp; - del_timer( &acsi_timer ); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); atari_stram_free( acsi_buffer ); @@ -1826,13 +1819,7 @@ if (devfs_unregister_blkdev( MAJOR_NR, "ad" ) != 0) printk( KERN_ERR "acsi: cleanup_module failed\n"); - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == &acsi_gendisk) - break; - if (!*gdp) - printk( KERN_ERR "acsi: entry in disk chain missing!\n" ); - else - *gdp = (*gdp)->next; + del_gendisk(&acsi_gendisk); } #endif @@ -1863,7 +1850,7 @@ { int device; struct gendisk * gdev; - int max_p, start, i; + int res; struct acsi_info_struct *aip; device = DEVICE_NR(MINOR(dev)); @@ -1878,16 +1865,7 @@ DEVICE_BUSY = 1; sti(); - max_p = gdev->max_p; - start = device << gdev->minor_shift; - - for( i = max_p - 1; i >= 0 ; i-- ) { - if (gdev->part[start + i].nr_sects != 0) { - invalidate_device(MKDEV(MAJOR_NR, start + i), 1); - gdev->part[start + i].nr_sects = 0; - } - gdev->part[start+i].start_sect = 0; - }; + res = wipe_partitions(dev); stdma_lock( NULL, NULL ); @@ -1902,12 +1880,13 @@ ENABLE_IRQ(); stdma_release(); - - grok_partitions(gdev, device, (aip->type==HARDDISK)?1<<4:1, aip->size); + + if (!res) + grok_partitions(dev, aip->size); DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/amiflop.c linux/drivers/block/amiflop.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/amiflop.c Sat Apr 28 20:27:53 2001 +++ linux/drivers/block/amiflop.c Fri Aug 3 12:04:42 2001 @@ -1890,10 +1890,9 @@ free_irq(IRQ_AMIGA_DSKBLK, NULL); custom.dmacon = DMAF_DISK; /* disable DMA */ amiga_chip_free(raw_buf); - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); release_mem_region(CUSTOM_PHYSADDR+0x20, 8); unregister_blkdev(MAJOR_NR, "fd"); + blk_clear(MAJOR_NR); } #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/blkpg.c linux/drivers/block/blkpg.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/blkpg.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/block/blkpg.c Wed Sep 5 11:13:21 2001 @@ -54,17 +54,6 @@ * Note that several drives may have the same major. */ -/* a linear search, superfluous when dev is a pointer */ -static struct gendisk *get_gendisk(kdev_t dev) { - struct gendisk *g; - int m = MAJOR(dev); - - for (g = gendisk_head; g; g = g->next) - if (g->major == m) - break; - return g; -} - /* * Add a partition. * @@ -206,8 +195,13 @@ int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) { + unsigned long longval; + struct gendisk *g; int intval; + if (!dev) + return -EINVAL; + switch (cmd) { case BLKROSET: if (!capable(CAP_SYS_ADMIN)) @@ -216,6 +210,7 @@ return -EFAULT; set_device_ro(dev, intval); return 0; + case BLKROGET: intval = (is_read_only(dev) != 0); return put_user(intval, (int *)(arg)); @@ -223,20 +218,47 @@ case BLKRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - if(!dev || arg > 0xff) + if(arg > 0xff) return -EINVAL; read_ahead[MAJOR(dev)] = arg; return 0; + case BLKRAGET: if (!arg) return -EINVAL; return put_user(read_ahead[MAJOR(dev)], (long *) arg); + case BLKFRASET: + { + int *mr; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!(mr = max_readahead[MAJOR(dev)])) + return -EINVAL; + mr[MINOR(dev)] = arg; + return 0; + } + + case BLKFRAGET: + { + int *mr; + if (!(mr = max_readahead[MAJOR(dev)])) + return -EINVAL; + return put_user(mr[MINOR(dev)], (long *) arg); + } + + case BLKSECTGET: + { + request_queue_t *q = blk_get_queue(dev); + if (!q) + return -ENODEV; + return put_user(q->max_sectors, (unsigned short *) arg); + } + case BLKFLSBUF: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!dev) - return -EINVAL; fsync_dev(dev); invalidate_buffers(dev); return 0; @@ -246,7 +268,6 @@ intval = get_hardsect_size(dev); return put_user(intval, (int *) arg); -#if 0 case BLKGETSIZE: /* Today get_gendisk() requires a linear scan; add this when dev has pointer type. */ @@ -256,7 +277,6 @@ else longval = g->part[MINOR(dev)].nr_sects; return put_user(longval, (long *) arg); -#endif #if 0 case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) @@ -267,12 +287,35 @@ case BLKPG: return blkpg_ioctl(dev, (struct blkpg_ioctl_arg *) arg); + /* + * deprecated, use the /proc/iosched interface instead + */ case BLKELVGET: - return blkelvget_ioctl(&blk_get_queue(dev)->elevator, - (blkelv_ioctl_arg_t *) arg); case BLKELVSET: - return blkelvset_ioctl(&blk_get_queue(dev)->elevator, - (blkelv_ioctl_arg_t *) arg); + return -ENOTTY; + + case BLKHASHPROF: { +#ifdef BIO_HASH_PROFILING + request_queue_t *q = blk_get_queue(dev); + if (!q) + return -EINVAL; + if (copy_to_user((struct bio_hash_stats *) arg, &q->queue_hash.st, sizeof(struct bio_hash_stats))) + return -EFAULT; +#endif + return 0; + } + + case BLKHASHCLEAR: { +#ifdef BIO_HASH_PROFILING + request_queue_t *q = blk_get_queue(dev); + if (!q) + return -EINVAL; + spin_lock_irq(&q->queue_lock); + memset(&q->queue_hash.st, 0, sizeof(struct bio_hash_stats)); + spin_unlock_irq(&q->queue_lock); +#endif + return 0; + } case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/cciss.c linux/drivers/block/cciss.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/cciss.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/block/cciss.c Thu Sep 6 09:47:34 2001 @@ -83,7 +83,7 @@ #define MAX_CONFIG_WAIT 1000 #define READ_AHEAD 128 -#define NR_CMDS 128 /* #commands that can be outstanding */ +#define NR_CMDS 384 /* #commands that can be outstanding */ #define MAX_CTLR 8 #define CCISS_DMA_MASK 0xFFFFFFFF /* 32 bit DMA */ @@ -145,7 +145,6 @@ " IRQ: %d\n" " Logical drives: %d\n" " Current Q depth: %d\n" - " Current # commands on controller %d\n" " Max Q depth since init: %d\n" " Max # commands on controller since init: %d\n" " Max SG entries since init: %d\n\n", @@ -156,8 +155,7 @@ (unsigned long)h->vaddr, (unsigned int)h->intr, h->num_luns, - h->Qdepth, h->commands_outstanding, - h->maxQsinceinit, h->max_outstanding, h->maxSG); + h->Qdepth, h->maxQsinceinit, h->max_outstanding, h->maxSG); pos += size; len += size; for(i=0; inum_luns; i++) { @@ -235,7 +233,7 @@ i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS); if (i == NR_CMDS) return NULL; - } while(test_and_set_bit(i%32, h->cmd_pool_bits+(i/32)) != 0); + } while(test_and_set_bit(i & 31, h->cmd_pool_bits+(i/32)) != 0); #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss: using command buffer %d\n", i); #endif @@ -306,13 +304,10 @@ /* for each partition */ for(j=0; jblocksizes[(i<hardsizes[ (i<block_size; - } hba[ctlr]->gendisk.nr_real++; + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->block_size; } } /* @@ -377,8 +372,6 @@ { int ctlr = MAJOR(inode->i_rdev) - MAJOR_NR; int dsk = MINOR(inode->i_rdev) >> NWD_SHIFT; - int diskinfo[4]; - struct hd_geometry *geo = (struct hd_geometry *)arg; #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg); @@ -386,6 +379,10 @@ switch(cmd) { case HDIO_GETGEO: + { + struct hd_geometry *geo = (struct hd_geometry *)arg; + int diskinfo[4]; + if (hba[ctlr]->drv[dsk].cylinders) { diskinfo[0] = hba[ctlr]->drv[dsk].heads; diskinfo[1] = hba[ctlr]->drv[dsk].sectors; @@ -393,18 +390,17 @@ } else { diskinfo[0] = 0xff; diskinfo[1] = 0x3f; - diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); } + diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); + } put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].start_sect, &geo->start); - return 0; - case BLKGETSIZE: - if (!arg) return -EINVAL; - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].nr_sects, (long*)arg); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; + } case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); + case BLKGETSIZE: case BLKFLSBUF: case BLKBSZSET: case BLKBSZGET: @@ -413,7 +409,7 @@ case BLKRASET: case BLKRAGET: case BLKPG: - return( blk_ioctl(inode->i_rdev, cmd, arg)); + return blk_ioctl(inode->i_rdev, cmd, arg); case CCISS_GETPCIINFO: { cciss_pci_info_struct pciinfo; @@ -455,16 +451,7 @@ // printk("cciss_ioctl: delay and count cannot be 0\n"); return( -EINVAL); } - spin_lock_irqsave(&io_request_lock, flags); - /* Can only safely update if no commands outstanding */ - if (c->commands_outstanding > 0 ) - { -// printk("cciss_ioctl: cannot change coalasing " -// "%d commands outstanding on controller\n", -// c->commands_outstanding); - spin_unlock_irqrestore(&io_request_lock, flags); - return(-EINVAL); - } + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ writel( intinfo.delay, &(c->cfgtable->HostWrite.CoalIntDelay)); @@ -480,7 +467,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -511,7 +498,7 @@ if (copy_from_user(NodeName, (void *) arg, sizeof( NodeName_type))) return -EFAULT; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ for(i=0;i<16;i++) @@ -527,7 +514,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -654,11 +641,11 @@ c->SG[0].Ext = 0; // we are not chaining } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* Wait for completion */ while(c->cmd_type != CMD_IOCTL_DONE) @@ -706,42 +693,32 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = MINOR(dev) >> NWD_SHIFT; ctlr = MAJOR(dev) - MAJOR_NR; gdev = &(hba[ctlr]->gendisk); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); return -EBUSY; } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); - - max_p = gdev->max_p; - start = target << gdev->minor_shift; + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; + res = wipe_partitions(dev); + if (res) + goto leave; - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } /* setup partitions per disk */ - grok_partitions(gdev, target, MAX_PART, - hba[ctlr]->drv[target].nr_blocks); + grok_partitions(dev, hba[ctlr]->drv[target].nr_blocks); +leave: hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } static int frevalidate_logvol(kdev_t dev) @@ -772,15 +749,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -789,7 +766,6 @@ memset(hba[ctlr]->hd, 0, sizeof(struct hd_struct) * 256); memset(hba[ctlr]->sizes, 0, sizeof(int) * 256); memset(hba[ctlr]->blocksizes, 0, sizeof(int) * 256); - memset(hba[ctlr]->hardsizes, 0, sizeof(int) * 256); memset(hba[ctlr]->drv, 0, sizeof(drive_info_struct) * CISS_MAX_LUN); hba[ctlr]->gendisk.nr_real = 0; @@ -1085,11 +1061,11 @@ while(( c = h->reqQ) != NULL ) { /* can't do anything if fifo is full */ - if ((h->access.fifo_full(h))) - { - printk(KERN_WARNING "cciss: fifo full \n"); - return; + if ((h->access.fifo_full(h))) { + printk("cciss: fifo full\n"); + break; } + /* Get the frist entry from the Request Q */ removeQ(&(h->reqQ), c); h->Qdepth--; @@ -1102,17 +1078,16 @@ } } -static inline void complete_buffers( struct buffer_head *bh, int status) +static inline void complete_buffers( struct bio *bio, int status) { - struct buffer_head *xbh; + struct bio *xbh; - while(bh) - { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, status); - bh = xbh; + while(bio) { + xbh = bio->bi_next; + bio->bi_next = NULL; + blk_finished_io(bio_sectors(bio)); + bio_endio(bio, status); + bio = xbh; } } /* checks the status of the job and calls complete buffers to mark all @@ -1131,7 +1106,7 @@ { temp64.val32.lower = cmd->SG[i].Addr.lower; temp64.val32.upper = cmd->SG[i].Addr.upper; - pci_unmap_single(hba[cmd->ctlr]->pdev, + pci_unmap_page(hba[cmd->ctlr]->pdev, temp64.val, cmd->SG[i].Len, (cmd->Request.Type.Direction == XFER_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); @@ -1210,79 +1185,33 @@ status=0; } } - complete_buffers(cmd->bh, status); -} - - -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < MAXSGENTRIES) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > MAXSGENTRIES) - return 0; - - rq->nr_segments = total_segments; - return 1; + complete_buffers(cmd->bio, status); } /* * Get a request and submit it to the controller. - * Currently we do one request at a time. Ideally we would like to send - * everything to the controller on the first call, but there is a danger - * of holding the io_request_lock for to long. */ static void do_cciss_request(request_queue_t *q) { ctlr_info_t *h= q->queuedata; CommandList_struct *c; int log_unit, start_blk, seg, sect; - char *lastdataend; - struct buffer_head *bh; + unsigned long long lastdataend; + struct bio *bio; struct list_head *queue_head = &q->queue_head; struct request *creq; u64bit temp64; - struct my_sg tmp_sg[MAXSGENTRIES]; - int i; + struct scatterlist tmp_sg[MAXSGENTRIES]; + int i, dir; - // Loop till the queue is empty if or it is plugged - while (1) - { - if (q->plugged || list_empty(queue_head)) { - start_io(h); - return; - } + if (blk_queue_plugged(q)) + goto startio; + +queue: + if (list_empty(queue_head)) + goto startio; - creq = blkdev_entry_next_request(queue_head); + creq = elv_next_request(q); if (creq->nr_segments > MAXSGENTRIES) BUG(); @@ -1291,18 +1220,15 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); - start_io(h); - return; + complete_buffers(creq->bio, 0); + goto startio; } if (( c = cmd_alloc(h, 1)) == NULL) - { - start_io(h); - return; - } + goto startio; + c->cmd_type = CMD_RWREQ; - bh = c->bh = creq->bh; + bio = c->bio = creq->bio; /* fill in the request */ log_unit = MINOR(creq->rq_dev) >> NWD_SHIFT; @@ -1317,43 +1243,43 @@ (creq->cmd == READ) ? XFER_READ: XFER_WRITE; c->Request.Timeout = 0; // Don't time out c->Request.CDB[0] = (creq->cmd == READ) ? CCISS_READ : CCISS_WRITE; - start_blk = hba[h->ctlr]->hd[MINOR(creq->rq_dev)].start_sect + creq->sector; + start_blk = creq->sector; #ifdef CCISS_DEBUG - if (bh == NULL) - panic("cciss: bh== NULL?"); + if (bio == NULL) + panic("cciss: bio== NULL?"); printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector, (int) creq->nr_sectors); #endif /* CCISS_DEBUG */ - seg = 0; - lastdataend = NULL; - sect = 0; - while(bh) - { - sect += bh->b_size/512; - if (bh->b_data == lastdataend) + seg = sect = 0; + lastdataend = ~0ULL; + while(bio) { + sect += bio_sectors(bio); + if (bio_to_phys(bio) == lastdataend) { // tack it on to the last segment - tmp_sg[seg-1].len +=bh->b_size; - lastdataend += bh->b_size; - } else - { + tmp_sg[seg-1].length += bio_size(bio); + lastdataend += bio_size(bio); + } else { if (seg == MAXSGENTRIES) BUG(); - tmp_sg[seg].len = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].length = bio_size(bio); + tmp_sg[seg].offset = bio_offset(bio); + tmp_sg[seg].page = bio_page(bio); + lastdataend = bio_to_phys(bio) + bio_size(bio); seg++; } - bh = bh->b_reqnext; + bio = bio->bi_next; } /* get the DMA records for the setup */ + if (c->Request.Type.Direction == XFER_READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; for (i=0; iSG[i].Len = tmp_sg[i].len; - temp64.val = (__u64) pci_map_single( h->pdev, - tmp_sg[i].start_addr, - tmp_sg[i].len, - (c->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + c->SG[i].Len = tmp_sg[i].length; + temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page, + tmp_sg[i].offset, tmp_sg[i].length, + dir); c->SG[i].Addr.lower = temp64.val32.lower; c->SG[i].Addr.upper = temp64.val32.upper; c->SG[i].Ext = 0; // we are not chaining @@ -1377,10 +1303,8 @@ c->Request.CDB[8]= sect & 0xff; c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0; - blkdev_dequeue_request(creq); - /* * ehh, we can't really end the request here since it's not * even started yet. for now it shouldn't hurt though @@ -1394,7 +1318,10 @@ h->Qdepth++; if(h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; - } // while loop + + goto queue; +startio: + start_io(h); } static void do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs) @@ -1413,7 +1340,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); while( h->access.intr_pending(h)) { while((a = h->access.command_completed(h)) != FIFO_EMPTY) @@ -1446,11 +1373,16 @@ } } } + /* * See if we can queue up some more IO */ +#if 0 + blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); +#else do_cciss_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); +#endif + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); } /* * We cannot read the structure directly, for portablity we must use @@ -1872,7 +1804,18 @@ sprintf(hba[i]->devname, "cciss%d", i); hba[i]->ctlr = i; hba[i]->pdev = pdev; - + + /* configure PCI DMA stuff */ + if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) + printk("cciss: using DAC cycles\n"); + else if (!pci_set_dma_mask(pdev, 0xffffffff)) + printk("cciss: not using DAC cycles\n"); + else { + printk("cciss: no suitable DMA available\n"); + free_hba(i); + return -ENODEV; + } + if( register_blkdev(MAJOR_NR+i, hba[i]->devname, &cciss_fops)) { printk(KERN_ERR "cciss: Unable to get major number " @@ -1941,20 +1884,16 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_cciss_request); + blk_init_queue(q, do_cciss_request, hba[i]->devname); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); + q->max_segments = MAXSGENTRIES; + blk_queue_max_sectors(q, 512); /* fill in the other Kernel structs */ blksize_size[MAJOR_NR+i] = hba[i]->blocksizes; - hardsect_size[MAJOR_NR+i] = hba[i]->hardsizes; read_ahead[MAJOR_NR+i] = READ_AHEAD; - /* Set the pointers to queue functions */ - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - - /* Fill in the gendisk data */ hba[i]->gendisk.major = MAJOR_NR + i; hba[i]->gendisk.major_name = "cciss"; @@ -1965,8 +1904,7 @@ hba[i]->gendisk.nr_real = hba[i]->num_luns; /* Get on the disk list */ - hba[i]->gendisk.next = gendisk_head; - gendisk_head = &(hba[i]->gendisk); + add_gendisk(&(hba[i]->gendisk)); cciss_geninit(i); for(j=0; jdriver_data == NULL) { @@ -2005,23 +1942,11 @@ unregister_blkdev(MAJOR_NR+i, hba[i]->devname); remove_proc_entry(hba[i]->devname, proc_cciss); - /* remove it from the disk list */ - if (gendisk_head == &(hba[i]->gendisk)) - { - gendisk_head = hba[i]->gendisk.next; - } else - { - for(g=gendisk_head; g ; g=g->next) - { - if(g->next == &(hba[i]->gendisk)) - { - g->next = hba[i]->gendisk.next; - } - } - } - pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), - hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); + del_gendisk(&(hba[i]->gendisk)); + + pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), + hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof( ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); kfree(hba[i]->cmd_pool_bits); @@ -2029,32 +1954,31 @@ } static struct pci_driver cciss_pci_driver = { - name: "cciss", - probe: cciss_init_one, - remove: cciss_remove_one, - id_table: cciss_pci_device_id, /* id_table */ + name: "cciss", + probe: cciss_init_one, + remove: cciss_remove_one, + id_table: cciss_pci_device_id, /* id_table */ }; /* -* This is it. Register the PCI driver information for the cards we control -* the OS will call our registered routines when it finds one of our cards. -*/ + * This is it. Register the PCI driver information for the cards we control + * the OS will call our registered routines when it finds one of our cards. + */ int __init cciss_init(void) { - printk(KERN_INFO DRIVER_NAME "\n"); + /* Register for out PCI devices */ if (pci_register_driver(&cciss_pci_driver) > 0 ) return 0; else return -ENODEV; - } +} EXPORT_NO_SYMBOLS; static int __init init_cciss_module(void) { - return ( cciss_init()); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/cciss.h linux/drivers/block/cciss.h --- /opt/kernel/linux-2.4.10-pre4/drivers/block/cciss.h Tue May 22 19:23:16 2001 +++ linux/drivers/block/cciss.h Thu Sep 6 13:30:30 2001 @@ -15,11 +15,6 @@ #define MAJOR_NR COMPAQ_CISS_MAJOR -struct my_sg { - int len; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; @@ -85,9 +80,8 @@ struct gendisk gendisk; // indexed by minor numbers struct hd_struct hd[256]; - int sizes[256]; + int sizes[256]; int blocksizes[256]; - int hardsizes[256]; }; /* Defining the diffent access_menthods */ @@ -247,5 +241,8 @@ char *product_name; struct access_method *access; }; + +#define CCISS_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif /* CCISS_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/cciss_cmd.h linux/drivers/block/cciss_cmd.h --- /opt/kernel/linux-2.4.10-pre4/drivers/block/cciss_cmd.h Tue May 22 19:23:16 2001 +++ linux/drivers/block/cciss_cmd.h Fri Aug 3 12:04:42 2001 @@ -7,7 +7,7 @@ //general boundary defintions #define SENSEINFOBYTES 32//note that this value may vary between host implementations -#define MAXSGENTRIES 31 +#define MAXSGENTRIES 32 #define MAXREPLYQS 256 //Command Status value @@ -228,7 +228,7 @@ int cmd_type; struct _CommandList_struct *prev; struct _CommandList_struct *next; - struct buffer_head * bh; + struct bio * bio; } CommandList_struct; //Configuration Table Structure diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/cpqarray.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/block/cpqarray.c Wed Sep 5 13:06:25 2001 @@ -99,7 +99,6 @@ static struct hd_struct * ida; static int * ida_sizes; static int * ida_blocksizes; -static int * ida_hardsizes; static struct gendisk ida_gendisk[MAX_CTLR]; static struct proc_dir_entry *proc_array; @@ -144,7 +143,7 @@ static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline void complete_buffers(struct buffer_head *bh, int ok); +static inline void complete_buffers(struct bio *bio, int ok); static inline void complete_command(cmdlist_t *cmd, int timeout); static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs); @@ -175,12 +174,11 @@ ida_sizes[(ctlr<nr_blks; - for(j=0; j<16; j++) { + for(j=0; j<16; j++) ida_blocksizes[(ctlr<blk_size; - } + + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->blk_size; ida_gendisk[ctlr].nr_real++; } @@ -311,7 +309,6 @@ void cleanup_module(void) { int i; - struct gendisk *g; char buff[4]; for(i=0; icmd_pool_dhandle); kfree(hba[i]->cmd_pool_bits); - if (gendisk_head == &ida_gendisk[i]) { - gendisk_head = ida_gendisk[i].next; - } else { - for(g=gendisk_head; g; g=g->next) { - if (g->next == &ida_gendisk[i]) { - g->next = ida_gendisk[i].next; - break; - } - } - } + del_gendisk(&ida_gendisk[i]); } remove_proc_entry("cpqarray", proc_root_driver); kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } #endif /* MODULE */ -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < SG_MAX) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > SG_MAX) - return 0; - - rq->nr_segments = total_segments; - return 1; -} - /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -441,20 +387,9 @@ return(num_cntlrs_reg); } - ida_hardsizes = kmalloc(sizeof(int)*nr_ctlr*NWD*16, GFP_KERNEL); - if(ida_hardsizes==NULL) - { - kfree(ida); - kfree(ida_sizes); - kfree(ida_blocksizes); - printk( KERN_ERR "cpqarray: out of memory"); - return(num_cntlrs_reg); - } - memset(ida, 0, sizeof(struct hd_struct)*nr_ctlr*NWD*16); memset(ida_sizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_blocksizes, 0, sizeof(int)*nr_ctlr*NWD*16); - memset(ida_hardsizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_gendisk, 0, sizeof(struct gendisk)*MAX_CTLR); /* @@ -512,7 +447,6 @@ { kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } return(num_cntlrs_reg); @@ -531,16 +465,13 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_ida_request); + blk_init_queue(q, do_ida_request, hba[i]->devname); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask); + q->max_segments = SG_MAX; blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256); - hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256); read_ahead[MAJOR_NR+i] = READ_AHEAD; - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - ida_gendisk[i].major = MAJOR_NR + i; ida_gendisk[i].major_name = "ida"; ida_gendisk[i].minor_shift = NWD_SHIFT; @@ -550,8 +481,7 @@ ida_gendisk[i].nr_real = 0; /* Get on the disk list */ - ida_gendisk[i].next = gendisk_head; - gendisk_head = &ida_gendisk[i]; + add_gendisk(&ida_gendisk[i]); init_timer(&hba[i]->timer); hba[i]->timer.expires = jiffies + IDA_TIMER; @@ -923,22 +853,27 @@ ctlr_info_t *h = q->queuedata; cmdlist_t *c; int seg, sect; - char *lastdataend; struct list_head * queue_head = &q->queue_head; - struct buffer_head *bh; + struct bio *bio; struct request *creq; - struct my_sg tmp_sg[SG_MAX]; - int i; + struct scatterlist tmp_sg[SG_MAX]; + unsigned long lastdataend; + int i, dir; -// Loop till the queue is empty if or it is plugged + if (blk_queue_plugged(q)) { + start_io(h); + return; + } + +// Loop till the queue is empty while (1) { - if (q->plugged || list_empty(queue_head)) { + if (list_empty(queue_head)) { start_io(h); return; } - creq = blkdev_entry_next_request(queue_head); + creq = elv_next_request(q); if (creq->nr_segments > SG_MAX) BUG(); @@ -947,7 +882,7 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq->bio, 0); start_io(h); return; } @@ -958,47 +893,51 @@ return; } - bh = creq->bh; + bio = creq->bio; c->ctlr = h->ctlr; c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT; c->hdr.size = sizeof(rblk_t) >> 2; c->size += sizeof(rblk_t); - c->req.hdr.blk = ida[(h->ctlr<rq_dev)].start_sect + creq->sector; - c->bh = bh; + c->req.hdr.blk = creq->sector; + c->bio = bio; DBGPX( - if (bh == NULL) - panic("bh == NULL?"); + if (bio == NULL) + panic("bio == NULL?"); printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); ); - seg = 0; lastdataend = NULL; - sect = 0; - while(bh) { - sect += bh->b_size/512; - if (bh->b_data == lastdataend) { - tmp_sg[seg-1].size += bh->b_size; - lastdataend += bh->b_size; + seg = sect = 0; + lastdataend = ~0UL; + while(bio) { + sect += bio_sectors(bio); + if (bio_to_phys(bio) == lastdataend) { + tmp_sg[seg-1].length += bio_size(bio); + lastdataend += bio_size(bio); } else { if (seg == SG_MAX) BUG(); - tmp_sg[seg].size = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].length = bio_size(bio); + tmp_sg[seg].page = bio_page(bio); + tmp_sg[seg].offset = bio_offset(bio); + lastdataend = bio_to_phys(bio) + bio_size(bio); seg++; } - bh = bh->b_reqnext; + bio = bio->bi_next; } /* Now do all the DMA Mappings */ + if (creq->cmd == READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; for( i=0; i < seg; i++) { - c->req.sg[i].size = tmp_sg[i].size; - c->req.sg[i].addr = (__u32) pci_map_single( - h->pci_dev, tmp_sg[i].start_addr, - tmp_sg[i].size, - (creq->cmd == READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + c->req.sg[i].size = tmp_sg[i].length; + c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev, + tmp_sg[i].page, + tmp_sg[i].offset, + tmp_sg[i].length, dir); } DBGPX( printk("Submitting %d sectors in %d segments\n", sect, seg); ); c->req.hdr.sg_cnt = seg; @@ -1060,17 +999,17 @@ } } -static inline void complete_buffers(struct buffer_head *bh, int ok) +static inline void complete_buffers(struct bio *bio, int ok) { - struct buffer_head *xbh; - while(bh) { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; + struct bio *xbh; + while(bio) { + xbh = bio->bi_next; + bio->bi_next = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, ok); + blk_finished_io(bio_sectors(bio)); + bio_endio(bio, ok); - bh = xbh; + bio = xbh; } } /* @@ -1079,7 +1018,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout) { int ok=1; - int i; + int i, ddir; if (cmd->req.hdr.rcode & RCODE_NONFATAL && (hba[cmd->ctlr]->misc_tflags & MISC_NONFATAL_WARN) == 0) { @@ -1101,13 +1040,15 @@ } if (timeout) ok = 0; /* unmap the DMA mapping for all the scatter gather elements */ + if (cmd->req.hdr.cmd == IDA_READ) + ddir = PCI_DMA_FROMDEVICE; + else + ddir = PCI_DMA_TODEVICE; for(i=0; ireq.hdr.sg_cnt; i++) - { - pci_unmap_single(hba[cmd->ctlr]->pci_dev, - cmd->req.sg[i].addr, cmd->req.sg[i].size, - (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); - } - complete_buffers(cmd->bh, ok); + pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr, + cmd->req.sg[i].size, ddir); + + complete_buffers(cmd->bio, ok); } /* @@ -1132,7 +1073,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); if (istat & FIFO_NOT_EMPTY) { while((a = h->access.command_completed(h))) { a1 = a; a &= ~3; @@ -1175,8 +1116,12 @@ /* * See if we can queue up some more IO */ +#if 0 + blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); +#else do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); +#endif + spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); } /* @@ -1222,14 +1167,10 @@ put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(ida[(ctlr<i_rdev)].start_sect, &geo->start); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; case IDAGETDRVINFO: return copy_to_user(&io->c.drv,&hba[ctlr]->drv[dsk],sizeof(drv_info_t)); - case BLKGETSIZE: - if (!arg) return -EINVAL; - put_user(ida[(ctlr<i_rdev)].nr_sects, (long*)arg); - return 0; case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); case IDAPASSTHRU: @@ -1265,6 +1206,7 @@ return(0); } + case BLKGETSIZE: case BLKFLSBUF: case BLKBSZSET: case BLKBSZGET: @@ -1371,11 +1313,11 @@ } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* Wait for completion */ while(c->type != CMD_IOCTL_DONE) @@ -1589,15 +1531,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -1606,7 +1548,6 @@ memset(ida+(ctlr*256), 0, sizeof(struct hd_struct)*NWD*16); memset(ida_sizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(ida_blocksizes+(ctlr*256), 0, sizeof(int)*NWD*16); - memset(ida_hardsizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(hba[ctlr]->drv, 0, sizeof(drv_info_t)*NWD); ida_gendisk[ctlr].nr_real = 0; @@ -1634,17 +1575,15 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); ctlr = MAJOR(dev) - MAJOR_NR; gdev = &ida_gendisk[ctlr]; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); @@ -1652,25 +1591,14 @@ } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); - - max_p = gdev->max_p; - start = target << gdev->minor_shift; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, hba[ctlr]->drv[target].nr_blks); - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } - - /* 16 minors per disk... */ - grok_partitions(gdev, target, 16, hba[ctlr]->drv[target].nr_blks); hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/cpqarray.h linux/drivers/block/cpqarray.h --- /opt/kernel/linux-2.4.10-pre4/drivers/block/cpqarray.h Tue May 22 19:23:16 2001 +++ linux/drivers/block/cpqarray.h Thu Sep 6 13:30:33 2001 @@ -56,11 +56,6 @@ #ifdef __KERNEL__ -struct my_sg { - int size; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; @@ -121,6 +116,9 @@ struct timer_list timer; unsigned int misc_tflags; }; + +#define IDA_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif #endif /* CPQARRAY_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/elevator.c linux/drivers/block/elevator.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/elevator.c Fri Jul 20 05:59:41 2001 +++ linux/drivers/block/elevator.c Fri Sep 7 08:59:16 2001 @@ -18,48 +18,61 @@ * Removed tests for max-bomb-segments, which was breaking elvtune * when run without -bN * + * Jens: + * - Rework again to work with bio instead of buffer_heads + * - added merge by hash-lookup + * - loose bi_dev comparisons, partition handling is right now + * - completely modularize elevator setup and teardown + * */ - +#include #include #include #include #include +#include #include +#include +#include #include /* - * This is a bit tricky. It's given that bh and rq are for the same + * This is a bit tricky. It's given that bio and rq are for the same * device, but the next request might of course not be. Run through * the tests below to check if we want to insert here if we can't merge - * bh into an existing request + * bio into an existing request */ -inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq, - struct list_head *head) +inline int bio_rq_in_between(struct bio *bio, struct request *rq, + struct list_head *head) { struct list_head *next; struct request *next_rq; - next = rq->queue.next; + /* + * if .next is a valid request + */ + next = rq->queuelist.next; if (next == head) return 0; + next_rq = list_entry(next, struct request, queuelist); + /* - * if the device is different (usually on a different partition), - * just check if bh is after rq + * if the device is different (not a normal case) just check if + * bio is after rq */ - next_rq = blkdev_entry_to_request(next); if (next_rq->rq_dev != rq->rq_dev) - return bh->b_rsector > rq->sector; + return bio->bi_sector > rq->sector; /* - * ok, rq, next_rq and bh are on the same device. if bh is in between + * ok, rq, next_rq and bio are on the same device. if bio is in between * the two, this is the sweet spot */ - if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector) + if (bio->bi_sector < next_rq->sector && bio->bi_sector > rq->sector) return 1; /* - * next_rq is ordered wrt rq, but bh is not in between the two + * next_rq is ordered wrt rq, but bio is not in between the two */ if (next_rq->sector > rq->sector) return 0; @@ -68,67 +81,137 @@ * next_rq and rq not ordered, if we happen to be either before * next_rq or after rq insert here anyway */ - if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector) + if (bio->bi_sector > rq->sector || bio->bi_sector < next_rq->sector) return 1; return 0; } +/* + * can we safely merge with this request? + */ +inline int elv_rq_merge_ok(struct request *rq, struct bio *bio, int rw) +{ + if (rq && rq->rq_dev == bio->bi_dev && rw == rq->cmd && !rq->waiting + && !rq->special) + return 1; + + return 0; +} int elevator_linus_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head *head, struct bio *bio) { struct list_head *entry = &q->queue_head; - unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; + unsigned int count = bio_sectors(bio); + struct elv_linus_data *edat = q->elevator.elevator_data; + struct request *__rq; + struct bio *bio_hash; + int rw = bio_rw(bio); + + /* + * first try a back merge, then front, then give up and scan. this + * will of course fail for different size bios on the same queue, + * however that isn't an issue + */ + if (edat->flags & ELV_LINUS_BACK_MERGE) { + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_back_lookups); + bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector - count); + if (bio_hash) { + __bio_put(bio_hash); + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_back_hits); + __rq = bio_hash->bi_req; + if (&__rq->queuelist == head && blk_queue_headlive(q)) + goto front; + else if (!elv_rq_merge_ok(__rq, bio, rw) ||bio->bi_next) + goto front; + + /* + * looks ok to merge + */ + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_back_merges); + *req = __rq; + bio->bi_req = __rq; + return ELEVATOR_BACK_MERGE; + } + } + } + +front: + if (edat->flags & ELV_LINUS_FRONT_MERGE) { + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_front_lookups); + bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector + count); + if (bio_hash) { + __bio_put(bio_hash); + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_front_hits); + __rq = bio_hash->bi_req; + if (&__rq->queuelist == head && blk_queue_headlive(q)) + goto scan; + else if (!elv_rq_merge_ok(__rq, bio, rw)) + goto scan; + + /* + * looks ok to merge + */ + if (__rq->sector - count == bio->bi_sector) { + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_front_merges); + *req = __rq; + bio->bi_req = __rq; + return ELEVATOR_FRONT_MERGE; + } + } + } + /* + * no merge possible, scan for insertion + */ +scan: while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); + __rq = list_entry(entry, struct request, queuelist); /* - * simply "aging" of requests in queue + * get next entry into L1 cache */ - if (__rq->elevator_sequence-- <= 0) - break; + prefetch(entry->prev); if (__rq->waiting) continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head)) - *req = __rq; - if (__rq->cmd != rw) - continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->elevator_sequence < count) - break; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - ret = ELEVATOR_BACK_MERGE; + if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head)) *req = __rq; + + /* + * simply "aging" of requests in queue + */ + if (__rq->elevator_sequence-- <= 0) break; - } else if (__rq->sector - count == bh->b_rsector) { - ret = ELEVATOR_FRONT_MERGE; - __rq->elevator_sequence -= count; - *req = __rq; + else if (__rq->elevator_sequence < count) break; - } } - return ret; + return ELEVATOR_NO_MERGE; } +extern int queue_nr_requests; + void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count) { - struct list_head *entry = &req->queue, *head = &q->queue_head; + struct list_head *entry, *head = &q->queue_head; + int i = queue_nr_requests * 2; + + if (req->q && req->q != q) + BUG(); /* * second pass scan of requests that got passed over, if any */ + entry = &req->queuelist; while ((entry = entry->next) != head) { - struct request *tmp = blkdev_entry_to_request(entry); + struct request *tmp =list_entry(entry,struct request,queuelist); + prefetch(entry->next); tmp->elevator_sequence -= count; + if (i-- < 0) + BUG(); } } @@ -138,85 +221,117 @@ req->elevator_sequence = next->elevator_sequence; } +extern int queue_nr_requests; +int elv_linus_init(request_queue_t *q, elevator_t *e) +{ + struct elv_linus_data *edata; + int ret; + + edata = kmalloc(sizeof(struct elv_linus_data), GFP_KERNEL); + if (!edata) + return -ENOMEM; + + if ((ret = bio_hash_init(&q->queue_hash, queue_nr_requests >> 2))) { + kfree(edata); + return -ENOMEM; + } + + /* + * default to doing both front and back merges + */ + edata->flags = ELV_LINUS_BACK_MERGE | ELV_LINUS_FRONT_MERGE; + e->elevator_data = edata; + return 0; +} + +void elv_linus_exit(request_queue_t *q, elevator_t *e) +{ + kfree(e->elevator_data); + bio_hash_cleanup(&q->queue_hash); +} + /* * See if we can find a request that this buffer can be coalesced with. */ int elevator_noop_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head * head, struct bio *bio) { - struct list_head *entry; - unsigned int count = bh->b_size >> 9; + struct bio *bio_hash; + struct request *__rq = NULL; + int rw, count, ret; + + count = bio_sectors(bio); + rw = bio_rw(bio); + ret = ELEVATOR_NO_MERGE; + + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_back_lookups); + bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector - count); + if (bio_hash) { + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_back_hits); + __bio_put(bio_hash); + __rq = bio_hash->bi_req; + if (&__rq->queuelist == head && blk_queue_headlive(q)) + goto front; + else if (!elv_rq_merge_ok(__rq, bio, rw) || bio->bi_next) + goto front; - if (list_empty(&q->queue_head)) - return ELEVATOR_NO_MERGE; + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_back_merges); + ret = ELEVATOR_BACK_MERGE; + goto out; + } + } - entry = &q->queue_head; - while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); +front: + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_front_lookups); + bio_hash = __bio_hash_find(&q->queue_hash, bio->bi_sector + count); + if (bio_hash) { + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_front_hits); + __bio_put(bio_hash); + __rq = bio_hash->bi_req; + if (&__rq->queuelist == head && blk_queue_headlive(q)) + goto out; + else if (!elv_rq_merge_ok(__rq, bio, rw)) + goto out; - if (__rq->cmd != rw) - continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->waiting) - continue; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - *req = __rq; - return ELEVATOR_BACK_MERGE; - } else if (__rq->sector - count == bh->b_rsector) { - *req = __rq; - return ELEVATOR_FRONT_MERGE; + if (__rq->sector - count == bio->bi_sector) { + BIO_HASH_PROF_INC(&q->queue_hash, q_nr_front_merges); + ret = ELEVATOR_FRONT_MERGE; + goto out; } } - *req = blkdev_entry_to_request(q->queue_head.prev); - return ELEVATOR_NO_MERGE; +out: + *req = bio->bi_req = __rq; + return ret; } void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {} void elevator_noop_merge_req(struct request *req, struct request *next) {} -int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg) +int elevator_init(request_queue_t *q, elevator_t *e, elevator_t type,char *name) { - blkelv_ioctl_arg_t output; + *e = type; - output.queue_ID = elevator->queue_ID; - output.read_latency = elevator->read_latency; - output.write_latency = elevator->write_latency; - output.max_bomb_segments = 0; + INIT_LIST_HEAD(&q->queue_head); + strncpy(e->queue_name, name, 15); - if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t))) - return -EFAULT; + if (e->elevator_init_fn) + return e->elevator_init_fn(q, e); return 0; } -int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg) +void elevator_exit(request_queue_t *q, elevator_t *e) { - blkelv_ioctl_arg_t input; - - if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t))) - return -EFAULT; - - if (input.read_latency < 0) - return -EINVAL; - if (input.write_latency < 0) - return -EINVAL; - - elevator->read_latency = input.read_latency; - elevator->write_latency = input.write_latency; - return 0; + if (e->elevator_exit_fn) + e->elevator_exit_fn(q, e); } -void elevator_init(elevator_t * elevator, elevator_t type) +int elevator_global_init(void) { - static unsigned int queue_ID; - - *elevator = type; - elevator->queue_ID = queue_ID++; + return 0; } + +module_init(elevator_global_init); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/floppy.c linux/drivers/block/floppy.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/floppy.c Sun Aug 5 22:12:40 2001 +++ linux/drivers/block/floppy.c Tue Aug 7 10:32:32 2001 @@ -570,7 +570,7 @@ static struct floppy_struct *_floppy = floppy_type; static unsigned char current_drive; static long current_count_sectors; -static unsigned char sector_t; /* sector in track */ +static unsigned char fsector_t; /* sector in track */ static unsigned char in_sector_offset; /* offset within physical sector, * expressed in units of 512 bytes */ @@ -2282,7 +2282,6 @@ static void request_done(int uptodate) { int block; - unsigned long flags; probing = 0; reschedule_timeout(MAXTIMEOUT, "request done %d", uptodate); @@ -2301,7 +2300,6 @@ DRS->maxtrack = 1; /* unlock chained buffers */ - spin_lock_irqsave(&io_request_lock, flags); while (current_count_sectors && !QUEUE_EMPTY && current_count_sectors >= CURRENT->current_nr_sectors){ current_count_sectors -= CURRENT->current_nr_sectors; @@ -2309,7 +2307,6 @@ CURRENT->sector += CURRENT->current_nr_sectors; end_request(1); } - spin_unlock_irqrestore(&io_request_lock, flags); if (current_count_sectors && !QUEUE_EMPTY){ /* "unlock" last subsector */ @@ -2334,9 +2331,7 @@ DRWE->last_error_sector = CURRENT->sector; DRWE->last_error_generation = DRS->generation; } - spin_lock_irqsave(&io_request_lock, flags); end_request(0); - spin_unlock_irqrestore(&io_request_lock, flags); } } @@ -2382,7 +2377,7 @@ printk("rt=%d t=%d\n", R_TRACK, TRACK); printk("heads=%d eoc=%d\n", heads, eoc); printk("spt=%d st=%d ss=%d\n", SECT_PER_TRACK, - sector_t, ssize); + fsector_t, ssize); printk("in_sector_offset=%d\n", in_sector_offset); } #endif @@ -2429,7 +2424,7 @@ } else if (CT(COMMAND) == FD_READ){ buffer_track = raw_cmd->track; buffer_drive = current_drive; - INFBOUND(buffer_max, nr_sectors + sector_t); + INFBOUND(buffer_max, nr_sectors + fsector_t); } cont->redo(); } @@ -2437,19 +2432,19 @@ /* Compute maximal contiguous buffer size. */ static int buffer_chain_size(void) { - struct buffer_head *bh; + struct bio *bio; int size; char *base; base = CURRENT->buffer; size = CURRENT->current_nr_sectors << 9; - bh = CURRENT->bh; + bio = CURRENT->bio; - if (bh){ - bh = bh->b_reqnext; - while (bh && bh->b_data == base + size){ - size += bh->b_size; - bh = bh->b_reqnext; + if (bio){ + bio = bio->bi_next; + while (bio && bio_data(bio) == base + size){ + size += bio_size(bio); + bio = bio->bi_next; } } return size >> 9; @@ -2458,13 +2453,13 @@ /* Compute the maximal transfer size */ static int transfer_size(int ssize, int max_sector, int max_size) { - SUPBOUND(max_sector, sector_t + max_size); + SUPBOUND(max_sector, fsector_t + max_size); /* alignment */ max_sector -= (max_sector % _floppy->sect) % ssize; /* transfer size, beginning not aligned */ - current_count_sectors = max_sector - sector_t ; + current_count_sectors = max_sector - fsector_t ; return max_sector; } @@ -2475,7 +2470,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) { int remaining; /* number of transferred 512-byte sectors */ - struct buffer_head *bh; + struct bio *bio; char *buffer, *dma_buffer; int size; @@ -2484,8 +2479,8 @@ CURRENT->nr_sectors); if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE && - buffer_max > sector_t + CURRENT->nr_sectors) - current_count_sectors = minimum(buffer_max - sector_t, + buffer_max > fsector_t + CURRENT->nr_sectors) + current_count_sectors = minimum(buffer_max - fsector_t, CURRENT->nr_sectors); remaining = current_count_sectors << 9; @@ -2496,7 +2491,7 @@ printk("current_count_sectors=%ld\n", current_count_sectors); printk("remaining=%d\n", remaining >> 9); printk("CURRENT->nr_sectors=%ld\n",CURRENT->nr_sectors); - printk("CURRENT->current_nr_sectors=%ld\n", + printk("CURRENT->current_nr_sectors=%u\n", CURRENT->current_nr_sectors); printk("max_sector=%d\n", max_sector); printk("ssize=%d\n", ssize); @@ -2505,9 +2500,9 @@ buffer_max = maximum(max_sector, buffer_max); - dma_buffer = floppy_track_buffer + ((sector_t - buffer_min) << 9); + dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9); - bh = CURRENT->bh; + bio = CURRENT->bio; size = CURRENT->current_nr_sectors << 9; buffer = CURRENT->buffer; @@ -2519,8 +2514,8 @@ dma_buffer < floppy_track_buffer){ DPRINT("buffer overrun in copy buffer %d\n", (int) ((floppy_track_buffer - dma_buffer) >>9)); - printk("sector_t=%d buffer_min=%d\n", - sector_t, buffer_min); + printk("fsector_t=%d buffer_min=%d\n", + fsector_t, buffer_min); printk("current_count_sectors=%ld\n", current_count_sectors); if (CT(COMMAND) == FD_READ) @@ -2541,15 +2536,15 @@ break; dma_buffer += size; - bh = bh->b_reqnext; + bio = bio->bi_next; #ifdef FLOPPY_SANITY_CHECK - if (!bh){ + if (!bio){ DPRINT("bh=null in copy buffer after copy\n"); break; } #endif - size = bh->b_size; - buffer = bh->b_data; + size = bio_size(bio); + buffer = bio_data(bio); } #ifdef FLOPPY_SANITY_CHECK if (remaining){ @@ -2641,7 +2636,7 @@ max_sector = _floppy->sect * _floppy->head; TRACK = CURRENT->sector / max_sector; - sector_t = CURRENT->sector % max_sector; + fsector_t = CURRENT->sector % max_sector; if (_floppy->track && TRACK >= _floppy->track) { if (CURRENT->current_nr_sectors & 1) { current_count_sectors = 1; @@ -2649,17 +2644,17 @@ } else return 0; } - HEAD = sector_t / _floppy->sect; + HEAD = fsector_t / _floppy->sect; if (((_floppy->stretch & FD_SWAPSIDES) || TESTF(FD_NEED_TWADDLE)) && - sector_t < _floppy->sect) + fsector_t < _floppy->sect) max_sector = _floppy->sect; /* 2M disks have phantom sectors on the first track */ if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)){ max_sector = 2 * _floppy->sect / 3; - if (sector_t >= max_sector){ - current_count_sectors = minimum(_floppy->sect - sector_t, + if (fsector_t >= max_sector){ + current_count_sectors = minimum(_floppy->sect - fsector_t, CURRENT->nr_sectors); return 1; } @@ -2681,7 +2676,7 @@ GAP = _floppy->gap; CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; - SECTOR = ((sector_t % _floppy->sect) << 2 >> SIZECODE) + 1; + SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 1; /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -2689,11 +2684,11 @@ tracksize = _floppy->sect - _floppy->sect % ssize; if (tracksize < _floppy->sect){ SECT_PER_TRACK ++; - if (tracksize <= sector_t % _floppy->sect) + if (tracksize <= fsector_t % _floppy->sect) SECTOR--; /* if we are beyond tracksize, fill up using smaller sectors */ - while (tracksize <= sector_t % _floppy->sect){ + while (tracksize <= fsector_t % _floppy->sect){ while(tracksize + ssize > _floppy->sect){ SIZECODE--; ssize >>= 1; @@ -2709,12 +2704,12 @@ max_sector = _floppy->sect; } - in_sector_offset = (sector_t % _floppy->sect) % ssize; - aligned_sector_t = sector_t - in_sector_offset; + in_sector_offset = (fsector_t % _floppy->sect) % ssize; + aligned_sector_t = fsector_t - in_sector_offset; max_size = CURRENT->nr_sectors; if ((raw_cmd->track == buffer_track) && (current_drive == buffer_drive) && - (sector_t >= buffer_min) && (sector_t < buffer_max)) { + (fsector_t >= buffer_min) && (fsector_t < buffer_max)) { /* data already in track buffer */ if (CT(COMMAND) == FD_READ) { copy_buffer(1, max_sector, buffer_max); @@ -2722,8 +2717,8 @@ } } else if (in_sector_offset || CURRENT->nr_sectors < ssize){ if (CT(COMMAND) == FD_WRITE){ - if (sector_t + CURRENT->nr_sectors > ssize && - sector_t + CURRENT->nr_sectors < ssize + ssize) + if (fsector_t + CURRENT->nr_sectors > ssize && + fsector_t + CURRENT->nr_sectors < ssize + ssize) max_size = ssize + ssize; else max_size = ssize; @@ -2736,7 +2731,7 @@ int direct, indirect; indirect= transfer_size(ssize,max_sector,max_buffer_sectors*2) - - sector_t; + fsector_t; /* * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide @@ -2751,7 +2746,7 @@ if (CROSS_64KB(CURRENT->buffer, max_size << 9)) max_size = (K_64 - ((unsigned long)CURRENT->buffer) % K_64)>>9; - direct = transfer_size(ssize,max_sector,max_size) - sector_t; + direct = transfer_size(ssize,max_sector,max_size) - fsector_t; /* * We try to read tracks, but if we get too many errors, we * go back to reading just one sector at a time. @@ -2770,8 +2765,8 @@ raw_cmd->length = current_count_sectors << 9; if (raw_cmd->length == 0){ DPRINT("zero dma transfer attempted from make_raw_request\n"); - DPRINT("indirect=%d direct=%d sector_t=%d", - indirect, direct, sector_t); + DPRINT("indirect=%d direct=%d fsector_t=%d", + indirect, direct, fsector_t); return 0; } /* check_dma_crossing(raw_cmd->kernel_data, @@ -2789,12 +2784,12 @@ /* claim buffer track if needed */ if (buffer_track != raw_cmd->track || /* bad track */ buffer_drive !=current_drive || /* bad drive */ - sector_t > buffer_max || - sector_t < buffer_min || + fsector_t > buffer_max || + fsector_t < buffer_min || ((CT(COMMAND) == FD_READ || (!in_sector_offset && CURRENT->nr_sectors >= ssize))&& max_sector > 2 * max_buffer_sectors + buffer_min && - max_size + sector_t > 2 * max_buffer_sectors + buffer_min) + max_size + fsector_t > 2 * max_buffer_sectors + buffer_min) /* not enough space */){ buffer_track = -1; buffer_drive = current_drive; @@ -2841,7 +2836,7 @@ floppy_track_buffer) >> 9), current_count_sectors); printk("st=%d ast=%d mse=%d msi=%d\n", - sector_t, aligned_sector_t, max_sector, max_size); + fsector_t, aligned_sector_t, max_sector, max_size); printk("ssize=%x SIZECODE=%d\n", ssize, SIZECODE); printk("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n", COMMAND, SECTOR, HEAD, TRACK); @@ -2859,8 +2854,8 @@ raw_cmd->kernel_data + raw_cmd->length > floppy_track_buffer + (max_buffer_sectors << 10)){ DPRINT("buffer overrun in schedule dma\n"); - printk("sector_t=%d buffer_min=%d current_count=%ld\n", - sector_t, buffer_min, + printk("fsector_t=%d buffer_min=%d current_count=%ld\n", + fsector_t, buffer_min, raw_cmd->length >> 9); printk("current_count_sectors=%ld\n", current_count_sectors); @@ -2913,8 +2908,6 @@ } if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) panic(DEVICE_NAME ": request list destroyed"); - if (CURRENT->bh && !buffer_locked(CURRENT->bh)) - panic(DEVICE_NAME ": block not locked"); device = CURRENT->rq_dev; set_fdc(DRIVE(device)); @@ -4168,7 +4161,7 @@ blk_size[MAJOR_NR] = floppy_sizes; blksize_size[MAJOR_NR] = floppy_blocksizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST, "floppy"); reschedule_timeout(MAXTIMEOUT, "floppy init", MAXTIMEOUT); config_types(); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/ida_cmd.h linux/drivers/block/ida_cmd.h --- /opt/kernel/linux-2.4.10-pre4/drivers/block/ida_cmd.h Wed Jul 25 23:12:01 2001 +++ linux/drivers/block/ida_cmd.h Thu Sep 6 13:30:30 2001 @@ -93,7 +93,7 @@ int ctlr; struct cmdlist *prev; struct cmdlist *next; - struct buffer_head *bh; + struct bio *bio; int type; } cmdlist_t; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/ll_rw_blk.c Sat Aug 4 07:37:09 2001 +++ linux/drivers/block/ll_rw_blk.c Fri Sep 7 08:57:03 2001 @@ -6,6 +6,7 @@ * Elevator latency, (C) 2000 Andrea Arcangeli SuSE * Queue request tables / lock, selectable elevator, Jens Axboe * kernel-doc documentation started by NeilBrown - July2000 + * bio rewrite, highmem i/o, etc, Jens Axboe - may 2001 */ /* @@ -22,6 +23,7 @@ #include #include #include +#include #include #include @@ -51,27 +53,13 @@ */ DECLARE_TASK_QUEUE(tq_disk); -/* - * Protect the request list against multiple users.. - * - * With this spinlock the Linux block IO subsystem is 100% SMP threaded - * from the IRQ event side, and almost 100% SMP threaded from the syscall - * side (we still have protect against block device array operations, and - * the do_request() side is casually still unsafe. The kernel lock protects - * this part currently.). - * - * there is a fair chance that things will work just OK if these functions - * are called with no global kernel lock held ... - */ -spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; - /* This specifies how many sectors to read ahead on the disk. */ int read_ahead[MAX_BLKDEV]; /* blk_dev_struct is: - * *request_fn - * *current_request + * request_queue + * *queue */ struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ @@ -83,7 +71,7 @@ * * if (!blk_size[MAJOR]) then no minor size checking is done. */ -int * blk_size[MAX_BLKDEV]; +int *blk_size[MAX_BLKDEV]; /* * blksize_size contains the size of all block-devices: @@ -95,18 +83,9 @@ int * blksize_size[MAX_BLKDEV]; /* - * hardsect_size contains the size of the hardware sector of a device. - * - * hardsect_size[MAJOR][MINOR] - * - * if (!hardsect_size[MAJOR]) - * then 512 bytes is assumed. - * else - * sector_size is hardsect_size[MAJOR][MINOR] - * This is currently set by some scsi devices and read by the msdos fs driver. - * Other uses may appear later. + * blk_gendisk contains pointers to the gendisk structures */ -int * hardsect_size[MAX_BLKDEV]; +struct gendisk *blk_gendisk[MAX_BLKDEV]; /* * The following tunes the read-ahead algorithm in mm/filemap.c @@ -114,24 +93,24 @@ int * max_readahead[MAX_BLKDEV]; /* - * Max number of sectors per request - */ -int * max_sectors[MAX_BLKDEV]; - -/* * How many reqeusts do we allocate per queue, * and how many do we "batch" on freeing them? */ -static int queue_nr_requests, batch_requests; - -static inline int get_max_sectors(kdev_t dev) -{ - if (!max_sectors[MAJOR(dev)]) - return MAX_SECTORS; - return max_sectors[MAJOR(dev)][MINOR(dev)]; -} +int queue_nr_requests, batch_requests; +unsigned long blk_max_low_pfn, blk_max_pfn; -inline request_queue_t *__blk_get_queue(kdev_t dev) +/** + * blk_get_queue: - return the queue that matches the given device + * @dev: device + * + * Description: + * Given a specific device, return the queue that will hold I/O + * for it. This is either a &struct blk_dev_struct lookup and a + * call to the ->queue() function defined, or the default queue + * stored in the same location. + * + **/ +inline request_queue_t *blk_get_queue(kdev_t dev) { struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); @@ -141,69 +120,6 @@ return &blk_dev[MAJOR(dev)].request_queue; } -/* - * NOTE: the device-specific queue() functions - * have to be atomic! - */ -request_queue_t *blk_get_queue(kdev_t dev) -{ - request_queue_t *ret; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock,flags); - ret = __blk_get_queue(dev); - spin_unlock_irqrestore(&io_request_lock,flags); - - return ret; -} - -static int __blk_cleanup_queue(struct list_head *head) -{ - struct request *rq; - int i = 0; - - if (list_empty(head)) - return 0; - - do { - rq = list_entry(head->next, struct request, table); - list_del(&rq->table); - kmem_cache_free(request_cachep, rq); - i++; - } while (!list_empty(head)); - - return i; -} - -/** - * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed - * @q: the request queue to be released - * - * Description: - * blk_cleanup_queue is the pair to blk_init_queue(). It should - * be called when a request queue is being released; typically - * when a block device is being de-registered. Currently, its - * primary task it to free all the &struct request structures that - * were allocated to the queue. - * Caveat: - * Hopefully the low level driver will have finished any - * outstanding requests first... - **/ -void blk_cleanup_queue(request_queue_t * q) -{ - int count = queue_nr_requests; - - count -= __blk_cleanup_queue(&q->request_freelist[READ]); - count -= __blk_cleanup_queue(&q->request_freelist[WRITE]); - count -= __blk_cleanup_queue(&q->pending_freelist[READ]); - count -= __blk_cleanup_queue(&q->pending_freelist[WRITE]); - - if (count) - printk("blk_cleanup_queue: leaked requests (%d)\n", count); - - memset(q, 0, sizeof(*q)); -} - /** * blk_queue_headactive - indicate whether head of request queue may be active * @q: The queue which this applies to. @@ -227,10 +143,9 @@ * * When a queue is plugged the head will be assumed to be inactive. **/ - void blk_queue_headactive(request_queue_t * q, int active) { - q->head_active = active; + set_bit(QUEUE_FLAG_HEADACTIVE, &q->queue_flags); } /** @@ -239,7 +154,7 @@ * @mfn: the alternate make_request function * * Description: - * The normal way for &struct buffer_heads to be passed to a device + * The normal way for &struct bios to be passed to a device * driver is for them to be collected into requests on a request * queue, and then to allow the device driver to select requests * off that queue when it is ready. This works well for many block @@ -251,19 +166,127 @@ * * Caveat: * The driver that does this *must* be able to deal appropriately - * with buffers in "highmemory", either by calling bh_kmap() to get - * a kernel mapping, to by calling create_bounce() to create a - * buffer in normal memory. + * with buffers in "highmemory". This can be accomplished by either calling + * bio_kmap() to get a temporary kernel mapping, or by calling + * blk_queue_bounce() to create a buffer in normal memory. **/ - void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) { + /* + * set defaults + */ + q->max_segments = MAX_SEGMENTS; q->make_request_fn = mfn; + blk_queue_max_sectors(q, MAX_SECTORS); + blk_queue_hardsect_size(q, 512); + + q->queue_state = Queue_up; + init_waitqueue_head(&q->queue_wait); +} + +/** + * blk_queue_bounce_limit - set bounce buffer limit for queue + * @q: the request queue for the device + * @dma_addr: bus address limit + * + * Description: + * Different hardware can have different requirements as to what pages + * it can do I/O directly to. A low level driver can call + * blk_queue_bounce_limit to have lower memory pages allocated as bounce + * buffers for doing I/O to pages residing above @page. By default + * the block layer sets this to the highest numbered "low" memory page. + **/ +void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) +{ + unsigned long mb = dma_addr >> 20; + struct page *bounce_page = mem_map + (dma_addr >> PAGE_SHIFT); + + /* + * just make sure that no pages are considered above this one... + */ + if (dma_addr == BLK_BOUNCE_ANY) + bounce_page = (struct page *) BLK_BOUNCE_ANY; + + /* + * keep this for debugging for now... + */ + if (dma_addr != BLK_BOUNCE_HIGH) { + printk("blk: queue %p, ", q); + if (dma_addr == BLK_BOUNCE_ANY) + printk("no I/O memory limit\n"); + else + printk("I/O limit %luMb (mask %Lx)\n", mb, (u64) dma_addr); + } + + q->bounce_limit = bounce_page; +} + +/** + * blk_queue_max_sectors - set max sectors for a request for this queue + * @q: the request queue for the device + * @max_sectors: max sectors in the usual 512b unit + * + * Description: + * Enables a low level driver to set an upper limit on the size of + * received requests. + **/ +void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors) +{ + q->max_sectors = max_sectors; +} + +/** + * blk_queue_max_segments - set max segments for a request for this queue + * @q: the request queue for the device + * @max_segments: max number of segments + * + * Description: + * Enables a low level driver to set an upper limit on the number of + * data segments in a request + **/ +void blk_queue_max_segments(request_queue_t *q, unsigned short max_segments) +{ + q->max_segments = max_segments; } -static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +/** + * blk_queue_hardsect_size - set hardware sector size for the queue + * @q: the request queue for the device + * @size: the hardware sector size, in bytes + * + * Description: + * This should typically be set to the lowest possible sector size + * that the hardware can operate on (possible without reverting to + * even internal read-modify-write operations). Usually the default + * of 512 covers most hardware. + **/ +void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) { - if (req->nr_segments < max_segments) { + q->hardsect_size = size; +} + +/* + * can we merge the two segments, or do we need to start a new one? + */ +inline int blk_same_segment(request_queue_t *q, struct bio *bio, + struct bio *nxt) +{ + if (!BIO_CONTIG(bio, nxt)) + return 0; + + if (BIO_PHYS_4G(bio, nxt)) + return 1; + + return 0; +} + +/* + * the standard queue merge functions, can be overridden with device + * specific ones if so desired + */ +static inline int ll_new_segment(request_queue_t *q, struct request *req) +{ + if (req->nr_segments < q->max_segments) { req->nr_segments++; return 1; } @@ -271,36 +294,65 @@ } static int ll_back_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + if (req->nr_sectors + bio_size(bio) > q->max_sectors) + return 0; + if (blk_same_segment(q, req->biotail, bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_front_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (bh->b_data + bh->b_size == req->bh->b_data) + if (req->nr_sectors + bio_size(bio) > q->max_sectors) + return 0; + if (blk_same_segment(q, bio, req->bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next, int max_segments) + struct request *next) { int total_segments = req->nr_segments + next->nr_segments; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + if (blk_same_segment(q, req->biotail, next->bio)) total_segments--; - if (total_segments > max_segments) + if (total_segments > q->max_segments) return 0; req->nr_segments = total_segments; return 1; } +/** + * blk_wake_queue - restart a queue that wasn't fully emptied at request_fn time + * @q: The &request_queue_t in question + * + * Description: + * Sometimes hardware can run out of resources, so no more commands can + * be queued. If a driver breaks out of request_fn while there are still + * requests left on there to be serviced, it will be left in a state where + * it is still unplugged but not be recalled by the block layer. + * not be replugged, and thus request_fn will be run. Once a driver has + * freed enough resources to start queueing new requests again, it must + * call blk_wake_queue to start processing again. + **/ +void blk_wake_queue(request_queue_t *q) +{ +#if 1 + if (!blk_set_plugged(q)) + queue_task(&q->plug_tq, &tq_disk); +#else + q->request_fn(q); +#endif +} + /* * "plug" the device if there are no outstanding requests: this will * force the transfer to start only after we have put all the requests @@ -309,16 +361,13 @@ * This is called with interrupts off and no requests on the queue. * (and with the request spinlock acquired) */ -static void generic_plug_device(request_queue_t *q, kdev_t dev) +static int blk_plug_device(request_queue_t *q) { - /* - * no need to replug device - */ - if (!list_empty(&q->queue_head) || q->plugged) - return; + if (!elv_queue_empty(q)) + return 0; - q->plugged = 1; - queue_task(&q->plug_tq, &tq_disk); + blk_wake_queue(q); + return 1; } /* @@ -326,24 +375,91 @@ */ static inline void __generic_unplug_device(request_queue_t *q) { - if (q->plugged) { - q->plugged = 0; - if (!list_empty(&q->queue_head)) - q->request_fn(q); - } + if (blk_set_unplugged(q) && !elv_queue_empty(q)) + q->request_fn(q); } +/** + * generic_unplug_device - fire a request queue + * @q: The &request_queue_t in question + * + * Description: + * Linux uses plugging to build bigger requests queues before letting + * the device have at them. If a queue is plugged, the I/O scheduler + * is still adding and merging requests on the queue. Once the queue + * gets unplugged (either by manually calling this function, or by + * running the tq_disk task queue), the request_fn defined for the + * queue is invoked and transfers started. + **/ void generic_unplug_device(void *data) { request_queue_t *q = (request_queue_t *) data; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); __generic_unplug_device(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } -static void blk_init_free_list(request_queue_t *q) +static int __blk_cleanup_rqlist(struct list_head *head) +{ + struct request *rq; + int i = 0; + + if (list_empty(head)) + return 0; + + do { + rq = list_entry(head->next, struct request, queuelist); + list_del(&rq->queuelist); + kmem_cache_free(request_cachep, rq); + i++; + } while (!list_empty(head)); + + return i; +} + +static int __blk_cleanup_queue(request_queue_t *q) +{ + int count; + + count = __blk_cleanup_rqlist(&q->request_freelist[READ]); + count += __blk_cleanup_rqlist(&q->request_freelist[WRITE]); + count += __blk_cleanup_rqlist(&q->pending_freelist[READ]); + count += __blk_cleanup_rqlist(&q->pending_freelist[WRITE]); + + return count; +} + +/** + * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * @q: the request queue to be released + * + * Description: + * blk_cleanup_queue is the pair to blk_init_queue(). It should + * be called when a request queue is being released; typically + * when a block device is being de-registered. Currently, its + * primary task it to free all the &struct request structures that + * were allocated to the queue. + * Caveat: + * Hopefully the low level driver will have finished any + * outstanding requests first... + **/ +void blk_cleanup_queue(request_queue_t * q) +{ + int count = queue_nr_requests; + + count -= __blk_cleanup_queue(q); + + if (count) + printk("blk_cleanup_queue: leaked requests (%d)\n", count); + + elevator_exit(q, &q->elevator); + + memset(q, 0, sizeof(*q)); +} + +static int blk_init_free_list(request_queue_t *q) { struct request *rq; int i; @@ -359,21 +475,27 @@ */ for (i = 0; i < queue_nr_requests; i++) { rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); - if (rq == NULL) { - /* We'll get a `leaked requests' message from blk_cleanup_queue */ - printk(KERN_EMERG "blk_init_free_list: error allocating requests\n"); - break; - } + if (!rq) + goto nomem; + memset(rq, 0, sizeof(struct request)); rq->rq_status = RQ_INACTIVE; - list_add(&rq->table, &q->request_freelist[i & 1]); + if (i < queue_nr_requests >> 1) + list_add(&rq->queuelist, &q->request_freelist[READ]); + else + list_add(&rq->queuelist, &q->request_freelist[WRITE]); } - init_waitqueue_head(&q->wait_for_request); + init_waitqueue_head(&q->wait_for_request[READ]); + init_waitqueue_head(&q->wait_for_request[WRITE]); spin_lock_init(&q->queue_lock); + return 0; +nomem: + __blk_cleanup_queue(q); + return 1; } -static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); +static int __make_request(request_queue_t *, struct bio *); /** * blk_init_queue - prepare a request queue for use with a block device @@ -396,8 +518,8 @@ * requests on the queue, it is responsible for arranging that the requests * get dealt with eventually. * - * A global spin lock $io_request_lock must be held while manipulating the - * requests on the request queue. + * The queue spin lock must be held while manipulating the requests on the + * request queue. * * The request on the head of the queue is by default assumed to be * potentially active, and it is not considered for re-ordering or merging @@ -408,33 +530,42 @@ * blk_init_queue() must be paired with a blk_cleanup_queue() call * when the block device is deactivated (such as at module unload). **/ -void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, char *name) { - INIT_LIST_HEAD(&q->queue_head); - elevator_init(&q->elevator, ELEVATOR_LINUS); - blk_init_free_list(q); + int ret = -ENOMEM; + + if (blk_init_free_list(q)) + goto out_err; + + if ((ret = elevator_init(q, &q->elevator, ELEVATOR_LINUS, name))) + goto cleanup_queue; + q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; q->merge_requests_fn = ll_merge_requests_fn; - q->make_request_fn = __make_request; q->plug_tq.sync = 0; q->plug_tq.routine = &generic_unplug_device; q->plug_tq.data = q; - q->plugged = 0; + /* - * These booleans describe the queue properties. We set the - * default (and most common) values here. Other drivers can - * use the appropriate functions to alter the queue properties. - * as appropriate. + * by default assume old behaviour and bounce for any highmem page */ - q->plug_device_fn = generic_plug_device; - q->head_active = 1; + blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); + + blk_queue_make_request(q, __make_request); + blk_set_unplugged(q); + blk_mark_headactive(q); + return 0; +cleanup_queue: + blk_cleanup_queue(q); +out_err: + return ret; } -#define blkdev_free_rq(list) list_entry((list)->next, struct request, table); +#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* - * Get a free request. io_request_lock must be held and interrupts + * Get a free request. queue lock must be held and interrupts * disabled on the way in. */ static inline struct request *get_request(request_queue_t *q, int rw) @@ -443,7 +574,7 @@ if (!list_empty(&q->request_freelist[rw])) { rq = blkdev_free_rq(&q->request_freelist[rw]); - list_del(&rq->table); + list_del(&rq->queuelist); rq->rq_status = RQ_ACTIVE; rq->special = NULL; rq->q = q; @@ -460,34 +591,24 @@ register struct request *rq; DECLARE_WAITQUEUE(wait, current); - add_wait_queue_exclusive(&q->wait_for_request, &wait); + spin_lock_prefetch(&q->queue_lock); + + add_wait_queue_exclusive(&q->wait_for_request[rw], &wait); for (;;) { __set_current_state(TASK_UNINTERRUPTIBLE); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); if (rq) break; generic_unplug_device(q); schedule(); } - remove_wait_queue(&q->wait_for_request, &wait); + remove_wait_queue(&q->wait_for_request[rw], &wait); current->state = TASK_RUNNING; return rq; } -static inline struct request *get_request_wait(request_queue_t *q, int rw) -{ - register struct request *rq; - - spin_lock_irq(&io_request_lock); - rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); - if (rq) - return rq; - return __get_request_wait(q, rw); -} - /* RO fail safe mechanism */ static long ro_bits[MAX_BLKDEV][8]; @@ -536,7 +657,7 @@ /* * add-request adds a request to the linked list. - * io_request_lock is held and interrupts disabled, as we muck with the + * queue lock is held and interrupts disabled, as we muck with the * request queue list. * * By this point, req->cmd is always either READ/WRITE, never READA, @@ -545,21 +666,22 @@ static inline void add_request(request_queue_t * q, struct request * req, struct list_head *insert_here) { + elevator_t *e = &q->elevator; + drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); - if (!q->plugged && q->head_active && insert_here == &q->queue_head) { - spin_unlock_irq(&io_request_lock); + if (!blk_queue_plugged(q) && blk_queue_headlive(q) + && insert_here == &q->queue_head) BUG(); - } /* * elevator indicated where it wants this request to be * inserted at elevator_merge time */ - list_add(&req->queue, insert_here); + e->elevator_add_req_fn(q, req, insert_here); } -inline void blk_refill_freelist(request_queue_t *q, int rw) +static void blk_refill_freelist(request_queue_t *q, int rw) { if (q->pending_free[rw]) { list_splice(&q->pending_freelist[rw], &q->request_freelist[rw]); @@ -569,7 +691,7 @@ } /* - * Must be called with io_request_lock held and interrupts disabled + * Must be called with queue lock held and interrupts disabled */ inline void blkdev_release_request(struct request *req) { @@ -587,12 +709,12 @@ /* * Add to pending free list and batch wakeups */ - list_add(&req->table, &q->pending_freelist[rw]); + list_add(&req->queuelist, &q->pending_freelist[rw]); if (++q->pending_free[rw] >= batch_requests) { int wake_up = q->pending_free[rw]; blk_refill_freelist(q, rw); - wake_up_nr(&q->wait_for_request, wake_up); + wake_up_nr(&q->wait_for_request[rw], wake_up); } } } @@ -600,10 +722,7 @@ /* * Has to be called with the request spinlock acquired */ -static void attempt_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) +static void attempt_merge(request_queue_t *q, struct request *req) { struct request *next; @@ -612,8 +731,8 @@ return; if (req->cmd != next->cmd || req->rq_dev != next->rq_dev - || req->nr_sectors + next->nr_sectors > max_sectors - || next->waiting) + || req->nr_sectors + next->nr_sectors > q->max_sectors + || next->waiting || next->special) return; /* * If we are not allowed to merge these requests, then @@ -621,135 +740,143 @@ * will have been updated to the appropriate number, * and we shouldn't do it here too. */ - if (!q->merge_requests_fn(q, req, next, max_segments)) - return; + if (q->merge_requests_fn(q, req, next)) { + q->elevator.elevator_merge_req_fn(req, next); + req->biotail->bi_next = next->bio; + req->biotail = next->biotail; + req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + blkdev_dequeue_request(next); + blkdev_release_request(next); + } +} - q->elevator.elevator_merge_req_fn(req, next); - req->bhtail->b_reqnext = next->bh; - req->bhtail = next->bhtail; - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - list_del(&next->queue); - blkdev_release_request(next); +static inline void attempt_back_merge(request_queue_t *q, struct request *rq) +{ + if (&rq->queuelist != q->queue_head.prev) + attempt_merge(q, rq); } -static inline void attempt_back_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_front_merge(request_queue_t *q, + struct list_head *head, + struct request *rq) { - if (&req->queue == q->queue_head.prev) - return; - attempt_merge(q, req, max_sectors, max_segments); + struct list_head *prev = rq->queuelist.prev; + + if (prev != head) + attempt_merge(q, blkdev_entry_to_request(prev)); } -static inline void attempt_front_merge(request_queue_t * q, - struct list_head * head, - struct request *req, - int max_sectors, - int max_segments) +static inline void __blk_attempt_remerge(request_queue_t *q, struct request *rq) { - struct list_head * prev; + if (rq->queuelist.next != &q->queue_head) + attempt_merge(q, rq); +} +/** + * blk_attempt_remerge - attempt to remerge active head with next request + * @q: The &request_queue_t belonging to the device + * @rq: The head request (usually) + * + * Description: + * For head-active devices, the queue can easily be unplugged so quickly + * that proper merging is not done on the front request. This may hurt + * performance greatly for some devices. The block layer cannot safely + * do merging on that first request for these queues, but the driver can + * call this function and make it happen any way. Only the driver knows + * when it is safe to do so. + **/ +void blk_attempt_remerge(request_queue_t *q, struct request *rq) +{ + unsigned long flags; - prev = req->queue.prev; - if (head == prev) - return; - attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); + spin_lock_irqsave(&q->queue_lock, flags); + __blk_attempt_remerge(q, rq); + spin_unlock_irqrestore(&q->queue_lock, flags); } -static int __make_request(request_queue_t * q, int rw, - struct buffer_head * bh) +static int __make_request(request_queue_t *q, struct bio *bio) { - unsigned int sector, count; - int max_segments = MAX_SEGMENTS; - struct request * req, *freereq = NULL; - int rw_ahead, max_sectors, el_ret; + struct request *req, *freereq = NULL; + int el_ret, latency = 0, rw, count; struct list_head *head, *insert_here; - int latency; elevator_t *elevator = &q->elevator; + sector_t sector; - count = bh->b_size >> 9; - sector = bh->b_rsector; - - rw_ahead = 0; /* normal case; gets changed below for READA */ - switch (rw) { - case READA: - rw_ahead = 1; - rw = READ; /* drop into READ */ - case READ: - case WRITE: - latency = elevator_request_latency(elevator, rw); - break; - default: - BUG(); - goto end_io; - } - - /* We'd better have a real physical mapping! - Check this bit only if the buffer was dirty and just locked - down by us so at this point flushpage will block and - won't clear the mapped bit under us. */ - if (!buffer_mapped(bh)) - BUG(); + sector = bio->bi_sector; + count = bio_sectors(bio); + rw = bio_rw(bio); - /* - * Temporary solution - in 2.5 this will be done by the lowlevel - * driver. Create a bounce buffer if the buffer data points into - * high memory - keep the original buffer otherwise. - */ -#if CONFIG_HIGHMEM - bh = create_bounce(rw, bh); -#endif + latency = elevator_request_latency(elevator, rw); -/* look for a free request. */ /* - * Try to coalesce the new request with old requests + * low level driver can indicate that it wants pages above a + * certain limit bounced to low memory (ie for highmem, or even + * ISA dma in theory) */ - max_sectors = get_max_sectors(bh->b_rdev); + bio = blk_queue_bounce(q, bio); again: + spin_lock_prefetch(&q->queue_lock); req = NULL; head = &q->queue_head; + + spin_lock_irq(&q->queue_lock); + /* - * Now we acquire the request spinlock, we have to be mega careful - * not to schedule or do something nonatomic + * barrier write must not be passed - so insert with 0 latency + * and invalidate the entire existing merge hash */ - spin_lock_irq(&io_request_lock); + if ((bio->bi_flags & BIO_BARRIER) && !freereq) { + latency = 0; + __bio_hash_inval(&q->queue_hash); + } insert_here = head->prev; - if (list_empty(head)) { - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + if (blk_plug_device(q)) goto get_rq; - } else if (q->head_active && !q->plugged) + else if (blk_queue_headlive(q) && !blk_queue_plugged(q)) head = head->next; - el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); + el_ret = elevator->elevator_merge_fn(q, &req, head, bio); switch (el_ret) { - case ELEVATOR_BACK_MERGE: - if (!q->back_merge_fn(q, req, bh, max_segments)) + if (!q->back_merge_fn(q, req, bio)) break; elevator->elevator_merge_cleanup_fn(q, req, count); - req->bhtail->b_reqnext = bh; - req->bhtail = bh; + /* + * remove this if we want the bio hash to serve as + * a generel pending-io lookup tool + */ + __bio_hash_remove(&req->biotail->bi_hash); + req->biotail->bi_next = bio; + req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_back_merge(q, req, max_sectors, max_segments); + attempt_back_merge(q, req); goto out; case ELEVATOR_FRONT_MERGE: - if (!q->front_merge_fn(q, req, bh, max_segments)) + if (!q->front_merge_fn(q, req, bio)) break; elevator->elevator_merge_cleanup_fn(q, req, count); - bh->b_reqnext = req->bh; - req->bh = bh; - req->buffer = bh->b_data; - req->current_nr_sectors = count; + /* + * see comment above + */ + __bio_hash_remove(&req->bio->bi_hash); + bio->bi_next = req->bio; + req->bio = bio; + /* + * may not be valid. if the low level driver said + * it didn't need a bounce buffer then it better + * not touch req->buffer either... + */ + req->buffer = bio_data(bio); + req->current_nr_sectors = req->hard_cur_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_front_merge(q, head, req, max_sectors, max_segments); + attempt_front_merge(q, head, req); goto out; /* @@ -762,7 +889,7 @@ * of the queue */ if (req) - insert_here = &req->queue; + insert_here = &req->queuelist; break; default: @@ -780,107 +907,126 @@ req = freereq; freereq = NULL; } else if ((req = get_request(q, rw)) == NULL) { - spin_unlock_irq(&io_request_lock); - if (rw_ahead) + spin_unlock_irq(&q->queue_lock); + if (bio->bi_flags & BIO_RW_AHEAD) { + bio->bi_flags |= BIO_RW_BLOCK; goto end_io; + } freereq = __get_request_wait(q, rw); goto again; } + bio->bi_req = req; + /* fill up the request-info, and add it to the queue */ req->elevator_sequence = latency; req->cmd = rw; req->errors = 0; req->hard_sector = req->sector = sector; req->hard_nr_sectors = req->nr_sectors = count; - req->current_nr_sectors = count; + req->current_nr_sectors = req->hard_cur_sectors = count; req->nr_segments = 1; /* Always 1 for a new request. */ req->nr_hw_segments = 1; /* Always 1 for a new request. */ - req->buffer = bh->b_data; + req->buffer = bio_data(bio); /* see ->buffer comment above */ req->waiting = NULL; - req->bh = bh; - req->bhtail = bh; - req->rq_dev = bh->b_rdev; + req->bio = req->biotail = bio; + req->rq_dev = bio->bi_dev; blk_started_io(count); add_request(q, req, insert_here); out: if (freereq) blkdev_release_request(freereq); - spin_unlock_irq(&io_request_lock); + if (__bio_hash_add_unique(&q->queue_hash, bio)) + printk("ll_rw_blk: %lu for %s already there\n", bio->bi_sector, kdevname(bio->bi_dev)); + spin_unlock_irq(&q->queue_lock); return 0; end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + bio->bi_end_io(bio); return 0; } + +/* + * If bio->bi_dev is a partition, remap the location + */ +static inline void blk_partition_remap(struct bio *bio) +{ + int major, minor, drive, minor0; + struct gendisk *g; + kdev_t dev0; + + major = MAJOR(bio->bi_dev); + if ((g = blk_gendisk[major])) { + minor = MINOR(bio->bi_dev); + drive = (minor >> g->minor_shift); + minor0 = (drive << g->minor_shift); /* whole disk device */ + /* that is, minor0 = (minor & ~((1<minor_shift)-1)); */ + dev0 = MKDEV(major, minor0); + if (dev0 != bio->bi_dev) { + bio->bi_dev = dev0; + bio->bi_sector += g->part[minor].start_sect; + } + /* lots of checks are possible */ + } +} + /** - * generic_make_request: hand a buffer head to it's device driver for I/O - * @rw: READ, WRITE, or READA - what sort of I/O is desired. - * @bh: The buffer head describing the location in memory and on the device. + * generic_make_request: hand a buffer to it's device driver for I/O + * @bio: The bio describing the location in memory and on the device. * * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct buffer_head and a &rw value. The - * %READ and %WRITE options are (hopefully) obvious in meaning. The - * %READA value means that a read is required, but that the driver is - * free to fail the request if, for example, it cannot get needed - * resources immediately. + * devices. It is passed a &struct bio, which describes the I/O that needs + * to be done. * * generic_make_request() does not return any status. The * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bh->b_end_io + * completion, is delivered asynchronously through the bio->bi_end_io * function described (one day) else where. * - * The caller of generic_make_request must make sure that b_page, - * b_addr, b_size are set to describe the memory buffer, that b_rdev - * and b_rsector are set to describe the device address, and the - * b_end_io and optionally b_private are set to describe how - * completion notification should be signaled. BH_Mapped should also - * be set (to confirm that b_dev and b_blocknr are valid). - * - * generic_make_request and the drivers it calls may use b_reqnext, - * and may change b_rdev and b_rsector. So the values of these fields + * The caller of generic_make_request must make sure that bi_io_vec + * are set to describe the memory buffer, and that bi_dev and bi_sector are + & set to describe the device address, and the + * bi_end_io and optionally bi_private are set to describe how + * completion notification should be signaled. + * + * generic_make_request and the drivers it calls may use bi_next if this + * bio happens to be merged with someone else, and may change bi_dev and + * bi_rsector for remaps as it sees fit. So the values of these fields * should NOT be depended on after the call to generic_make_request. - * Because of this, the caller should record the device address - * information in b_dev and b_blocknr. * - * Apart from those fields mentioned above, no other fields, and in - * particular, no other flags, are changed by generic_make_request or - * any lower level drivers. * */ -void generic_make_request (int rw, struct buffer_head * bh) +void generic_make_request(struct bio *bio) { - int major = MAJOR(bh->b_rdev); - int minorsize = 0; + int major = MAJOR(bio->bi_dev); + int minor = MINOR(bio->bi_dev); request_queue_t *q; + int rw = bio_rw(bio); + sector_t minorsize = 0; - if (!bh->b_end_io) - BUG(); - - /* Test device size, when known. */ + /* Test device or partition size, when known. */ if (blk_size[major]) - minorsize = blk_size[major][MINOR(bh->b_rdev)]; + minorsize = blk_size[major][minor]; if (minorsize) { unsigned long maxsector = (minorsize << 1) + 1; - unsigned long sector = bh->b_rsector; - unsigned int count = bh->b_size >> 9; + unsigned long sector = bio->bi_sector; + unsigned int count = bio_sectors(bio); if (maxsector < count || maxsector - count < sector) { - /* Yecch */ - bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); - - /* This may well happen - the kernel calls bread() - without checking the size of the device, e.g., - when mounting a device. */ - printk(KERN_INFO - "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", - kdevname(bh->b_rdev), rw, - (sector + count)>>1, minorsize); - - /* Yecch again */ - bh->b_end_io(bh, 0); - return; + if (blk_size[major][minor]) { + + /* This may well happen - the kernel calls + * bread() without checking the size of the + * device, e.g., when mounting a device. */ + printk(KERN_INFO + "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%Lu\n", + kdevname(bio->bi_dev), rw, + (sector + count)>>1, + (u64) blk_size[major][minor]); + } + bio->bi_flags |= BIO_EOF; + goto end_io; } } @@ -888,63 +1034,130 @@ * Resolve the mapping until finished. (drivers are * still free to implement/resolve their own stacking * by explicitly returning 0) - */ - /* NOTE: we don't repeat the blk_size check for each new device. + * + * NOTE: we don't repeat the blk_size check for each new device. * Stacking drivers are expected to know what they are doing. */ do { - q = blk_get_queue(bh->b_rdev); + enum blk_queue_state state; + + q = blk_get_queue(bio->bi_dev); if (!q) { printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%ld)\n", - kdevname(bh->b_rdev), bh->b_rsector); - buffer_IO_error(bh); + "generic_make_request: Trying to access nonexistent block-device %s (%Lu)\n", + kdevname(bio->bi_dev), (u64) bio->bi_sector); +end_io: + bio->bi_end_io(bio); break; } - } while (q->make_request_fn(q, rw, bh)); + + /* + * change state to flush queue, for instance + */ + state = q->queue_state; + while (state != Queue_up) { + BUG(); /* just testing... */ + wait_event(q->queue_wait, q->queue_state != state); + state = q->queue_state; + } + + /* + * If this device has partitions, remap block n + * of partition p to block n+start(p) of the disk. + */ + blk_partition_remap(bio); + + } while (q->make_request_fn(q, bio)); } +/* + * our default bio end_io callback handler for a buffer_head mapping. it's + * pretty simple, because no bio will ever contain more than one bio_vec + */ +static void end_bio_bh_io_sync(struct bio *bio) +{ + struct buffer_head *bh = bio->bi_private; + + bh->b_end_io(bh, bio->bi_flags & BIO_UPTODATE); + __bio_put(bio); +} /** - * submit_bh: submit a buffer_head to the block device later for I/O + * submit_bio: submit a bio to the block device layer for I/O * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) - * @bh: The &struct buffer_head which describes the I/O + * @bio: The &struct bio which describes the I/O * - * submit_bh() is very similar in purpose to generic_make_request(), and - * uses that function to do most of the work. + * submit_bio() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. Both are fairly rough + * interfaces, @bio must be presetup and ready for I/O. * - * The extra functionality provided by submit_bh is to determine - * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. - * This is is appropriate for IO requests that come from the buffer - * cache and page cache which (currently) always use aligned blocks. */ +void submit_bio(int rw, struct bio *bio) +{ + int count = bio_sectors(bio); + + /* + * do some validity checks... + */ + if (!bio->bi_end_io) + BUG(); + if (bio_size(bio) > PAGE_SIZE) { + printk("bio: invalid size %d\n", bio_size(bio)); + BUG(); + } else if ((bio_offset(bio) + bio_size(bio)) > PAGE_SIZE) { + printk("bio: size/off %d/%d\n", bio_size(bio), bio_offset(bio)); + BUG(); + } + + if (rw & WRITE) { + kstat.pgpgout += count; + bio->bi_flags |= BIO_WRITE; + } else { + kstat.pgpgin += count; + bio->bi_flags |= BIO_READ; + if (rw == READA) + bio->bi_flags |= BIO_RW_AHEAD; + } + + generic_make_request(bio); +} + +/** + * submit_bh: submit a buffer_head to the block device layer for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bh: The &struct buffer_head which describes the I/O + * + **/ void submit_bh(int rw, struct buffer_head * bh) { - int count = bh->b_size >> 9; + struct bio *bio; if (!test_bit(BH_Lock, &bh->b_state)) BUG(); + if (!buffer_mapped(bh)) + BUG(); + if (!bh->b_end_io) + BUG(); set_bit(BH_Req, &bh->b_state); /* - * First step, 'identity mapping' - RAID or LVM might - * further remap this. + * from here on down, it's all bio -- do the initial mapping, + * submit_bio -> generic_make_request may further map this bio around */ - bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * count; + bio = bio_alloc(GFP_NOIO); - generic_make_request(rw, bh); + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_dev = bh->b_dev; + bio->bi_next = NULL; + bio->bi_private = bh; + bio->bi_end_io = end_bio_bh_io_sync; + + bio->bi_io_vec.bv_page = bh->b_page; + bio->bi_io_vec.bv_len = bh->b_size; + bio->bi_io_vec.bv_offset = bh_offset(bh); - switch (rw) { - case WRITE: - kstat.pgpgout += count; - break; - default: - kstat.pgpgin += count; - break; - } + submit_bio(rw, bio); } /** @@ -976,8 +1189,9 @@ * * Caveat: * All of the buffers must be for the same device, and must also be - * of the current approved size for the device. */ - + * a multiple of the current approved size for the device. + * + **/ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) { unsigned int major; @@ -1000,7 +1214,7 @@ /* Verify requested block sizes. */ for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (bh->b_size % correct_size) { + if (bh->b_size & (correct_size - 1)) { printk(KERN_NOTICE "ll_rw_block: device %s: " "only %d-char blocks implemented (%u)\n", kdevname(bhs[0]->b_dev), @@ -1061,12 +1275,51 @@ extern int stram_device_init (void); #endif +/* + * must be called with queue lock held! + */ +inline int __end_that_request_first(struct request *req, int uptodate) +{ + struct bio *bio; + int nsect; + + req->errors = 0; + if (!uptodate) + printk("end_request: I/O error, dev %s, sector %lu\n", + kdevname(req->rq_dev), req->sector); + + if ((bio = req->bio) != NULL) { + nsect = bio_sectors(bio); + blk_finished_io(nsect); + req->bio = bio->bi_next; + bio->bi_next = NULL; + bio->bi_req = NULL; + bio_endio(bio, uptodate); + if ((bio = req->bio) != NULL) { + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + + req->current_nr_sectors = bio_sectors(bio); + req->hard_cur_sectors = req->current_nr_sectors; + if (req->nr_sectors < req->current_nr_sectors) { + printk("end_request: buffer-list destroyed\n"); + printk("%s: %lu < %u\n", kdevname(req->rq_dev), req->nr_sectors, req->current_nr_sectors); + req->nr_sectors = req->current_nr_sectors; + } + req->buffer = bio_data(bio); + return 1; + } + } + return 0; +} /** * end_that_request_first - end I/O on one buffer. + * &q: queue that finished request * @req: the request being processed * @uptodate: 0 for I/O error - * @name: the name printed for an I/O error * * Description: * Ends I/O on the first buffer attached to @req, and sets it up @@ -1081,45 +1334,21 @@ * blk_finished_io() appropriately. **/ -int end_that_request_first (struct request *req, int uptodate, char *name) +int end_that_request_first(request_queue_t *q, struct request *rq, int uptodate) { - struct buffer_head * bh; - int nsect; + unsigned long flags; + int ret; - req->errors = 0; - if (!uptodate) - printk("end_request: I/O error, dev %s (%s), sector %lu\n", - kdevname(req->rq_dev), name, req->sector); + spin_lock_irqsave(&q->queue_lock, flags); + ret = __end_that_request_first(rq, uptodate); + spin_unlock_irqrestore(&q->queue_lock, flags); - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector = req->hard_sector; - req->nr_sectors = req->hard_nr_sectors; - - req->current_nr_sectors = bh->b_size >> 9; - if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; - printk("end_request: buffer-list destroyed\n"); - } - req->buffer = bh->b_data; - return 1; - } - } - return 0; + return ret; } void end_that_request_last(struct request *req) { - if (req->waiting != NULL) - complete(req->waiting); - + complete(req->waiting); blkdev_release_request(req); } @@ -1142,7 +1371,6 @@ memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); - memset(max_sectors, 0, sizeof(max_sectors)); total_ram = nr_free_pages() << (PAGE_SHIFT - 10); @@ -1152,7 +1380,7 @@ */ queue_nr_requests = 64; if (total_ram > MB(32)) - queue_nr_requests = 128; + queue_nr_requests = 256; /* * Batch frees according to queue length @@ -1160,122 +1388,37 @@ batch_requests = queue_nr_requests >> 3; printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests); -#ifdef CONFIG_AMIGA_Z2RAM - z2_init(); -#endif -#ifdef CONFIG_STRAM_SWAP - stram_device_init(); -#endif -#ifdef CONFIG_BLK_DEV_RAM - rd_init(); -#endif -#ifdef CONFIG_ISP16_CDI - isp16_init(); -#endif + blk_max_low_pfn = max_low_pfn; + blk_max_pfn = max_pfn; + #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) ide_init(); /* this MUST precede hd_init */ #endif #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) hd_init(); #endif -#ifdef CONFIG_BLK_DEV_PS2 - ps2esdi_init(); -#endif -#ifdef CONFIG_BLK_DEV_XD - xd_init(); -#endif -#ifdef CONFIG_BLK_DEV_MFM - mfm_init(); -#endif -#ifdef CONFIG_PARIDE - { extern void paride_init(void); paride_init(); }; -#endif -#ifdef CONFIG_MAC_FLOPPY - swim3_init(); -#endif -#ifdef CONFIG_BLK_DEV_SWIM_IOP - swimiop_init(); -#endif -#ifdef CONFIG_AMIGA_FLOPPY - amiga_floppy_init(); -#endif -#ifdef CONFIG_ATARI_FLOPPY - atari_floppy_init(); -#endif -#ifdef CONFIG_BLK_DEV_FD - floppy_init(); -#else #if defined(__i386__) /* Do we even need this? */ outb_p(0xc, 0x3f2); #endif -#endif -#ifdef CONFIG_CDU31A - cdu31a_init(); -#endif -#ifdef CONFIG_ATARI_ACSI - acsi_init(); -#endif -#ifdef CONFIG_MCD - mcd_init(); -#endif -#ifdef CONFIG_MCDX - mcdx_init(); -#endif -#ifdef CONFIG_SBPCD - sbpcd_init(); -#endif -#ifdef CONFIG_AZTCD - aztcd_init(); -#endif -#ifdef CONFIG_CDU535 - sony535_init(); -#endif -#ifdef CONFIG_GSCD - gscd_init(); -#endif -#ifdef CONFIG_CM206 - cm206_init(); -#endif -#ifdef CONFIG_OPTCD - optcd_init(); -#endif -#ifdef CONFIG_SJCD - sjcd_init(); -#endif -#ifdef CONFIG_APBLOCK - ap_init(); -#endif -#ifdef CONFIG_DDV - ddv_init(); -#endif -#ifdef CONFIG_MDISK - mdisk_init(); -#endif -#ifdef CONFIG_DASD - dasd_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) - tapeblock_init(); -#endif -#ifdef CONFIG_BLK_DEV_XPRAM - xpram_init(); -#endif -#ifdef CONFIG_SUN_JSFLASH - jsfd_init(); -#endif return 0; }; -EXPORT_SYMBOL(io_request_lock); EXPORT_SYMBOL(end_that_request_first); +EXPORT_SYMBOL(__end_that_request_first); EXPORT_SYMBOL(end_that_request_last); EXPORT_SYMBOL(blk_init_queue); EXPORT_SYMBOL(blk_get_queue); -EXPORT_SYMBOL(__blk_get_queue); EXPORT_SYMBOL(blk_cleanup_queue); EXPORT_SYMBOL(blk_queue_headactive); EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(blk_queue_bounce_limit); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(generic_unplug_device); +EXPORT_SYMBOL(blk_wake_queue); +EXPORT_SYMBOL(blk_attempt_remerge); +EXPORT_SYMBOL(blk_max_low_pfn); +EXPORT_SYMBOL(blk_queue_max_sectors); +EXPORT_SYMBOL(blk_queue_max_segments); +EXPORT_SYMBOL(blk_queue_hardsect_size); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/loop.c linux/drivers/block/loop.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/loop.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/block/loop.c Fri Sep 7 09:01:00 2001 @@ -76,7 +76,7 @@ #define MAJOR_NR LOOP_MAJOR static int max_loop = 8; -static struct loop_device *loop_dev; +static struct loop_device *loop_dev, **loop_lookup; static int *loop_sizes; static int *loop_blksizes; static devfs_handle_t devfs_handle; /* For the directory */ @@ -87,10 +87,12 @@ static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf, char *loop_buf, int size, int real_block) { - if (cmd == READ) - memcpy(loop_buf, raw_buf, size); - else - memcpy(raw_buf, loop_buf, size); + if (raw_buf != loop_buf) { + if (cmd == READ) + memcpy(loop_buf, raw_buf, size); + else + memcpy(raw_buf, loop_buf, size); + } return 0; } @@ -118,6 +120,7 @@ static int none_status(struct loop_device *lo, struct loop_info *info) { + lo->lo_flags |= LO_FLAGS_BH_REMAP; return 0; } @@ -165,8 +168,7 @@ lo->lo_device); } -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ struct address_space *mapping = file->f_dentry->d_inode->i_mapping; @@ -179,8 +181,8 @@ index = pos >> PAGE_CACHE_SHIFT; offset = pos & (PAGE_CACHE_SIZE - 1); - len = bh->b_size; - data = bh->b_data; + len = bio_size(bio); + data = bio_data(bio); while (len > 0) { int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; size = PAGE_CACHE_SIZE - offset; @@ -252,18 +254,17 @@ return size; } -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct lo_read_data cookie; read_descriptor_t desc; struct file *file; cookie.lo = lo; - cookie.data = bh->b_data; + cookie.data = bio_data(bio); cookie.bsize = bsize; desc.written = 0; - desc.count = bh->b_size; + desc.count = bio_size(bio); desc.buf = (char*)&cookie; desc.error = 0; spin_lock_irq(&lo->lo_lock); @@ -299,42 +300,46 @@ return IV; } -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) { loff_t pos; int ret; - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; + pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; - if (rw == WRITE) - ret = lo_send(lo, bh, loop_get_bs(lo), pos); + if (bio->bi_flags & BIO_WRITE) + ret = lo_send(lo, bio, loop_get_bs(lo), pos); else - ret = lo_receive(lo, bh, loop_get_bs(lo), pos); + ret = lo_receive(lo, bio, loop_get_bs(lo), pos); return ret; } -static void loop_put_buffer(struct buffer_head *bh) +static void loop_end_io_transfer(struct bio *); +static void loop_put_buffer(struct bio *bio) { - if (bh) { - __free_page(bh->b_page); - kmem_cache_free(bh_cachep, bh); + /* + * check bi_end_io, may just be a remapped bio + */ + if (bio && bio->bi_end_io == loop_end_io_transfer) { + __free_page(bio_page(bio)); + __bio_put(bio); } } /* - * Add buffer_head to back of pending list + * Add bio to back of pending list */ -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) +static void loop_add_bio(struct loop_device *lo, struct bio *bio) { unsigned long flags; spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_bhtail) { - lo->lo_bhtail->b_reqnext = bh; - lo->lo_bhtail = bh; + if (lo->lo_biotail) { + lo->lo_biotail->bi_next = bio; + lo->lo_biotail = bio; } else - lo->lo_bh = lo->lo_bhtail = bh; + lo->lo_bio = lo->lo_biotail = bio; spin_unlock_irqrestore(&lo->lo_lock, flags); up(&lo->lo_bh_mutex); @@ -343,65 +348,57 @@ /* * Grab first pending buffer */ -static struct buffer_head *loop_get_bh(struct loop_device *lo) +static struct bio *loop_get_bio(struct loop_device *lo) { - struct buffer_head *bh; + struct bio *bio; spin_lock_irq(&lo->lo_lock); - if ((bh = lo->lo_bh)) { - if (bh == lo->lo_bhtail) - lo->lo_bhtail = NULL; - lo->lo_bh = bh->b_reqnext; - bh->b_reqnext = NULL; + if ((bio = lo->lo_bio)) { + if (bio == lo->lo_biotail) + lo->lo_biotail = NULL; + lo->lo_bio = bio->bi_next; + bio->bi_next = NULL; } spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } /* - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE - * and lo->transfer stuff has already been done. if not, it was a READ - * so queue it for the loop thread and let it do the transfer out of - * b_end_io context (we don't want to do decrypt of a page with irqs + * if this was a WRITE lo->transfer stuff has already been done. for READs, + * queue it for the loop thread and let it do the transfer out of + * bi_end_io context (we don't want to do decrypt of a page with irqs * disabled) */ -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) +static void loop_end_io_transfer(struct bio *bio) { - struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; + struct loop_device *lo = loop_lookup[MINOR(bio->bi_dev)]; - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { - struct buffer_head *rbh = bh->b_private; + if (bio->bi_flags & (BIO_UPTODATE | BIO_WRITE)) { + struct bio *rbh = bio->bi_private; - rbh->b_end_io(rbh, uptodate); + bio_endio(rbh, bio->bi_flags & BIO_UPTODATE); if (atomic_dec_and_test(&lo->lo_pending)) up(&lo->lo_bh_mutex); - loop_put_buffer(bh); + loop_put_buffer(bio); } else - loop_add_bh(lo, bh); + loop_add_bio(lo, bio); } -static struct buffer_head *loop_get_buffer(struct loop_device *lo, - struct buffer_head *rbh) +static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh) { - struct buffer_head *bh; - - do { - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); - if (bh) - break; + struct page *page; + struct bio *bio; - run_task_queue(&tq_disk); - schedule_timeout(HZ); - } while (1); - memset(bh, 0, sizeof(*bh)); + /* + * for xfer_funcs that can operate on the same bh, do that + */ + if (lo->lo_flags & LO_FLAGS_BH_REMAP) { + bio = rbh; + goto out_bh; + } - bh->b_size = rbh->b_size; - bh->b_dev = rbh->b_rdev; - spin_lock_irq(&lo->lo_lock); - bh->b_rdev = lo->lo_device; - spin_unlock_irq(&lo->lo_lock); - bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + bio = bio_alloc(GFP_NOIO); /* * easy way out, although it does waste some memory for < PAGE_SIZE @@ -409,66 +406,62 @@ * so can we :-) */ do { - bh->b_page = alloc_page(GFP_NOIO); - if (bh->b_page) + page = alloc_page(GFP_NOIO); + if (page) break; run_task_queue(&tq_disk); schedule_timeout(HZ); } while (1); - bh->b_data = page_address(bh->b_page); - bh->b_end_io = loop_end_io_transfer; - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); - init_waitqueue_head(&bh->b_wait); + bio->bi_io_vec.bv_page = page; + bio->bi_io_vec.bv_len = bio_size(rbh); + bio->bi_io_vec.bv_offset = bio_offset(rbh); + + bio->bi_end_io = loop_end_io_transfer; + bio->bi_private = rbh; + +out_bh: + bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9); + bio->bi_flags |= rbh->bi_flags & BIO_RW_MASK; + spin_lock_irq(&lo->lo_lock); + bio->bi_dev = lo->lo_device; + spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } -static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh) +static int loop_make_request(request_queue_t *q, struct bio *rbh) { - struct buffer_head *bh = NULL; + struct bio *bh = NULL; struct loop_device *lo; unsigned long IV; - if (!buffer_locked(rbh)) - BUG(); - - if (MINOR(rbh->b_rdev) >= max_loop) + if (MINOR(rbh->bi_dev) >= max_loop) goto out; - lo = &loop_dev[MINOR(rbh->b_rdev)]; + lo = &loop_dev[MINOR(rbh->bi_dev)]; spin_lock_irq(&lo->lo_lock); if (lo->lo_state != Lo_bound) goto inactive; atomic_inc(&lo->lo_pending); spin_unlock_irq(&lo->lo_lock); - if (rw == WRITE) { + if (rbh->bi_flags & BIO_WRITE) { if (lo->lo_flags & LO_FLAGS_READ_ONLY) goto err; - } else if (rw == READA) { - rw = READ; - } else if (rw != READ) { - printk(KERN_ERR "loop: unknown command (%d)\n", rw); + } else if (!(rbh->bi_flags & BIO_READ)) { + printk(KERN_ERR "loop: unknown command (%lx)\n", rbh->bi_flags); goto err; } -#if CONFIG_HIGHMEM - rbh = create_bounce(rw, rbh); -#endif + rbh = blk_queue_bounce(q, rbh); /* * file backed, queue for loop_thread to handle */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - /* - * rbh locked at this point, noone else should clear - * the dirty flag - */ - if (rw == WRITE) - set_bit(BH_Dirty, &rbh->b_state); - loop_add_bh(lo, rbh); + loop_add_bio(lo, rbh); return 0; } @@ -476,16 +469,14 @@ * piggy old buffer on original, and submit for I/O */ bh = loop_get_buffer(lo, rbh); - bh->b_private = rbh; - IV = loop_get_iv(lo, bh->b_rsector); - if (rw == WRITE) { - set_bit(BH_Dirty, &bh->b_state); - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, - bh->b_size, IV)) + IV = loop_get_iv(lo, rbh->bi_sector); + if (rbh->bi_flags & BIO_WRITE) { + if (lo_do_transfer(lo, WRITE, bio_data(bh), bio_data(rbh), + bio_size(bh), IV)) goto err; } - generic_make_request(rw, bh); + generic_make_request(bh); return 0; err: @@ -493,14 +484,14 @@ up(&lo->lo_bh_mutex); loop_put_buffer(bh); out: - buffer_IO_error(rbh); + bio_io_error(rbh); return 0; inactive: spin_unlock_irq(&lo->lo_lock); goto out; } -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) +static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) { int ret; @@ -508,19 +499,17 @@ * For block backed loop, we know this is a READ */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); - - ret = do_bh_filebacked(lo, bh, rw); - bh->b_end_io(bh, !ret); + ret = do_bio_filebacked(lo, bio); + bio_endio(bio, !ret); } else { - struct buffer_head *rbh = bh->b_private; - unsigned long IV = loop_get_iv(lo, rbh->b_rsector); + struct bio *rbh = bio->bi_private; + unsigned long IV = loop_get_iv(lo, rbh->bi_sector); - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, - bh->b_size, IV); + ret = lo_do_transfer(lo, READ, bio_data(bio), bio_data(rbh), + bio_size(bio), IV); - rbh->b_end_io(rbh, !ret); - loop_put_buffer(bh); + bio_endio(rbh, !ret); + loop_put_buffer(bio); } } @@ -533,7 +522,7 @@ static int loop_thread(void *data) { struct loop_device *lo = data; - struct buffer_head *bh; + struct bio *bio; daemonize(); exit_files(current); @@ -567,12 +556,12 @@ if (!atomic_read(&lo->lo_pending)) break; - bh = loop_get_bh(lo); - if (!bh) { - printk("loop: missing bh\n"); + bio = loop_get_bio(lo); + if (!bio) { + printk("loop: missing bio\n"); continue; } - loop_handle_bh(lo, bh); + loop_handle_bio(lo, bio); /* * upped both for pending work and tear-down, lo_pending @@ -601,7 +590,7 @@ error = -EBUSY; if (lo->lo_state != Lo_unbound) goto out; - + error = -EBADF; file = fget(arg); if (!file) @@ -621,7 +610,6 @@ * If we can't read - sorry. If we only can't write - well, * it's going to be read-only. */ - error = -EINVAL; if (!aops->readpage) goto out_putf; @@ -650,6 +638,7 @@ figure_loop_size(lo); lo->old_gfp_mask = inode->i_mapping->gfp_mask; inode->i_mapping->gfp_mask = GFP_NOIO; + loop_lookup[MINOR(lo_device)] = lo; bs = 0; if (blksize_size[MAJOR(lo_device)]) @@ -659,7 +648,7 @@ set_blocksize(dev, bs); - lo->lo_bh = lo->lo_bhtail = NULL; + lo->lo_bio = lo->lo_biotail = NULL; kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); down(&lo->lo_sem); @@ -853,7 +842,7 @@ err = -EINVAL; break; } - err = put_user(loop_sizes[lo->lo_number] << 1, (long *) arg); + err = put_user(loop_sizes[lo->lo_number] << 1, (long *)arg); break; case BLKBSZGET: case BLKBSZSET: @@ -988,13 +977,17 @@ if (!loop_dev) return -ENOMEM; + loop_lookup = kmalloc(max_loop*sizeof(struct loop_device *),GFP_KERNEL); + if (!loop_lookup) + goto out_mem; + loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_sizes) - goto out_sizes; + goto out_mem; loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_blksizes) - goto out_blksizes; + goto out_mem; blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); @@ -1010,6 +1003,7 @@ memset(loop_sizes, 0, max_loop * sizeof(int)); memset(loop_blksizes, 0, max_loop * sizeof(int)); + memset(loop_lookup, 0, max_loop * sizeof(struct loop_device *)); blk_size[MAJOR_NR] = loop_sizes; blksize_size[MAJOR_NR] = loop_blksizes; for (i = 0; i < max_loop; i++) @@ -1018,9 +1012,9 @@ printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; -out_sizes: +out_mem: kfree(loop_dev); -out_blksizes: + kfree(loop_lookup); kfree(loop_sizes); printk(KERN_ERR "loop: ran out of memory\n"); return -ENOMEM; @@ -1033,6 +1027,7 @@ printk(KERN_WARNING "loop: cannot unregister blkdev\n"); kfree(loop_dev); + kfree(loop_lookup); kfree(loop_sizes); kfree(loop_blksizes); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/nbd.c linux/drivers/block/nbd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/nbd.c Sat Jun 30 01:15:41 2001 +++ linux/drivers/block/nbd.c Fri Aug 3 12:59:51 2001 @@ -166,14 +166,14 @@ FAIL("Sendmsg failed for control."); if (req->cmd == WRITE) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(1, sock, bh->b_data, bh->b_size, bh->b_reqnext == NULL ? 0 : MSG_MORE); + result = nbd_xmit(1, sock, bio_data(bio), bio_size(bio), bio->bi_next == NULL ? 0 : MSG_MORE); if (result <= 0) FAIL("Send data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } return; @@ -206,14 +206,14 @@ if (ntohl(reply.error)) FAIL("Other side returned error."); if (req->cmd == READ) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(0, lo->sock, bh->b_data, bh->b_size, MSG_WAITALL); + result = nbd_xmit(0, lo->sock, bio_data(bio), bio_size(bio), MSG_WAITALL); if (result <= 0) HARDFAIL("Recv data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } DEBUG("done.\n"); return req; @@ -251,7 +251,7 @@ goto out; } #endif - list_del(&req->queue); + blkdev_dequeue_request(req); up (&lo->queue_lock); nbd_end_request(req); @@ -286,7 +286,7 @@ } #endif req->errors++; - list_del(&req->queue); + blkdev_dequeue_request(req); up(&lo->queue_lock); nbd_end_request(req); @@ -334,22 +334,22 @@ #endif req->errors = 0; blkdev_dequeue_request(req); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down (&lo->queue_lock); - list_add(&req->queue, &lo->queue_head); + list_add(&req->queuelist, &lo->queue_head); nbd_send_req(lo->sock, req); /* Why does this block? */ up (&lo->queue_lock); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; error_out: req->errors++; blkdev_dequeue_request(req); - spin_unlock(&io_request_lock); + spin_unlock(&q->queue_lock); nbd_end_request(req); - spin_lock(&io_request_lock); + spin_lock(&q->queue_lock); } return; } @@ -500,7 +500,7 @@ #endif blksize_size[MAJOR_NR] = nbd_blksizes; blk_size[MAJOR_NR] = nbd_sizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request, "nbd"); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_NBD; i++) { nbd_dev[i].refcnt = 0; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/paride/pd.c linux/drivers/block/paride/pd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/paride/pd.c Sat Apr 28 20:27:53 2001 +++ linux/drivers/block/paride/pd.c Fri Aug 3 12:04:42 2001 @@ -329,7 +329,6 @@ static int pd_cmd; /* current command READ/WRITE */ static int pd_unit; /* unit of current request */ static int pd_dev; /* minor of current request */ -static int pd_poffs; /* partition offset of current minor */ static char * pd_buf; /* buffer for request in progress */ static DECLARE_WAIT_QUEUE_HEAD(pd_wait_open); @@ -455,8 +454,7 @@ pd_gendisk.major = major; pd_gendisk.major_name = name; - pd_gendisk.next = gendisk_head; - gendisk_head = &pd_gendisk; + add_gendisk(&pd_gendisk); for(i=0;ii_rdev)) return -EINVAL; - dev = MINOR(inode->i_rdev); + if (!inode || !inode->i_rdev) + return -EINVAL; unit = DEVICE_NR(inode->i_rdev); - if (dev >= PD_DEVS) return -EINVAL; - if (!PD.present) return -ENODEV; + if (!PD.present) + return -ENODEV; - switch (cmd) { + switch (cmd) { case CDROMEJECT: if (PD.access == 1) pd_eject(unit); return 0; - case HDIO_GETGEO: - if (!geo) return -EINVAL; - err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); - if (err) return err; + case HDIO_GETGEO: + if (!geo) return -EINVAL; + err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); + if (err) return err; if (PD.alt_geom) { - put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), + put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), (short *) &geo->cylinders); - put_user(PD_LOG_HEADS, (char *) &geo->heads); - put_user(PD_LOG_SECTS, (char *) &geo->sectors); + put_user(PD_LOG_HEADS, (char *) &geo->heads); + put_user(PD_LOG_SECTS, (char *) &geo->sectors); } else { - put_user(PD.cylinders, (short *) &geo->cylinders); - put_user(PD.heads, (char *) &geo->heads); - put_user(PD.sectors, (char *) &geo->sectors); + put_user(PD.cylinders, (short *) &geo->cylinders); + put_user(PD.heads, (char *) &geo->heads); + put_user(PD.sectors, (char *) &geo->sectors); } - put_user(pd_hd[dev].start_sect,(long *)&geo->start); - return 0; - case BLKGETSIZE: - if (!arg) return -EINVAL; - err = verify_area(VERIFY_WRITE,(long *) arg,sizeof(long)); - if (err) return (err); - put_user(pd_hd[dev].nr_sects,(long *) arg); - return (0); - case BLKRRPART: + put_user(get_start_sect(inode->i_rdev), (long *)&geo->start); + return 0; + case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - return pd_revalidate(inode->i_rdev); + return pd_revalidate(inode->i_rdev); + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKRASET: @@ -546,9 +539,9 @@ case BLKFLSBUF: case BLKPG: return blk_ioctl(inode->i_rdev, cmd, arg); - default: - return -EINVAL; - } + default: + return -EINVAL; + } } static int pd_release (struct inode *inode, struct file *file) @@ -586,36 +579,32 @@ } static int pd_revalidate(kdev_t dev) +{ + int unit, res; + long flags; -{ int p, unit, minor; - long flags; - - unit = DEVICE_NR(dev); - if ((unit >= PD_UNITS) || (!PD.present)) return -ENODEV; - - save_flags(flags); - cli(); - if (PD.access > 1) { - restore_flags(flags); - return -EBUSY; - } - pd_valid = 0; - restore_flags(flags); + unit = DEVICE_NR(dev); + if ((unit >= PD_UNITS) || !PD.present) + return -ENODEV; - for (p=(PD_PARTNS-1);p>=0;p--) { - minor = p + unit*PD_PARTNS; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - pd_hd[minor].start_sect = 0; - pd_hd[minor].nr_sects = 0; - } + save_flags(flags); + cli(); + if (PD.access > 1) { + restore_flags(flags); + return -EBUSY; + } + pd_valid = 0; + restore_flags(flags); - if (pd_identify(unit)) - grok_partitions(&pd_gendisk,unit,1<next)) - if (*gdp == &pd_gendisk) break; - if (*gdp) *gdp = (*gdp)->next; + devfs_unregister_blkdev(MAJOR_NR, name); + del_gendisk(&pd_gendisk); - for (unit=0;unitcmd; - pd_poffs = pd_hd[pd_dev].start_sect; - pd_block += pd_poffs; pd_buf = CURRENT->buffer; pd_retries = 0; @@ -963,7 +947,7 @@ (CURRENT->cmd != pd_cmd) || (MINOR(CURRENT->rq_dev) != pd_dev) || (CURRENT->rq_status == RQ_INACTIVE) || - (CURRENT->sector+pd_poffs != pd_block)) + (CURRENT->sector != pd_block)) printk("%s: OUCH: request list changed unexpectedly\n", PD.name); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/paride/pf.c linux/drivers/block/paride/pf.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/paride/pf.c Sun Feb 4 19:05:29 2001 +++ linux/drivers/block/paride/pf.c Fri Aug 3 12:04:42 2001 @@ -463,7 +463,7 @@ if (PF.access == 1) { pf_eject(unit); return 0; - } + } case HDIO_GETGEO: if (!geo) return -EINVAL; err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); @@ -483,10 +483,7 @@ return 0; case BLKGETSIZE: if (!arg) return -EINVAL; - err = verify_area(VERIFY_WRITE,(long *) arg,sizeof(long)); - if (err) return (err); - put_user(PF.capacity,(long *) arg); - return (0); + return put_user(PF.capacity,(long *) arg); case BLKROSET: case BLKROGET: case BLKRASET: diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/ps2esdi.c linux/drivers/block/ps2esdi.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/ps2esdi.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/block/ps2esdi.c Wed Sep 5 11:13:21 2001 @@ -65,6 +65,7 @@ #define TYPE_0_CMD_BLK_LENGTH 2 #define TYPE_1_CMD_BLK_LENGTH 4 +#define PS2ESDI_LOCK (&((BLK_DEFAULT_QUEUE(MAJOR_NR))->queue_lock)) static void reset_ctrl(void); @@ -117,7 +118,6 @@ static char ps2esdi_valid[MAX_HD]; static int ps2esdi_sizes[MAX_HD << 6]; static int ps2esdi_blocksizes[MAX_HD << 6]; -static int ps2esdi_maxsect[MAX_HD << 6]; static int ps2esdi_drives; static struct hd_struct ps2esdi[MAX_HD << 6]; static u_short io_base; @@ -183,9 +183,9 @@ blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */ - /* some minor housekeeping - setup the global gendisk structure */ - ps2esdi_gendisk.next = gendisk_head; - gendisk_head = &ps2esdi_gendisk; + /* setup the global gendisk structure */ + add_gendisk(&ps2esdi_gendisk); + ps2esdi_geninit(); return 0; } /* ps2esdi_init */ @@ -221,18 +221,18 @@ } void -cleanup_module(void) -{ - if(ps2esdi_slot) - { +cleanup_module(void) { + if(ps2esdi_slot) { mca_mark_as_unused(ps2esdi_slot); mca_set_adapter_procfn(ps2esdi_slot, NULL, NULL); } release_region(io_base, 4); free_dma(dma_arb_level); - free_irq(PS2ESDI_IRQ, NULL) + free_irq(PS2ESDI_IRQ, NULL); devfs_unregister_blkdev(MAJOR_NR, "ed"); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); + del_gendisk(&ps2esdi_gendisk); + blk_clear(MAJOR_NR); } #endif /* MODULE */ @@ -415,16 +415,13 @@ ps2esdi_gendisk.nr_real = ps2esdi_drives; - /* 128 was old default, maybe maxsect=255 is ok too? - Paul G. */ - for (i = 0; i < (MAX_HD << 6); i++) { - ps2esdi_maxsect[i] = 128; + for (i = 0; i < (MAX_HD << 6); i++) ps2esdi_blocksizes[i] = 1024; - } request_dma(dma_arb_level, "ed"); request_region(io_base, 4, "ed"); blksize_size[MAJOR_NR] = ps2esdi_blocksizes; - max_sectors[MAJOR_NR] = ps2esdi_maxsect; + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 128); for (i = 0; i < ps2esdi_drives; i++) { register_disk(&ps2esdi_gendisk,MKDEV(MAJOR_NR,i<<6),1<<6, @@ -482,7 +479,7 @@ if (virt_to_bus(CURRENT->buffer + CURRENT->current_nr_sectors * 512) > 16 * MB) { printk("%s: DMA above 16MB not supported\n", DEVICE_NAME); - end_request(FAIL); + __end_request(CURRENT, FAIL); } /* check for above 16Mb dmas */ else if ((CURRENT_DEV < ps2esdi_drives) && (CURRENT->sector + CURRENT->current_nr_sectors <= @@ -495,13 +492,9 @@ CURRENT->current_nr_sectors); #endif - - block = CURRENT->sector + ps2esdi[MINOR(CURRENT->rq_dev)].start_sect; - -#if 0 - printk("%s: blocknumber : %d\n", DEVICE_NAME, block); -#endif + block = CURRENT->sector; count = CURRENT->current_nr_sectors; + switch (CURRENT->cmd) { case READ: ps2esdi_readwrite(READ, CURRENT_DEV, block, count); @@ -511,7 +504,7 @@ break; default: printk("%s: Unknown command\n", DEVICE_NAME); - end_request(FAIL); + __end_request(CURRENT, FAIL); break; } /* handle different commands */ } @@ -519,7 +512,7 @@ else { printk("Grrr. error. ps2esdi_drives: %d, %lu %lu\n", ps2esdi_drives, CURRENT->sector, ps2esdi[MINOR(CURRENT->rq_dev)].nr_sects); - end_request(FAIL); + __end_request(CURRENT, FAIL); } } /* main strategy routine */ @@ -584,7 +577,7 @@ if (ps2esdi_out_cmd_blk(cmd_blk)) { printk("%s: Controller failed\n", DEVICE_NAME); if ((++CURRENT->errors) >= MAX_RETRIES) - end_request(FAIL); + __end_request(CURRENT, FAIL); } /* check for failure to put out the command block */ else { @@ -958,10 +951,10 @@ break; } if(ending != -1) { - spin_lock_irqsave(&io_request_lock, flags); - end_request(ending); + spin_lock_irqsave(PS2ESDI_LOCK, flags); + __end_request(CURRENT, ending); do_ps2esdi_request(BLK_DEFAULT_QUEUE(MAJOR_NR)); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(PS2ESDI_LOCK, flags); } } /* handle interrupts */ @@ -1100,20 +1093,10 @@ put_user(ps2esdi_info[dev].head, (char *) &geometry->heads); put_user(ps2esdi_info[dev].sect, (char *) &geometry->sectors); put_user(ps2esdi_info[dev].cyl, (short *) &geometry->cylinders); - put_user(ps2esdi[MINOR(inode->i_rdev)].start_sect, + put_user(get_start_sect(inode->i_rdev), (long *) &geometry->start); - return (0); - } - break; - - case BLKGETSIZE: - if (arg) { - if ((err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)))) - return (err); - put_user(ps2esdi[MINOR(inode->i_rdev)].nr_sects, (long *) arg); - - return (0); + return 0; } break; @@ -1122,6 +1105,7 @@ return -EACCES; return (ps2esdi_reread_partitions(inode->i_rdev)); + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKRASET: @@ -1140,8 +1124,7 @@ static int ps2esdi_reread_partitions(kdev_t dev) { int target = DEVICE_NR(dev); - int start = target << ps2esdi_gendisk.minor_shift; - int partition; + int res; cli(); ps2esdi_valid[target] = (access_count[target] != 1); @@ -1149,21 +1132,16 @@ if (ps2esdi_valid[target]) return (-EBUSY); - for (partition = ps2esdi_gendisk.max_p - 1; - partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - ps2esdi_gendisk.part[minor].start_sect = 0; - ps2esdi_gendisk.part[minor].nr_sects = 0; - } - - grok_partitions(&ps2esdi_gendisk, target, 1<<6, - ps2esdi_info[target].head * ps2esdi_info[target].cyl * ps2esdi_info[target].sect); - + res = wipe_partitions(dev); + if (res == 0) + grok_partitions(dev, ps2esdi_info[target].head + * ps2esdi_info[target].cyl + * ps2esdi_info[target].sect); + ps2esdi_valid[target] = 1; wake_up(&ps2esdi_wait_open); - return (0); + return (res); } static void ps2esdi_reset_timer(unsigned long unused) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/rd.c linux/drivers/block/rd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/rd.c Mon Jul 16 01:15:44 2001 +++ linux/drivers/block/rd.c Fri Aug 3 13:00:07 2001 @@ -98,7 +98,7 @@ static unsigned long rd_length[NUM_RAMDISKS]; /* Size of RAM disks in bytes */ static int rd_hardsec[NUM_RAMDISKS]; /* Size of real blocks in bytes */ static int rd_blocksizes[NUM_RAMDISKS]; /* Size of 1024 byte blocks :) */ -static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ +static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ static devfs_handle_t devfs_handle; static struct inode *rd_inode[NUM_RAMDISKS]; /* Protected device inodes */ @@ -194,22 +194,21 @@ * 19-JAN-1998 Richard Gooch Added devfs support * */ -static int rd_make_request(request_queue_t * q, int rw, struct buffer_head *sbh) +static int rd_make_request(request_queue_t * q, struct bio *sbh) { unsigned int minor; unsigned long offset, len; struct buffer_head *rbh; char *bdata; + int rw = bio_rw(sbh); - - minor = MINOR(sbh->b_rdev); + minor = MINOR(sbh->bi_dev); if (minor >= NUM_RAMDISKS) goto fail; - - offset = sbh->b_rsector << 9; - len = sbh->b_size; + offset = sbh->bi_sector << 9; + len = bio_size(sbh); if ((offset + len) > rd_length[minor]) goto fail; @@ -221,25 +220,26 @@ goto fail; } - rbh = getblk(sbh->b_rdev, sbh->b_rsector/(sbh->b_size>>9), sbh->b_size); + rbh = getblk(sbh->bi_dev,sbh->bi_sector/bio_sectors(sbh),bio_size(sbh)); /* I think that it is safe to assume that rbh is not in HighMem, though * sbh might be - NeilBrown */ - bdata = bh_kmap(sbh); - if (rw == READ) { - if (sbh != rbh) - memcpy(bdata, rbh->b_data, rbh->b_size); - } else - if (sbh != rbh) - memcpy(rbh->b_data, bdata, rbh->b_size); - bh_kunmap(sbh); + bdata = kmap(bio_page(sbh)); + + if (rw == READ) + memcpy(bdata, rbh->b_data, rbh->b_size); + else + memcpy(rbh->b_data, bdata, rbh->b_size); + + kunmap(bio_page(sbh)); + mark_buffer_protected(rbh); brelse(rbh); - sbh->b_end_io(sbh,1); + bio_endio(sbh, 1); return 0; fail: - sbh->b_end_io(sbh,0); + bio_io_error(sbh); return 0; } @@ -370,7 +370,8 @@ for (i = 0 ; i < NUM_RAMDISKS; i++) { if (rd_inode[i]) { - /* withdraw invalidate_buffers() and prune_icache() immunity */ + /* withdraw invalidate_buffers() and prune_icache() + immunity */ atomic_dec(&rd_inode[i]->i_bdev->bd_openers); /* remove stale pointer to module address space */ rd_inode[i]->i_bdev->bd_op = NULL; @@ -381,9 +382,7 @@ devfs_unregister (devfs_handle); unregister_blkdev( MAJOR_NR, "ramdisk" ); - hardsect_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); } #endif @@ -428,7 +427,6 @@ register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &fd_fops, rd_size<<1); #endif - hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */ blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */ blk_size[MAJOR_NR] = rd_kbsize; /* Size of the RAM disk in kB */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/block/xd.c linux/drivers/block/xd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/block/xd.c Fri May 25 00:14:08 2001 +++ linux/drivers/block/xd.c Fri Aug 3 12:04:42 2001 @@ -173,8 +173,7 @@ devfs_handle = devfs_mk_dir (NULL, xd_gendisk.major_name, NULL); blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */ - xd_gendisk.next = gendisk_head; - gendisk_head = &xd_gendisk; + add_gendisk(&xd_gendisk); xd_geninit(); return 0; @@ -258,7 +257,6 @@ } xd_gendisk.nr_real = xd_drives; - } /* xd_open: open a device */ @@ -296,7 +294,7 @@ if (CURRENT_DEV < xd_drives && CURRENT->sector + CURRENT->nr_sectors <= xd_struct[MINOR(CURRENT->rq_dev)].nr_sects) { - block = CURRENT->sector + xd_struct[MINOR(CURRENT->rq_dev)].start_sect; + block = CURRENT->sector; count = CURRENT->nr_sectors; switch (CURRENT->cmd) { @@ -333,18 +331,16 @@ g.heads = xd_info[dev].heads; g.sectors = xd_info[dev].sectors; g.cylinders = xd_info[dev].cylinders; - g.start = xd_struct[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(geometry, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: - if (!arg) return -EINVAL; - return put_user(xd_struct[MINOR(inode->i_rdev)].nr_sects,(long *) arg); case HDIO_SET_DMA: if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (xdc_busy) return -EBUSY; nodma = !arg; if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); + xd_dma_mem_free((unsigned long)xd_dma_buffer, + xd_maxsectors * 0x200); xd_dma_buffer = 0; } return 0; @@ -357,6 +353,7 @@ return -EACCES; return xd_reread_partitions(inode->i_rdev); + case BLKGETSIZE: case BLKFLSBUF: case BLKROSET: case BLKROGET: @@ -387,11 +384,9 @@ static int xd_reread_partitions(kdev_t dev) { int target; - int start; - int partition; + int res; target = DEVICE_NR(dev); - start = target << xd_gendisk.minor_shift; cli(); xd_valid[target] = (xd_access[target] != 1); @@ -399,20 +394,16 @@ if (xd_valid[target]) return -EBUSY; - for (partition = xd_gendisk.max_p - 1; partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - xd_gendisk.part[minor].start_sect = 0; - xd_gendisk.part[minor].nr_sects = 0; - }; - - grok_partitions(&xd_gendisk, target, 1<<6, - xd_info[target].heads * xd_info[target].cylinders * xd_info[target].sectors); + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, xd_info[target].heads + * xd_info[target].cylinders + * xd_info[target].sectors); xd_valid[target] = 1; wake_up(&xd_wait_open); - return 0; + return res; } /* xd_readwrite: handle a read/write request */ @@ -1112,18 +1103,9 @@ static void xd_done (void) { - struct gendisk ** gdp; - - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - blk_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - read_ahead[MAJOR_NR] = 0; - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == &xd_gendisk) - break; - if (*gdp) - *gdp = (*gdp)->next; + del_gendisk(&xd_gendisk); + blk_clear(MAJOR_NR); release_region(xd_iobase,4); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/cdrom/cdu31a.c linux/drivers/cdrom/cdu31a.c --- /opt/kernel/linux-2.4.10-pre4/drivers/cdrom/cdu31a.c Fri Feb 9 20:30:22 2001 +++ linux/drivers/cdrom/cdu31a.c Fri Sep 7 08:52:30 2001 @@ -1675,7 +1675,7 @@ restore_flags(flags); if (!QUEUE_EMPTY && CURRENT->rq_status != RQ_INACTIVE) { - end_request(0); + __end_request(CURRENT, 0); } restore_flags(flags); #if DEBUG @@ -1694,7 +1694,10 @@ /* Make sure we have a valid TOC. */ sony_get_toc(); - spin_unlock_irq(&io_request_lock); + /* yes lets release the lock and then muck with the queue etc. I won't + * bother auditing this driver, it's decrepit and full of races anyway. + * /jens */ + spin_unlock_irq(&q->queue_lock); /* Make sure the timer is cancelled. */ del_timer(&cdu31a_abort_timer); @@ -1853,7 +1856,7 @@ } end_do_cdu31a_request: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); #if 0 /* After finished, cancel any pending operations. */ abort_read(); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/cdrom/cm206.c linux/drivers/cdrom/cm206.c --- /opt/kernel/linux-2.4.10-pre4/drivers/cdrom/cm206.c Fri Feb 9 20:30:22 2001 +++ linux/drivers/cdrom/cm206.c Fri Aug 3 12:04:42 2001 @@ -823,10 +823,10 @@ return; if (CURRENT->cmd != READ) { debug(("Non-read command %d on cdrom\n", CURRENT->cmd)); - end_request(0); + __end_request(CURRENT, 0); continue; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); error=0; for (i=0; inr_sectors; i++) { int e1, e2; @@ -849,8 +849,8 @@ debug(("cm206_request: %d %d\n", e1, e2)); } } - spin_lock_irq(&io_request_lock); - end_request(!error); + spin_lock_irq(&q->queue_lock); + __end_request(CURRENT, !error); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/cdrom/sbpcd.c linux/drivers/cdrom/sbpcd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/cdrom/sbpcd.c Tue Jul 17 00:13:32 2001 +++ linux/drivers/cdrom/sbpcd.c Fri Aug 3 12:04:42 2001 @@ -4882,7 +4882,7 @@ #undef DEBUG_GTL static inline void sbpcd_end_request(struct request *req, int uptodate) { list_add(&req->queue, &req->q->queue_head); - end_request(uptodate); + __end_request(req, uptodate); } /*==========================================================================*/ /* @@ -4924,7 +4924,7 @@ sbpcd_end_request(req, 0); if (req -> sector == -1) sbpcd_end_request(req, 0); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down(&ioctl_read_sem); if (req->cmd != READ) @@ -4964,7 +4964,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5005,7 +5005,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5021,7 +5021,7 @@ #endif up(&ioctl_read_sem); sbp_sleep(0); /* wait a bit, try again */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 0); goto request_loop; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/char/raw.c linux/drivers/char/raw.c --- /opt/kernel/linux-2.4.10-pre4/drivers/char/raw.c Thu Jun 28 02:10:55 2001 +++ linux/drivers/char/raw.c Fri Aug 3 12:04:42 2001 @@ -134,10 +134,8 @@ if (is_mounted(rdev)) { if (blksize_size[MAJOR(rdev)]) sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)]; - } else { - if (hardsect_size[MAJOR(rdev)]) - sector_size = hardsect_size[MAJOR(rdev)][MINOR(rdev)]; - } + } else + sector_size = get_hardsect_size(rdev); set_blocksize(rdev, sector_size); raw_devices[minor].sector_size = sector_size; @@ -282,16 +280,14 @@ struct kiobuf * iobuf; int new_iobuf; int err = 0; - unsigned long blocknr, blocks; + unsigned long blocks; size_t transferred; int iosize; - int i; int minor; kdev_t dev; unsigned long limit; - int sector_size, sector_bits, sector_mask; - int max_sectors; + sector_t blocknr; /* * First, a few checks on device size limits @@ -316,7 +312,6 @@ sector_size = raw_devices[minor].sector_size; sector_bits = raw_devices[minor].sector_bits; sector_mask = sector_size- 1; - max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); if (blk_size[MAJOR(dev)]) limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits; @@ -334,18 +329,10 @@ if ((*offp >> sector_bits) >= limit) goto out_free; - /* - * Split the IO into KIO_MAX_SECTORS chunks, mapping and - * unmapping the single kiobuf as we go to perform each chunk of - * IO. - */ - transferred = 0; blocknr = *offp >> sector_bits; while (size > 0) { blocks = size >> sector_bits; - if (blocks > max_sectors) - blocks = max_sectors; if (blocks > limit - blocknr) blocks = limit - blocknr; if (!blocks) @@ -357,10 +344,7 @@ if (err) break; - for (i=0; i < blocks; i++) - iobuf->blocks[i] = blocknr++; - - err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size); + err = brw_kiovec(rw, 1, &iobuf, dev, &blocknr, sector_size); if (rw == READ && err > 0) mark_dirty_kiobuf(iobuf, err); @@ -370,6 +354,8 @@ size -= err; buf += err; } + + blocknr += blocks; unmap_kiobuf(iobuf); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/i2o/i2o_block.c linux/drivers/i2o/i2o_block.c --- /opt/kernel/linux-2.4.10-pre4/drivers/i2o/i2o_block.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/i2o/i2o_block.c Wed Sep 5 13:19:43 2001 @@ -112,15 +112,16 @@ #define I2O_BSA_DSC_VOLUME_CHANGED 0x000D #define I2O_BSA_DSC_TIMEOUT 0x000E +#define I2O_UNIT(dev) (i2ob_dev[MINOR((dev)) & 0xf0]) +#define I2O_LOCK(unit) (i2ob_dev[(unit)].req_queue->queue_lock) + /* * Some of these can be made smaller later */ static int i2ob_blksizes[MAX_I2OB<<4]; -static int i2ob_hardsizes[MAX_I2OB<<4]; static int i2ob_sizes[MAX_I2OB<<4]; static int i2ob_media_change_flag[MAX_I2OB]; -static u32 i2ob_max_sectors[MAX_I2OB<<4]; static int i2ob_context; @@ -250,9 +251,9 @@ unsigned long mptr; u64 offset; struct request *req = ireq->req; - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; int count = req->nr_sectors<<9; - char *last = NULL; + unsigned long last = ~0UL; unsigned short size = 0; // printk(KERN_INFO "i2ob_send called\n"); @@ -281,30 +282,30 @@ if(req->cmd == READ) { __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_phys(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x10000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x10000000|bio_size(bio), mptr); else - __raw_writel(0xD0000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD0000000|bio_size(bio), mptr); + __raw_writel(bio_to_phys(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_phys(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } /* * Heuristic for now since the block layer doesnt give @@ -320,30 +321,30 @@ else if(req->cmd == WRITE) { __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_phys(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x14000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x14000000|bio_size(bio), mptr); else - __raw_writel(0xD4000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD4000000|bio_size(bio), mptr); + __raw_writel(bio_to_phys(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_phys(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } if(c->battery) @@ -407,7 +408,8 @@ * unlocked. */ - while (end_that_request_first( req, !req->errors, "i2o block" )); + while (__end_that_request_first( req, !req->errors)) + ; /* * It is now ok to complete the request. @@ -415,61 +417,6 @@ end_that_request_last( req ); } -/* - * Request merging functions - */ -static inline int i2ob_new_segment(request_queue_t *q, struct request *req, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->nr_segments < max_segments) { - req->nr_segments++; - return 1; - } - return 0; -} - -static int i2ob_back_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_front_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (bh->b_data + bh->b_size == req->bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_merge_requests(request_queue_t *q, - struct request *req, - struct request *next, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - int total_segments = req->nr_segments + next->nr_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) - total_segments--; - - if (total_segments > max_segments) - return 0; - - req->nr_segments = total_segments; - return 1; -} - static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit) { unsigned long msg; @@ -527,10 +474,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* Now flush the message by making it a NOP */ m[0]&=0x00FFFFFF; @@ -551,12 +498,12 @@ if(msg->function == I2O_CMD_BLOCK_CFLUSH) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); dev->constipated=0; DEBUG(("unconstipated\n")); if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -572,10 +519,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n"); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -621,7 +568,7 @@ */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); if(err==4) { /* @@ -666,7 +613,7 @@ */ i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * and out @@ -674,7 +621,7 @@ return; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, bsa_errors[m[4]&0XFFFF]); if(m[4]&0x00FF0000) @@ -689,8 +636,8 @@ * Dequeue the request. We use irqsave locks as one day we * may be running polled controllers from a BH... */ - - spin_lock_irqsave(&io_request_lock, flags); + + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); atomic_dec(&i2ob_queues[c->unit]->queue_depth); @@ -702,7 +649,7 @@ if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); } /* @@ -781,8 +728,7 @@ for(i = unit; i <= unit+15; i++) { i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } @@ -816,11 +762,11 @@ if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 ) i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(unit), flags); i2ob_sizes[unit] = (int)(size>>10); i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(unit), flags); break; } @@ -873,13 +819,14 @@ static void i2ob_timer_handler(unsigned long q) { + request_queue_t *req_queue = (request_queue_t *) q; unsigned long flags; /* * We cannot touch the request queue or the timer - * flag without holding the io_request_lock. + * flag without holding the queue_lock */ - spin_lock_irqsave(&io_request_lock,flags); + spin_lock_irqsave(&req_queue->queue_lock,flags); /* * Clear the timer started flag so that @@ -890,12 +837,12 @@ /* * Restart any requests. */ - i2ob_request((request_queue_t*)q); + i2ob_request(req_queue); /* * Free the lock. */ - spin_unlock_irqrestore(&io_request_lock,flags); + spin_unlock_irqrestore(&req_queue->queue_lock,flags); } static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev) @@ -1124,32 +1071,23 @@ static int i2ob_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct i2ob_device *dev; - int minor; - /* Anyone capable of this syscall can do *real bad* things */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!inode) + if (!inode || !inode->i_rdev) return -EINVAL; - minor = MINOR(inode->i_rdev); - if (minor >= (MAX_I2OB<<4)) - return -ENODEV; - dev = &i2ob_dev[minor]; switch (cmd) { - case BLKGETSIZE: - return put_user(i2ob[minor].nr_sects, (long *) arg); - case HDIO_GETGEO: { struct hd_geometry g; - int u=minor&0xF0; + int u = MINOR(inode->i_rdev) & 0xF0; i2o_block_biosparam(i2ob_sizes[u]<<1, &g.cylinders, &g.heads, &g.sectors); - g.start = i2ob[minor].start_sect; - return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0; + g.start = get_start_sect(inode->i_rdev); + return copy_to_user((void *)arg, &g, sizeof(g)) + ? -EFAULT : 0; } case BLKRRPART: @@ -1157,6 +1095,7 @@ return -EACCES; return do_i2ob_revalidate(inode->i_rdev,1); + case BLKGETSIZE: case BLKFLSBUF: case BLKROSET: case BLKROGET: @@ -1343,8 +1282,6 @@ i2ob_query_device(dev, 0x0000, 5, &flags, 4); i2ob_query_device(dev, 0x0000, 6, &status, 4); i2ob_sizes[unit] = (int)(size>>10); - for(i=unit; i <= unit+15 ; i++) - i2ob_hardsizes[i] = blocksize; i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); @@ -1358,23 +1295,25 @@ for(i=unit;i<=unit+15;i++) { + request_queue_t *q = i2ob_dev[unit].req_queue; + if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy) { - i2ob_max_sectors[i] = 32; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 32); + blk_queue_max_sectors(q, 8); i2ob_dev[i].depth = 4; } else if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req) { - i2ob_max_sectors[i] = 8; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 8); + blk_queue_max_segments(q, 8); } else { /* MAX_SECTORS was used but 255 is a dumb number for striped RAID */ - i2ob_max_sectors[i]=256; - i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 8)/2; + blk_queue_max_sectors(q, 256); + blk_queue_max_segments(q, (d->controller->status_block->inbound_frame_size - 8)/2); } } @@ -1419,7 +1358,7 @@ } printk(".\n"); printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", - d->dev_name, i2ob_max_sectors[unit]); + d->dev_name, i2ob_dev[unit].req_queue->max_sectors); /* * If this is the first I2O block device found on this IOP, @@ -1439,7 +1378,7 @@ */ dev->req_queue = &i2ob_queues[c->unit]->req_queue; - grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9)); + grok_partitions(MKDEV(MAJOR_NR, unit), (long)(size>>9)); /* * Register for the events we're interested in and that the @@ -1457,6 +1396,7 @@ */ static int i2ob_init_iop(unsigned int unit) { + char name[16]; int i; i2ob_queues[unit] = (struct i2ob_iop_queue*) @@ -1480,11 +1420,9 @@ i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0]; atomic_set(&i2ob_queues[unit]->queue_depth, 0); - blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request); + sprintf(name, "i2o%d", unit); + blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request, name); blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0); - i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge; - i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge; - i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests; i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit]; return 0; @@ -1495,11 +1433,11 @@ */ static request_queue_t* i2ob_get_queue(kdev_t dev) { - int unit = MINOR(dev)&0xF0; - - return i2ob_dev[unit].req_queue; + return I2O_UNIT(dev).req_queue; } + + /* * Probe the I2O subsytem for block class devices */ @@ -1697,7 +1635,7 @@ int i = 0; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); /* * Need to do this...we somtimes get two events from the IRTOS @@ -1719,7 +1657,7 @@ if(unit >= MAX_I2OB<<4) { printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n"); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -1732,12 +1670,11 @@ { i2ob_dev[i].i2odev = NULL; i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * Sync the device...this will force all outstanding I/Os @@ -1901,12 +1838,10 @@ */ blksize_size[MAJOR_NR] = i2ob_blksizes; - hardsect_size[MAJOR_NR] = i2ob_hardsizes; blk_size[MAJOR_NR] = i2ob_sizes; - max_sectors[MAJOR_NR] = i2ob_max_sectors; blk_dev[MAJOR_NR].queue = i2ob_get_queue; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request, "i2o"); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_I2OB << 4; i++) { @@ -1919,7 +1854,6 @@ i2ob_dev[i].tail = NULL; i2ob_dev[i].depth = MAX_I2OB_DEPTH; i2ob_blksizes[i] = 1024; - i2ob_max_sectors[i] = 2; } /* @@ -1975,9 +1909,8 @@ /* * Adding i2ob_gendisk into the gendisk list. - */ - i2ob_gendisk.next = gendisk_head; - gendisk_head = &i2ob_gendisk; + */ + add_gendisk(&i2ob_gendisk); return 0; } @@ -1990,7 +1923,6 @@ void cleanup_module(void) { - struct gendisk *gdp; int i; if(evt_running) { @@ -2047,20 +1979,6 @@ */ blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - /* - * Why isnt register/unregister gendisk in the kernel ??? - */ - - if (gendisk_head == &i2ob_gendisk) { - gendisk_head = i2ob_gendisk.next; - } - else { - for (gdp = gendisk_head; gdp; gdp = gdp->next) - if (gdp->next == &i2ob_gendisk) - { - gdp->next = i2ob_gendisk.next; - break; - } - } + del_gendisk(&i2ob_gendisk); } #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/i2o/i2o_core.c linux/drivers/i2o/i2o_core.c --- /opt/kernel/linux-2.4.10-pre4/drivers/i2o/i2o_core.c Thu Aug 16 18:50:24 2001 +++ linux/drivers/i2o/i2o_core.c Wed Sep 5 11:13:21 2001 @@ -125,6 +125,7 @@ * Function table to send to bus specific layers * See for explanation of this */ +#ifdef CONFIG_I2O_PCI_MODULE static struct i2o_core_func_table i2o_core_functions = { i2o_install_controller, @@ -135,7 +136,6 @@ i2o_delete_controller }; -#ifdef CONFIG_I2O_PCI_MODULE extern int i2o_pci_core_attach(struct i2o_core_func_table *); extern void i2o_pci_core_detach(void); #endif /* CONFIG_I2O_PCI_MODULE */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/aec62xx.c linux/drivers/ide/aec62xx.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/aec62xx.c Tue Jun 20 16:52:36 2000 +++ linux/drivers/ide/aec62xx.c Fri Aug 3 12:04:42 2001 @@ -557,6 +557,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) hwif->dmaproc = &aec62xx_dmaproc; + hwif->highmem = 1; #else /* !CONFIG_BLK_DEV_IDEDMA */ hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/cmd64x.c linux/drivers/ide/cmd64x.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/cmd64x.c Fri Jul 28 01:40:57 2000 +++ linux/drivers/ide/cmd64x.c Fri Aug 3 12:04:42 2001 @@ -795,5 +795,7 @@ default: break; } + + hwif->highmem = 1; #endif /* CONFIG_BLK_DEV_IDEDMA */ } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/cs5530.c linux/drivers/ide/cs5530.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/cs5530.c Wed Jan 3 01:58:45 2001 +++ linux/drivers/ide/cs5530.c Fri Aug 3 12:04:42 2001 @@ -352,9 +352,10 @@ unsigned int basereg, d0_timings; #ifdef CONFIG_BLK_DEV_IDEDMA - hwif->dmaproc = &cs5530_dmaproc; + hwif->dmaproc = &cs5530_dmaproc; + hwif->highmem = 1; #else - hwif->autodma = 0; + hwif->autodma = 0; #endif /* CONFIG_BLK_DEV_IDEDMA */ hwif->tuneproc = &cs5530_tuneproc; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/cy82c693.c linux/drivers/ide/cy82c693.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/cy82c693.c Sun May 20 02:43:06 2001 +++ linux/drivers/ide/cy82c693.c Fri Aug 3 12:04:42 2001 @@ -441,6 +441,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &cy82c693_dmaproc; if (!noautodma) hwif->autodma = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/hd.c linux/drivers/ide/hd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/hd.c Sat Apr 28 20:27:53 2001 +++ linux/drivers/ide/hd.c Fri Aug 3 12:04:42 2001 @@ -107,7 +107,6 @@ static int hd_sizes[MAX_HD<<6]; static int hd_blocksizes[MAX_HD<<6]; static int hd_hardsectsizes[MAX_HD<<6]; -static int hd_maxsect[MAX_HD<<6]; static struct timer_list device_timer; @@ -560,19 +559,18 @@ dev = MINOR(CURRENT->rq_dev); block = CURRENT->sector; nsect = CURRENT->nr_sectors; - if (dev >= (NR_HD<<6) || block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { -#ifdef DEBUG - if (dev >= (NR_HD<<6)) + if (dev >= (NR_HD<<6) || (dev & 0x3f) || + block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { + if (dev >= (NR_HD<<6) || (dev & 0x3f)) printk("hd: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev)); else printk("hd%c: bad access: block=%d, count=%d\n", (MINOR(CURRENT->rq_dev)>>6)+'a', block, nsect); -#endif end_request(0); goto repeat; } - block += hd[dev].start_sect; + dev >>= 6; if (special_op[dev]) { if (do_special_op(dev)) @@ -634,20 +632,16 @@ g.heads = hd_info[dev].head; g.sectors = hd_info[dev].sect; g.cylinders = hd_info[dev].cyl; - g.start = hd[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - if (!arg) return -EINVAL; - return put_user(hd[MINOR(inode->i_rdev)].nr_sects, - (long *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return revalidate_hddisk(inode->i_rdev, 1); + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKRASET: @@ -734,11 +728,9 @@ for(drive=0; drive < (MAX_HD << 6); drive++) { hd_blocksizes[drive] = 1024; hd_hardsectsizes[drive] = 512; - hd_maxsect[drive]=255; } blksize_size[MAJOR_NR] = hd_blocksizes; hardsect_size[MAJOR_NR] = hd_hardsectsizes; - max_sectors[MAJOR_NR] = hd_maxsect; #ifdef __i386__ if (!NR_HD) { @@ -841,9 +833,9 @@ return -1; } blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 255); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ - hd_gendisk.next = gendisk_head; - gendisk_head = &hd_gendisk; + add_gendisk(&hd_gendisk); init_timer(&device_timer); device_timer.function = hd_times_out; hd_geninit(); @@ -870,9 +862,7 @@ { int target; struct gendisk * gdev; - int max_p; - int start; - int i; + int res; long flags; target = DEVICE_NR(dev); @@ -887,25 +877,20 @@ DEVICE_BUSY = 1; restore_flags(flags); - max_p = gdev->max_p; - start = target << gdev->minor_shift; - - for (i=max_p - 1; i >=0 ; i--) { - int minor = start + i; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(gdev, target, 1<<6, CAPACITY); + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } static int parse_hd_setup (char *line) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/hpt34x.c linux/drivers/ide/hpt34x.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/hpt34x.c Sun May 20 02:43:06 2001 +++ linux/drivers/ide/hpt34x.c Fri Aug 3 12:04:42 2001 @@ -425,6 +425,7 @@ hwif->autodma = 0; hwif->dmaproc = &hpt34x_dmaproc; + hwif->highmem = 1; } else { hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/hpt366.c linux/drivers/ide/hpt366.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/hpt366.c Wed Aug 15 05:01:07 2001 +++ linux/drivers/ide/hpt366.c Wed Sep 5 11:13:21 2001 @@ -730,6 +730,7 @@ hwif->autodma = 1; else hwif->autodma = 0; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-cd.c linux/drivers/ide/ide-cd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-cd.c Thu Aug 16 18:30:45 2001 +++ linux/drivers/ide/ide-cd.c Wed Sep 5 11:13:21 2001 @@ -926,7 +926,7 @@ /* If we're not done filling the current buffer, complain. Otherwise, complete the command normally. */ if (rq->current_nr_sectors > 0) { - printk ("%s: cdrom_read_intr: data underrun (%ld blocks)\n", + printk ("%s: cdrom_read_intr: data underrun (%u blocks)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); } else @@ -959,8 +959,7 @@ /* First, figure out if we need to bit-bucket any of the leading sectors. */ - nskip = MIN ((int)(rq->current_nr_sectors - (rq->bh->b_size >> SECTOR_BITS)), - sectors_to_transfer); + nskip = MIN(rq->current_nr_sectors - bio_sectors(rq->bio), sectors_to_transfer); while (nskip > 0) { /* We need to throw away a sector. */ @@ -1058,7 +1057,7 @@ represent the number of sectors to skip at the start of a transfer will fail. I think that this will never happen, but let's be paranoid and check. */ - if (rq->current_nr_sectors < (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors < bio_sectors(rq->bio) && (rq->sector % SECTORS_PER_FRAME) != 0) { printk ("%s: cdrom_read_from_buffer: buffer botch (%ld)\n", drive->name, rq->sector); @@ -1097,9 +1096,9 @@ nskip = (sector % SECTORS_PER_FRAME); if (nskip > 0) { /* Sanity check... */ - if (rq->current_nr_sectors != (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors != bio_sectors(rq->bio) && (rq->sector % CD_FRAMESIZE != 0)) { - printk ("%s: cdrom_start_read_continuation: buffer botch (%lu)\n", + printk ("%s: cdrom_start_read_continuation: buffer botch (%u)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); return ide_stopped; @@ -1192,66 +1191,17 @@ return cdrom_start_packet_command (drive, 0, cdrom_start_seek_continuation); } -static inline int cdrom_merge_requests(struct request *rq, struct request *nxt) -{ - int ret = 1; - - /* - * partitions not really working, but better check anyway... - */ - if (rq->cmd == nxt->cmd && rq->rq_dev == nxt->rq_dev) { - rq->nr_sectors += nxt->nr_sectors; - rq->hard_nr_sectors += nxt->nr_sectors; - rq->bhtail->b_reqnext = nxt->bh; - rq->bhtail = nxt->bhtail; - list_del(&nxt->queue); - blkdev_release_request(nxt); - ret = 0; - } - - return ret; -} - -/* - * the current request will always be the first one on the list - */ -static void cdrom_attempt_remerge(ide_drive_t *drive, struct request *rq) -{ - struct list_head *entry; - struct request *nxt; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock, flags); - - while (1) { - entry = rq->queue.next; - if (entry == &drive->queue.queue_head) - break; - - nxt = blkdev_entry_to_request(entry); - if (rq->sector + rq->nr_sectors != nxt->sector) - break; - else if (rq->nr_sectors + nxt->nr_sectors > SECTORS_MAX) - break; - - if (cdrom_merge_requests(rq, nxt)) - break; - } - - spin_unlock_irqrestore(&io_request_lock, flags); -} - /* Fix up a possibly partially-processed request so that we can - start it over entirely, or even put it back on the request queue. */ + start it over entirely */ static void restore_request (struct request *rq) { - if (rq->buffer != rq->bh->b_data) { - int n = (rq->buffer - rq->bh->b_data) / SECTOR_SIZE; - rq->buffer = rq->bh->b_data; + if (rq->buffer != bio_data(rq->bio)) { + int n = (rq->buffer - (char *) bio_data(rq->bio)) / SECTOR_SIZE; + rq->buffer = bio_data(rq->bio); rq->nr_sectors += n; rq->sector -= n; } - rq->current_nr_sectors = rq->bh->b_size >> SECTOR_BITS; + rq->hard_cur_sectors = rq->current_nr_sectors = bio_sectors(rq->bio); rq->hard_nr_sectors = rq->nr_sectors; rq->hard_sector = rq->sector; } @@ -1281,7 +1231,7 @@ if (cdrom_read_from_buffer(drive)) return ide_stopped; - cdrom_attempt_remerge(drive, rq); + blk_attempt_remerge(&drive->queue, rq); /* Clear the local sector buffer. */ info->nsectors_buffered = 0; @@ -1577,7 +1527,7 @@ */ uptodate = 1; if (rq->current_nr_sectors > 0) { - printk("%s: write_intr: data underrun (%ld blocks)\n", + printk("%s: write_intr: data underrun (%u blocks)\n", drive->name, rq->current_nr_sectors); uptodate = 0; } @@ -1674,7 +1624,7 @@ * remerge requests, often the plugging will not have had time * to do this properly */ - cdrom_attempt_remerge(drive, rq); + blk_attempt_remerge(&drive->queue, rq); info->nsectors_buffered = 0; @@ -2202,7 +2152,9 @@ pc.quiet = cgc->quiet; pc.timeout = cgc->timeout; pc.sense = cgc->sense; - return cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->buflen -= pc.buflen; + return cgc->stat; } static @@ -2711,7 +2663,6 @@ ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "dsc_overlap", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->dsc_overlap, NULL); } @@ -2875,7 +2826,7 @@ MOD_INC_USE_COUNT; if (info->buffer == NULL) info->buffer = (char *) kmalloc(SECTOR_BUFFER_SIZE, GFP_KERNEL); - if ((info->buffer == NULL) || (rc = cdrom_fops.open(ip, fp))) { + if ((info->buffer == NULL) || (rc = cdrom_fops.open(ip, fp))) { drive->usage--; MOD_DEC_USE_COUNT; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-disk.c Mon Aug 13 23:56:19 2001 +++ linux/drivers/ide/ide-disk.c Wed Sep 5 11:18:28 2001 @@ -27,6 +27,7 @@ * Version 1.09 added increment of rq->sector in ide_multwrite * added UDMA 3/4 reporting * Version 1.10 request queue changes, Ultra DMA 100 + * Version 1.11 Highmem I/O support, Jens Axboe */ #define IDEDISK_VERSION "1.10" @@ -139,7 +140,9 @@ byte stat; int i; unsigned int msect, nsect; + unsigned long flags; struct request *rq; + char *to; /* new way for dealing with premature shared PCI interrupts */ if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { @@ -150,8 +153,8 @@ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); return ide_started; } + msect = drive->mult_count; - read_next: rq = HWGROUP(drive)->rq; if (msect) { @@ -160,14 +163,15 @@ msect -= nsect; } else nsect = 1; - idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); + to = ide_map_buffer(rq, &flags); + idedisk_input_data(drive, to, nsect * SECTOR_WORDS); #ifdef DEBUG printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", drive->name, rq->sector, rq->sector+nsect-1, (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); #endif + ide_unmap_buffer(to, &flags); rq->sector += nsect; - rq->buffer += nsect<<9; rq->errors = 0; i = (rq->nr_sectors -= nsect); if (((long)(rq->current_nr_sectors -= nsect)) <= 0) @@ -201,14 +205,16 @@ #endif if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { rq->sector++; - rq->buffer += 512; rq->errors = 0; i = --rq->nr_sectors; --rq->current_nr_sectors; if (((long)rq->current_nr_sectors) <= 0) ide_end_request(1, hwgroup); if (i > 0) { - idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + unsigned long flags; + char *to = ide_map_buffer(rq, &flags); + idedisk_output_data (drive, to, SECTOR_WORDS); + ide_unmap_buffer(to, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); return ide_started; } @@ -238,28 +244,28 @@ do { char *buffer; int nsect = rq->current_nr_sectors; - + unsigned long flags; + if (nsect > mcount) nsect = mcount; mcount -= nsect; - buffer = rq->buffer; + buffer = ide_map_buffer(rq, &flags); rq->sector += nsect; - rq->buffer += nsect << 9; rq->nr_sectors -= nsect; rq->current_nr_sectors -= nsect; /* Do we move to the next bh after this? */ if (!rq->current_nr_sectors) { - struct buffer_head *bh = rq->bh->b_reqnext; + struct bio *bio = rq->bio->bi_next; /* end early early we ran out of requests */ - if (!bh) { + if (!bio) { mcount = 0; } else { - rq->bh = bh; - rq->current_nr_sectors = bh->b_size >> 9; - rq->buffer = bh->b_data; + rq->bio = bio; + rq->current_nr_sectors = bio_sectors(bio); + rq->hard_cur_sectors = rq->current_nr_sectors; } } @@ -268,6 +274,7 @@ * re-entering us on the last transfer. */ idedisk_output_data(drive, buffer, nsect<<7); + ide_unmap_buffer(buffer, &flags); } while (mcount); return 0; @@ -367,6 +374,9 @@ */ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) { + unsigned long flags; + char *buffer; + if (IDE_CONTROL_REG) OUT_BYTE(drive->ctl,IDE_CONTROL_REG); OUT_BYTE(0x00, IDE_FEATURE_REG); @@ -444,16 +454,17 @@ hwgroup->wrq = *rq; /* scratchpad */ ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL); if (ide_multwrite(drive, drive->mult_count)) { - unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return ide_stopped; } } else { ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); - idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + buffer = ide_map_buffer(rq, &flags); + idedisk_output_data(drive, buffer, SECTOR_WORDS); + ide_unmap_buffer(buffer, &flags); } return ide_started; } @@ -482,7 +493,8 @@ { if (drive->removable && !drive->usage) { invalidate_buffers(inode->i_rdev); - if (drive->doorlocking && ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) + if (drive->doorlocking && + ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) drive->doorlocking = 0; } MOD_DEC_USE_COUNT; @@ -495,9 +507,7 @@ static void idedisk_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<nowerr = arg; drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&DRIVE_LOCK(drive)); return 0; } @@ -691,7 +701,6 @@ ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL); ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL); ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-dma.c Mon Aug 13 23:56:19 2001 +++ linux/drivers/ide/ide-dma.c Thu Sep 6 14:42:40 2001 @@ -183,25 +183,6 @@ #endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */ /* - * Our Physical Region Descriptor (PRD) table should be large enough - * to handle the biggest I/O request we are likely to see. Since requests - * can have no more than 256 sectors, and since the typical blocksize is - * two or more sectors, we could get by with a limit of 128 entries here for - * the usual worst case. Most requests seem to include some contiguous blocks, - * further reducing the number of table entries required. - * - * The driver reverts to PIO mode for individual requests that exceed - * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling - * 100% of all crazy scenarios here is not necessary. - * - * As it turns out though, we must allocate a full 4KB page for this, - * so the two PRD tables (ide0 & ide1) will each get half of that, - * allowing each to have about 256 entries (8 bytes each) from this. - */ -#define PRD_BYTES 8 -#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) - -/* * dma_intr() is the handler for disk read/write DMA interrupts */ ide_startstop_t ide_dma_intr (ide_drive_t *drive) @@ -229,35 +210,40 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq) { - struct buffer_head *bh; struct scatterlist *sg = hwif->sg_table; + struct bio *bio = rq->bio; + unsigned long lastdataend = ~0UL; int nents = 0; - if (hwif->sg_dma_active) - BUG(); - if (rq->cmd == READ) hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; else hwif->sg_dma_direction = PCI_DMA_TODEVICE; - bh = rq->bh; - do { - unsigned char *virt_addr = bh->b_data; - unsigned int size = bh->b_size; - - if (nents >= PRD_ENTRIES) - return 0; - while ((bh = bh->b_reqnext) != NULL) { - if ((virt_addr + size) != (unsigned char *) bh->b_data) - break; - size += bh->b_size; + bio = rq->bio; + do { + /* + * continue segment from before? + */ + if (bio_to_phys(bio) == lastdataend) { + sg[nents - 1].length += bio_size(bio); + lastdataend += bio_size(bio); + } else { + /* + * start new segment + */ + if (nents >= PRD_ENTRIES) + BUG(); + + memset(&sg[nents], 0, sizeof(*sg)); + sg[nents].address = NULL; + sg[nents].page = bio_page(bio); + sg[nents].length = bio_size(bio); + sg[nents].offset = bio_offset(bio); + lastdataend = bio_to_phys(bio) + bio_size(bio); + nents++; } - memset(&sg[nents], 0, sizeof(*sg)); - sg[nents].address = virt_addr; - sg[nents].length = size; - nents++; - } while (bh != NULL); + } while ((bio = bio->bi_next) != NULL); return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } @@ -269,9 +255,10 @@ */ int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func) { - unsigned int *table = HWIF(drive)->dmatable_cpu; + ide_hwif_t *hwif = HWIF(drive); + unsigned int *table = hwif->dmatable_cpu; #ifdef CONFIG_BLK_DEV_TRM290 - unsigned int is_trm290_chipset = (HWIF(drive)->chipset == ide_trm290); + unsigned int is_trm290_chipset = (hwif->chipset == ide_trm290); #else const int is_trm290_chipset = 0; #endif @@ -279,13 +266,15 @@ int i; struct scatterlist *sg; - HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq); + if (hwif->sg_dma_active) + BUG(); + hwif->sg_nents = i = ide_build_sglist(hwif, HWGROUP(drive)->rq); if (!i) return 0; - sg = HWIF(drive)->sg_table; - while (i && sg_dma_len(sg)) { + sg = hwif->sg_table; + while (i) { u32 cur_addr; u32 cur_len; @@ -299,55 +288,50 @@ */ while (cur_len) { - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } else { - u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); - - if (bcount > cur_len) - bcount = cur_len; - *table++ = cpu_to_le32(cur_addr); - xcount = bcount & 0xffff; - if (is_trm290_chipset) - xcount = ((xcount >> 2) - 1) << 16; - if (xcount == 0x0000) { - /* - * Most chipsets correctly interpret a length of 0x0000 as 64KB, - * but at least one (e.g. CS5530) misinterprets it as zero (!). - * So here we break the 64KB entry into two 32KB entries instead. - */ - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } - *table++ = cpu_to_le32(0x8000); - *table++ = cpu_to_le32(cur_addr + 0x8000); - xcount = 0x8000; + u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); + + if (count++ >= PRD_ENTRIES) + BUG(); + + if (bcount > cur_len) + bcount = cur_len; + *table++ = cpu_to_le32(cur_addr); + xcount = bcount & 0xffff; + if (is_trm290_chipset) + xcount = ((xcount >> 2) - 1) << 16; + if (xcount == 0x0000) { + /* + * Most chipsets correctly interpret a length of + * 0x0000 as 64KB, but at least one (e.g. CS5530) + * misinterprets it as zero (!). So here we break + * the 64KB entry into two 32KB entries instead. + */ + if (count++ >= PRD_ENTRIES) { + pci_unmap_sg(hwif->pci_dev, sg, + hwif->sg_nents, + hwif->sg_dma_direction); + return 0; } - *table++ = cpu_to_le32(xcount); - cur_addr += bcount; - cur_len -= bcount; + + *table++ = cpu_to_le32(0x8000); + *table++ = cpu_to_le32(cur_addr + 0x8000); + xcount = 0x8000; } + *table++ = cpu_to_le32(xcount); + cur_addr += bcount; + cur_len -= bcount; } sg++; i--; } - if (count) { - if (!is_trm290_chipset) - *--table |= cpu_to_le32(0x80000000); - return count; - } - printk("%s: empty DMA table?\n", drive->name); -use_pio_instead: - pci_unmap_sg(HWIF(drive)->pci_dev, - HWIF(drive)->sg_table, - HWIF(drive)->sg_nents, - HWIF(drive)->sg_dma_direction); - HWIF(drive)->sg_dma_active = 0; - return 0; /* revert to PIO for this request */ + if (!count) + printk("%s: empty DMA table?\n", drive->name); + else if (!is_trm290_chipset) + *--table |= cpu_to_le32(0x80000000); + + return count; } /* Teardown mappings after DMA has completed. */ @@ -512,6 +496,18 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ +static void ide_toggle_bounce(ide_drive_t *drive, int on) +{ + dma64_addr_t addr = BLK_BOUNCE_HIGH; + + if (!PCI_DMA_BUS_IS_PHYS) + addr = BLK_BOUNCE_ANY; + if (on && drive->media == ide_disk && HWIF(drive)->highmem) + addr = HWIF(drive)->pci_dev->dma_mask; + + blk_queue_bounce_limit(&drive->queue, addr); +} + /* * ide_dmaproc() initiates/aborts DMA read/write operations on a drive. * @@ -530,19 +526,20 @@ */ int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive) { -// ide_hwgroup_t *hwgroup = HWGROUP(drive); - ide_hwif_t *hwif = HWIF(drive); - unsigned long dma_base = hwif->dma_base; - byte unit = (drive->select.b.unit & 0x01); - unsigned int count, reading = 0; + ide_hwif_t *hwif = HWIF(drive); + unsigned long dma_base = hwif->dma_base; + byte unit = (drive->select.b.unit & 0x01); + unsigned int count, reading = 0, set_high = 1; byte dma_stat; switch (func) { case ide_dma_off: printk("%s: DMA disabled\n", drive->name); + set_high = 0; case ide_dma_off_quietly: outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); case ide_dma_on: + ide_toggle_bounce(drive, set_high); drive->using_dma = (func == ide_dma_on); if (drive->using_dma) outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-floppy.c linux/drivers/ide/ide-floppy.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-floppy.c Mon Aug 13 23:56:19 2001 +++ linux/drivers/ide/ide-floppy.c Wed Sep 5 11:13:21 2001 @@ -1714,9 +1714,7 @@ */ static void idefloppy_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<bios_sect, NULL); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); } @@ -1889,10 +1886,7 @@ */ if (strcmp(drive->id->model, "IOMEGA ZIP 100 ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - } + blk_queue_max_sectors(&drive->queue, 64); /* * Guess what? The IOMEGA Clik! drive also needs the diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-probe.c Tue Aug 14 00:00:37 2001 +++ linux/drivers/ide/ide-probe.c Wed Sep 5 11:13:21 2001 @@ -594,9 +594,21 @@ static void ide_init_queue(ide_drive_t *drive) { request_queue_t *q = &drive->queue; + int max_sectors; q->queuedata = HWGROUP(drive); - blk_init_queue(q, do_ide_request); + blk_init_queue(q, do_ide_request, drive->name); + + /* IDE can do up to 128K per request, pdc4030 needs smaller limit */ +#ifdef CONFIG_BLK_DEV_PDC4030 + max_sectors = 127; +#else + max_sectors = 255; +#endif + blk_queue_max_sectors(q, max_sectors); + + /* IDE DMA can do PRD_ENTRIES number of segments */ + q->max_segments = PRD_ENTRIES; } /* @@ -670,7 +682,7 @@ hwgroup->rq = NULL; hwgroup->handler = NULL; hwgroup->drive = NULL; - hwgroup->busy = 0; + hwgroup->flags = 0; init_timer(&hwgroup->timer); hwgroup->timer.function = &ide_timer_expiry; hwgroup->timer.data = (unsigned long) hwgroup; @@ -700,6 +712,13 @@ hwif->next = hwgroup->hwif->next; hwgroup->hwif->next = hwif; + if (!hwgroup->hwif) { + hwgroup->hwif = HWIF(hwgroup->drive); +#ifdef DEBUG + printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name); +#endif + } + restore_flags(flags); /* all CPUs; safe now that hwif->hwgroup is set up */ for (index = 0; index < MAX_DRIVES; ++index) { ide_drive_t *drive = &hwif->drives[index]; if (!drive->present) @@ -710,13 +729,6 @@ hwgroup->drive->next = drive; ide_init_queue(drive); } - if (!hwgroup->hwif) { - hwgroup->hwif = HWIF(hwgroup->drive); -#ifdef DEBUG - printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name); -#endif - } - restore_flags(flags); /* all CPUs; safe now that hwif->hwgroup is set up */ #if !defined(__mc68000__) && !defined(CONFIG_APUS) && !defined(__sparc__) printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name, @@ -747,9 +759,9 @@ */ static void init_gendisk (ide_hwif_t *hwif) { - struct gendisk *gd, **gdp; + struct gendisk *gd; unsigned int unit, units, minors; - int *bs, *max_sect, *max_ra; + int *bs, *max_ra; extern devfs_handle_t ide_devfs_handle; /* figure out maximum drive number on the interface */ @@ -762,23 +774,15 @@ gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); bs = kmalloc (minors*sizeof(int), GFP_KERNEL); - max_sect = kmalloc (minors*sizeof(int), GFP_KERNEL); max_ra = kmalloc (minors*sizeof(int), GFP_KERNEL); memset(gd->part, 0, minors * sizeof(struct hd_struct)); /* cdroms and msdos f/s are examples of non-1024 blocksizes */ blksize_size[hwif->major] = bs; - max_sectors[hwif->major] = max_sect; max_readahead[hwif->major] = max_ra; for (unit = 0; unit < minors; ++unit) { *bs++ = BLOCK_SIZE; -#ifdef CONFIG_BLK_DEV_PDC4030 - *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 255); -#else - /* IDE can do up to 128K per request. */ - *max_sect++ = 255; -#endif *max_ra++ = MAX_READAHEAD; } @@ -800,8 +804,8 @@ if (gd->flags) memset (gd->flags, 0, sizeof *gd->flags * units); - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) ; - hwif->gd = *gdp = gd; /* link onto tail of list */ + hwif->gd = gd; + add_gendisk(gd); for (unit = 0; unit < units; ++unit) { if (hwif->drives[unit].present) { @@ -870,13 +874,6 @@ read_ahead[hwif->major] = 8; /* (4kB) */ hwif->present = 1; /* success */ -#if (DEBUG_SPINLOCK > 0) -{ - static int done = 0; - if (!done++) - printk("io_request_lock is %p\n", &io_request_lock); /* FIXME */ -} -#endif return hwif->present; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-proc.c linux/drivers/ide/ide-proc.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-proc.c Mon Aug 13 23:56:19 2001 +++ linux/drivers/ide/ide-proc.c Wed Sep 5 11:13:21 2001 @@ -190,7 +190,7 @@ if (hwif->mate && hwif->mate->hwgroup) mategroup = (ide_hwgroup_t *)(hwif->mate->hwgroup); cli(); /* all CPUs; ensure all writes are done together */ - while (mygroup->busy || (mategroup && mategroup->busy)) { + while (test_bit(IDE_BUSY, &mygroup->flags) || (mategroup && test_bit(IDE_BUSY, &mategroup->flags))) { sti(); /* all CPUs */ if (0 < (signed long)(jiffies - timeout)) { printk("/proc/ide/%s/config: channel(s) busy, cannot write\n", hwif->name); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-tape.c linux/drivers/ide/ide-tape.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide-tape.c Mon Aug 13 23:56:19 2001 +++ linux/drivers/ide/ide-tape.c Wed Sep 5 11:13:21 2001 @@ -1887,8 +1887,7 @@ printk("ide-tape: %s: skipping over config parition..\n", tape->name); #endif tape->onstream_write_error = OS_PART_ERROR; - if (tape->waiting) - complete(tape->waiting); + complete(tape->waiting); } } remove_stage = 1; @@ -1904,8 +1903,7 @@ tape->nr_pending_stages++; tape->next_stage = tape->first_stage; rq->current_nr_sectors = rq->nr_sectors; - if (tape->waiting) - complete(tape->waiting); + complete(tape->waiting); } } } else if (rq->cmd == IDETAPE_READ_RQ) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide.c linux/drivers/ide/ide.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/ide.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/ide/ide.c Wed Sep 5 11:49:05 2001 @@ -113,6 +113,8 @@ * Version 6.31 Debug Share INTR's and request queue streaming * Native ATA-100 support * Prep for Cascades Project + * Version 6.32 4GB highmem support for DMA, and mapping of those for + * PIO transfer (Jens Axboe) * * Some additional driver compile-time options are in ./include/linux/ide.h * @@ -121,8 +123,8 @@ * */ -#define REVISION "Revision: 6.31" -#define VERSION "Id: ide.c 6.31 2000/06/09" +#define REVISION "Revision: 6.32" +#define VERSION "Id: ide.c 6.32 2001/05/24" #undef REALLY_SLOW_IO /* most systems can safely undef this */ @@ -170,6 +172,7 @@ static int idebus_parameter; /* holds the "idebus=" parameter */ static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */ static int initializing; /* set while initializing built-in drivers */ +spinlock_t ide_lock = SPIN_LOCK_UNLOCKED; #ifdef CONFIG_BLK_DEV_IDEPCI static int ide_scan_direction; /* THIS was formerly 2.2.x pci=reverse */ @@ -551,7 +554,7 @@ unsigned long flags; ide_drive_t *drive = hwgroup->drive; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); rq = hwgroup->rq; /* @@ -563,13 +566,13 @@ hwgroup->hwif->dmaproc(ide_dma_on, drive); } - if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) { + if (!__end_that_request_first(rq, uptodate)) { add_blkdev_randomness(MAJOR(rq->rq_dev)); blkdev_dequeue_request(rq); hwgroup->rq = NULL; end_that_request_last(rq); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } /* @@ -585,7 +588,7 @@ unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); if (hwgroup->handler != NULL) { printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n", drive->name, hwgroup->handler, handler); @@ -594,7 +597,7 @@ hwgroup->expiry = expiry; hwgroup->timer.expires = jiffies + timeout; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } /* @@ -844,9 +847,9 @@ unsigned long flags; struct request *rq; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); rq = HWGROUP(drive)->rq; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); if (rq->cmd == IDE_DRIVE_CMD) { byte *args = (byte *) rq->buffer; @@ -869,11 +872,11 @@ args[6] = IN_BYTE(IDE_SELECT_REG); } } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; end_that_request_last(rq); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } /* @@ -1193,8 +1196,8 @@ static ide_startstop_t start_request (ide_drive_t *drive) { ide_startstop_t startstop; - unsigned long block, blockend; - struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head); + unsigned long block; + struct request *rq = HWGROUP(drive)->rq; unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS; ide_hwif_t *hwif = HWIF(drive); @@ -1217,16 +1220,11 @@ } #endif block = rq->sector; - blockend = block + rq->nr_sectors; + /* Strange disk manager remap */ if ((rq->cmd == READ || rq->cmd == WRITE) && (drive->media == ide_disk || drive->media == ide_floppy)) { - if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) { - printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name, - (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors); - goto kill_rq; - } - block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0; + block += drive->sect0; } /* Yecch - this will shift the entire interval, possibly killing some innocent following sector */ @@ -1238,7 +1236,8 @@ #endif SELECT_DRIVE(hwif, drive); - if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { + if (ide_wait_stat(&startstop, drive, drive->ready_stat, + BUSY_STAT|DRQ_STAT, WAIT_READY)) { printk("%s: drive not ready for command\n", drive->name); return startstop; } @@ -1249,7 +1248,8 @@ if (drive->driver != NULL) { return (DRIVER(drive)->do_request(drive, rq, block)); } - printk("%s: media type %d not supported\n", drive->name, drive->media); + printk("%s: media type %d not supported\n", + drive->name, drive->media); goto kill_rq; } return do_special(drive); @@ -1266,10 +1266,10 @@ ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return start_request(drive); } @@ -1303,7 +1303,7 @@ || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep))) || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive)))) { - if( !drive->queue.plugged ) + if (!blk_queue_plugged(&drive->queue)) best = drive; } } @@ -1332,7 +1332,7 @@ /* * Issue a new request to a drive from hwgroup - * Caller must have already done spin_lock_irqsave(&io_request_lock, ..); + * Caller must have already done spin_lock_irqsave(&DRIVE_LOCK(drive), ...) * * A hwgroup is a serialized group of IDE interfaces. Usually there is * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) @@ -1344,26 +1344,21 @@ * possibly along with many other devices. This is especially common in * PCI-based systems with off-board IDE controller cards. * - * The IDE driver uses the single global io_request_lock spinlock to protect - * access to the request queues, and to protect the hwgroup->busy flag. + * The IDE driver uses the queue spinlock to protect access to the request + * queues. * * The first thread into the driver for a particular hwgroup sets the - * hwgroup->busy flag to indicate that this hwgroup is now active, + * hwgroup->flags IDE_BUSY flag to indicate that this hwgroup is now active, * and then initiates processing of the top request from the request queue. * * Other threads attempting entry notice the busy setting, and will simply - * queue their new requests and exit immediately. Note that hwgroup->busy - * remains set even when the driver is merely awaiting the next interrupt. + * queue their new requests and exit immediately. Note that hwgroup->flags + * remains busy even when the driver is merely awaiting the next interrupt. * Thus, the meaning is "this hwgroup is busy processing a request". * * When processing of a request completes, the completing thread or IRQ-handler * will start the next request from the queue. If no more work remains, - * the driver will clear the hwgroup->busy flag and exit. - * - * The io_request_lock (spinlock) is used to protect all access to the - * hwgroup->busy flag, but is otherwise not needed for most processing in - * the driver. This makes the driver much more friendlier to shared IRQs - * than previous designs, while remaining 100% (?) SMP safe and capable. + * the driver will clear the hwgroup->flags IDE_BUSY flag and exit. */ static void ide_do_request(ide_hwgroup_t *hwgroup, int masked_irq) { @@ -1375,8 +1370,7 @@ __cli(); /* necessary paranoia: ensure IRQs are masked on local CPU */ - while (!hwgroup->busy) { - hwgroup->busy = 1; + while (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) { drive = choose_drive(hwgroup); if (drive == NULL) { unsigned long sleep = 0; @@ -1399,13 +1393,13 @@ if (timer_pending(&hwgroup->timer)) printk("ide_set_handler: timer already active\n"); #endif - hwgroup->sleeping = 1; /* so that ide_timer_expiry knows what to do */ + set_bit(IDE_SLEEP, &hwgroup->flags); mod_timer(&hwgroup->timer, sleep); - /* we purposely leave hwgroup->busy==1 while sleeping */ + /* we purposely leave hwgroup busy while sleeping */ } else { /* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */ ide_release_lock(&ide_lock); /* for atari only */ - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } return; /* no more work for this hwgroup (for now) */ } @@ -1419,9 +1413,14 @@ drive->sleep = 0; drive->service_start = jiffies; - if ( drive->queue.plugged ) /* paranoia */ + if (blk_queue_plugged(&drive->queue)) /* paranoia */ printk("%s: Huh? nuking plugged queue\n", drive->name); - hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); + + /* + * just continuing an interrupted request maybe + */ + hwgroup->rq = elv_next_request(&drive->queue); + /* * Some systems have trouble with IDE IRQs arriving while * the driver is still setting things up. So, here we disable @@ -1432,14 +1431,14 @@ */ if (masked_irq && hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); - spin_unlock(&io_request_lock); + spin_unlock(&DRIVE_LOCK(drive)); ide__sti(); /* allow other IRQs while we start this request */ startstop = start_request(drive); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&DRIVE_LOCK(drive)); if (masked_irq && hwif->irq != masked_irq) enable_irq(hwif->irq); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } @@ -1492,16 +1491,16 @@ (void) hwif->dmaproc(ide_dma_off_quietly, drive); /* - * un-busy drive etc (hwgroup->busy is cleared on return) and + * un-busy drive etc (hwgroup is un-busy'ed on return) and * make sure request is sane */ rq = HWGROUP(drive)->rq; HWGROUP(drive)->rq = NULL; rq->errors = 0; - rq->sector = rq->bh->b_rsector; - rq->current_nr_sectors = rq->bh->b_size >> 9; - rq->buffer = rq->bh->b_data; + rq->sector = rq->bio->bi_sector; + rq->current_nr_sectors = bio_sectors(rq->bio); + //rq->buffer = rq->bh->b_data; } /* @@ -1517,7 +1516,11 @@ unsigned long flags; unsigned long wait; - spin_lock_irqsave(&io_request_lock, flags); + /* + * a global lock protects timers etc -- shouldn't get contention + * worth mentioning + */ + spin_lock_irqsave(&ide_lock, flags); del_timer(&hwgroup->timer); if ((handler = hwgroup->handler) == NULL) { @@ -1527,10 +1530,8 @@ * or we were "sleeping" to give other devices a chance. * Either way, we don't really want to complain about anything. */ - if (hwgroup->sleeping) { - hwgroup->sleeping = 0; - hwgroup->busy = 0; - } + if (test_and_clear_bit(IDE_SLEEP, &hwgroup->flags)) + clear_bit(IDE_BUSY, &hwgroup->flags); } else { ide_drive_t *drive = hwgroup->drive; if (!drive) { @@ -1539,17 +1540,16 @@ } else { ide_hwif_t *hwif; ide_startstop_t startstop; - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name); - } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_timer_expiry: hwgroup was not busy??\n", drive->name); if ((expiry = hwgroup->expiry) != NULL) { /* continue */ if ((wait = expiry(drive)) != 0) { /* reset timer */ hwgroup->timer.expires = jiffies + wait; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } } @@ -1559,7 +1559,7 @@ * the handler() function, which means we need to globally * mask the specific IRQ: */ - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); hwif = HWIF(drive); #if DISABLE_IRQ_NOSYNC disable_irq_nosync(hwif->irq); @@ -1585,13 +1585,15 @@ set_recovery_timer(hwif); drive->service_time = jiffies - drive->service_start; enable_irq(hwif->irq); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } + spin_unlock_irqrestore(&ide_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(hwgroup->drive), flags); ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(hwgroup->drive), flags); } /* @@ -1654,11 +1656,11 @@ ide_handler_t *handler; ide_startstop_t startstop; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwif = hwgroup->hwif; if (!ide_ack_intr(hwif)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } @@ -1692,7 +1694,7 @@ (void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); #endif /* CONFIG_BLK_DEV_IDEPCI */ } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } drive = hwgroup->drive; @@ -1700,7 +1702,7 @@ /* * This should NEVER happen, and there isn't much we could do about it here. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } if (!drive_is_ready(drive)) { @@ -1710,21 +1712,20 @@ * the IRQ before their status register is up to date. Hopefully we have * enough advance overhead that the latter isn't a problem. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name); - } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_intr: hwgroup was not busy??\n", drive->name); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); if (drive->unmask) ide__sti(); /* local CPU only */ startstop = handler(drive); /* service this interrupt, may set handler for next interrupt */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&DRIVE_LOCK(drive)); /* * Note that handler() may have set things up for another @@ -1737,13 +1738,13 @@ drive->service_time = jiffies - drive->service_start; if (startstop == ide_stopped) { if (hwgroup->handler == NULL) { /* paranoia */ - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); ide_do_request(hwgroup, hwif->irq); } else { printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name); } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } /* @@ -1753,9 +1754,6 @@ ide_drive_t *get_info_ptr (kdev_t i_rdev) { int major = MAJOR(i_rdev); -#if 0 - int minor = MINOR(i_rdev) & PARTN_MASK; -#endif unsigned int h; for (h = 0; h < MAX_HWIFS; ++h) { @@ -1764,11 +1762,7 @@ unsigned unit = DEVICE_NR(i_rdev); if (unit < MAX_DRIVES) { ide_drive_t *drive = &hwif->drives[unit]; -#if 0 - if ((drive->present) && (drive->part[minor].nr_sects)) -#else if (drive->present) -#endif return drive; } break; @@ -1828,7 +1822,7 @@ rq->rq_dev = MKDEV(major,(drive->select.b.unit)<waiting = &wait; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); if (list_empty(queue_head) || action == ide_preempt) { if (action == ide_preempt) hwgroup->rq = NULL; @@ -1838,9 +1832,9 @@ } else queue_head = queue_head->next; } - list_add(&rq->queue, queue_head); + list_add(&rq->queuelist, queue_head); ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); if (action == ide_wait) { wait_for_completion(&wait); /* wait for it to be serviced */ return rq->errors ? -EIO : 0; /* return -EIO if errors */ @@ -1849,6 +1843,16 @@ } +/* Common for ide-floppy.c and ide-disk.c */ +void ide_revalidate_drive (ide_drive_t *drive) +{ + struct gendisk *g = HWIF(drive)->gd; + int minor = (drive->select.b.unit << g->minor_shift); + kdev_t dev = MKDEV(g->major, minor); + + grok_partitions(dev, current_capacity(drive)); +} + /* * This routine is called to flush all partitions and partition tables * for a changed disk, and then re-read the new partition table. @@ -1861,40 +1865,33 @@ { ide_drive_t *drive; ide_hwgroup_t *hwgroup; - unsigned int p, major, minor; - long flags; + unsigned long flags; + int res; if ((drive = get_info_ptr(i_rdev)) == NULL) return -ENODEV; - major = MAJOR(i_rdev); - minor = drive->select.b.unit << PARTN_BITS; hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); if (drive->busy || (drive->usage > 1)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); return -EBUSY; - }; + } drive->busy = 1; MOD_INC_USE_COUNT; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); - for (p = 0; p < (1<part[p].nr_sects > 0) { - kdev_t devp = MKDEV(major, minor+p); - invalidate_device(devp, 1); - set_blocksize(devp, 1024); - } - drive->part[p].start_sect = 0; - drive->part[p].nr_sects = 0; - }; + res = wipe_partitions(i_rdev); + if (res) + goto leave; if (DRIVER(drive)->revalidate) DRIVER(drive)->revalidate(drive); + leave: drive->busy = 0; wake_up(&drive->wqueue); MOD_DEC_USE_COUNT; - return 0; + return res; } static void revalidate_drives (void) @@ -2059,7 +2056,7 @@ void ide_unregister (unsigned int index) { - struct gendisk *gd, **gdp; + struct gendisk *gd; ide_drive_t *drive, *d; ide_hwif_t *hwif, *g; ide_hwgroup_t *hwgroup; @@ -2174,18 +2171,13 @@ */ unregister_blkdev(hwif->major, hwif->name); kfree(blksize_size[hwif->major]); - kfree(max_sectors[hwif->major]); kfree(max_readahead[hwif->major]); blk_dev[hwif->major].data = NULL; blk_dev[hwif->major].queue = NULL; - blksize_size[hwif->major] = NULL; - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == hwif->gd) - break; - if (*gdp == NULL) - printk("gd not in disk chain!\n"); - else { - gd = *gdp; *gdp = gd->next; + blk_clear(hwif->major); + gd = hwif->gd; + if (gd) { + del_gendisk(gd); kfree(gd->sizes); kfree(gd->part); if (gd->de_arr) @@ -2193,6 +2185,7 @@ if (gd->flags) kfree (gd->flags); kfree(gd); + hwif->gd = NULL; } old_hwif = *hwif; init_hwif_data (index); /* restore hwif data to pristine status */ @@ -2411,7 +2404,7 @@ unsigned long flags; if ((setting->rw & SETTING_READ)) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&DRIVE_LOCK(drive), flags); switch(setting->data_type) { case TYPE_BYTE: val = *((u8 *) setting->data); @@ -2424,7 +2417,7 @@ val = *((u32 *) setting->data); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&DRIVE_LOCK(drive), flags); } return val; } @@ -2434,11 +2427,11 @@ ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long timeout = jiffies + (3 * HZ); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&DRIVE_LOCK(drive)); - while (hwgroup->busy) { + while (test_bit(IDE_BUSY, &hwgroup->flags)) { unsigned long lflags; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&DRIVE_LOCK(drive)); __save_flags(lflags); /* local CPU only */ __sti(); /* local CPU only; needed for jiffies */ if (0 < (signed long)(jiffies - timeout)) { @@ -2447,7 +2440,7 @@ return -EBUSY; } __restore_flags(lflags); /* local CPU only */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&DRIVE_LOCK(drive)); } return 0; } @@ -2488,7 +2481,7 @@ *p = val; break; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&DRIVE_LOCK(drive)); return 0; } @@ -2628,24 +2621,14 @@ { struct hd_geometry *loc = (struct hd_geometry *) arg; unsigned short bios_cyl = drive->bios_cyl; /* truncate */ - if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; - if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; - if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; - if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT; - if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, - (unsigned long *) &loc->start)) return -EFAULT; - return 0; - } - - case HDIO_GETGEO_BIG: - { - struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; - if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; - if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; - if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; - if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT; - if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, - (unsigned long *) &loc->start)) return -EFAULT; + if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) + return -EINVAL; + if (put_user(drive->bios_head, &loc->heads) || + put_user(drive->bios_sect, &loc->sectors) || + put_user(bios_cyl, &loc->cylinders) || + put_user(get_start_sect(inode->i_rdev), + &loc->start)) + return -EFAULT; return 0; } @@ -2661,9 +2644,6 @@ return 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (long *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return ide_revalidate_disk(inode->i_rdev); @@ -2781,6 +2761,7 @@ } return 0; + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKFLSBUF: @@ -2790,6 +2771,8 @@ case BLKELVSET: case BLKBSZGET: case BLKBSZSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case HDIO_GET_BUSSTATE: diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/pdc202xx.c linux/drivers/ide/pdc202xx.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/pdc202xx.c Mon Aug 13 23:56:19 2001 +++ linux/drivers/ide/pdc202xx.c Wed Sep 5 11:13:21 2001 @@ -891,6 +891,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { hwif->dmaproc = &pdc202xx_dmaproc; + hwif->highmem = 1; if (!noautodma) hwif->autodma = 1; } else { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/piix.c linux/drivers/ide/piix.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/piix.c Mon Aug 13 23:56:19 2001 +++ linux/drivers/ide/piix.c Wed Sep 5 11:13:21 2001 @@ -521,6 +521,7 @@ if (!hwif->dma_base) return; + hwif->highmem = 1; #ifndef CONFIG_BLK_DEV_IDEDMA hwif->autodma = 0; #else /* CONFIG_BLK_DEV_IDEDMA */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/serverworks.c linux/drivers/ide/serverworks.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/serverworks.c Mon Aug 13 23:56:19 2001 +++ linux/drivers/ide/serverworks.c Wed Sep 5 11:14:56 2001 @@ -568,6 +568,7 @@ if (!noautodma) hwif->autodma = 1; hwif->dmaproc = &svwks_dmaproc; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/sis5513.c linux/drivers/ide/sis5513.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/sis5513.c Mon Aug 6 19:11:58 2001 +++ linux/drivers/ide/sis5513.c Tue Aug 7 10:32:32 2001 @@ -671,6 +671,7 @@ case PCI_DEVICE_ID_SI_5591: if (!noautodma) hwif->autodma = 1; + hwif->highmem = 1; hwif->dmaproc = &sis5513_dmaproc; break; #endif /* CONFIG_BLK_DEV_IDEDMA */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/slc90e66.c linux/drivers/ide/slc90e66.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/slc90e66.c Mon Jul 16 01:22:23 2001 +++ linux/drivers/ide/slc90e66.c Fri Aug 3 12:04:42 2001 @@ -373,6 +373,7 @@ return; hwif->autodma = 0; + hwif->highmem = 1; #ifdef CONFIG_BLK_DEV_IDEDMA if (!noautodma) hwif->autodma = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/ide/via82cxxx.c linux/drivers/ide/via82cxxx.c --- /opt/kernel/linux-2.4.10-pre4/drivers/ide/via82cxxx.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/ide/via82cxxx.c Wed Sep 5 11:13:21 2001 @@ -513,6 +513,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &via82cxxx_dmaproc; #ifdef CONFIG_IDEDMA_AUTO if (!noautodma) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/md/lvm.c linux/drivers/md/lvm.c --- /opt/kernel/linux-2.4.10-pre4/drivers/md/lvm.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/md/lvm.c Wed Sep 5 11:13:21 2001 @@ -394,8 +394,6 @@ */ int lvm_init(void) { - struct gendisk *gendisk_ptr = NULL; - if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) { printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name); return -EIO; @@ -415,27 +413,19 @@ lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root); if (lvm_proc_dir != NULL) { - lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir); + lvm_proc_vg_subdir = + create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, + lvm_proc_dir); pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir); - if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info; + if (pde != NULL) + pde->read_proc = &lvm_proc_get_global_info; } lvm_init_vars(); lvm_geninit(&lvm_gendisk); /* insert our gendisk at the corresponding major */ - if (gendisk_head != NULL) { - gendisk_ptr = gendisk_head; - while (gendisk_ptr->next != NULL && - gendisk_ptr->major > lvm_gendisk.major) { - gendisk_ptr = gendisk_ptr->next; - } - lvm_gendisk.next = gendisk_ptr->next; - gendisk_ptr->next = &lvm_gendisk; - } else { - gendisk_head = &lvm_gendisk; - lvm_gendisk.next = NULL; - } + add_gendisk(&lvm_gendisk); #ifdef LVM_HD_NAME /* reference from drivers/block/genhd.c */ @@ -447,7 +437,7 @@ /* optional read root VGDA */ /* - if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); + if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); */ printk(KERN_INFO @@ -469,8 +459,6 @@ */ static void lvm_cleanup(void) { - struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL; - devfs_unregister (lvm_devfs_handle); if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) { @@ -480,21 +468,8 @@ printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name); } - - gendisk_ptr = gendisk_ptr_prev = gendisk_head; - while (gendisk_ptr != NULL) { - if (gendisk_ptr == &lvm_gendisk) - break; - gendisk_ptr_prev = gendisk_ptr; - gendisk_ptr = gendisk_ptr->next; - } - /* delete our gendisk from chain */ - if (gendisk_ptr == &lvm_gendisk) - gendisk_ptr_prev->next = gendisk_ptr->next; - - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; + del_gendisk(&lvm_gendisk); + blk_clear(MAJOR_NR); remove_proc_entry(LVM_GLOBAL, lvm_proc_dir); remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir); @@ -506,7 +481,6 @@ #endif printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name); - return; } /* lvm_cleanup() */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/md/md.c linux/drivers/md/md.c --- /opt/kernel/linux-2.4.10-pre4/drivers/md/md.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/md/md.c Wed Sep 5 11:13:21 2001 @@ -222,18 +222,6 @@ return mddev; } -struct gendisk * find_gendisk (kdev_t dev) -{ - struct gendisk *tmp = gendisk_head; - - while (tmp != NULL) { - if (tmp->major == MAJOR(dev)) - return (tmp); - tmp = tmp->next; - } - return (NULL); -} - mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) { mdk_rdev_t * rdev; @@ -281,7 +269,7 @@ /* * ok, add this new device name to the list */ - hd = find_gendisk (dev); + hd = get_gendisk (dev); dname->name = NULL; if (hd) dname->name = disk_name (hd, MINOR(dev), dname->namebuf); @@ -569,7 +557,7 @@ static kdev_t dev_unit(kdev_t dev) { unsigned int mask; - struct gendisk *hd = find_gendisk(dev); + struct gendisk *hd = get_gendisk(dev); if (!hd) return 0; @@ -2639,7 +2627,7 @@ (short *) &loc->cylinders); if (err) goto abort_unlock; - err = md_put_user (md_hd_struct[minor].start_sect, + err = md_put_user (get_start_sect(dev), (long *) &loc->start); goto done_unlock; } @@ -3515,13 +3503,13 @@ read_ahead[MAJOR_NR] = INT_MAX; - md_gendisk.next = gendisk_head; - gendisk_head = &md_gendisk; + add_gendisk(&md_gendisk); md_recovery_thread = md_register_thread(md_do_recovery, NULL, name); if (!md_recovery_thread) - printk(KERN_ALERT "md: bug: couldn't allocate md_recovery_thread\n"); + printk(KERN_ALERT + "md: bug: couldn't allocate md_recovery_thread\n"); md_register_reboot_notifier(&md_notifier); raid_table_header = register_sysctl_table(raid_root_table, 1); @@ -3854,23 +3842,12 @@ #ifdef CONFIG_PROC_FS remove_proc_entry("mdstat", NULL); #endif - - gendisk_ptr = &gendisk_head; - while (*gendisk_ptr) { - if (*gendisk_ptr == &md_gendisk) { - *gendisk_ptr = md_gendisk.next; - break; - } - gendisk_ptr = & (*gendisk_ptr)->next; - } + + del_gendisk(&md_gendisk); blk_dev[MAJOR_NR].queue = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; - max_readahead[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); free_device_names(); - } #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/mtd/ftl.c linux/drivers/mtd/ftl.c --- /opt/kernel/linux-2.4.10-pre4/drivers/mtd/ftl.c Tue Jun 12 19:30:27 2001 +++ linux/drivers/mtd/ftl.c Fri Aug 3 12:04:42 2001 @@ -1171,7 +1171,7 @@ put_user(1, (char *)&geo->heads); put_user(8, (char *)&geo->sectors); put_user((sect>>3), (short *)&geo->cylinders); - put_user(ftl_hd[minor].start_sect, (u_long *)&geo->start); + put_user(get_start_sect(inode->i_rdev), (u_long *)&geo->start); break; case BLKGETSIZE: ret = verify_area(VERIFY_WRITE, (long *)arg, sizeof(long)); @@ -1211,42 +1211,27 @@ ======================================================================*/ -static int ftl_reread_partitions(int minor) +static int ftl_reread_partitions(kdev_t dev) { + int minor = MINOR(dev); partition_t *part = myparts[minor >> 4]; - int i, whole; + int res; DEBUG(0, "ftl_cs: ftl_reread_partition(%d)\n", minor); if ((atomic_read(&part->open) > 1)) { return -EBUSY; } - whole = minor & ~(MAX_PART-1); - i = MAX_PART - 1; - while (i-- > 0) { - if (ftl_hd[whole+i].nr_sects > 0) { - kdev_t rdev = MKDEV(FTL_MAJOR, whole+i); - - invalidate_device(rdev, 1); - } - ftl_hd[whole+i].start_sect = 0; - ftl_hd[whole+i].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; scan_header(part); register_disk(&ftl_gendisk, whole >> PART_BITS, MAX_PART, &ftl_blk_fops, le32_to_cpu(part->header.FormattedSize)/SECTOR_SIZE); -#ifdef PCMCIA_DEBUG - for (i = 0; i < MAX_PART; i++) { - if (ftl_hd[whole+i].nr_sects > 0) - printk(KERN_INFO " %d: start %ld size %ld\n", i, - ftl_hd[whole+i].start_sect, - ftl_hd[whole+i].nr_sects); - } -#endif - return 0; + return res; } /*====================================================================== @@ -1428,8 +1413,7 @@ blksize_size[FTL_MAJOR] = ftl_blocksizes; ftl_gendisk.major = FTL_MAJOR; blk_init_queue(BLK_DEFAULT_QUEUE(FTL_MAJOR), &do_ftl_request); - ftl_gendisk.next = gendisk_head; - gendisk_head = &ftl_gendisk; + add_gendisk(&ftl_gendisk, FTL_MAJOR); register_mtd_user(&ftl_notifier); @@ -1438,19 +1422,13 @@ mod_exit_t cleanup_ftl(void) { - struct gendisk *gd, **gdp; - unregister_mtd_user(&ftl_notifier); unregister_blkdev(FTL_MAJOR, "ftl"); blk_cleanup_queue(BLK_DEFAULT_QUEUE(FTL_MAJOR)); - blksize_size[FTL_MAJOR] = NULL; + bklk_clear(FTL_MAJOR); - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == &ftl_gendisk) { - gd = *gdp; *gdp = gd->next; - break; - } + del_gendisk(&ftl_gendisk); } module_init(init_ftl); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/mtd/mtdblock.c linux/drivers/mtd/mtdblock.c --- /opt/kernel/linux-2.4.10-pre4/drivers/mtd/mtdblock.c Sat Apr 28 20:27:54 2001 +++ linux/drivers/mtd/mtdblock.c Fri Aug 3 13:00:36 2001 @@ -28,7 +28,7 @@ #if LINUX_VERSION_CODE < 0x20300 #define QUEUE_PLUGGED (blk_dev[MAJOR_NR].plug_tq.sync) #else -#define QUEUE_PLUGGED (blk_dev[MAJOR_NR].request_queue.plugged) +#define QUEUE_PLUGGED (blk_queue_plugged(QUEUE)) #endif #ifdef CONFIG_DEVFS_FS @@ -392,7 +392,7 @@ /* * This is a special request_fn because it is executed in a process context - * to be able to sleep independently of the caller. The io_request_lock + * to be able to sleep independently of the caller. The queue_lock * is held upon entry and exit. * The head of our request queue is considered active so there is no need * to dequeue requests before we are done. @@ -406,7 +406,7 @@ for (;;) { INIT_REQUEST; req = CURRENT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); mtdblk = mtdblks[MINOR(req->rq_dev)]; res = 0; @@ -448,7 +448,7 @@ } end_req: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } @@ -485,16 +485,16 @@ while (!leaving) { add_wait_queue(&thr_wq, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); if (QUEUE_EMPTY || QUEUE_PLUGGED) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); schedule(); remove_wait_queue(&thr_wq, &wait); } else { remove_wait_queue(&thr_wq, &wait); set_current_state(TASK_RUNNING); handle_mtdblock_request(); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/mtd/nftlcore.c linux/drivers/mtd/nftlcore.c --- /opt/kernel/linux-2.4.10-pre4/drivers/mtd/nftlcore.c Tue Jun 12 19:30:27 2001 +++ linux/drivers/mtd/nftlcore.c Fri Aug 3 13:00:46 2001 @@ -59,11 +59,6 @@ /* .. for the Linux partition table handling. */ struct hd_struct part_table[256]; -#if LINUX_VERSION_CODE < 0x20328 -static void dummy_init (struct gendisk *crap) -{} -#endif - static struct gendisk nftl_gendisk = { major: MAJOR_NR, major_name: "nftl", @@ -166,7 +161,8 @@ #if LINUX_VERSION_CODE < 0x20328 resetup_one_dev(&nftl_gendisk, firstfree); #else - grok_partitions(&nftl_gendisk, firstfree, 1<nr_sects); + grok_partitions(MKDEV(MAJOR_NR,firstfree<nr_sects); #endif } @@ -774,7 +770,7 @@ static int nftl_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { struct NFTLrecord *nftl; - int p; + int res; nftl = NFTLs[MINOR(inode->i_rdev) >> NFTL_PARTN_BITS]; @@ -787,14 +783,9 @@ g.heads = nftl->heads; g.sectors = nftl->sectors; g.cylinders = nftl->cylinders; - g.start = part_table[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user((void *)arg, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - if (!arg) return -EINVAL; - return put_user(part_table[MINOR(inode->i_rdev)].nr_sects, - (long *) arg); - case BLKFLSBUF: if (!capable(CAP_SYS_ADMIN)) return -EACCES; fsync_dev(inode->i_rdev); @@ -811,23 +802,11 @@ * or we won't be able to re-use the partitions, * if there was a change and we don't want to reboot */ - p = (1< 0) { - kdev_t devp = MKDEV(MAJOR(inode->i_dev), MINOR(inode->i_dev)+p); - if (part_table[p].nr_sects > 0) - invalidate_device (devp, 1); + res = wipe_partitions(inode->i_rdev); + if (!res) + grok_partitions(inode->i_rdev, nftl->nr_sects); - part_table[MINOR(inode->i_dev)+p].start_sect = 0; - part_table[MINOR(inode->i_dev)+p].nr_sects = 0; - } - -#if LINUX_VERSION_CODE < 0x20328 - resetup_one_dev(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS); -#else - grok_partitions(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS, - 1<nr_sects); -#endif - return 0; + return res; #if (LINUX_VERSION_CODE < 0x20303) RO_IOCTLS(inode->i_rdev, arg); /* ref. linux/blk.h */ @@ -845,7 +824,7 @@ void nftl_request(RQFUNC_ARG) { - unsigned int dev, block, nsect; + unsigned int dev, unit, block, nsect; struct NFTLrecord *nftl; char *buffer; struct request *req; @@ -857,10 +836,11 @@ /* We can do this because the generic code knows not to touch the request at the head of the queue */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); DEBUG(MTD_DEBUG_LEVEL2, "NFTL_request\n"); - DEBUG(MTD_DEBUG_LEVEL3, "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", + DEBUG(MTD_DEBUG_LEVEL3, + "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", (req->cmd == READ) ? "Read " : "Write", req->sector, req->current_nr_sectors); @@ -870,8 +850,8 @@ buffer = req->buffer; res = 1; /* succeed */ - if (dev >= MAX_NFTLS * (1<> NFTL_PARTN_BITS; + if (unit >= MAX_NFTLS || dev != (unit << NFTL_PARTN_BITS)) { printk("nftl: bad minor number: device = %s\n", kdevname(req->rq_dev)); res = 0; /* fail */ @@ -892,8 +872,6 @@ goto repeat; } - block += part_table[dev].start_sect; - if (req->cmd == READ) { DEBUG(MTD_DEBUG_LEVEL2, "NFTL read request of 0x%x sectors @ %x " "(req->nr_sectors == %lx)\n", nsect, block, req->nr_sectors); @@ -939,7 +917,7 @@ } repeat: DEBUG(MTD_DEBUG_LEVEL3, "end_request(%d)\n", res); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } @@ -1045,22 +1023,19 @@ #endif if (register_blkdev(MAJOR_NR, "nftl", &nftl_fops)){ - printk("unable to register NFTL block device on major %d\n", MAJOR_NR); + printk("unable to register NFTL block device on major %d\n", + MAJOR_NR); return -EBUSY; } else { -#if LINUX_VERSION_CODE < 0x20320 - blk_dev[MAJOR_NR].request_fn = nftl_request; -#else blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), &nftl_request); -#endif + /* set block size to 1kB each */ for (i = 0; i < 256; i++) { nftl_blocksizes[i] = 1024; } blksize_size[MAJOR_NR] = nftl_blocksizes; - nftl_gendisk.next = gendisk_head; - gendisk_head = &nftl_gendisk; + add_gendisk(&nftl_gendisk); } register_mtd_user(&nftl_notifier); @@ -1070,24 +1045,12 @@ static void __exit cleanup_nftl(void) { - struct gendisk *gd, **gdp; - unregister_mtd_user(&nftl_notifier); unregister_blkdev(MAJOR_NR, "nftl"); -#if LINUX_VERSION_CODE < 0x20320 - blk_dev[MAJOR_NR].request_fn = 0; -#else blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); -#endif - /* remove ourself from generic harddisk list - FIXME: why can't I found this partition on /proc/partition */ - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == &nftl_gendisk) { - gd = *gdp; *gdp = gd->next; - break; - } + del_gendisk(&nftl_gendisk); } module_init(init_nftl); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/net/acenic.c linux/drivers/net/acenic.c --- /opt/kernel/linux-2.4.10-pre4/drivers/net/acenic.c Wed Aug 15 10:22:15 2001 +++ linux/drivers/net/acenic.c Wed Sep 5 12:42:08 2001 @@ -161,10 +161,6 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif -#if (BITS_PER_LONG == 64) -#define ACE_64BIT_PTR 1 -#endif - #ifndef SET_MODULE_OWNER #define SET_MODULE_OWNER(dev) {do{} while(0);} #define ACE_MOD_INC_USE_COUNT MOD_INC_USE_COUNT @@ -199,9 +195,15 @@ *dma_handle = virt_to_bus(virt_ptr); return virt_ptr; } + #define pci_free_consistent(cookie, size, ptr, dma_ptr) kfree(ptr) -#define pci_map_single(cookie, address, size, dir) virt_to_bus(address) -#define pci_unmap_single(cookie, address, size, dir) +#define pci_map_page(cookie, page, off, size, dir) \ + virt_to_bus(page_address(page)+(off)) +#define pci_unmap_page(cookie, address, size, dir) +#define pci_set_dma_mask(dev, mask) \ + (((u64)(mask) & 0xffffffff00000000) == 0 ? 0 : -EIO) +#define pci_dma_supported(dev, mask) \ + (((u64)(mask) & 0xffffffff00000000) == 0 ? 1 : 0) #endif #if (LINUX_VERSION_CODE < 0x02032b) @@ -259,11 +261,6 @@ #define ace_if_down(dev) {do{} while(0);} #endif -#ifndef pci_set_dma_mask -#define pci_set_dma_mask(dev, mask) dev->dma_mask = mask; -#endif - - #if (LINUX_VERSION_CODE >= 0x02031b) #define NEW_NETINIT #define ACE_PROBE_ARG void @@ -585,7 +582,7 @@ dev->irq = pdev->irq; dev->open = &ace_open; dev->hard_start_xmit = &ace_start_xmit; - dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_HIGHDMA; + dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; if (1) { static void ace_watchdog(struct net_device *dev); dev->tx_timeout = &ace_watchdog; @@ -727,6 +724,8 @@ kfree(dev); continue; } + if (ap->pci_using_dac) + dev->features |= NETIF_F_HIGHDMA; boards_found++; } @@ -793,14 +792,12 @@ struct sk_buff *skb = ap->skb->rx_std_skbuff[i].skb; if (skb) { -#ifndef DUMMY_PCI_UNMAP dma_addr_t mapping; mapping = ap->skb->rx_std_skbuff[i].mapping; - pci_unmap_single(ap->pdev, mapping, - ACE_STD_BUFSIZE - (2 + 16), - PCI_DMA_FROMDEVICE); -#endif + pci_unmap_page(ap->pdev, mapping, + ACE_STD_BUFSIZE - (2 + 16), + PCI_DMA_FROMDEVICE); ap->rx_std_ring[i].size = 0; ap->skb->rx_std_skbuff[i].skb = NULL; @@ -812,14 +809,13 @@ struct sk_buff *skb = ap->skb->rx_mini_skbuff[i].skb; if (skb) { -#ifndef DUMMY_PCI_UNMAP dma_addr_t mapping; mapping = ap->skb->rx_mini_skbuff[i].mapping; - pci_unmap_single(ap->pdev, mapping, - ACE_MINI_BUFSIZE - (2 + 16), - PCI_DMA_FROMDEVICE); -#endif + pci_unmap_page(ap->pdev, mapping, + ACE_MINI_BUFSIZE - (2 + 16), + PCI_DMA_FROMDEVICE); + ap->rx_mini_ring[i].size = 0; ap->skb->rx_mini_skbuff[i].skb = NULL; dev_kfree_skb(skb); @@ -829,14 +825,12 @@ for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) { struct sk_buff *skb = ap->skb->rx_jumbo_skbuff[i].skb; if (skb) { -#ifndef DUMMY_PCI_UNMAP dma_addr_t mapping; mapping = ap->skb->rx_jumbo_skbuff[i].mapping; - pci_unmap_single(ap->pdev, mapping, - ACE_JUMBO_BUFSIZE - (2 + 16), - PCI_DMA_FROMDEVICE); -#endif + pci_unmap_page(ap->pdev, mapping, + ACE_JUMBO_BUFSIZE - (2 + 16), + PCI_DMA_FROMDEVICE); ap->rx_jumbo_ring[i].size = 0; ap->skb->rx_jumbo_skbuff[i].skb = NULL; @@ -1195,12 +1189,6 @@ ap->pci_latency); /* - * Make sure to enable the 64 bit DMA mask if we're in a 64bit slot - */ - if (!(pci_state & PCI_32BIT)) - pci_set_dma_mask(ap->pdev, (dma_addr_t)~0ULL); - - /* * Set the max DMA transfer size. Seems that for most systems * the performance is better when no MAX parameter is * set. However for systems enabling PCI write and invalidate, @@ -1294,12 +1282,24 @@ #endif /* + * Configure DMA attributes. + */ + if (!pci_set_dma_mask(ap->pdev, (u64) 0xffffffffffffffff)) { + ap->pci_using_dac = 1; + } else if (!pci_set_dma_mask(ap->pdev, (u64) 0xffffffff)) { + ap->pci_using_dac = 0; + } else { + ecode = -ENODEV; + goto init_error; + } + + /* * Initialize the generic info block and the command+event rings * and the control blocks for the transmit and receive rings * as they need to be setup once and for all. */ if (!(info = pci_alloc_consistent(ap->pdev, sizeof(struct ace_info), - &ap->info_dma))) { + &ap->info_dma))) { ecode = -EAGAIN; goto init_error; } @@ -1340,12 +1340,8 @@ ace_load_firmware(dev); ap->fw_running = 0; - tmp_ptr = (unsigned long) ap->info_dma; -#ifdef ACE_64BIT_PTR + tmp_ptr = (u64) ap->info_dma; writel(tmp_ptr >> 32, ®s->InfoPtrHi); -#else - writel(0, ®s->InfoPtrHi); -#endif writel(tmp_ptr & 0xffffffff, ®s->InfoPtrLo); memset(ap->evt_ring, 0, EVT_RING_ENTRIES * sizeof(struct event)); @@ -1779,13 +1775,14 @@ * Make sure IP header starts on a fresh cache line. */ skb_reserve(skb, 2 + 16); - mapping = pci_map_single(ap->pdev, skb->data, - ACE_STD_BUFSIZE - (2 + 16), - PCI_DMA_FROMDEVICE); + mapping = pci_map_page(ap->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), + ACE_STD_BUFSIZE - (2 + 16), + PCI_DMA_FROMDEVICE); ap->skb->rx_std_skbuff[idx].skb = skb; -#ifndef DUMMY_PCI_UNMAP ap->skb->rx_std_skbuff[idx].mapping = mapping; -#endif rd = &ap->rx_std_ring[idx]; set_aceaddr(&rd->addr, mapping); @@ -1843,13 +1840,14 @@ * Make sure the IP header ends up on a fresh cache line */ skb_reserve(skb, 2 + 16); - mapping = pci_map_single(ap->pdev, skb->data, - ACE_MINI_BUFSIZE - (2 + 16), - PCI_DMA_FROMDEVICE); + mapping = pci_map_page(ap->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), + ACE_MINI_BUFSIZE - (2 + 16), + PCI_DMA_FROMDEVICE); ap->skb->rx_mini_skbuff[idx].skb = skb; -#ifndef DUMMY_PCI_UNMAP ap->skb->rx_mini_skbuff[idx].mapping = mapping; -#endif rd = &ap->rx_mini_ring[idx]; set_aceaddr(&rd->addr, mapping); @@ -1904,13 +1902,14 @@ * Make sure the IP header ends up on a fresh cache line */ skb_reserve(skb, 2 + 16); - mapping = pci_map_single(ap->pdev, skb->data, - ACE_JUMBO_BUFSIZE - (2 + 16), - PCI_DMA_FROMDEVICE); + mapping = pci_map_page(ap->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), + ACE_JUMBO_BUFSIZE - (2 + 16), + PCI_DMA_FROMDEVICE); ap->skb->rx_jumbo_skbuff[idx].skb = skb; -#ifndef DUMMY_PCI_UNMAP ap->skb->rx_jumbo_skbuff[idx].mapping = mapping; -#endif rd = &ap->rx_jumbo_ring[idx]; set_aceaddr(&rd->addr, mapping); @@ -2113,10 +2112,8 @@ skb = rip->skb; rip->skb = NULL; -#ifndef DUMMY_PCI_UNMAP - pci_unmap_single(ap->pdev, rip->mapping, mapsize, - PCI_DMA_FROMDEVICE); -#endif + pci_unmap_page(ap->pdev, rip->mapping, mapsize, + PCI_DMA_FROMDEVICE); skb_put(skb, retdesc->size); #if 0 /* unncessary */ @@ -2180,22 +2177,19 @@ do { struct sk_buff *skb; -#ifndef DUMMY_PCI_UNMAP dma_addr_t mapping; -#endif struct tx_ring_info *info; info = ap->skb->tx_skbuff + idx; skb = info->skb; -#ifndef DUMMY_PCI_UNMAP mapping = info->mapping; if (mapping) { - pci_unmap_single(ap->pdev, mapping, info->maplen, - PCI_DMA_TODEVICE); + pci_unmap_page(ap->pdev, mapping, info->maplen, + PCI_DMA_TODEVICE); info->mapping = 0; } -#endif + if (skb) { ap->stats.tx_packets++; ap->stats.tx_bytes += skb->len; @@ -2472,23 +2466,19 @@ for (i = 0; i < TX_RING_ENTRIES; i++) { struct sk_buff *skb; -#ifndef DUMMY_PCI_UNMAP dma_addr_t mapping; -#endif struct tx_ring_info *info; info = ap->skb->tx_skbuff + i; skb = info->skb; -#ifndef DUMMY_PCI_UNMAP mapping = info->mapping; if (mapping) { memset(ap->tx_ring+i, 0, sizeof(struct tx_desc)); - pci_unmap_single(ap->pdev, mapping, info->maplen, - PCI_DMA_TODEVICE); + pci_unmap_page(ap->pdev, mapping, info->maplen, + PCI_DMA_TODEVICE); info->mapping = 0; } -#endif if (skb) { dev_kfree_skb(skb); info->skb = NULL; @@ -2508,79 +2498,35 @@ return 0; } - -/* - * Following below should be (in more clean form!) in arch/ARCH/kernel/pci_*. - * For now, let it stay here. - */ -#if defined(CONFIG_HIGHMEM) && MAX_SKB_FRAGS -#ifndef DUMMY_PCI_UNMAP -#error Sorry, cannot DMA from high memory on this architecture. -#endif - -#if defined(CONFIG_X86) -#define DMAADDR_OFFSET 0 -typedef unsigned long long dmaaddr_high_t; -#elif defined(CONFIG_PPC) -#define DMAADDR_OFFSET PCI_DRAM_OFFSET -typedef unsigned long dmaaddr_high_t; -#endif - - -static inline dmaaddr_high_t -pci_map_single_high(struct pci_dev *hwdev, struct page *page, - int offset, size_t size, int dir) -{ - dmaaddr_high_t phys; - - phys = (page-mem_map) * (dmaaddr_high_t) PAGE_SIZE + offset; - - return (phys + DMAADDR_OFFSET); -} - -#else - -typedef unsigned long dmaaddr_high_t; - -static inline dmaaddr_high_t -pci_map_single_high(struct pci_dev *hwdev, struct page *page, - int offset, size_t size, int dir) -{ - return pci_map_single(hwdev, page_address(page) + offset, size, dir); -} - -#endif - - -static inline dmaaddr_high_t +static inline dma_addr_t ace_map_tx_skb(struct ace_private *ap, struct sk_buff *skb, struct sk_buff *tail, u32 idx) { unsigned long addr; struct tx_ring_info *info; - addr = pci_map_single(ap->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); + addr = pci_map_page(ap->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), + skb->len, PCI_DMA_TODEVICE); info = ap->skb->tx_skbuff + idx; info->skb = tail; -#ifndef DUMMY_PCI_UNMAP info->mapping = addr; info->maplen = skb->len; -#endif return addr; } static inline void -ace_load_tx_bd(struct tx_desc *desc, dmaaddr_high_t addr, u32 flagsize) +ace_load_tx_bd(struct tx_desc *desc, u64 addr, u32 flagsize) { #if !USE_TX_COAL_NOW flagsize &= ~BD_FLG_COAL_NOW; #endif -#ifdef ACE_64BIT_PTR desc->addr.addrhi = addr >> 32; -#endif desc->addr.addrlo = addr; desc->flagsize = flagsize; } @@ -2642,16 +2588,16 @@ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct tx_ring_info *info; - dmaaddr_high_t phys; + dma_addr_t phys; len += frag->size; info = ap->skb->tx_skbuff + idx; desc = ap->tx_ring + idx; - phys = pci_map_single_high(ap->pdev, frag->page, - frag->page_offset, - frag->size, - PCI_DMA_TODEVICE); + phys = pci_map_page(ap->pdev, frag->page, + frag->page_offset, + frag->size, + PCI_DMA_TODEVICE); flagsize = (frag->size << 16); if (skb->ip_summed == CHECKSUM_HW) @@ -2671,10 +2617,8 @@ } else { info->skb = NULL; } -#ifndef DUMMY_PCI_UNMAP info->mapping = phys; info->maplen = frag->size; -#endif ace_load_tx_bd(desc, phys, flagsize); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/net/acenic.h linux/drivers/net/acenic.h --- /opt/kernel/linux-2.4.10-pre4/drivers/net/acenic.h Thu Jun 28 23:47:10 2001 +++ linux/drivers/net/acenic.h Wed Sep 5 12:42:08 2001 @@ -582,16 +582,9 @@ aceaddr stats2_ptr; }; -#if defined(CONFIG_X86) || defined(CONFIG_PPC) -/* Intel has null pci_unmap_single, no reasons to remember mapping. */ -#define DUMMY_PCI_UNMAP -#endif - struct ring_info { struct sk_buff *skb; -#ifndef DUMMY_PCI_UNMAP dma_addr_t mapping; -#endif }; /* Funny... As soon as we add maplen on alpha, it starts to work @@ -600,10 +593,8 @@ */ struct tx_ring_info { struct sk_buff *skb; -#ifndef DUMMY_PCI_UNMAP dma_addr_t mapping; int maplen; -#endif }; /* @@ -691,6 +682,7 @@ u32 last_tx, last_std_rx, last_mini_rx; #endif struct net_device_stats stats; + int pci_using_dac; }; @@ -712,31 +704,11 @@ static inline void set_aceaddr(aceaddr *aa, dma_addr_t addr) { - unsigned long baddr = (unsigned long) addr; -#ifdef ACE_64BIT_PTR + u64 baddr = (u64) addr; aa->addrlo = baddr & 0xffffffff; aa->addrhi = baddr >> 32; -#else - /* Don't bother setting zero every time */ - aa->addrlo = baddr; -#endif - mb(); -} - - -#if 0 -static inline void *get_aceaddr(aceaddr *aa) -{ - unsigned long addr; mb(); -#ifdef ACE_64BIT_PTR - addr = (u64)aa->addrhi << 32 | aa->addrlo; -#else - addr = aa->addrlo; -#endif - return (void *)addr; } -#endif static inline void ace_set_txprd(struct ace_regs *regs, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/net/sk98lin/skge.c linux/drivers/net/sk98lin/skge.c --- /opt/kernel/linux-2.4.10-pre4/drivers/net/sk98lin/skge.c Tue Aug 7 17:30:50 2001 +++ linux/drivers/net/sk98lin/skge.c Wed Sep 5 12:42:08 2001 @@ -443,6 +443,11 @@ if (pci_enable_device(pdev)) continue; + /* Configure DMA attributes. */ + if (pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff) && + pci_set_dma_mask(pdev, (u64) 0xffffffff)) + continue; + if ((dev = init_etherdev(dev, sizeof(DEV_NET))) == 0) { printk(KERN_ERR "Unable to allocate etherdev " "structure!\n"); @@ -1769,10 +1774,12 @@ #endif /* set up descriptor and CONTROL dword */ - PhysAddr = (SK_U64) pci_map_single(&pAC->PciDev, - pMessage->data, - pMessage->len, - PCI_DMA_TODEVICE); + PhysAddr = (SK_U64) pci_map_page(&pAC->PciDev, + virt_to_page(pMessage->data), + ((unsigned long) pMessage->data & + ~PAGE_MASK), + pMessage->len, + PCI_DMA_TODEVICE); pTxd->VDataLow = (SK_U32) (PhysAddr & 0xffffffff); pTxd->VDataHigh = (SK_U32) (PhysAddr >> 32); pTxd->pMBuf = pMessage; @@ -1864,9 +1871,9 @@ /* release the DMA mapping */ PhysAddr = ((SK_U64) pTxd->VDataHigh) << (SK_U64) 32; PhysAddr |= (SK_U64) pTxd->VDataLow; - pci_unmap_single(&pAC->PciDev, PhysAddr, - pTxd->pMBuf->len, - PCI_DMA_TODEVICE); + pci_unmap_page(&pAC->PciDev, PhysAddr, + pTxd->pMBuf->len, + PCI_DMA_TODEVICE); /* free message */ DEV_KFREE_SKB_ANY(pTxd->pMBuf); @@ -1945,10 +1952,12 @@ pRxPort->pRxdRingTail = pRxd->pNextRxd; pRxPort->RxdRingFree--; Length = pAC->RxBufSize; - PhysAddr = (SK_U64) pci_map_single(&pAC->PciDev, - pMsgBlock->data, - pAC->RxBufSize - 2, - PCI_DMA_FROMDEVICE); + PhysAddr = (SK_U64) pci_map_page(&pAC->PciDev, + virt_to_page(pMsgBlock->data), + ((unsigned long) pMsgBlock->data & + ~PAGE_MASK), + pAC->RxBufSize - 2, + PCI_DMA_FROMDEVICE); pRxd->VDataLow = (SK_U32) (PhysAddr & 0xffffffff); pRxd->VDataHigh = (SK_U32) (PhysAddr >> 32); pRxd->pMBuf = pMsgBlock; @@ -2092,9 +2101,9 @@ PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32; PhysAddr |= (SK_U64) pRxd->VDataLow; pci_dma_sync_single(&pAC->PciDev, - (dma_addr_t) PhysAddr, - FrameLength, - PCI_DMA_FROMDEVICE); + (dma_addr_t) PhysAddr, + FrameLength, + PCI_DMA_FROMDEVICE); ReQueueRxBuffer(pAC, pRxPort, pMsg, pRxd->VDataHigh, pRxd->VDataLow); @@ -2116,9 +2125,9 @@ skb_reserve(pNewMsg, 2); skb_put(pNewMsg, FrameLength); pci_dma_sync_single(&pAC->PciDev, - (dma_addr_t) PhysAddr, - FrameLength, - PCI_DMA_FROMDEVICE); + (dma_addr_t) PhysAddr, + FrameLength, + PCI_DMA_FROMDEVICE); eth_copy_and_sum(pNewMsg, pMsg->data, FrameLength, 0); ReQueueRxBuffer(pAC, pRxPort, pMsg, @@ -2136,10 +2145,10 @@ PhysAddr |= (SK_U64) pRxd->VDataLow; /* release the DMA mapping */ - pci_unmap_single(&pAC->PciDev, - PhysAddr, - pAC->RxBufSize - 2, - PCI_DMA_FROMDEVICE); + pci_unmap_page(&pAC->PciDev, + PhysAddr, + pAC->RxBufSize - 2, + PCI_DMA_FROMDEVICE); /* set length in message */ skb_put(pMsg, FrameLength); @@ -2261,10 +2270,10 @@ /* release the DMA mapping */ PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32; PhysAddr |= (SK_U64) pRxd->VDataLow; - pci_unmap_single(&pAC->PciDev, - PhysAddr, - pAC->RxBufSize - 2, - PCI_DMA_FROMDEVICE); + pci_unmap_page(&pAC->PciDev, + PhysAddr, + pAC->RxBufSize - 2, + PCI_DMA_FROMDEVICE); DEV_KFREE_SKB_IRQ(pRxd->pMBuf); pRxd->pMBuf = NULL; pRxPort->RxdRingFree++; @@ -2341,10 +2350,10 @@ if (pRxd->pMBuf != NULL) { PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32; PhysAddr |= (SK_U64) pRxd->VDataLow; - pci_unmap_single(&pAC->PciDev, - PhysAddr, - pAC->RxBufSize - 2, - PCI_DMA_FROMDEVICE); + pci_unmap_page(&pAC->PciDev, + PhysAddr, + pAC->RxBufSize - 2, + PCI_DMA_FROMDEVICE); DEV_KFREE_SKB(pRxd->pMBuf); pRxd->pMBuf = NULL; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/net/sungem.c linux/drivers/net/sungem.c --- /opt/kernel/linux-2.4.10-pre4/drivers/net/sungem.c Wed Aug 15 04:57:28 2001 +++ linux/drivers/net/sungem.c Wed Sep 5 12:42:08 2001 @@ -416,7 +416,8 @@ while (entry != limit) { struct sk_buff *skb; struct gem_txd *txd; - u32 dma_addr, dma_len; + dma_addr_t dma_addr; + u32 dma_len; int frag; skb = gp->tx_skbs[entry]; @@ -442,10 +443,10 @@ for (frag = 0; frag <= skb_shinfo(skb)->nr_frags; frag++) { txd = &gp->init_block->txd[entry]; - dma_addr = (u32) le64_to_cpu(txd->buffer); + dma_addr = le64_to_cpu(txd->buffer); dma_len = le64_to_cpu(txd->control_word) & TXDCTRL_BUFSZ; - pci_unmap_single(gp->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE); + pci_unmap_page(gp->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE); entry = NEXT_TX(entry); } @@ -496,7 +497,7 @@ struct gem_rxd *rxd = &gp->init_block->rxd[entry]; struct sk_buff *skb; u64 status = cpu_to_le64(rxd->status_word); - u32 dma_addr; + dma_addr_t dma_addr; int len; if ((status & RXDCTRL_OWN) != 0) @@ -518,7 +519,7 @@ goto next; } - dma_addr = (u32) cpu_to_le64(rxd->buffer); + dma_addr = cpu_to_le64(rxd->buffer); if (len > RX_COPY_THRESHOLD) { struct sk_buff *new_skb; @@ -527,15 +528,18 @@ drops++; goto drop_it; } - pci_unmap_single(gp->pdev, dma_addr, - RX_BUF_ALLOC_SIZE(gp), PCI_DMA_FROMDEVICE); + pci_unmap_page(gp->pdev, dma_addr, + RX_BUF_ALLOC_SIZE(gp), + PCI_DMA_FROMDEVICE); gp->rx_skbs[entry] = new_skb; new_skb->dev = gp->dev; skb_put(new_skb, (ETH_FRAME_LEN + RX_OFFSET)); - rxd->buffer = cpu_to_le64(pci_map_single(gp->pdev, - new_skb->data, - RX_BUF_ALLOC_SIZE(gp), - PCI_DMA_FROMDEVICE)); + rxd->buffer = cpu_to_le64(pci_map_page(gp->pdev, + virt_to_page(new_skb->data), + ((unsigned long) new_skb->data & + ~PAGE_MASK), + RX_BUF_ALLOC_SIZE(gp), + PCI_DMA_FROMDEVICE)); skb_reserve(new_skb, RX_OFFSET); /* Trim the original skb for the netif. */ @@ -659,37 +663,45 @@ if (skb_shinfo(skb)->nr_frags == 0) { struct gem_txd *txd = &gp->init_block->txd[entry]; - u32 mapping, len; + dma_addr_t mapping; + u32 len; len = skb->len; - mapping = pci_map_single(gp->pdev, skb->data, len, PCI_DMA_TODEVICE); + mapping = pci_map_page(gp->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), + len, PCI_DMA_TODEVICE); ctrl |= TXDCTRL_SOF | TXDCTRL_EOF | len; txd->buffer = cpu_to_le64(mapping); txd->control_word = cpu_to_le64(ctrl); entry = NEXT_TX(entry); } else { struct gem_txd *txd; - u32 first_len, first_mapping; + u32 first_len; + dma_addr_t first_mapping; int frag, first_entry = entry; /* We must give this initial chunk to the device last. * Otherwise we could race with the device. */ first_len = skb->len - skb->data_len; - first_mapping = pci_map_single(gp->pdev, skb->data, - first_len, PCI_DMA_TODEVICE); + first_mapping = pci_map_page(gp->pdev, virt_to_page(skb->data), + ((unsigned long) skb->data & ~PAGE_MASK), + first_len, PCI_DMA_TODEVICE); entry = NEXT_TX(entry); for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; - u32 len, mapping; + u32 len; + dma_addr_t mapping; u64 this_ctrl; len = this_frag->size; - mapping = pci_map_single(gp->pdev, - ((void *) page_address(this_frag->page) + - this_frag->page_offset), - len, PCI_DMA_TODEVICE); + mapping = pci_map_page(gp->pdev, + this_frag->page, + this_frag->page_offset, + len, PCI_DMA_TODEVICE); this_ctrl = ctrl; if (frag == skb_shinfo(skb)->nr_frags - 1) this_ctrl |= TXDCTRL_EOF; @@ -946,19 +958,18 @@ struct gem_init_block *gb = gp->init_block; struct sk_buff *skb; int i; - u32 dma_addr; + dma_addr_t dma_addr; for (i = 0; i < RX_RING_SIZE; i++) { struct gem_rxd *rxd; rxd = &gb->rxd[i]; if (gp->rx_skbs[i] != NULL) { - skb = gp->rx_skbs[i]; - dma_addr = (u32) le64_to_cpu(rxd->buffer); - pci_unmap_single(gp->pdev, dma_addr, - RX_BUF_ALLOC_SIZE(gp), - PCI_DMA_FROMDEVICE); + dma_addr = le64_to_cpu(rxd->buffer); + pci_unmap_page(gp->pdev, dma_addr, + RX_BUF_ALLOC_SIZE(gp), + PCI_DMA_FROMDEVICE); dev_kfree_skb_any(skb); gp->rx_skbs[i] = NULL; } @@ -976,10 +987,10 @@ for (frag = 0; frag <= skb_shinfo(skb)->nr_frags; frag++) { txd = &gb->txd[i]; - dma_addr = (u32) le64_to_cpu(txd->buffer); - pci_unmap_single(gp->pdev, dma_addr, - le64_to_cpu(txd->control_word) & - TXDCTRL_BUFSZ, PCI_DMA_TODEVICE); + dma_addr = le64_to_cpu(txd->buffer); + pci_unmap_page(gp->pdev, dma_addr, + le64_to_cpu(txd->control_word) & + TXDCTRL_BUFSZ, PCI_DMA_TODEVICE); if (frag != skb_shinfo(skb)->nr_frags) i++; @@ -994,7 +1005,7 @@ struct gem_init_block *gb = gp->init_block; struct net_device *dev = gp->dev; int i, gfp_flags = GFP_KERNEL; - u32 dma_addr; + dma_addr_t dma_addr; if (from_irq) gfp_flags = GFP_ATOMIC; @@ -1017,9 +1028,12 @@ gp->rx_skbs[i] = skb; skb->dev = dev; skb_put(skb, (ETH_FRAME_LEN + RX_OFFSET)); - dma_addr = pci_map_single(gp->pdev, skb->data, - RX_BUF_ALLOC_SIZE(gp), - PCI_DMA_FROMDEVICE); + dma_addr = pci_map_page(gp->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), + RX_BUF_ALLOC_SIZE(gp), + PCI_DMA_FROMDEVICE); rxd->buffer = cpu_to_le64(dma_addr); rxd->status_word = cpu_to_le64(RXDCTRL_FRESH(gp)); skb_reserve(skb, RX_OFFSET); @@ -1135,13 +1149,15 @@ static void gem_init_dma(struct gem *gp) { + u64 desc_dma = (u64) gp->gblock_dvma; u32 val; val = (TXDMA_CFG_BASE | (0x7ff << 10) | TXDMA_CFG_PMODE); writel(val, gp->regs + TXDMA_CFG); - writel(0, gp->regs + TXDMA_DBHI); - writel(gp->gblock_dvma, gp->regs + TXDMA_DBLOW); + writel(desc_dma >> 32, gp->regs + TXDMA_DBHI); + writel(desc_dma & 0xffffffff, gp->regs + TXDMA_DBLOW); + desc_dma += (TX_RING_SIZE * sizeof(struct gem_txd)); writel(0, gp->regs + TXDMA_KICK); @@ -1149,10 +1165,8 @@ ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_512); writel(val, gp->regs + RXDMA_CFG); - writel(0, gp->regs + RXDMA_DBHI); - writel((gp->gblock_dvma + - (TX_RING_SIZE * sizeof(struct gem_txd))), - gp->regs + RXDMA_DBLOW); + writel(desc_dma >> 32, gp->regs + RXDMA_DBHI); + writel(desc_dma & 0xffffffff, gp->regs + RXDMA_DBLOW); writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK); @@ -1560,8 +1574,10 @@ } { - u32 cfg = readl(gp->regs + GREG_BIFCFG); + u32 cfg; + /* XXX Why do I do this? -DaveM XXX */ + cfg = readl(gp->regs + GREG_BIFCFG); cfg |= GREG_BIFCFG_B64DIS; writel(cfg, gp->regs + GREG_BIFCFG); @@ -1619,7 +1635,7 @@ unsigned long gemreg_base, gemreg_len; struct net_device *dev; struct gem *gp; - int i, err; + int i, err, pci_using_dac; if (gem_version_printed++ == 0) printk(KERN_INFO "%s", version); @@ -1632,6 +1648,29 @@ } pci_set_master(pdev); + /* Configure DMA attributes. */ + + /* All of the GEM documentation states that 64-bit DMA addressing + * is fully supported and should work just fine. However the + * front end for RIO based GEMs is different and only supports + * 32-bit addressing. + * + * For now we assume the various PPC GEMs are 32-bit only as well. + */ + if (pdev->vendor == PCI_VENDOR_ID_SUN && + pdev->device == PCI_DEVICE_ID_SUN_GEM && + !pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) { + pci_using_dac = 1; + } else { + err = pci_set_dma_mask(pdev, (u64) 0xffffffff); + if (err) { + printk(KERN_ERR PFX "No usable DMA configuration, " + "aborting.\n"); + return err; + } + pci_using_dac = 0; + } + gemreg_base = pci_resource_start(pdev, 0); gemreg_len = pci_resource_len(pdev, 0); @@ -1715,6 +1754,8 @@ /* GEM can do it all... */ dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + if (pci_using_dac) + dev->features |= NETIF_F_HIGHDMA; return 0; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/pci/pci.c linux/drivers/pci/pci.c --- /opt/kernel/linux-2.4.10-pre4/drivers/pci/pci.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/pci/pci.c Wed Sep 5 12:42:08 2001 @@ -833,17 +833,27 @@ } int -pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) +pci_set_dma_mask(struct pci_dev *dev, u64 mask) { - if(! pci_dma_supported(dev, mask)) - return -EIO; + if (!pci_dma_supported(dev, mask)) + return -EIO; - dev->dma_mask = mask; + dev->dma_mask = mask; - return 0; + return 0; } +int +pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) +{ + if (!pci_dac_dma_supported(dev, mask)) + return -EIO; + + dev->dma_mask = mask; + return 0; +} + /* * Translate the low bits of the PCI base * to the resource type @@ -1677,7 +1687,8 @@ if (!page) return 0; page->vaddr = pci_alloc_consistent (pool->dev, - pool->allocation, &page->dma); + pool->allocation, + &page->dma); if (page->vaddr) { memset (page->bitmap, 0xff, mapsize); // bit set == free if (pool->flags & SLAB_POISON) @@ -1863,14 +1874,14 @@ if ((page = pool_find_page (pool, dma)) == 0) { printk (KERN_ERR "pci_pool_free %s/%s, %p/%x (bad dma)\n", pool->dev ? pool->dev->slot_name : NULL, - pool->name, vaddr, dma); + pool->name, vaddr, (int) (dma & 0xffffffff)); return; } #ifdef CONFIG_PCIPOOL_DEBUG if (((dma - page->dma) + (void *)page->vaddr) != vaddr) { printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%x\n", pool->dev ? pool->dev->slot_name : NULL, - pool->name, vaddr, dma); + pool->name, vaddr, (int) (dma & 0xffffffff)); return; } #endif @@ -1955,6 +1966,7 @@ EXPORT_SYMBOL(pci_find_subsys); EXPORT_SYMBOL(pci_set_master); EXPORT_SYMBOL(pci_set_dma_mask); +EXPORT_SYMBOL(pci_dac_set_dma_mask); EXPORT_SYMBOL(pci_assign_resource); EXPORT_SYMBOL(pci_register_driver); EXPORT_SYMBOL(pci_unregister_driver); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/pci/setup-bus.c linux/drivers/pci/setup-bus.c --- /opt/kernel/linux-2.4.10-pre4/drivers/pci/setup-bus.c Sun May 20 02:43:06 2001 +++ linux/drivers/pci/setup-bus.c Wed Sep 5 12:42:08 2001 @@ -23,7 +23,7 @@ #include -#define DEBUG_CONFIG 1 +#define DEBUG_CONFIG 0 #if DEBUG_CONFIG # define DBGC(args) printk args #else diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/pci/setup-res.c linux/drivers/pci/setup-res.c --- /opt/kernel/linux-2.4.10-pre4/drivers/pci/setup-res.c Sun May 20 02:43:06 2001 +++ linux/drivers/pci/setup-res.c Wed Sep 5 12:42:08 2001 @@ -25,7 +25,7 @@ #include -#define DEBUG_CONFIG 1 +#define DEBUG_CONFIG 0 #if DEBUG_CONFIG # define DBGC(args) printk args #else diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/s390/block/dasd.c linux/drivers/s390/block/dasd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/s390/block/dasd.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/s390/block/dasd.c Wed Sep 5 11:13:22 2001 @@ -696,13 +696,6 @@ goto out_hardsect_size; memset (hardsect_size[major], 0, (1 << MINORBITS) * sizeof (int)); - /* init max_sectors */ - max_sectors[major] = - (int *) kmalloc ((1 << MINORBITS) * sizeof (int), GFP_ATOMIC); - if (!max_sectors[major]) - goto out_max_sectors; - memset (max_sectors[major], 0, (1 << MINORBITS) * sizeof (int)); - /* finally do the gendisk stuff */ major_info->gendisk.part = kmalloc ((1 << MINORBITS) * sizeof (struct hd_struct), @@ -714,18 +707,13 @@ INIT_BLK_DEV (major, do_dasd_request, dasd_get_queue, NULL); - major_info->gendisk.major = major; - major_info->gendisk.next = gendisk_head; major_info->gendisk.sizes = blk_size[major]; - gendisk_head = &major_info->gendisk; + major_info->gendisk.major = major; + add_gendisk(&major_info->gendisk); return major; /* error handling - free the prior allocated memory */ out_gendisk: - kfree (max_sectors[major]); - max_sectors[major] = NULL; - - out_max_sectors: kfree (hardsect_size[major]); hardsect_size[major] = NULL; @@ -775,7 +763,6 @@ { int rc = 0; int major; - struct gendisk *dd, *prev = NULL; unsigned long flags; if (major_info == NULL) { @@ -784,32 +771,16 @@ major = major_info->gendisk.major; INIT_BLK_DEV (major, NULL, NULL, NULL); - /* do the gendisk stuff */ - for (dd = gendisk_head; dd; dd = dd->next) { - if (dd == &major_info->gendisk) { - if (prev) - prev->next = dd->next; - else - gendisk_head = dd->next; - break; - } - prev = dd; - } - if (dd == NULL) { - return -ENOENT; - } + del_gendisk(&major_info->gendisk); + kfree (major_info->dasd_device); kfree (major_info->gendisk.part); kfree (blk_size[major]); kfree (blksize_size[major]); kfree (hardsect_size[major]); - kfree (max_sectors[major]); - blk_size[major] = NULL; - blksize_size[major] = NULL; - hardsect_size[major] = NULL; - max_sectors[major] = NULL; + blk_clear(major); rc = devfs_unregister_blkdev (major, DASD_NAME); if (rc < 0) { @@ -1591,19 +1562,11 @@ dasd_end_request (req, 0); dasd_dequeue_request (queue,req); } else { - /* relocate request according to partition table */ - req->sector += - device->major_info->gendisk. - part[MINOR (req->rq_dev)].start_sect; cqr = device->discipline->build_cp_from_req (device, req); if (cqr == NULL) { DASD_MESSAGE (KERN_WARNING, device, "CCW creation failed on request %p\n", req); - /* revert relocation of request */ - req->sector -= - device->major_info->gendisk. - part[MINOR (req->rq_dev)].start_sect; break; /* terminate request queue loop */ } @@ -1652,10 +1615,10 @@ dasd_run_bh (dasd_device_t * device) { long flags; - spin_lock_irqsave (&io_request_lock, flags); + spin_lock_irqsave (&device->request_queue.queue_lock, flags); atomic_set (&device->bh_scheduled, 0); dasd_process_queues (device); - spin_unlock_irqrestore (&io_request_lock, flags); + spin_unlock_irqrestore (&device->request_queue.queue_lock, flags); } /* @@ -2536,15 +2499,14 @@ /* SECTION: Management of device list */ int -dasd_fillgeo(int kdev,struct hd_geometry *geo) +dasd_fillgeo(kdev_t kdev,struct hd_geometry *geo) { dasd_device_t *device = dasd_device_from_kdev (kdev); if (!device->discipline->fill_geometry) return -EINVAL; device->discipline->fill_geometry (device, geo); - geo->start = device->major_info->gendisk.part[MINOR(kdev)].start_sect - >> device->sizes.s2b_shift;; + geo->start = get_start_sect(kdev); return 0; } @@ -3186,6 +3148,12 @@ int major = MAJOR(device->kdev); int minor = MINOR(device->kdev); + device->request_queue = kmalloc(sizeof(request_queue_t),GFP_KERNEL); + device->request_queue->queuedata = device; + blk_init_queue (device->request_queue, do_dasd_request); + blk_queue_headactive (device->request_queue, 0); + elevator_init (&(device->request_queue->elevator),ELEVATOR_NOOP); + for (i = 0; i < (1 << DASD_PARTN_BITS); i++) { if (i == 0) device->major_info->gendisk.sizes[minor] = @@ -3195,17 +3163,11 @@ device->major_info->gendisk.sizes[minor + i] = 0; hardsect_size[major][minor + i] = device->sizes.bp_block; blksize_size[major][minor + i] = device->sizes.bp_block; - max_sectors[major][minor + i] = - device->discipline->max_blocks << - device->sizes.s2b_shift; + blk_queue_max_sectors(device->request_queue, + device->discipline->max_blocks << device->sizes.s2b_shift); device->major_info->gendisk.part[minor+i].start_sect = 0; device->major_info->gendisk.part[minor+i].nr_sects = 0; } - device->request_queue = kmalloc(sizeof(request_queue_t),GFP_KERNEL); - device->request_queue->queuedata = device; - blk_init_queue (device->request_queue, do_dasd_request); - blk_queue_headactive (device->request_queue, 0); - elevator_init (&(device->request_queue->elevator),ELEVATOR_NOOP); return rc; } @@ -3221,7 +3183,6 @@ device->major_info->gendisk.sizes[minor + i] = 0; hardsect_size[major][minor + i] = 0; blksize_size[major][minor + i] = 0; - max_sectors[major][minor + i] = 0; } if (device->request_queue) { blk_cleanup_queue (device->request_queue); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/s390/block/xpram.c linux/drivers/s390/block/xpram.c --- /opt/kernel/linux-2.4.10-pre4/drivers/s390/block/xpram.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/s390/block/xpram.c Wed Sep 5 11:13:22 2001 @@ -1214,8 +1214,7 @@ { int i; - /* first of all, flush it all and reset all the data structures */ - + /* first of all, flush it all and reset all the data structures */ for (i=0; irequest_queue; + + spin_lock_irqsave (&q->queue_lock, flags_ior); s390irq_spin_lock_irqsave(ti->devinfo.irq,flags_390irq); atomic_set(&ti->bh_scheduled,0); tapeblock_exec_IO(ti); s390irq_spin_unlock_irqrestore(ti->devinfo.irq,flags_390irq); - spin_unlock_irqrestore (&io_request_lock, flags_ior); + spin_unlock_irqrestore (&q->queue_lock, flags_ior); } void diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/Config.in linux/drivers/scsi/Config.in --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/Config.in Thu Jul 5 20:28:16 2001 +++ linux/drivers/scsi/Config.in Fri Aug 3 12:04:42 2001 @@ -20,10 +20,6 @@ comment 'Some SCSI devices (e.g. CD jukebox) support multiple LUNs' -#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES -#fi - bool ' Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN bool ' Verbose SCSI error reporting (kernel size +=12K)' CONFIG_SCSI_CONSTANTS diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/aha1542.c linux/drivers/scsi/aha1542.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/aha1542.c Wed May 2 01:05:00 2001 +++ linux/drivers/scsi/aha1542.c Wed Sep 5 12:42:08 2001 @@ -67,12 +67,10 @@ int nseg, int badseg) { - printk(KERN_CRIT "sgpnt[%d:%d] addr %p/0x%lx alt %p/0x%lx length %d\n", + printk(KERN_CRIT "sgpnt[%d:%d] addr %p/0x%lx length %d\n", badseg, nseg, sgpnt[badseg].address, SCSI_PA(sgpnt[badseg].address), - sgpnt[badseg].alt_address, - sgpnt[badseg].alt_address ? SCSI_PA(sgpnt[badseg].alt_address) : 0, sgpnt[badseg].length); /* @@ -716,7 +714,7 @@ unsigned char *ptr; printk(KERN_CRIT "Bad segment list supplied to aha1542.c (%d, %d)\n", SCpnt->use_sg, i); for (i = 0; i < SCpnt->use_sg; i++) { - printk(KERN_CRIT "%d: %x %x %d\n", i, (unsigned int) sgpnt[i].address, (unsigned int) sgpnt[i].alt_address, + printk(KERN_CRIT "%d: %p %d\n", i, sgpnt[i].address, sgpnt[i].length); }; printk(KERN_CRIT "cptr %x: ", (unsigned int) cptr); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/aic7xxx/aic7xxx_linux.c linux/drivers/scsi/aic7xxx/aic7xxx_linux.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/aic7xxx/aic7xxx_linux.c Mon Aug 13 02:37:53 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Wed Sep 5 11:13:22 2001 @@ -1165,9 +1165,9 @@ if (host == NULL) return (ENOMEM); - ahc_lock(ahc, &s); *((struct ahc_softc **)host->hostdata) = ahc; ahc->platform_data->host = host; + ahc_lock(ahc, &s); host->can_queue = AHC_MAX_QUEUE; host->cmd_per_lun = 2; host->sg_tablesize = AHC_NSEG; @@ -1313,7 +1313,9 @@ TAILQ_INIT(&ahc->platform_data->completeq); TAILQ_INIT(&ahc->platform_data->device_runq); ahc->platform_data->hw_dma_mask = 0xFFFFFFFF; - ahc_lockinit(ahc); + /* + * ahc_lockinit done by scsi_register, as we don't own that lock + */ ahc_done_lockinit(ahc); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) init_MUTEX_LOCKED(&ahc->platform_data->eh_sem); @@ -1569,22 +1571,17 @@ int ahc_linux_queue(Scsi_Cmnd * cmd, void (*scsi_done) (Scsi_Cmnd *)) { - struct ahc_softc *ahc; + struct ahc_softc *ahc = *(struct ahc_softc **)cmd->host->hostdata; struct ahc_linux_device *dev; - u_long flags; - - ahc = *(struct ahc_softc **)cmd->host->hostdata; /* * Save the callback on completion function. */ cmd->scsi_done = scsi_done; - ahc_lock(ahc, &flags); dev = ahc_linux_get_device(ahc, cmd->channel, cmd->target, cmd->lun, /*alloc*/TRUE); if (dev == NULL) { - ahc_unlock(ahc, &flags); printf("aic7xxx_linux_queue: Unable to allocate device!\n"); return (-ENOMEM); } @@ -1595,7 +1592,6 @@ dev->flags |= AHC_DEV_ON_RUN_LIST; ahc_linux_run_device_queues(ahc); } - ahc_unlock(ahc, &flags); return (0); } @@ -2444,12 +2440,10 @@ flag == SCB_ABORT ? "n ABORT" : " TARGET RESET"); /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. + * we used to drop io_request_lock and lock ahc from here, but + * now that the global lock is gone the upper layer have already + * done what ahc_lock would do /jens */ - spin_unlock_irq(&io_request_lock); - - ahc_lock(ahc, &s); /* * First determine if we currently own this command. @@ -2698,7 +2692,7 @@ ahc_unlock(ahc, &s); if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return (retval); } @@ -2741,14 +2735,7 @@ u_long s; int found; - /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. - */ - spin_unlock_irq(&io_request_lock); - ahc = *(struct ahc_softc **)cmd->host->hostdata; - ahc_lock(ahc, &s); found = ahc_reset_channel(ahc, cmd->channel + 'A', /*initiate reset*/TRUE); acmd = TAILQ_FIRST(&ahc->platform_data->completeq); @@ -2761,7 +2748,7 @@ if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return SUCCESS; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/aic7xxx/aic7xxx_linux_host.h linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Sat May 5 00:16:28 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Fri Aug 3 12:04:42 2001 @@ -81,7 +81,8 @@ present: 0, /* number of 7xxx's present */\ unchecked_isa_dma: 0, /* no memory DMA restrictions */\ use_clustering: ENABLE_CLUSTERING, \ - use_new_eh_code: 1 \ + use_new_eh_code: 1, \ + can_dma_32: 1 \ } #endif /* _AIC7XXX_LINUX_HOST_H_ */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/aic7xxx/aic7xxx_osm.h linux/drivers/scsi/aic7xxx/aic7xxx_osm.h --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/aic7xxx/aic7xxx_osm.h Sun Aug 12 19:51:42 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_osm.h Thu Sep 6 13:45:58 2001 @@ -543,9 +543,6 @@ TAILQ_HEAD(, ahc_linux_device) device_runq; struct ahc_completeq completeq; -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0) - spinlock_t spin_lock; -#endif u_int qfrozen; struct timer_list reset_timer; struct semaphore eh_sem; @@ -684,20 +681,20 @@ static __inline void ahc_lockinit(struct ahc_softc *ahc) { - spin_lock_init(&ahc->platform_data->spin_lock); + spin_lock_init(&ahc->platform_data->host->host_lock); } static __inline void ahc_lock(struct ahc_softc *ahc, unsigned long *flags) { *flags = 0; - spin_lock_irqsave(&ahc->platform_data->spin_lock, *flags); + spin_lock_irqsave(&ahc->platform_data->host->host_lock, *flags); } static __inline void ahc_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&ahc->platform_data->spin_lock, *flags); + spin_unlock_irqrestore(&ahc->platform_data->host->host_lock, *flags); } static __inline void @@ -709,14 +706,18 @@ static __inline void ahc_done_lock(struct ahc_softc *ahc, unsigned long *flags) { + struct Scsi_Host *host = ahc->platform_data->host; + *flags = 0; - spin_lock_irqsave(&io_request_lock, *flags); + spin_lock_irqsave(&host->host_lock, *flags); } static __inline void ahc_done_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&io_request_lock, *flags); + struct Scsi_Host *host = ahc->platform_data->host; + + spin_unlock_irqrestore(&host->host_lock, *flags); } #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,1,0) */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/hosts.c linux/drivers/scsi/hosts.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/hosts.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/hosts.c Fri Aug 3 12:04:42 2001 @@ -160,6 +160,7 @@ break; } } + spin_lock_init(&retval->host_lock); atomic_set(&retval->host_active,0); retval->host_busy = 0; retval->host_failed = 0; @@ -235,6 +236,7 @@ retval->cmd_per_lun = tpnt->cmd_per_lun; retval->unchecked_isa_dma = tpnt->unchecked_isa_dma; retval->use_clustering = tpnt->use_clustering; + retval->can_dma_32 = tpnt->can_dma_32; retval->select_queue_depths = tpnt->select_queue_depths; retval->max_sectors = tpnt->max_sectors; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/hosts.h linux/drivers/scsi/hosts.h --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/hosts.h Wed Aug 15 23:23:11 2001 +++ linux/drivers/scsi/hosts.h Thu Sep 6 13:28:54 2001 @@ -291,6 +291,8 @@ */ unsigned emulated:1; + unsigned can_dma_32:1; + /* * Name of proc directory */ @@ -317,6 +319,7 @@ struct Scsi_Host * next; Scsi_Device * host_queue; + spinlock_t host_lock; struct task_struct * ehandler; /* Error recovery thread. */ struct semaphore * eh_wait; /* The error recovery thread waits on @@ -390,6 +393,7 @@ unsigned in_recovery:1; unsigned unchecked_isa_dma:1; unsigned use_clustering:1; + unsigned can_dma_32:1; /* * True if this host was loaded as a loadable module */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/osst.c linux/drivers/scsi/osst.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/osst.c Fri Jul 20 06:18:15 2001 +++ linux/drivers/scsi/osst.c Wed Sep 5 12:42:08 2001 @@ -4933,7 +4933,6 @@ tb->sg[0].address = (unsigned char *)__get_free_pages(priority, order); if (tb->sg[0].address != NULL) { - tb->sg[0].alt_address = NULL; tb->sg[0].length = b_size; break; } @@ -4969,7 +4968,6 @@ tb = NULL; break; } - tb->sg[segs].alt_address = NULL; tb->sg[segs].length = b_size; got += b_size; segs++; @@ -5043,7 +5041,6 @@ normalize_buffer(STbuffer); return FALSE; } - STbuffer->sg[segs].alt_address = NULL; STbuffer->sg[segs].length = b_size; STbuffer->sg_segs += 1; got += b_size; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/qlogicfc.c linux/drivers/scsi/qlogicfc.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/qlogicfc.c Sun Aug 12 19:51:41 2001 +++ linux/drivers/scsi/qlogicfc.c Wed Sep 5 12:42:08 2001 @@ -21,6 +21,9 @@ * * Big endian support and dynamic DMA mapping added * by Jakub Jelinek . + * + * Conversion to final pci64 DMA interfaces + * by David S. Miller . */ /* @@ -63,31 +66,10 @@ #include "sd.h" #include "hosts.h" -#if 1 -/* Once pci64_ DMA mapping interface is in, kill this. */ -typedef dma_addr_t dma64_addr_t; -#define pci64_alloc_consistent(d,s,p) pci_alloc_consistent((d),(s),(p)) -#define pci64_free_consistent(d,s,c,a) pci_free_consistent((d),(s),(c),(a)) -#define pci64_map_single(d,c,s,dir) pci_map_single((d),(c),(s),(dir)) -#define pci64_map_sg(d,s,n,dir) pci_map_sg((d),(s),(n),(dir)) -#define pci64_unmap_single(d,a,s,dir) pci_unmap_single((d),(a),(s),(dir)) -#define pci64_unmap_sg(d,s,n,dir) pci_unmap_sg((d),(s),(n),(dir)) -#if BITS_PER_LONG > 32 #define pci64_dma_hi32(a) ((u32) (0xffffffff & (((u64)(a))>>32))) #define pci64_dma_lo32(a) ((u32) (0xffffffff & (((u64)(a))))) -#else -#define pci64_dma_hi32(a) 0 -#define pci64_dma_lo32(a) (a) -#endif /* BITS_PER_LONG */ -#define pci64_dma_build(hi,lo) (lo) -#define sg_dma64_address(s) sg_dma_address(s) -#define sg_dma64_len(s) sg_dma_len(s) -#if BITS_PER_LONG > 32 -#define PCI64_DMA_BITS 64 -#else -#define PCI64_DMA_BITS 32 -#endif /* BITS_PER_LONG */ -#endif +#define pci64_dma_build(hi,lo) \ + ((dma_addr_t)(((u64)(lo))|(((u64)(hi))<<32))) #include "qlogicfc.h" @@ -245,13 +227,8 @@ }; /* entry header type commands */ -#if PCI64_DMA_BITS > 32 #define ENTRY_COMMAND 0x19 #define ENTRY_CONTINUATION 0x0a -#else -#define ENTRY_COMMAND 0x11 -#define ENTRY_CONTINUATION 0x02 -#endif #define ENTRY_STATUS 0x03 #define ENTRY_MARKER 0x04 @@ -262,23 +239,12 @@ #define EFLAG_BAD_HEADER 4 #define EFLAG_BAD_PAYLOAD 8 -#if PCI64_DMA_BITS > 32 - struct dataseg { u_int d_base; u_int d_base_hi; u_int d_count; }; -#else - -struct dataseg { - u_int d_base; - u_int d_count; -}; - -#endif - struct Command_Entry { struct Entry_header hdr; u_int handle; @@ -303,18 +269,10 @@ #define CFLAG_READ 0x20 #define CFLAG_WRITE 0x40 -#if PCI64_DMA_BITS > 32 -struct Continuation_Entry { - struct Entry_header hdr; - struct dataseg dataseg[DATASEGS_PER_CONT]; -}; -#else struct Continuation_Entry { struct Entry_header hdr; - u32 rsvd; struct dataseg dataseg[DATASEGS_PER_CONT]; }; -#endif struct Marker_Entry { struct Entry_header hdr; @@ -734,7 +692,7 @@ struct isp2x00_hostdata *hostdata; struct pci_dev *pdev; unsigned short device_ids[2]; - dma64_addr_t busaddr; + dma_addr_t busaddr; int i; @@ -746,7 +704,7 @@ tmpt->proc_name = "isp2x00"; if (pci_present() == 0) { - printk("qlogicfc : PCI not present\n"); + printk(KERN_INFO "qlogicfc : PCI not present\n"); return 0; } @@ -756,6 +714,11 @@ if (pci_enable_device(pdev)) continue; + /* Try to configure DMA attributes. */ + if (pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff) && + pci_set_dma_mask(pdev, (u64) 0xffffffff)) + continue; + host = scsi_register(tmpt, sizeof(struct isp2x00_hostdata)); if (!host) { printk("qlogicfc%d : could not register host.\n", hosts); @@ -769,11 +732,11 @@ memset(hostdata, 0, sizeof(struct isp2x00_hostdata)); hostdata->pci_dev = pdev; - hostdata->res = pci64_alloc_consistent(pdev, RES_SIZE + REQ_SIZE, &busaddr); + hostdata->res = pci_alloc_consistent(pdev, RES_SIZE + REQ_SIZE, &busaddr); if (!hostdata->res){ printk("qlogicfc%d : could not allocate memory for request and response queue.\n", hosts); - pci64_free_consistent(pdev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); + pci_free_consistent(pdev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); scsi_unregister(host); continue; } @@ -806,7 +769,7 @@ hostdata->host_id = hosts; if (isp2x00_init(host) || isp2x00_reset_hardware(host)) { - pci64_free_consistent (pdev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); + pci_free_consistent (pdev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); scsi_unregister(host); continue; } @@ -815,7 +778,7 @@ if (request_irq(host->irq, do_isp2x00_intr_handler, SA_INTERRUPT | SA_SHIRQ, "qlogicfc", host)) { printk("qlogicfc%d : interrupt %d already in use\n", hostdata->host_id, host->irq); - pci64_free_consistent (pdev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); + pci_free_consistent (pdev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); scsi_unregister(host); continue; } @@ -824,7 +787,7 @@ "in use\n", hostdata->host_id, host->io_port, host->io_port + 0xff); free_irq(host->irq, host); - pci64_free_consistent (pdev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); + pci_free_consistent (pdev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); scsi_unregister(host); continue; } @@ -983,7 +946,7 @@ u_int port_id; struct sns_cb *req; u_char *sns_response; - dma64_addr_t busaddr; + dma_addr_t busaddr; struct isp2x00_hostdata *hostdata; hostdata = (struct isp2x00_hostdata *) host->hostdata; @@ -1000,7 +963,7 @@ } printk("qlogicfc%d : Fabric found.\n", hostdata->host_id); - req = (struct sns_cb *)pci64_alloc_consistent(hostdata->pci_dev, sizeof(*req) + 608, &busaddr); + req = (struct sns_cb *)pci_alloc_consistent(hostdata->pci_dev, sizeof(*req) + 608, &busaddr); if (!req){ printk("qlogicfc%d : Could not allocate DMA resources for fabric initialization\n", hostdata->host_id); @@ -1102,12 +1065,12 @@ done = 1; } else { printk("qlogicfc%d : Get All Next failed %x.\n", hostdata->host_id, param[0]); - pci64_free_consistent(hostdata->pci_dev, sizeof(*req) + 608, req, busaddr); + pci_free_consistent(hostdata->pci_dev, sizeof(*req) + 608, req, busaddr); return 0; } } - pci64_free_consistent(hostdata->pci_dev, sizeof(*req) + 608, req, busaddr); + pci_free_consistent(hostdata->pci_dev, sizeof(*req) + 608, req, busaddr); return 1; } @@ -1117,7 +1080,7 @@ int isp2x00_release(struct Scsi_Host *host) { struct isp2x00_hostdata *hostdata; - dma64_addr_t busaddr; + dma_addr_t busaddr; ENTER("isp2x00_release"); @@ -1130,7 +1093,7 @@ busaddr = pci64_dma_build(le32_to_cpu(hostdata->control_block.res_queue_addr_high), le32_to_cpu(hostdata->control_block.res_queue_addr_lo)); - pci64_free_consistent(hostdata->pci_dev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); + pci_free_consistent(hostdata->pci_dev, RES_SIZE + REQ_SIZE, hostdata->res, busaddr); LEAVE("isp2x00_release"); @@ -1274,7 +1237,7 @@ if (Cmnd->use_sg) { sg = (struct scatterlist *) Cmnd->request_buffer; - sg_count = pci64_map_sg(hostdata->pci_dev, sg, Cmnd->use_sg, scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + sg_count = pci_map_sg(hostdata->pci_dev, sg, Cmnd->use_sg, scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); cmd->segment_cnt = cpu_to_le16(sg_count); ds = cmd->dataseg; /* fill in first two sg entries: */ @@ -1283,11 +1246,9 @@ n = DATASEGS_PER_COMMAND; for (i = 0; i < n; i++) { - ds[i].d_base = cpu_to_le32(pci64_dma_lo32(sg_dma64_address(sg))); -#if PCI64_DMA_BITS > 32 - ds[i].d_base_hi = cpu_to_le32(pci64_dma_hi32(sg_dma64_address(sg))); -#endif - ds[i].d_count = cpu_to_le32(sg_dma64_len(sg)); + ds[i].d_base = cpu_to_le32(pci64_dma_lo32(sg_dma_address(sg))); + ds[i].d_base_hi = cpu_to_le32(pci64_dma_hi32(sg_dma_address(sg))); + ds[i].d_count = cpu_to_le32(sg_dma_len(sg)); ++sg; } sg_count -= DATASEGS_PER_COMMAND; @@ -1309,31 +1270,30 @@ if (n > DATASEGS_PER_CONT) n = DATASEGS_PER_CONT; for (i = 0; i < n; ++i) { - ds[i].d_base = cpu_to_le32(pci64_dma_lo32(sg_dma64_address(sg))); -#if PCI64_DMA_BITS > 32 - ds[i].d_base_hi = cpu_to_le32(pci64_dma_hi32(sg_dma64_address(sg))); -#endif - ds[i].d_count = cpu_to_le32(sg_dma64_len(sg)); + ds[i].d_base = cpu_to_le32(pci64_dma_lo32(sg_dma_address(sg))); + ds[i].d_base_hi = cpu_to_le32(pci64_dma_hi32(sg_dma_address(sg))); + ds[i].d_count = cpu_to_le32(sg_dma_len(sg)); ++sg; } sg_count -= n; } } else if (Cmnd->request_bufflen && Cmnd->sc_data_direction != PCI_DMA_NONE) { - dma64_addr_t busaddr = pci64_map_single(hostdata->pci_dev, Cmnd->request_buffer, Cmnd->request_bufflen, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + struct page *page = virt_to_page(Cmnd->request_buffer); + unsigned long offset = ((unsigned long)Cmnd->request_buffer & + ~PAGE_MASK); + dma_addr_t busaddr = pci_map_page(hostdata->pci_dev, + page, offset, + Cmnd->request_bufflen, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + Cmnd->SCp.dma_handle = busaddr; - *(dma64_addr_t *)&Cmnd->SCp = busaddr; cmd->dataseg[0].d_base = cpu_to_le32(pci64_dma_lo32(busaddr)); -#if PCI64_DMA_BITS > 32 cmd->dataseg[0].d_base_hi = cpu_to_le32(pci64_dma_hi32(busaddr)); -#endif cmd->dataseg[0].d_count = cpu_to_le32(Cmnd->request_bufflen); cmd->segment_cnt = cpu_to_le16(1); } else { cmd->dataseg[0].d_base = 0; -#if PCI64_DMA_BITS > 32 cmd->dataseg[0].d_base_hi = 0; -#endif cmd->segment_cnt = cpu_to_le16(1); /* Shouldn't this be 0? */ } @@ -1426,16 +1386,17 @@ Scsi_Cmnd *Cmnd = hostdata->handle_ptrs[i]; if (Cmnd->use_sg) - pci64_unmap_sg(hostdata->pci_dev, - (struct scatterlist *)Cmnd->buffer, - Cmnd->use_sg, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + pci_unmap_sg(hostdata->pci_dev, + (struct scatterlist *)Cmnd->buffer, + Cmnd->use_sg, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); else if (Cmnd->request_bufflen && - Cmnd->sc_data_direction != PCI_DMA_NONE) - pci64_unmap_single(hostdata->pci_dev, - *(dma64_addr_t *)&Cmnd->SCp, - Cmnd->request_bufflen, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + Cmnd->sc_data_direction != PCI_DMA_NONE) { + pci_unmap_page(hostdata->pci_dev, + Cmnd->SCp.dma_handle, + Cmnd->request_bufflen, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + } hostdata->handle_ptrs[i]->result = DID_SOFT_ERROR << 16; @@ -1532,16 +1493,16 @@ hostdata->queued--; if (Cmnd != NULL) { if (Cmnd->use_sg) - pci64_unmap_sg(hostdata->pci_dev, - (struct scatterlist *)Cmnd->buffer, - Cmnd->use_sg, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + pci_unmap_sg(hostdata->pci_dev, + (struct scatterlist *)Cmnd->buffer, + Cmnd->use_sg, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); else if (Cmnd->request_bufflen && Cmnd->sc_data_direction != PCI_DMA_NONE) - pci64_unmap_single(hostdata->pci_dev, - *(dma64_addr_t *)&Cmnd->SCp, - Cmnd->request_bufflen, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + pci_unmap_page(hostdata->pci_dev, + Cmnd->SCp.dma_handle, + Cmnd->request_bufflen, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); Cmnd->result = 0x0; (*Cmnd->scsi_done) (Cmnd); } else @@ -1587,13 +1548,14 @@ hostdata->queued--; if (Cmnd->use_sg) - pci64_unmap_sg(hostdata->pci_dev, - (struct scatterlist *)Cmnd->buffer, Cmnd->use_sg, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + pci_unmap_sg(hostdata->pci_dev, + (struct scatterlist *)Cmnd->buffer, Cmnd->use_sg, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); else if (Cmnd->request_bufflen && Cmnd->sc_data_direction != PCI_DMA_NONE) - pci64_unmap_single(hostdata->pci_dev, *(dma64_addr_t *)&Cmnd->SCp, - Cmnd->request_bufflen, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + pci_unmap_page(hostdata->pci_dev, + Cmnd->SCp.dma_handle, + Cmnd->request_bufflen, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); /* * if any of the following are true we do not @@ -1852,7 +1814,7 @@ u_short param[8]; struct isp2x00_hostdata *hostdata; int loop_count; - dma64_addr_t busaddr; + dma_addr_t busaddr; unsigned short risc_code_addr01 = 0x1000 ; ENTER("isp2x00_reset_hardware"); @@ -1964,9 +1926,15 @@ hostdata->wwn |= (u64) (cpu_to_le16(hostdata->control_block.node_name[3]) & 0x00ff) << 8; hostdata->wwn |= (u64) (cpu_to_le16(hostdata->control_block.node_name[3]) & 0xff00) >> 8; - /* FIXME: If the DMA transfer goes one way only, this should use PCI_DMA_TODEVICE and below as well. */ - busaddr = pci64_map_single(hostdata->pci_dev, &hostdata->control_block, sizeof(hostdata->control_block), - PCI_DMA_BIDIRECTIONAL); + /* FIXME: If the DMA transfer goes one way only, this should use + * PCI_DMA_TODEVICE and below as well. + */ + busaddr = pci_map_page(hostdata->pci_dev, + virt_to_page(&hostdata->control_block), + ((unsigned long) &hostdata->control_block & + ~PAGE_MASK), + sizeof(hostdata->control_block), + PCI_DMA_BIDIRECTIONAL); param[0] = MBOX_INIT_FIRMWARE; param[2] = (u_short) (pci64_dma_lo32(busaddr) >> 16); @@ -1978,21 +1946,24 @@ isp2x00_mbox_command(host, param); if (param[0] != MBOX_COMMAND_COMPLETE) { printk("qlogicfc%d.c: Ouch 0x%04x\n", hostdata->host_id, param[0]); - pci64_unmap_single(hostdata->pci_dev, busaddr, sizeof(hostdata->control_block), - PCI_DMA_BIDIRECTIONAL); + pci_unmap_page(hostdata->pci_dev, busaddr, + sizeof(hostdata->control_block), + PCI_DMA_BIDIRECTIONAL); return 1; } param[0] = MBOX_GET_FIRMWARE_STATE; isp2x00_mbox_command(host, param); if (param[0] != MBOX_COMMAND_COMPLETE) { printk("qlogicfc%d.c: 0x%04x\n", hostdata->host_id, param[0]); - pci64_unmap_single(hostdata->pci_dev, busaddr, sizeof(hostdata->control_block), - PCI_DMA_BIDIRECTIONAL); + pci_unmap_page(hostdata->pci_dev, busaddr, + sizeof(hostdata->control_block), + PCI_DMA_BIDIRECTIONAL); return 1; } - pci64_unmap_single(hostdata->pci_dev, busaddr, sizeof(hostdata->control_block), - PCI_DMA_BIDIRECTIONAL); + pci_unmap_page(hostdata->pci_dev, busaddr, + sizeof(hostdata->control_block), + PCI_DMA_BIDIRECTIONAL); LEAVE("isp2x00_reset_hardware"); return 0; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/qlogicfc.h linux/drivers/scsi/qlogicfc.h --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/qlogicfc.h Mon Jun 26 21:02:16 2000 +++ linux/drivers/scsi/qlogicfc.h Fri Aug 3 12:04:42 2001 @@ -100,7 +100,8 @@ cmd_per_lun: QLOGICFC_CMD_PER_LUN, \ present: 0, \ unchecked_isa_dma: 0, \ - use_clustering: ENABLE_CLUSTERING \ + use_clustering: ENABLE_CLUSTERING, \ + can_dma_32: 1 \ } #endif /* _QLOGICFC_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi.c Fri Jul 20 06:07:04 2001 +++ linux/drivers/scsi/scsi.c Wed Aug 8 12:54:26 2001 @@ -178,10 +178,17 @@ * handler in the list - ultimately they call scsi_request_fn * to do the dirty deed. */ -void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) { - blk_init_queue(&SDpnt->request_queue, scsi_request_fn); - blk_queue_headactive(&SDpnt->request_queue, 0); - SDpnt->request_queue.queuedata = (void *) SDpnt; +void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) +{ + char name[16]; + + request_queue_t *q = &SDpnt->request_queue; + + sprintf(name, "scsi%d%d%d", SDpnt->id, SDpnt->lun, SDpnt->channel); + blk_init_queue(q, scsi_request_fn, name); + blk_queue_headactive(q, 0); + q->queuedata = (void *) SDpnt; + q->max_segments = SHpnt->sg_tablesize; } #ifdef MODULE @@ -219,9 +226,7 @@ req = &SCpnt->request; req->rq_status = RQ_SCSI_DONE; /* Busy, but indicate request done */ - if (req->waiting != NULL) { - complete(req->waiting); - } + complete(req->waiting); } /* @@ -612,8 +617,6 @@ unsigned long flags = 0; unsigned long timeout; - ASSERT_LOCK(&io_request_lock, 0); - #if DEBUG unsigned long *ret = 0; #ifdef __mips__ @@ -625,6 +628,8 @@ host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); + /* Assign a unique nonzero serial_number. */ if (++serial_number == 0) serial_number = 1; @@ -678,41 +683,41 @@ * passes a meaningful return value. */ if (host->hostt->use_new_eh_code) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); rtn = host->hostt->queuecommand(SCpnt, scsi_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); if (rtn != 0) { scsi_delete_timer(SCpnt); scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n")); } } else { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_old_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } } else { int temp; SCSI_LOG_MLQUEUE(3, printk("command() : routine at %p\n", host->hostt->command)); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); SCpnt->result = temp; #ifdef DEBUG_DELAY - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); clock = jiffies + 4 * HZ; while (time_before(jiffies, clock)) barrier(); printk("done(host = %d, result = %04x) : routine at %p\n", host->host_no, temp, host->hostt->command); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); #endif if (host->hostt->use_new_eh_code) { scsi_done(SCpnt); } else { scsi_old_done(SCpnt); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n")); return rtn; @@ -780,7 +785,7 @@ Scsi_Device * SDpnt = SRpnt->sr_device; struct Scsi_Host *host = SDpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCSI_LOG_MLQUEUE(4, { @@ -876,7 +881,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->owner = SCSI_OWNER_MIDLEVEL; SRpnt->sr_command = SCpnt; @@ -966,7 +971,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->owner = SCSI_OWNER_MIDLEVEL; @@ -1315,11 +1320,11 @@ Scsi_Request * SRpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); - host = SCpnt->host; device = SCpnt->device; + ASSERT_LOCK(&host->host_lock, 0); + /* * We need to protect the decrement, as otherwise a race condition * would exist. Fiddling with SCpnt isn't a problem as the @@ -1327,10 +1332,10 @@ * one execution context, but the device and host structures are * shared. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->host_busy--; /* Indicate that we are free */ device->device_busy--; /* Decrement device usage counter. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); /* * Clear the flags which say that the device/host is no longer @@ -1821,7 +1826,6 @@ Scsi_Device *SDpnt; struct Scsi_Device_Template *sdtpnt; const char *name; - unsigned long flags; int out_of_space = 0; if (tpnt->next || !tpnt->detect) @@ -1831,7 +1835,7 @@ /* If max_sectors isn't set, default to max */ if (!tpnt->max_sectors) - tpnt->max_sectors = MAX_SECTORS; + tpnt->max_sectors = 1024; pcount = next_scsi_host; @@ -1843,10 +1847,11 @@ using the new scsi code. NOTE: the detect routine could redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */ + /* + * detect should do its own locking + */ if (tpnt->use_new_eh_code) { - spin_lock_irqsave(&io_request_lock, flags); tpnt->present = tpnt->detect(tpnt); - spin_unlock_irqrestore(&io_request_lock, flags); } else tpnt->present = tpnt->detect(tpnt); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi.h Wed Aug 15 23:23:11 2001 +++ linux/drivers/scsi/scsi.h Thu Sep 6 13:28:54 2001 @@ -386,15 +386,6 @@ #define ASKED_FOR_SENSE 0x20 #define SYNC_RESET 0x40 -#if defined(__mc68000__) || defined(CONFIG_APUS) -#include -#define CONTIGUOUS_BUFFERS(X,Y) \ - (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data)) -#else -#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data) -#endif - - /* * This is the crap from the old error handling code. We have it in a special * place so that we can more easily delete it later on. @@ -633,6 +624,8 @@ struct scatterlist *buffer; /* which buffer */ int buffers_residual; /* how many buffers left */ + dma_addr_t dma_handle; + volatile int Status; volatile int Message; volatile int have_data_in; @@ -745,7 +738,8 @@ unsigned request_bufflen; /* Actual request size */ struct timer_list eh_timeout; /* Used to time out the command. */ - void *request_buffer; /* Actual requested buffer */ + void *request_buffer; /* Actual requested buffer */ + void **bounce_buffers; /* Array of bounce buffers when using scatter-gather */ /* These elements define the operation we ultimately want to perform */ unsigned char data_cmnd[MAX_COMMAND_SIZE]; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_debug.c linux/drivers/scsi/scsi_debug.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_debug.c Sun Nov 12 04:01:11 2000 +++ linux/drivers/scsi/scsi_debug.c Wed Sep 5 12:42:08 2001 @@ -154,10 +154,7 @@ if (SCpnt->use_sg) { sgpnt = (struct scatterlist *) SCpnt->buffer; for (i = 0; i < SCpnt->use_sg; i++) { - lpnt = (int *) sgpnt[i].alt_address; - printk(":%p %p %d\n", sgpnt[i].alt_address, sgpnt[i].address, sgpnt[i].length); - if (lpnt) - printk(" (Alt %x) ", lpnt[15]); + printk(":%p %d\n", sgpnt[i].address, sgpnt[i].length); }; } else { printk("nosg: %p %p %d\n", SCpnt->request.buffer, SCpnt->buffer, @@ -175,12 +172,6 @@ printk("\n"); if (flag == 0) return; - lpnt = (unsigned int *) sgpnt[0].alt_address; - for (i = 0; i < sizeof(Scsi_Cmnd) / 4 + 1; i++) { - if ((i & 7) == 0) - printk("\n"); - printk("%x ", *lpnt++); - }; #if 0 printk("\n"); lpnt = (unsigned int *) sgpnt[0].address; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_dma.c linux/drivers/scsi/scsi_dma.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_dma.c Tue Sep 5 23:08:55 2000 +++ linux/drivers/scsi/scsi_dma.c Thu Sep 6 09:52:18 2001 @@ -255,7 +255,7 @@ #endif new_dma_sectors += ((nents * sizeof(struct scatterlist) + 511) >> 9) * - SDpnt->queue_depth; + (SDpnt->queue_depth + 2); if (SDpnt->type == TYPE_WORM || SDpnt->type == TYPE_ROM) new_dma_sectors += (2048 >> 9) * SDpnt->queue_depth; } else if (SDpnt->type == TYPE_SCANNER || diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_error.c linux/drivers/scsi/scsi_error.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_error.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/scsi_error.c Fri Aug 3 12:04:42 2001 @@ -422,8 +422,6 @@ {REQUEST_SENSE, 0, 0, 0, 255, 0}; unsigned char scsi_result0[256], *scsi_result = NULL; - ASSERT_LOCK(&io_request_lock, 0); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); @@ -580,16 +578,14 @@ STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout) { unsigned long flags; - struct Scsi_Host *host; - - ASSERT_LOCK(&io_request_lock, 0); + struct Scsi_Host *host = SCpnt->host; - host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); - retry: +retry: /* - * We will use a queued command if possible, otherwise we will emulate the - * queuing and calling of completion function ourselves. + * We will use a queued command if possible, otherwise we will + * emulate the queuing and calling of completion function ourselves. */ SCpnt->owner = SCSI_OWNER_LOWLEVEL; @@ -606,9 +602,9 @@ SCpnt->host->eh_action = &sem; SCpnt->request.rq_status = RQ_SCSI_BUSY; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_eh_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); down(&sem); @@ -631,10 +627,10 @@ * abort a timed out command or not. Not sure how * we should treat them differently anyways. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); if (SCpnt->host->hostt->eh_abort_handler) SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); SCpnt->request.rq_status = RQ_SCSI_DONE; SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; @@ -647,13 +643,13 @@ int temp; /* - * We damn well had better never use this code. There is no timeout - * protection here, since we would end up waiting in the actual low - * level driver, we don't know how to wake it up. + * We damn well had better never use this code. There is no + * timeout protection here, since we would end up waiting in + * the actual low level driver, we don't know how to wake it up. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); SCpnt->result = temp; /* Fall through to code below to examine status. */ @@ -661,8 +657,8 @@ } /* - * Now examine the actual status codes to see whether the command actually - * did complete normally. + * Now examine the actual status codes to see whether the command + * actually did complete normally. */ if (SCpnt->eh_state == SUCCESS) { int ret = scsi_eh_completed_normally(SCpnt); @@ -773,9 +769,9 @@ SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); return rtn; } @@ -805,9 +801,9 @@ } SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -838,9 +834,9 @@ return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -884,9 +880,9 @@ if (SCpnt->host->hostt->eh_host_reset_handler == NULL) { return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -1227,7 +1223,7 @@ Scsi_Device *SDpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); /* * Next free up anything directly waiting upon the host. This will be @@ -1244,19 +1240,22 @@ * now that error recovery is done, we will need to ensure that these * requests are started. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { - request_queue_t *q; + request_queue_t *q = &SDpnt->request_queue; + if ((host->can_queue > 0 && (host->host_busy >= host->can_queue)) || (host->host_blocked) || (host->host_self_blocked) || (SDpnt->device_blocked)) { break; } - q = &SDpnt->request_queue; + + spin_lock(&q->queue_lock); q->request_fn(q); + spin_unlock(&q->queue_lock); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* @@ -1303,7 +1302,7 @@ Scsi_Cmnd *SCdone; int timed_out; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCdone = NULL; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_lib.c Sun Aug 12 19:51:42 2001 +++ linux/drivers/scsi/scsi_lib.c Wed Sep 5 13:16:20 2001 @@ -61,7 +61,7 @@ * data - private data * at_head - insert request at head or tail of queue * - * Lock status: Assumed that io_request_lock is not held upon entry. + * Lock status: Assumed that queue lock is not held upon entry. * * Returns: Nothing */ @@ -70,7 +70,7 @@ { unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); rq->cmd = SPECIAL; rq->special = data; @@ -84,15 +84,15 @@ * head of the queue for things like a QUEUE_FULL message from a * device, or a host that is unable to accept a particular command. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (at_head) - list_add(&rq->queue, &q->queue_head); + list_add(&rq->queuelist, &q->queue_head); else - list_add_tail(&rq->queue, &q->queue_head); + list_add_tail(&rq->queuelist, &q->queue_head); q->request_fn(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } @@ -167,8 +167,6 @@ */ int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); - SCpnt->owner = SCSI_OWNER_MIDLEVEL; SCpnt->reset_chain = NULL; SCpnt->serial_number = 0; @@ -250,9 +248,9 @@ Scsi_Device *SDpnt; struct Scsi_Host *SHpnt; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (SCpnt != NULL) { /* @@ -262,7 +260,7 @@ * the bad sector. */ SCpnt->request.special = (void *) SCpnt; - list_add(&SCpnt->request.queue, &q->queue_head); + list_add(&SCpnt->request.queuelist, &q->queue_head); } /* @@ -280,14 +278,10 @@ * with special case code, then spin off separate versions and * use function pointers to pick the right one. */ - if (SDpnt->single_lun - && list_empty(&q->queue_head) - && SDpnt->device_busy == 0) { + if (SDpnt->single_lun && list_empty(&q->queue_head) && SDpnt->device_busy == 0) { request_queue_t *q; - for (SDpnt = SHpnt->host_queue; - SDpnt; - SDpnt = SDpnt->next) { + for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) { if (((SHpnt->can_queue > 0) && (SHpnt->host_busy >= SHpnt->can_queue)) || (SHpnt->host_blocked) @@ -295,6 +289,7 @@ || (SDpnt->device_blocked)) { break; } + q = &SDpnt->request_queue; q->request_fn(q); } @@ -328,7 +323,7 @@ SHpnt->some_device_starved = 0; } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } /* @@ -360,57 +355,35 @@ int requeue, int frequeue) { + request_queue_t *q = &SCpnt->device->request_queue; struct request *req; - struct buffer_head *bh; - Scsi_Device * SDpnt; - int nsect; + unsigned long flags; + + ASSERT_LOCK(&q->queue_lock, 0); - ASSERT_LOCK(&io_request_lock, 0); + spin_lock_irqsave(&q->queue_lock, flags); req = &SCpnt->request; - req->errors = 0; - if (!uptodate) { - printk(" I/O error: dev %s, sector %lu\n", - kdevname(req->rq_dev), req->sector); - } do { - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - sectors -= nsect; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector += nsect; - req->nr_sectors -= nsect; - - req->current_nr_sectors = bh->b_size >> 9; - if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; - printk("scsi_end_request: buffer-list destroyed\n"); - } - } + if (!req->bio) { + printk("scsi_end_request: missing bio\n"); + break; } - } while (sectors && bh); + sectors -= bio_sectors(req->bio); + if (!__end_that_request_first(req, 1)) + break; + } while (sectors > 0); + + spin_unlock_irqrestore(&q->queue_lock, flags); /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ - if (req->bh) { - request_queue_t *q; - - if( !requeue ) - { + if (req->bio) { + if (!requeue) return SCpnt; - } - - q = &SCpnt->device->request_queue; - req->buffer = bh->b_data; /* * Bleah. Leftovers again. Stick the leftovers in * the front of the queue, and goose the queue again. @@ -418,17 +391,14 @@ scsi_queue_next_request(q, SCpnt); return SCpnt; } + /* * This request is done. If there is someone blocked waiting for this - * request, wake them up. Typically used to wake up processes trying - * to swap a page into memory. + * request, wake them up. */ - if (req->waiting != NULL) { - complete(req->waiting); - } - add_blkdev_randomness(MAJOR(req->rq_dev)); + complete(req->waiting); - SDpnt = SCpnt->device; + add_blkdev_randomness(MAJOR(req->rq_dev)); /* * This will goose the queue request function at the end, so we don't @@ -436,12 +406,9 @@ */ __scsi_release_command(SCpnt); - if( frequeue ) { - request_queue_t *q; + if (frequeue) + scsi_queue_next_request(q, NULL); - q = &SDpnt->request_queue; - scsi_queue_next_request(q, NULL); - } return NULL; } @@ -489,27 +456,31 @@ */ static void scsi_release_buffers(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); + struct request *req = &SCpnt->request; + + ASSERT_LOCK(&SCpnt->device->request_queue.queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. */ if (SCpnt->use_sg) { struct scatterlist *sgpnt; + void **bbpnt; int i; sgpnt = (struct scatterlist *) SCpnt->request_buffer; + bbpnt = SCpnt->bounce_buffers; - for (i = 0; i < SCpnt->use_sg; i++) { - if (sgpnt[i].alt_address) { - scsi_free(sgpnt[i].address, sgpnt[i].length); + if (bbpnt) { + for (i = 0; i < SCpnt->use_sg; i++) { + if (bbpnt[i]) + scsi_free(sgpnt[i].address, sgpnt[i].length); } } scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); } else { - if (SCpnt->request_buffer != SCpnt->request.buffer) { - scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen); - } + if (SCpnt->request_buffer != req->buffer) + scsi_free(SCpnt->request_buffer,SCpnt->request_bufflen); } /* @@ -545,6 +516,7 @@ int result = SCpnt->result; int this_count = SCpnt->bufflen >> 9; request_queue_t *q = &SCpnt->device->request_queue; + struct request *req = &SCpnt->request; /* * We must do one of several things here: @@ -559,7 +531,7 @@ * would be used if we just wanted to retry, for example. * */ - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. @@ -568,26 +540,33 @@ */ if (SCpnt->use_sg) { struct scatterlist *sgpnt; + void **bbpnt; int i; sgpnt = (struct scatterlist *) SCpnt->buffer; + bbpnt = SCpnt->bounce_buffers; - for (i = 0; i < SCpnt->use_sg; i++) { - if (sgpnt[i].alt_address) { - if (SCpnt->request.cmd == READ) { - memcpy(sgpnt[i].alt_address, - sgpnt[i].address, - sgpnt[i].length); + if (bbpnt) { + for (i = 0; i < SCpnt->use_sg; i++) { + if (bbpnt[i]) { + if (SCpnt->request.cmd == READ) { + memcpy(bbpnt[i], + sgpnt[i].address, + sgpnt[i].length); + } + scsi_free(sgpnt[i].address, sgpnt[i].length); } - scsi_free(sgpnt[i].address, sgpnt[i].length); } } scsi_free(SCpnt->buffer, SCpnt->sglist_len); } else { - if (SCpnt->buffer != SCpnt->request.buffer) { - if (SCpnt->request.cmd == READ) { - memcpy(SCpnt->request.buffer, SCpnt->buffer, - SCpnt->bufflen); + if (SCpnt->buffer != req->buffer) { + if (req->cmd == READ) { + unsigned long flags; + char *to = bio_kmap_irq(req->bio, &flags); + + memcpy(to, SCpnt->buffer, SCpnt->bufflen); + bio_kunmap_irq(to, &flags); } scsi_free(SCpnt->buffer, SCpnt->bufflen); } @@ -608,11 +587,10 @@ */ if (good_sectors > 0) { SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n", - SCpnt->request.nr_sectors, - good_sectors)); + req->nr_sectors good_sectors)); SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg)); - SCpnt->request.errors = 0; + req->errors = 0; /* * If multiple sectors are requested in one buffer, then * they will have been finished off by the first command. @@ -709,7 +687,7 @@ break; case NOT_READY: printk(KERN_INFO "Device %s not ready.\n", - kdevname(SCpnt->request.rq_dev)); + kdevname(req->rq_dev)); SCpnt = scsi_end_request(SCpnt, 0, this_count); return; break; @@ -753,7 +731,7 @@ * We sometimes get this cruft in the event that a medium error * isn't properly reported. */ - SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors); + SCpnt = scsi_end_request(SCpnt, 0, req->current_nr_sectors); return; } } @@ -767,7 +745,7 @@ * Arguments: request - I/O request we are preparing to queue. * * Lock status: No locks assumed to be held, but as it happens the - * io_request_lock is held when this is called. + * q->queue_lock is held when this is called. * * Returns: Nothing * @@ -781,7 +759,7 @@ kdev_t dev = req->rq_dev; int major = MAJOR(dev); - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&req->q->queue_lock, 1); for (spnt = scsi_devicelist; spnt; spnt = spnt->next) { /* @@ -839,7 +817,7 @@ struct Scsi_Host *SHpnt; struct Scsi_Device_Template *STpnt; - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&q->queue_lock, 1); SDpnt = (Scsi_Device *) q->queuedata; if (!SDpnt) { @@ -857,10 +835,17 @@ * released the lock and grabbed it again, so each time * we need to check to see if the queue is plugged or not. */ - if (SHpnt->in_recovery || q->plugged) + if (SHpnt->in_recovery || blk_queue_plugged(q)) return; /* + * if we are at the max queue depth, don't attempt to queue + * more + */ + if (SHpnt->host_busy == SDpnt->queue_depth) + break; + + /* * If the device cannot accept another request, then quit. */ if (SDpnt->device_blocked) { @@ -906,9 +891,9 @@ */ SDpnt->was_reset = 0; if (SDpnt->removable && !in_interrupt()) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; } } @@ -917,14 +902,14 @@ * If we couldn't find a request that could be queued, then we * can also quit. */ - if (list_empty(&q->queue_head)) + if (blk_queue_empty(q)) break; /* - * Loop through all of the requests in this queue, and find - * one that is queueable. + * get next queueable request. cur_rq would be set if we + * previously had to abort for some reason */ - req = blkdev_entry_next_request(&q->queue_head); + req = elv_next_request(q); /* * Find the actual device driver associated with this command. @@ -944,9 +929,8 @@ if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) { SCpnt = scsi_allocate_device(SRpnt->sr_device, FALSE, FALSE); - if( !SCpnt ) { + if (!SCpnt) break; - } scsi_init_cmd_from_req(SCpnt, SRpnt); } @@ -966,7 +950,7 @@ * scatter-gather segments here - the * normal case code assumes this to be * correct, as it would be a performance - * lose to always recount. Handling + * loss to always recount. Handling * errors is always unusual, of course. */ recount_segments(SCpnt); @@ -978,9 +962,8 @@ * while the queue is locked and then break out of the * loop. Otherwise loop around and try another request. */ - if (!SCpnt) { + if (!SCpnt) break; - } } /* @@ -1017,7 +1000,7 @@ * another. */ req = NULL; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); if (SCpnt->request.cmd != SPECIAL) { /* @@ -1047,7 +1030,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1063,7 +1046,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1084,7 +1067,7 @@ * Now we need to grab the lock again. We are about to mess * with the request queue and try to find another command. */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_merge.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/scsi_merge.c Wed Sep 5 13:58:49 2001 @@ -6,6 +6,7 @@ * Based upon conversations with large numbers * of people at Linux Expo. * Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com). + * Support for highmem I/O: Jens Axboe */ /* @@ -71,58 +72,15 @@ */ #define DMA_SEGMENT_SIZE_LIMITED -#ifdef CONFIG_SCSI_DEBUG_QUEUES -/* - * Enable a bunch of additional consistency checking. Turn this off - * if you are benchmarking. - */ -static int dump_stats(struct request *req, - int use_clustering, - int dma_host, - int segments) -{ - struct buffer_head *bh; - - /* - * Dump the information that we have. We know we have an - * inconsistency. - */ - printk("nr_segments is %x\n", req->nr_segments); - printk("counted segments is %x\n", segments); - printk("Flags %d %d\n", use_clustering, dma_host); - for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) - { - printk("Segment 0x%p, blocks %d, addr 0x%lx\n", - bh, - bh->b_size >> 9, - virt_to_phys(bh->b_data - 1)); - } - panic("Ththththaats all folks. Too dangerous to continue.\n"); -} - - -/* - * Simple sanity check that we will use for the first go around - * in order to ensure that we are doing the counting correctly. - * This can be removed for optimization. - */ -#define SANITY_CHECK(req, _CLUSTER, _DMA) \ - if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) ) \ - { \ - printk("Incorrect segment count at 0x%p", current_text_addr()); \ - dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \ - } -#else -#define SANITY_CHECK(req, _CLUSTER, _DMA) -#endif - static void dma_exhausted(Scsi_Cmnd * SCpnt, int i) { int jj; struct scatterlist *sgpnt; + void **bbpnt; int consumed = 0; sgpnt = (struct scatterlist *) SCpnt->request_buffer; + bbpnt = SCpnt->bounce_buffers; /* * Now print out a bunch of stats. First, start with the request @@ -136,15 +94,13 @@ */ for(jj=0; jj < SCpnt->use_sg; jj++) { - printk("[%d]\tlen:%d\taddr:%p\talt:%p\n", + printk("[%d]\tlen:%d\taddr:%p\tbounce:%p\n", jj, sgpnt[jj].length, sgpnt[jj].address, - sgpnt[jj].alt_address); - if( sgpnt[jj].alt_address != NULL ) - { - consumed = (sgpnt[jj].length >> 9); - } + (bbpnt ? bbpnt[jj] : NULL)); + if (bbpnt && bbpnt[jj]) + consumed += sgpnt[jj].length; } printk("Total %d sectors consumed\n", consumed); panic("DMA pool exhausted"); @@ -191,8 +147,7 @@ { int ret = 1; int reqsize = 0; - struct buffer_head *bh; - struct buffer_head *bhnext; + struct bio *bio, *bionext; if( remainder != NULL ) { reqsize = *remainder; @@ -201,21 +156,21 @@ /* * Add in the size increment for the first buffer. */ - bh = req->bh; + bio = req->bio; #ifdef DMA_SEGMENT_SIZE_LIMITED - if( reqsize + bh->b_size > PAGE_SIZE ) { + if( reqsize + bio_size(bio) > PAGE_SIZE ) { ret++; - reqsize = bh->b_size; + reqsize = bio_size(bio); } else { - reqsize += bh->b_size; + reqsize += bio_size(bio); } #else - reqsize += bh->b_size; + reqsize += bio_size(bio); #endif - for (bh = req->bh, bhnext = bh->b_reqnext; - bhnext != NULL; - bh = bhnext, bhnext = bh->b_reqnext) { + for (bio = req->bio, bionext = bio->bi_next; + bionext != NULL; + bio = bionext, bionext = bio->bi_next) { if (use_clustering) { /* * See if we can do this without creating another @@ -223,11 +178,10 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(bionext) - 1 == ISA_DMA_THRESHOLD) { ret++; - reqsize = bhnext->b_size; - } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) { + reqsize = bio_size(bionext); + } else if (BIO_CONTIG(bio, bionext)) { /* * This one is OK. Let it go. */ @@ -241,23 +195,22 @@ * kind of screwed and we need to start * another segment. */ - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD - && reqsize + bhnext->b_size > PAGE_SIZE ) + if(dma_host && bio_to_phys(bionext) - 1 >= ISA_DMA_THRESHOLD + && reqsize + bio_size(bionext) > PAGE_SIZE ) { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); continue; } #endif - reqsize += bhnext->b_size; + reqsize += bio_size(bionext); continue; } ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } else { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } } if( remainder != NULL ) { @@ -304,14 +257,13 @@ } #define MERGEABLE_BUFFERS(X,Y) \ -(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \ +(((((long)bio_to_phys((X))+bio_size((X)))|((long)bio_to_phys((Y)))) & \ (DMA_CHUNK_SIZE - 1)) == 0) #ifdef DMA_CHUNK_SIZE static inline int scsi_new_mergeable(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg will be able to merge these two @@ -320,26 +272,26 @@ * scsi.c allocates for this purpose * min(64,sg_tablesize) entries. */ - if (req->nr_segments >= max_segments || - req->nr_segments >= SHpnt->sg_tablesize) + if (req->nr_segments >= q->max_segments) return 0; + req->nr_segments++; return 1; } static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg won't be able to map these two * into a single hardware sg entry, so we have to * check if things fit into sg_tablesize. */ - if (req->nr_hw_segments >= SHpnt->sg_tablesize || - req->nr_segments >= SHpnt->sg_tablesize) + if (req->nr_hw_segments >= q->max_segments || + req->nr_segments >= q->max_segments) return 0; + req->nr_hw_segments++; req->nr_segments++; return 1; @@ -347,20 +299,18 @@ #else static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { - if (req->nr_segments < SHpnt->sg_tablesize && - req->nr_segments < max_segments) { + if (req->nr_segments < q->max_segments) { /* * This will form the start of a new segment. Bump the * counter. */ req->nr_segments++; return 1; - } else { - return 0; } + + return 0; } #endif @@ -371,7 +321,7 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot @@ -380,7 +330,7 @@ * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -399,8 +349,7 @@ */ __inline static int __scsi_back_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { @@ -412,12 +361,16 @@ SDpnt = (Scsi_Device *) q->queuedata; SHpnt = SDpnt->host; + /* + * FIXME: remember to look into this /jens + */ #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; #endif - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > SHpnt->max_sectors) + return 0; + else if (!BIO_PHYS_4G(req->biotail, bio)) return 0; if (use_clustering) { @@ -427,17 +380,15 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto new_end_segment; } - if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) { + if (BIO_CONTIG(req->biotail, bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { segment_size = 0; count = __count_segments(req, use_clustering, dma_host, &segment_size); - if( segment_size + bh->b_size > PAGE_SIZE ) { + if( segment_size + bio_size(bio) > PAGE_SIZE ) { goto new_end_segment; } } @@ -450,16 +401,15 @@ } new_end_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(req->bhtail, bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(req->biotail, bio)) + return scsi_new_mergeable(q, req, SHpnt); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SHpnt); } __inline static int __scsi_front_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { @@ -472,11 +422,12 @@ SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; #endif - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > SHpnt->max_sectors) + return 0; + else if (!BIO_PHYS_4G(bio, req->bio)) return 0; if (use_clustering) { @@ -486,15 +437,13 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(bio) - 1 == ISA_DMA_THRESHOLD) { goto new_start_segment; } - if (CONTIGUOUS_BUFFERS(bh, req->bh)) { + if (BIO_CONTIG(bio, req->bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { - segment_size = bh->b_size; + if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { + segment_size = bio_size(bio); count = __count_segments(req, use_clustering, dma_host, &segment_size); if( count != req->nr_segments ) { goto new_start_segment; @@ -509,10 +458,10 @@ } new_start_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(bh, req->bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(bio, req->bio)) + return scsi_new_mergeable(q, req, SHpnt); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SHpnt); } /* @@ -522,12 +471,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -535,15 +484,12 @@ #define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct buffer_head * bh, \ - int max_segments) \ + struct bio *bio) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ ret = __scsi_ ## _BACK_FRONT ## _merge_fn(q, \ req, \ - bh, \ - max_segments, \ + bio, \ _CLUSTER, \ _DMA); \ return ret; \ @@ -576,7 +522,7 @@ * Returns: 1 if it is OK to merge the two requests. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -596,7 +542,6 @@ __inline static int __scsi_merge_requests_fn(request_queue_t * q, struct request *req, struct request *next, - int max_segments, int use_clustering, int dma_host) { @@ -609,31 +554,30 @@ */ if (req->special || next->special) return 0; + else if (!BIO_PHYS_4G(req->biotail, next->bio)) + return 0; SDpnt = (Scsi_Device *) q->queuedata; SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; /* If it would not fit into prepared memory space for sg chain, * then don't allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > max_segments || - req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments - 1 > q->max_segments) return 0; - } - if (req->nr_hw_segments + next->nr_hw_segments - 1 > SHpnt->sg_tablesize) { + + if (req->nr_hw_segments + next->nr_hw_segments - 1 > q->max_segments) return 0; - } #else /* * If the two requests together are too large (even assuming that we * can merge the boundary requests into one segment, then don't * allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments - 1 > q->max_segments) { return 0; } #endif @@ -652,8 +596,7 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto dont_combine; } #ifdef DMA_SEGMENT_SIZE_LIMITED @@ -662,8 +605,8 @@ * buffers in chunks of PAGE_SIZE or less. */ if (dma_host - && CONTIGUOUS_BUFFERS(req->bhtail, next->bh) - && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD ) + && BIO_CONTIG(req->biotail, next->bio) + && bio_to_phys(req->biotail) - 1 >= ISA_DMA_THRESHOLD ) { int segment_size = 0; int count = 0; @@ -675,7 +618,7 @@ } } #endif - if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) { + if (BIO_CONTIG(req->biotail, next->bio)) { /* * This one is OK. Let it go. */ @@ -688,17 +631,16 @@ } dont_combine: #ifdef DMA_CHUNK_SIZE - if (req->nr_segments + next->nr_segments > max_segments || - req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments > q->max_segments) return 0; - } + /* If dynamic DMA mapping can merge last segment in req with * first segment in next, then the check for hw segments was * done above already, so we can always merge. */ - if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) { + if (MERGEABLE_BUFFERS (req->biotail, next->bio)) { req->nr_hw_segments += next->nr_hw_segments - 1; - } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) { + } else if (req->nr_hw_segments + next->nr_hw_segments > q->max_segments) return 0; } else { req->nr_hw_segments += next->nr_hw_segments; @@ -711,8 +653,7 @@ * Make sure we can fix something that is the sum of the two. * A slightly stricter test than we had above. */ - if (req->nr_segments + next->nr_segments > max_segments || - req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments > q->max_segments) { return 0; } else { /* @@ -732,12 +673,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -745,12 +686,10 @@ #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct request * next, \ - int max_segments) \ + struct request * next) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ - ret = __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \ + ret = __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \ return ret; \ } @@ -798,8 +737,8 @@ int use_clustering, int dma_host) { - struct buffer_head * bh; - struct buffer_head * bhprev; + struct bio * bio; + struct bio * bioprev; char * buff; int count; int i; @@ -807,31 +746,10 @@ int sectors; struct scatterlist * sgpnt; int this_count; + void ** bbpnt; - /* - * FIXME(eric) - don't inline this - it doesn't depend on the - * integer flags. Come to think of it, I don't think this is even - * needed any more. Need to play with it and see if we hit the - * panic. If not, then don't bother. - */ - if (!SCpnt->request.bh) { - /* - * Case of page request (i.e. raw device), or unlinked buffer - * Typically used for swapping, but this isn't how we do - * swapping any more. - */ - panic("I believe this is dead code. If we hit this, I was wrong"); -#if 0 - SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9; - SCpnt->request_buffer = SCpnt->request.buffer; - SCpnt->use_sg = 0; - /* - * FIXME(eric) - need to handle DMA here. - */ -#endif - return 1; - } req = &SCpnt->request; + /* * First we need to know how many scatter gather segments are needed. */ @@ -847,37 +765,48 @@ * buffer. */ if (dma_host && scsi_dma_free_sectors <= 10) { - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; } + /* - * Don't bother with scatter-gather if there is only one segment. + * we used to not use scatter-gather for single segment request, + * but now we do (it makes highmem I/O easier to support without + * kmapping pages) */ - if (count == 1) { - this_count = SCpnt->request.nr_sectors; - goto single_segment; - } SCpnt->use_sg = count; /* * Allocate the actual scatter-gather table itself. - * scsi_malloc can only allocate in chunks of 512 bytes */ - SCpnt->sglist_len = (SCpnt->use_sg - * sizeof(struct scatterlist) + 511) & ~511; + SCpnt->sglist_len = (SCpnt->use_sg * sizeof(struct scatterlist)); + /* If we could potentially require ISA bounce buffers, allocate + * space for this array here. + */ + if (dma_host) + SCpnt->sglist_len += (SCpnt->use_sg * sizeof(void *)); + + /* scsi_malloc can only allocate in chunks of 512 bytes so + * round it up. + */ + SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511; + sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len); /* * Now fill the scatter-gather table. */ if (!sgpnt) { + struct Scsi_Host *SHpnt = SCpnt->host; + /* * If we cannot allocate the scatter-gather table, then * simply write the first buffer all by itself. */ printk("Warning - running *really* short on DMA buffers\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; + printk("SCSI: depth is %d, # segs %d, # hw segs %d\n", SHpnt->host_busy, req->nr_segments, req->nr_hw_segments); goto single_segment; } /* @@ -887,15 +816,14 @@ memset(sgpnt, 0, SCpnt->sglist_len); SCpnt->request_buffer = (char *) sgpnt; SCpnt->request_bufflen = 0; - bhprev = NULL; + req->buffer = NULL; + bioprev = NULL; - for (count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { - if (use_clustering && bhprev != NULL) { - if (dma_host && - virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) { + for (count = 0, bio = req->bio; bio; bio = bio->bi_next) { + if (use_clustering && bioprev != NULL) { + if (dma_host && bio_to_phys(bioprev) - 1 == ISA_DMA_THRESHOLD) { /* Nothing - fall through */ - } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) { + } else if (BIO_CONTIG(bioprev, bio)) { /* * This one is OK. Let it go. Note that we * do not have the ability to allocate @@ -904,32 +832,36 @@ */ if( dma_host ) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD - || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) { - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; + if( bio_to_phys(bio) - 1 < ISA_DMA_THRESHOLD + || sgpnt[count - 1].length + bio_size(bio) <= PAGE_SIZE ) { + sgpnt[count - 1].length += bio_size(bio); + bioprev = bio; continue; } #else - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; + sgpnt[count - 1].length += bio_size(bio); + bioprev = bio; continue; #endif } else { - sgpnt[count - 1].length += bh->b_size; - SCpnt->request_bufflen += bh->b_size; - bhprev = bh; + sgpnt[count - 1].length += bio_size(bio); + SCpnt->request_bufflen += bio_size(bio); + bioprev = bio; continue; } } } + + sgpnt[count].address = NULL; + sgpnt[count].page = bio_page(bio); + sgpnt[count].offset = bio_offset(bio); + sgpnt[count].length = bio_size(bio); + + if (!dma_host) + SCpnt->request_bufflen += bio_size(bio); + count++; - sgpnt[count - 1].address = bh->b_data; - sgpnt[count - 1].length += bh->b_size; - if (!dma_host) { - SCpnt->request_bufflen += bh->b_size; - } - bhprev = bh; + bioprev = bio; } /* @@ -937,26 +869,36 @@ */ if (count != SCpnt->use_sg) { printk("Incorrect number of segments after building list\n"); -#ifdef CONFIG_SCSI_DEBUG_QUEUES - dump_stats(req, use_clustering, dma_host, count); -#endif + scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); + this_count = req->current_nr_sectors; + goto single_segment; } - if (!dma_host) { + + if (!dma_host) return 1; - } + /* * Now allocate bounce buffers, if needed. */ SCpnt->request_bufflen = 0; + + if (dma_host) + bbpnt = (void **) ((char *)sgpnt + + (SCpnt->use_sg * sizeof(struct scatterlist))); + else + bbpnt = NULL; + + SCpnt->bounce_buffers = bbpnt; + for (i = 0; i < count; i++) { sectors = (sgpnt[i].length >> 9); SCpnt->request_bufflen += sgpnt[i].length; if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 > - ISA_DMA_THRESHOLD) { + ISA_DMA_THRESHOLD) { if( scsi_dma_free_sectors - sectors <= 10 ) { /* * If this would nearly drain the DMA - * pool, mpty, then let's stop here. + * pool empty, then let's stop here. * Don't make this request any larger. * This is kind of a safety valve that * we use - we could get screwed later @@ -970,9 +912,12 @@ break; } - sgpnt[i].alt_address = sgpnt[i].address; - sgpnt[i].address = - (char *) scsi_malloc(sgpnt[i].length); + /* + * this is not a dma host, so it will never + * be a highmem page + */ + bbpnt[i] = page_address(sgpnt[i].page) +sgpnt[i].offset; + sgpnt[i].address = (char *)scsi_malloc(sgpnt[i].length); /* * If we cannot allocate memory for this DMA bounce * buffer, then queue just what we have done so far. @@ -986,8 +931,8 @@ } break; } - if (SCpnt->request.cmd == WRITE) { - memcpy(sgpnt[i].address, sgpnt[i].alt_address, + if (req->cmd == WRITE) { + memcpy(sgpnt[i].address, bbpnt[i], sgpnt[i].length); } } @@ -1031,21 +976,20 @@ * single-block requests if we had hundreds of free sectors. */ if( scsi_dma_free_sectors > 30 ) { - for (this_count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + for (this_count = 0, bio = req->bio; bio; bio = bio->bi_next) { if( scsi_dma_free_sectors - this_count < 30 || this_count == sectors ) { break; } - this_count += bh->b_size >> 9; + this_count += bio_sectors(bio); } } else { /* * Yow! Take the absolute minimum here. */ - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; } /* @@ -1058,28 +1002,33 @@ * segment. Possibly the entire request, or possibly a small * chunk of the entire request. */ - bh = SCpnt->request.bh; - buff = SCpnt->request.buffer; - if (dma_host) { + bio = req->bio; + buff = req->buffer = bio_data(bio); + + if (dma_host || PageHighMem(bio_page(bio))) { /* * Allocate a DMA bounce buffer. If the allocation fails, fall * back and allocate a really small one - enough to satisfy * the first buffer. */ - if (virt_to_phys(SCpnt->request.bh->b_data) - + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { + if (bio_to_phys(bio) + bio_size(bio) - 1 > ISA_DMA_THRESHOLD) { buff = (char *) scsi_malloc(this_count << 9); if (!buff) { printk("Warning - running low on DMA memory\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; buff = (char *) scsi_malloc(this_count << 9); if (!buff) { dma_exhausted(SCpnt, 0); + return 0; } } - if (SCpnt->request.cmd == WRITE) - memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9); + if (req->cmd == WRITE) { + unsigned long flags; + char *buf = bio_kmap_irq(bio, &flags); + memcpy(buff, buf, this_count << 9); + bio_kunmap_irq(buf, &flags); + } } } SCpnt->request_bufflen = this_count << 9; @@ -1120,11 +1069,9 @@ */ void initialize_merge_fn(Scsi_Device * SDpnt) { - request_queue_t *q; - struct Scsi_Host *SHpnt; - SHpnt = SDpnt->host; - - q = &SDpnt->request_queue; + struct Scsi_Host *SHpnt = SDpnt->host; + request_queue_t *q = &SDpnt->request_queue; + dma64_addr_t bounce_limit; /* * If the host has already selected a merge manager, then don't @@ -1166,4 +1113,19 @@ q->merge_requests_fn = scsi_merge_requests_fn_dc; SDpnt->scsi_init_io_fn = scsi_init_io_vdc; } + + /* + * now enable highmem I/O, if appropriate + */ + if (!PCI_DMA_BUS_IS_PHYS) + /* Platforms with virtual-DMA translation + * hardware have no practical limit. + */ + bounce_limit = BLK_BOUNCE_ANY; + else if (SHpnt->can_dma_32 && (SDpnt->type == TYPE_DISK)) + bounce_limit = SHpnt->pci_dev->dma_mask; + else + bounce_limit = BLK_BOUNCE_HIGH; + + blk_queue_bounce_limit(q, bounce_limit); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_obsolete.c linux/drivers/scsi/scsi_obsolete.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_obsolete.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/scsi_obsolete.c Fri Aug 3 12:04:42 2001 @@ -145,9 +145,10 @@ void scsi_old_times_out(Scsi_Cmnd * SCpnt) { + struct Scsi_Host *host = SCpnt->host; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); /* Set the serial_number_at_timeout to the current serial_number */ SCpnt->serial_number_at_timeout = SCpnt->serial_number; @@ -164,7 +165,7 @@ break; case IN_ABORT: printk("SCSI host %d abort (pid %ld) timed out - resetting\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); if (!scsi_reset(SCpnt, SCSI_RESET_ASYNCHRONOUS)) break; case IN_RESET: @@ -175,7 +176,7 @@ */ printk("SCSI host %d channel %d reset (pid %ld) timed out - " "trying harder\n", - SCpnt->host->host_no, SCpnt->channel, SCpnt->pid); + host->host_no, SCpnt->channel, SCpnt->pid); SCpnt->internal_timeout &= ~IN_RESET; SCpnt->internal_timeout |= IN_RESET2; scsi_reset(SCpnt, @@ -188,7 +189,7 @@ * Maybe the HBA itself crashed and this will shake it loose. */ printk("SCSI host %d reset (pid %ld) timed out - trying to shake it loose\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2); SCpnt->internal_timeout |= IN_RESET3; scsi_reset(SCpnt, @@ -197,19 +198,19 @@ default: printk("SCSI host %d reset (pid %ld) timed out again -\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); printk("probably an unrecoverable SCSI bus or device hang.\n"); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* * From what I can find in scsi_obsolete.c, this function is only called * by scsi_old_done and scsi_reset. Both of these functions run with the - * io_request_lock already held, so we need do nothing here about grabbing + * host_lock already held, so we need do nothing here about grabbing * any locks. */ static void scsi_request_sense(Scsi_Cmnd * SCpnt) @@ -217,7 +218,6 @@ SCpnt->flags |= WAS_SENSE | ASKED_FOR_SENSE; update_timeout(SCpnt, SENSE_TIMEOUT); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); memset((void *) SCpnt->sense_buffer, 0, @@ -238,9 +238,9 @@ * Ugly, ugly. The newer interfaces all assume that the lock * isn't held. Mustn't disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&SCpnt->host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&SCpnt->host->host_lock); } @@ -646,9 +646,9 @@ * assume that the lock isn't held. Mustn't * disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } break; default: @@ -674,7 +674,7 @@ * use, the upper code is run from a bottom half handler, so * it isn't an issue. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); SRpnt = SCpnt->sc_request; if( SRpnt != NULL ) { SRpnt->sr_result = SRpnt->sr_command->result; @@ -686,7 +686,7 @@ } SCpnt->done(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } #undef CMD_FINISHED #undef REDO @@ -725,10 +725,10 @@ return 0; } if (SCpnt->internal_timeout & IN_ABORT) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_ABORT) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_ABORT; oldto = update_timeout(SCpnt, ABORT_TIMEOUT); @@ -908,10 +908,10 @@ return 0; } if (SCpnt->internal_timeout & IN_RESET) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_RESET) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_RESET; update_timeout(SCpnt, RESET_TIMEOUT); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_queue.c linux/drivers/scsi/scsi_queue.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/scsi_queue.c Fri Feb 9 20:30:23 2001 +++ linux/drivers/scsi/scsi_queue.c Fri Aug 3 12:04:42 2001 @@ -80,6 +80,7 @@ { struct Scsi_Host *host; unsigned long flags; + request_queue_t *q = &cmd->device->request_queue; SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd)); @@ -137,10 +138,10 @@ * Decrement the counters, since these commands are no longer * active on the host/device. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); cmd->host->host_busy--; cmd->device->device_busy--; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); /* * Insert this command at the head of the queue for it's device. diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sd.c linux/drivers/scsi/sd.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sd.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/scsi/sd.c Wed Sep 5 11:03:02 2001 @@ -61,10 +61,6 @@ #include -/* - * static const char RCSid[] = "$Header:"; - */ - #define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i)) #define SCSI_DISKS_PER_MAJOR 16 @@ -72,8 +68,7 @@ #define SD_MINOR_NUMBER(i) ((i) & 255) #define MKDEV_SD_PARTITION(i) MKDEV(SD_MAJOR_NUMBER(i), (i) & 255) #define MKDEV_SD(index) MKDEV_SD_PARTITION((index) << 4) -#define N_USED_SCSI_DISKS (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1) -#define N_USED_SD_MAJORS (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR) +#define N_USED_SD_MAJORS (1 + ((sd_template.dev_max - 1) >> 4)) #define MAX_RETRIES 5 @@ -89,14 +84,12 @@ static Scsi_Disk *rscsi_disks; static int *sd_sizes; static int *sd_blocksizes; -static int *sd_hardsizes; /* Hardware sector size */ static int check_scsidisk_media_change(kdev_t); static int fop_revalidate_scsidisk(kdev_t); static int sd_init_onedisk(int); - static int sd_init(void); static void sd_finish(void); static int sd_attach(Scsi_Device *); @@ -177,9 +170,11 @@ diskinfo[0] = 0x40; diskinfo[1] = 0x20; - diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11; + diskinfo[2] = + rscsi_disks[DEVICE_NR(dev)].capacity >> 11; - /* override with calculated, extended default, or driver values */ + /* override with calculated, extended default, + or driver values */ if(host->hostt->bios_param != NULL) host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)], @@ -189,48 +184,14 @@ dev, &diskinfo[0]); if (put_user(diskinfo[0], &loc->heads) || - put_user(diskinfo[1], &loc->sectors) || - put_user(diskinfo[2], &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) - return -EFAULT; - return 0; - } - case HDIO_GETGEO_BIG: - { - struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; - - if(!loc) - return -EINVAL; - - host = rscsi_disks[DEVICE_NR(dev)].device->host; - - /* default to most commonly used values */ - - diskinfo[0] = 0x40; - diskinfo[1] = 0x20; - diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11; - - /* override with calculated, extended default, or driver values */ - - if(host->hostt->bios_param != NULL) - host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)], - dev, - &diskinfo[0]); - else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)], - dev, &diskinfo[0]); - - if (put_user(diskinfo[0], &loc->heads) || - put_user(diskinfo[1], &loc->sectors) || - put_user(diskinfo[2], (unsigned int *) &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) + put_user(diskinfo[1], &loc->sectors) || + put_user(diskinfo[2], &loc->cylinders) || + put_user(get_start_sect(inode->i_rdev), + &loc->start)) return -EFAULT; return 0; } - case BLKGETSIZE: /* Return device size */ - if (!arg) - return -EINVAL; - return put_user(sd[SD_PARTITION(inode->i_rdev)].nr_sects, (long *) arg); - + case BLKGETSIZE: case BLKROSET: case BLKROGET: case BLKRASET: @@ -238,10 +199,12 @@ case BLKFLSBUF: case BLKSSZGET: case BLKPG: - case BLKELVGET: - case BLKELVSET: + case BLKELVGET: + case BLKELVSET: case BLKBSZGET: case BLKBSZSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case BLKRRPART: /* Re-read partition tables */ @@ -250,7 +213,8 @@ return revalidate_scsidisk(dev, 1); default: - return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device , cmd, (void *) arg); + return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device, + cmd, (void *) arg); } } @@ -300,7 +264,7 @@ SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = %d, block = %d\n", devm, block)); dpnt = &rscsi_disks[dev]; - if (devm >= (sd_template.dev_max << 4) || + if (devm >= (sd_template.dev_max << 4) || (devm & 0xf) || !dpnt || !dpnt->device->online || block + SCpnt->request.nr_sectors > sd[devm].nr_sects) { @@ -308,7 +272,7 @@ SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt)); return 0; } - block += sd[devm].start_sect; + if (dpnt->device->changed) { /* * quietly refuse to do anything to a changed disc until the changed @@ -578,7 +542,6 @@ static struct gendisk *sd_gendisks = &sd_gendisk; #define SD_GENDISK(i) sd_gendisks[(i) / SCSI_DISKS_PER_MAJOR] -#define LAST_SD_GENDISK sd_gendisks[N_USED_SD_MAJORS - 1] /* * rw_intr is the interrupt routine for the device driver. @@ -618,8 +581,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); switch (SCpnt->device->sector_size) { case 1024: error_sector <<= 1; @@ -642,7 +605,7 @@ default: break; } - error_sector -= sd[SD_PARTITION(SCpnt->request.rq_dev)].start_sect; + error_sector &= ~(block_sectors - 1); good_sectors = error_sector - SCpnt->request.sector; if (good_sectors < 0 || good_sectors >= this_count) @@ -970,15 +933,11 @@ * So I have created this table. See ll_rw_blk.c * Jacques Gelinas (Jacques@solucorp.qc.ca) */ - int m; int hard_sector = sector_size; int sz = rscsi_disks[i].capacity * (hard_sector/256); /* There are 16 minors allocated for each major device */ - for (m = i << 4; m < ((i + 1) << 4); m++) { - sd_hardsizes[m] = hard_sector; - } - + blk_queue_hardsect_size(blk_get_queue(SD_MAJOR(i)), hard_sector); printk("SCSI device %s: " "%d %d-byte hdwr sectors (%d MB)\n", nbuff, rscsi_disks[i].capacity, @@ -1063,7 +1022,7 @@ static int sd_init() { - int i; + int i, maxparts; if (sd_template.dev_noticed == 0) return 0; @@ -1074,10 +1033,17 @@ if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR) sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR; + /* At most 16 partitions on each scsi disk. */ + maxparts = (sd_template.dev_max << 4); + if (maxparts == 0) + return 0; + if (!sd_registered) { for (i = 0; i < N_USED_SD_MAJORS; i++) { - if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) { - printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i)); + if (devfs_register_blkdev(SD_MAJOR(i), "sd", + &sd_fops)) { + printk("Unable to get major %d for SCSI disk\n", + SD_MAJOR(i)); return 1; } } @@ -1087,93 +1053,78 @@ if (rscsi_disks) return 0; - rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC); - if (!rscsi_disks) - goto cleanup_devfs; - memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk)); + /* allocate memory */ +#define init_mem_lth(x,n) x = kmalloc((n) * sizeof(*x), GFP_ATOMIC) +#define zero_mem_lth(x,n) memset(x, 0, (n) * sizeof(*x)) + + init_mem_lth(rscsi_disks, sd_template.dev_max); + init_mem_lth(sd_sizes, maxparts); + init_mem_lth(sd_blocksizes, maxparts); + init_mem_lth(sd, maxparts); + init_mem_lth(sd_gendisks, N_USED_SD_MAJORS); + + if (!rscsi_disks || !sd_sizes || !sd_blocksizes || !sd || !sd_gendisks) + goto cleanup_mem; + + zero_mem_lth(rscsi_disks, sd_template.dev_max); + zero_mem_lth(sd_sizes, maxparts); + zero_mem_lth(sd, maxparts); - /* for every (necessary) major: */ - sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_sizes) - goto cleanup_disks; - memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int)); - - sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_blocksizes) - goto cleanup_sizes; - - sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_hardsizes) - goto cleanup_blocksizes; - - for (i = 0; i < sd_template.dev_max << 4; i++) { + for (i = 0; i < maxparts; i++) { sd_blocksizes[i] = 1024; - sd_hardsizes[i] = 512; } for (i = 0; i < N_USED_SD_MAJORS; i++) { - blksize_size[SD_MAJOR(i)] = sd_blocksizes + i * (SCSI_DISKS_PER_MAJOR << 4); - hardsect_size[SD_MAJOR(i)] = sd_hardsizes + i * (SCSI_DISKS_PER_MAJOR << 4); - } - sd = kmalloc((sd_template.dev_max << 4) * - sizeof(struct hd_struct), - GFP_ATOMIC); - if (!sd) - goto cleanup_sd; - memset(sd, 0, (sd_template.dev_max << 4) * sizeof(struct hd_struct)); - - if (N_USED_SD_MAJORS > 1) - sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk), GFP_ATOMIC); - if (!sd_gendisks) - goto cleanup_sd_gendisks; + request_queue_t *q = blk_get_queue(SD_MAJOR(i)); + int parts_per_major = (SCSI_DISKS_PER_MAJOR << 4); + + blksize_size[SD_MAJOR(i)] = + sd_blocksizes + i * parts_per_major; + blk_queue_hardsect_size(q, 512); + } + for (i = 0; i < N_USED_SD_MAJORS; i++) { + int N = SCSI_DISKS_PER_MAJOR; + sd_gendisks[i] = sd_gendisk; - sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr, - GFP_ATOMIC); - if (!sd_gendisks[i].de_arr) - goto cleanup_gendisks_de_arr; - memset (sd_gendisks[i].de_arr, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr); - sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags, - GFP_ATOMIC); - if (!sd_gendisks[i].flags) - goto cleanup_gendisks_flags; - memset (sd_gendisks[i].flags, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags); + + init_mem_lth(sd_gendisks[i].de_arr, N); + init_mem_lth(sd_gendisks[i].flags, N); + + if (!sd_gendisks[i].de_arr || !sd_gendisks[i].flags) + goto cleanup_gendisks; + + zero_mem_lth(sd_gendisks[i].de_arr, N); + zero_mem_lth(sd_gendisks[i].flags, N); + sd_gendisks[i].major = SD_MAJOR(i); sd_gendisks[i].major_name = "sd"; sd_gendisks[i].minor_shift = 4; sd_gendisks[i].max_p = 1 << 4; - sd_gendisks[i].part = sd + (i * SCSI_DISKS_PER_MAJOR << 4); - sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4); + sd_gendisks[i].part = sd + i * (N << 4); + sd_gendisks[i].sizes = sd_sizes + i * (N << 4); sd_gendisks[i].nr_real = 0; - sd_gendisks[i].next = sd_gendisks + i + 1; sd_gendisks[i].real_devices = (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR); } - LAST_SD_GENDISK.next = NULL; return 0; -cleanup_gendisks_flags: - kfree(sd_gendisks[i].de_arr); -cleanup_gendisks_de_arr: - while (--i >= 0 ) { +#undef init_mem_lth +#undef zero_mem_lth + +cleanup_gendisks: + /* kfree can handle NULL, so no test is required here */ + for (i = 0; i < N_USED_SD_MAJORS; i++) { kfree(sd_gendisks[i].de_arr); kfree(sd_gendisks[i].flags); } +cleanup_mem: kfree(sd_gendisks); -cleanup_sd_gendisks: kfree(sd); -cleanup_sd: - kfree(sd_hardsizes); -cleanup_blocksizes: kfree(sd_blocksizes); -cleanup_sizes: kfree(sd_sizes); -cleanup_disks: kfree(rscsi_disks); -cleanup_devfs: for (i = 0; i < N_USED_SD_MAJORS; i++) { devfs_unregister_blkdev(SD_MAJOR(i), "sd"); } @@ -1184,19 +1135,13 @@ static void sd_finish() { - struct gendisk *gendisk; int i; for (i = 0; i < N_USED_SD_MAJORS; i++) { blk_dev[SD_MAJOR(i)].queue = sd_find_queue; + add_gendisk(&(sd_gendisks[i])); } - for (gendisk = gendisk_head; gendisk != NULL; gendisk = gendisk->next) - if (gendisk == sd_gendisks) - break; - if (gendisk == NULL) { - LAST_SD_GENDISK.next = gendisk_head; - gendisk_head = sd_gendisks; - } + for (i = 0; i < sd_template.dev_max; ++i) if (!rscsi_disks[i].capacity && rscsi_disks[i].device) { sd_init_onedisk(i); @@ -1284,9 +1229,7 @@ int revalidate_scsidisk(kdev_t dev, int maxusage) { int target; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); @@ -1296,36 +1239,18 @@ } DEVICE_BUSY = 1; - max_p = sd_gendisks->max_p; - start = target << sd_gendisks->minor_shift; - - for (i = max_p - 1; i >= 0; i--) { - int index = start + i; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - /* - * Reset the blocksize for everything so that we can read - * the partition table. Technically we will determine the - * correct block size when we revalidate, but we do this just - * to make sure that everything remains consistent. - */ - sd_blocksizes[index] = 1024; - if (rscsi_disks[target].device->sector_size == 2048) - sd_blocksizes[index] = 2048; - else - sd_blocksizes[index] = 1024; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR, - 1<<4, CAPACITY); - + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; - return 0; + return res; } static int fop_revalidate_scsidisk(kdev_t dev) @@ -1335,6 +1260,7 @@ static void sd_detach(Scsi_Device * SDp) { Scsi_Disk *dpnt; + kdev_t dev; int i, j; int max_p; int start; @@ -1342,18 +1268,13 @@ for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++) if (dpnt->device == SDp) { - /* If we are disconnecting a disk driver, sync and invalidate - * everything */ max_p = sd_gendisk.max_p; start = i << sd_gendisk.minor_shift; + dev = MKDEV_SD_PARTITION(start); + wipe_partitions(dev); + for (j = max_p - 1; j >= 0; j--) + sd_sizes[start + j] = 0; - for (j = max_p - 1; j >= 0; j--) { - int index = start + j; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - sd_sizes[index] = 0; - } devfs_register_partitions (&SD_GENDISK (i), SD_MINOR_NUMBER (start), 1); /* unregister_disk() */ @@ -1366,7 +1287,6 @@ SD_GENDISK(i).nr_real--; return; } - return; } static int __init init_sd(void) @@ -1377,10 +1297,7 @@ static void __exit exit_sd(void) { - struct gendisk **prev_sdgd_link; - struct gendisk *sdgd; int i; - int removed = 0; scsi_unregister_module(MODULE_SCSI_DEV, &sd_template); @@ -1392,31 +1309,11 @@ kfree(rscsi_disks); kfree(sd_sizes); kfree(sd_blocksizes); - kfree(sd_hardsizes); kfree((char *) sd); - - /* - * Now remove sd_gendisks from the linked list - */ - prev_sdgd_link = &gendisk_head; - while ((sdgd = *prev_sdgd_link) != NULL) { - if (sdgd >= sd_gendisks && sdgd <= &LAST_SD_GENDISK) { - removed++; - *prev_sdgd_link = sdgd->next; - continue; - } - prev_sdgd_link = &sdgd->next; - } - - if (removed != N_USED_SD_MAJORS) - printk("%s %d sd_gendisks in disk chain", - removed > N_USED_SD_MAJORS ? "total" : "just", removed); - } for (i = 0; i < N_USED_SD_MAJORS; i++) { - blk_size[SD_MAJOR(i)] = NULL; - hardsect_size[SD_MAJOR(i)] = NULL; - read_ahead[SD_MAJOR(i)] = 0; + del_gendisk(&(sd_gendisks[i])); + blk_clear(SD_MAJOR(i)); } sd_template.dev_max = 0; if (sd_gendisks != &sd_gendisk) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sr.c linux/drivers/scsi/sr.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sr.c Wed Sep 5 12:41:17 2001 +++ linux/drivers/scsi/sr.c Wed Sep 5 12:42:08 2001 @@ -88,7 +88,6 @@ static int *sr_sizes; static int *sr_blocksizes; -static int *sr_hardsizes; static int sr_open(struct cdrom_device_info *, int); void get_sectorsize(int); @@ -218,8 +217,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); if (block_sectors < 4) block_sectors = 4; if (scsi_CDs[device_nr].device->sector_size == 2048) @@ -264,6 +263,7 @@ struct scatterlist *sg, *old_sg = NULL; int i, fsize, bsize, sg_ent, sg_count; char *front, *back; + void **bbpnt, **old_bbpnt = NULL; back = front = NULL; sg_ent = SCpnt->use_sg; @@ -291,17 +291,25 @@ * extend or allocate new scatter-gather table */ sg_count = SCpnt->use_sg; - if (sg_count) + if (sg_count) { old_sg = (struct scatterlist *) SCpnt->request_buffer; - else { + old_bbpnt = SCpnt->bounce_buffers; + } else { sg_count = 1; sg_ent++; } - i = ((sg_ent * sizeof(struct scatterlist)) + 511) & ~511; + /* Get space for scatterlist and bounce buffer array. */ + i = sg_ent * sizeof(struct scatterlist); + i += sg_ent * sizeof(void *); + i = (i + 511) & ~511; + if ((sg = scsi_malloc(i)) == NULL) goto no_mem; + bbpnt = (void **) + ((char *)sg + (sg_ent * sizeof(struct scatterlist))); + /* * no more failing memory allocs possible, we can safely assign * SCpnt values now @@ -312,13 +320,15 @@ i = 0; if (fsize) { - sg[0].address = sg[0].alt_address = front; + sg[0].address = bbpnt[0] = front; sg[0].length = fsize; i++; } if (old_sg) { memcpy(sg + i, old_sg, SCpnt->use_sg * sizeof(struct scatterlist)); - scsi_free(old_sg, ((SCpnt->use_sg * sizeof(struct scatterlist)) + 511) & ~511); + memcpy(bbpnt + i, old_bbpnt, SCpnt->use_sg * sizeof(void *)); + scsi_free(old_sg, (((SCpnt->use_sg * sizeof(struct scatterlist)) + + (SCpnt->use_sg * sizeof(void *))) + 511) & ~511); } else { sg[i].address = SCpnt->request_buffer; sg[i].length = SCpnt->request_bufflen; @@ -326,11 +336,12 @@ SCpnt->request_bufflen += (fsize + bsize); SCpnt->request_buffer = sg; + SCpnt->bounce_buffers = bbpnt; SCpnt->use_sg += i; if (bsize) { sg[SCpnt->use_sg].address = back; - sg[SCpnt->use_sg].alt_address = back; + bbpnt[SCpnt->use_sg] = back; sg[SCpnt->use_sg].length = bsize; SCpnt->use_sg++; } @@ -642,6 +653,7 @@ scsi_CDs[i].needs_sector_size = 0; sr_sizes[i] = scsi_CDs[i].capacity >> (BLOCK_SIZE_BITS - 9); }; + blk_queue_hardsect_size(blk_get_queue(MAJOR_NR), sector_size); scsi_free(buffer, 512); } @@ -790,21 +802,14 @@ if (!sr_blocksizes) goto cleanup_sizes; - sr_hardsizes = kmalloc(sr_template.dev_max * sizeof(int), GFP_ATOMIC); - if (!sr_hardsizes) - goto cleanup_blocksizes; /* * These are good guesses for the time being. */ - for (i = 0; i < sr_template.dev_max; i++) { + for (i = 0; i < sr_template.dev_max; i++) sr_blocksizes[i] = 2048; - sr_hardsizes[i] = 2048; - } + blksize_size[MAJOR_NR] = sr_blocksizes; - hardsect_size[MAJOR_NR] = sr_hardsizes; return 0; -cleanup_blocksizes: - kfree(sr_blocksizes); cleanup_sizes: kfree(sr_sizes); cleanup_cds: @@ -876,7 +881,6 @@ else read_ahead[MAJOR_NR] = 4; /* 4 sector read-ahead */ - return; } static void sr_detach(Scsi_Device * SDp) @@ -884,17 +888,18 @@ Scsi_CD *cpnt; int i; - for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) + for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) { if (cpnt->device == SDp) { /* - * Since the cdrom is read-only, no need to sync the device. + * Since the cdrom is read-only, no need to sync + * the device. * We should be kind to our buffer cache, however. */ invalidate_device(MKDEV(MAJOR_NR, i), 0); /* - * Reset things back to a sane state so that one can re-load a new - * driver (perhaps the same one). + * Reset things back to a sane state so that one can + * re-load a new driver (perhaps the same one). */ unregister_cdrom(&(cpnt->cdi)); cpnt->device = NULL; @@ -905,7 +910,7 @@ sr_sizes[i] = 0; return; } - return; + } } static int __init init_sr(void) @@ -927,13 +932,9 @@ kfree(sr_blocksizes); sr_blocksizes = NULL; - kfree(sr_hardsizes); - sr_hardsizes = NULL; } - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; read_ahead[MAJOR_NR] = 0; + blk_clear(MAJOR_NR); sr_template.dev_max = 0; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/st.c linux/drivers/scsi/st.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/st.c Sun Aug 12 20:21:47 2001 +++ linux/drivers/scsi/st.c Wed Sep 5 12:42:08 2001 @@ -3222,7 +3222,6 @@ tb->sg[0].address = (unsigned char *) __get_free_pages(priority, order); if (tb->sg[0].address != NULL) { - tb->sg[0].alt_address = NULL; tb->sg[0].length = b_size; break; } @@ -3258,7 +3257,6 @@ tb = NULL; break; } - tb->sg[segs].alt_address = NULL; tb->sg[segs].length = b_size; got += b_size; segs++; @@ -3332,7 +3330,6 @@ normalize_buffer(STbuffer); return FALSE; } - STbuffer->sg[segs].alt_address = NULL; STbuffer->sg[segs].length = b_size; STbuffer->sg_segs += 1; got += b_size; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sym53c8xx.c linux/drivers/scsi/sym53c8xx.c --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sym53c8xx.c Thu Jul 5 20:28:16 2001 +++ linux/drivers/scsi/sym53c8xx.c Thu Sep 6 09:52:30 2001 @@ -644,10 +644,10 @@ #define NCR_LOCK_NCB(np, flags) spin_lock_irqsave(&np->smp_lock, flags) #define NCR_UNLOCK_NCB(np, flags) spin_unlock_irqrestore(&np->smp_lock, flags) -#define NCR_LOCK_SCSI_DONE(np, flags) \ - spin_lock_irqsave(&io_request_lock, flags) -#define NCR_UNLOCK_SCSI_DONE(np, flags) \ - spin_unlock_irqrestore(&io_request_lock, flags) +#define NCR_LOCK_SCSI_DONE(host, flags) \ + spin_lock_irqsave(&((host)->host_lock), flags) +#define NCR_UNLOCK_SCSI_DONE(host, flags) \ + spin_unlock_irqrestore(&((host)->host_lock), flags) #else @@ -658,8 +658,8 @@ #define NCR_LOCK_NCB(np, flags) do { save_flags(flags); cli(); } while (0) #define NCR_UNLOCK_NCB(np, flags) do { restore_flags(flags); } while (0) -#define NCR_LOCK_SCSI_DONE(np, flags) do {;} while (0) -#define NCR_UNLOCK_SCSI_DONE(np, flags) do {;} while (0) +#define NCR_LOCK_SCSI_DONE(host, flags) do {;} while (0) +#define NCR_UNLOCK_SCSI_DONE(host, flags) do {;} while (0) #endif @@ -989,8 +989,8 @@ if (vbp) { dma_addr_t daddr; vp = (m_addr_t) pci_alloc_consistent(mp->bush, - PAGE_SIZE<vaddr = vp; @@ -1140,26 +1140,26 @@ /* Linux version with pci bus iommu kernel interface */ /* To keep track of the dma mapping (sg/single) that has been set */ -#define __data_mapped SCp.phase -#define __data_mapping SCp.have_data_in +#define __data_mapped(cmd) (cmd)->SCp.phase +#define __data_mapping(cmd) (cmd)->SCp.dma_handle static void __unmap_scsi_data(pcidev_t pdev, Scsi_Cmnd *cmd) { int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); - switch(cmd->__data_mapped) { + switch(__data_mapped(cmd)) { case 2: pci_unmap_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); break; case 1: - pci_unmap_single(pdev, cmd->__data_mapping, - cmd->request_bufflen, dma_dir); + pci_unmap_page(pdev, __data_mapping(cmd), + cmd->request_bufflen, dma_dir); break; } - cmd->__data_mapped = 0; + __data_mapped(cmd) = 0; } -static u_long __map_scsi_single_data(pcidev_t pdev, Scsi_Cmnd *cmd) +static dma_addr_t __map_scsi_single_data(pcidev_t pdev, Scsi_Cmnd *cmd) { dma_addr_t mapping; int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); @@ -1167,10 +1167,13 @@ if (cmd->request_bufflen == 0) return 0; - mapping = pci_map_single(pdev, cmd->request_buffer, - cmd->request_bufflen, dma_dir); - cmd->__data_mapped = 1; - cmd->__data_mapping = mapping; + mapping = pci_map_page(pdev, + virt_to_page(cmd->request_buffer), + ((unsigned long)cmd->request_buffer & + ~PAGE_MASK), + cmd->request_bufflen, dma_dir); + __data_mapped(cmd) = 1; + __data_mapping(cmd) = mapping; return mapping; } @@ -1184,8 +1187,8 @@ return 0; use_sg = pci_map_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); - cmd->__data_mapped = 2; - cmd->__data_mapping = use_sg; + __data_mapped(cmd) = 2; + __data_mapping(cmd) = use_sg; return use_sg; } @@ -1194,12 +1197,12 @@ { int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); - switch(cmd->__data_mapped) { + switch(__data_mapped(cmd)) { case 2: pci_dma_sync_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); break; case 1: - pci_dma_sync_single(pdev, cmd->__data_mapping, + pci_dma_sync_single(pdev, __data_mapping(cmd), cmd->request_bufflen, dma_dir); break; } @@ -5031,12 +5034,12 @@ /* ** 64 bit (53C895A or 53C896) ? */ - if (np->features & FE_DAC) -#ifdef SCSI_NCR_USE_64BIT_DAC - np->rv_ccntl1 |= (XTIMOD | EXTIBMV); -#else - np->rv_ccntl1 |= (DDAC); -#endif + if (np->features & FE_DAC) { + if (np->features & FE_DAC_IN_USE) + np->rv_ccntl1 |= (XTIMOD | EXTIBMV); + else + np->rv_ccntl1 |= (DDAC); + } /* ** Phase mismatch handled by SCRIPTS (53C895A, 53C896 or C1010) ? @@ -12070,15 +12073,9 @@ ** code will get more complex later). */ -#ifdef SCSI_NCR_USE_64BIT_DAC #define SCATTER_ONE(data, badd, len) \ (data)->addr = cpu_to_scr(badd); \ (data)->size = cpu_to_scr((((badd) >> 8) & 0xff000000) + len); -#else -#define SCATTER_ONE(data, badd, len) \ - (data)->addr = cpu_to_scr(badd); \ - (data)->size = cpu_to_scr(len); -#endif #define CROSS_16MB(p, n) (((((u_long) p) + n - 1) ^ ((u_long) p)) & ~0xffffff) @@ -12090,7 +12087,7 @@ cp->data_len = cmd->request_bufflen; if (cmd->request_bufflen) { - u_long baddr = map_scsi_single_data(np, cmd); + dma_addr_t baddr = map_scsi_single_data(np, cmd); SCATTER_ONE(data, baddr, cmd->request_bufflen); if (CROSS_16MB(baddr, cmd->request_bufflen)) { @@ -12141,7 +12138,7 @@ data = &cp->phys.data[MAX_SCATTER - use_sg]; for (segn = 0; segn < use_sg; segn++) { - u_long baddr = scsi_sg_dma_address(&scatter[segn]); + dma_addr_t baddr = scsi_sg_dma_address(&scatter[segn]); unsigned int len = scsi_sg_dma_len(&scatter[segn]); SCATTER_ONE(&data[segn], @@ -12180,7 +12177,7 @@ data = &cp->phys.data[MAX_SCATTER - use_sg]; for (segment = 0; segment < use_sg; segment++) { - u_long baddr = scsi_sg_dma_address(&scatter[segment]); + dma_addr_t baddr = scsi_sg_dma_address(&scatter[segment]); unsigned int len = scsi_sg_dma_len(&scatter[segment]); SCATTER_ONE(&data[segment], @@ -13100,14 +13097,6 @@ (int) (PciDeviceFn(pdev) & 0xf8) >> 3, (int) (PciDeviceFn(pdev) & 7)); -#ifdef SCSI_NCR_DYNAMIC_DMA_MAPPING - if (pci_set_dma_mask(pdev, (dma_addr_t) (0xffffffffUL))) { - printk(KERN_WARNING NAME53C8XX - "32 BIT PCI BUS DMA ADDRESSING NOT SUPPORTED\n"); - return -1; - } -#endif - /* ** Read info from the PCI config space. ** pci_read_config_xxx() functions are assumed to be used for @@ -13175,6 +13164,28 @@ break; } +#ifdef SCSI_NCR_DYNAMIC_DMA_MAPPING + /* Configure DMA attributes. For DAC capable boards, we can encode + ** 32+8 bits for SCSI DMA data addresses with the extra bits used + ** in the size field. We use normal 32-bit PCI addresses for + ** descriptors. + */ + if (chip->features & FE_DAC) { + if (pci_set_dma_mask(pdev, (u64) 0xffffffffff)) + chip->features &= ~FE_DAC_IN_USE; + else + chip->features |= FE_DAC_IN_USE; + } + + if (!(chip->features & FE_DAC_IN_USE)) { + if (pci_set_dma_mask(pdev, (u64) 0xffffffff)) { + printk(KERN_WARNING NAME53C8XX + "32 BIT PCI BUS DMA ADDRESSING NOT SUPPORTED\n"); + return -1; + } + } +#endif + /* ** Ignore Symbios chips controlled by SISL RAID controller. ** This controller sets value 0x52414944 at RAM end - 16. @@ -13611,8 +13622,8 @@ cmd->SCp.ptr = NULL; cmd->SCp.buffer = NULL; #ifdef SCSI_NCR_DYNAMIC_DMA_MAPPING - cmd->__data_mapped = 0; - cmd->__data_mapping = 0; + __data_mapped(cmd) = 0; + __data_mapping(cmd) = 0; #endif NCR_LOCK_NCB(np, flags); @@ -13667,9 +13678,9 @@ if (DEBUG_FLAGS & DEBUG_TINY) printk ("]\n"); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } @@ -13690,9 +13701,9 @@ NCR_UNLOCK_NCB(np, flags); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sym53c8xx.h linux/drivers/scsi/sym53c8xx.h --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sym53c8xx.h Wed Aug 15 23:23:24 2001 +++ linux/drivers/scsi/sym53c8xx.h Wed Sep 5 13:17:02 2001 @@ -96,7 +96,8 @@ this_id: 7, \ sg_tablesize: SCSI_NCR_SG_TABLESIZE, \ cmd_per_lun: SCSI_NCR_CMD_PER_LUN, \ - use_clustering: DISABLE_CLUSTERING} + use_clustering: DISABLE_CLUSTERING, \ + can_dma_32: 1} #else diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sym53c8xx_comm.h linux/drivers/scsi/sym53c8xx_comm.h --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sym53c8xx_comm.h Thu Jul 5 20:28:16 2001 +++ linux/drivers/scsi/sym53c8xx_comm.h Wed Sep 5 12:42:08 2001 @@ -2186,7 +2186,7 @@ (int) (PciDeviceFn(pdev) & 7)); #ifdef SCSI_NCR_DYNAMIC_DMA_MAPPING - if (!pci_dma_supported(pdev, (dma_addr_t) (0xffffffffUL))) { + if (!pci_dma_supported(pdev, 0xffffffff)) { printk(KERN_WARNING NAME53C8XX "32 BIT PCI BUS DMA ADDRESSING NOT SUPPORTED\n"); return -1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sym53c8xx_defs.h linux/drivers/scsi/sym53c8xx_defs.h --- /opt/kernel/linux-2.4.10-pre4/drivers/scsi/sym53c8xx_defs.h Wed Aug 15 23:23:24 2001 +++ linux/drivers/scsi/sym53c8xx_defs.h Wed Sep 5 12:42:08 2001 @@ -184,20 +184,6 @@ #endif /* - * Should we enable DAC cycles on Sparc64 platform? - * Until further investigation we do not enable it - * at the moment. - * We may want to enable it for __ia64__ (untested) - */ -#if defined(__ia64__) -# if !defined(SCSI_NCR_USE_64BIT_DAC) -# define SCSI_NCR_USE_64BIT_DAC -# endif -#else -# undef SCSI_NCR_USE_64BIT_DAC -#endif - -/* * Immediate arbitration */ #if defined(CONFIG_SCSI_NCR53C8XX_IARB) @@ -205,13 +191,6 @@ #endif /* - * Should we enable DAC cycles on sparc64 platforms? - * Until further investigation we do not enable it - * anywhere at the moment. - */ -#undef SCSI_NCR_USE_64BIT_DAC - -/* * Sync transfer frequency at startup. * Allow from 5Mhz to 80Mhz default 20 Mhz. */ @@ -746,6 +725,7 @@ #define FE_66MHZ (1<<23) /* 66MHz PCI Support */ #define FE_DAC (1<<24) /* Support DAC cycles (64 bit addressing) */ #define FE_ISTAT1 (1<<25) /* Have ISTAT1, MBOX0, MBOX1 registers */ +#define FE_DAC_IN_USE (1<<26) /* Platform does DAC cycles */ #define FE_CACHE_SET (FE_ERL|FE_CLSE|FE_WRIE|FE_ERMP) #define FE_SCSI_SET (FE_WIDE|FE_ULTRA|FE_ULTRA2|FE_DBLR|FE_QUAD|F_CLK80) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/fs/Makefile linux/fs/Makefile --- /opt/kernel/linux-2.4.10-pre4/fs/Makefile Wed Sep 5 12:41:18 2001 +++ linux/fs/Makefile Wed Sep 5 11:13:23 2001 @@ -7,12 +7,12 @@ O_TARGET := fs.o -export-objs := filesystems.o open.o dcache.o +export-objs := filesystems.o open.o dcache.o bio.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ - super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \ - fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ + bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ + namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ filesystems.o diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/fs/bio.c linux/fs/bio.c --- /opt/kernel/linux-2.4.10-pre4/fs/bio.c Thu Jan 1 01:00:00 1970 +++ linux/fs/bio.c Fri Sep 7 08:43:12 2001 @@ -0,0 +1,545 @@ +/* + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +kmem_cache_t *bio_cachep; +static spinlock_t bio_lock = SPIN_LOCK_UNLOCKED; +static struct bio *bio_pool; +static DECLARE_WAIT_QUEUE_HEAD(bio_pool_wait); + +#define BIO_HASH_SCALE 3 + +/* + * pending further testing... + */ +#define bhash_fn(htable, key) \ + ((((key) >> BIO_HASH_SCALE) & (htable)->mask) + (htable)->table) + +/* + * make this changeable (1 or 2 would be a good default, 32 is insane :-) + */ +#define BIO_POOL_PAGES 1 + +int bio_hash_init(bio_hash_table_t *hash_table, int entries) +{ + int bit_size, hash_bits, hash_size; + + memset(hash_table, 0, sizeof(bio_hash_table_t)); + + bit_size = entries * sizeof(void); + hash_bits = 0; + while (bit_size) { + hash_bits++; + bit_size >>= 1; + } + + hash_size = (1UL << hash_bits) * sizeof(void *); + hash_table->table = vmalloc(hash_size); + if (hash_table->table == NULL) + return -ENOMEM; + + memset(hash_table->table, 0, hash_size); + hash_table->mask = (1UL << hash_bits) - 1; + hash_table->valid_counter = 1; + + return 0; +} + +void bio_hash_cleanup(bio_hash_table_t *table) +{ + vfree(table->table); + memset(table, 0, sizeof(bio_hash_table_t)); +} + +inline void __bio_hash_remove(bio_hash_t *entry) +{ + if (entry->valid_counter) { + bio_hash_t *nxt = entry->next_hash; + bio_hash_t **pprev = entry->pprev_hash; + + if (nxt) + nxt->pprev_hash = pprev; + + *pprev = nxt; + entry->pprev_hash = NULL; + entry->valid_counter = 0; + } +} + +inline void bio_hash_remove(request_queue_t *q, struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&q->queue_lock, flags); + __bio_hash_remove(&bio->bi_hash); + spin_unlock_irqrestore(&q->queue_lock, flags); +} + +inline void __bio_hash_add(bio_hash_table_t *htable, struct bio *bio) +{ + bio_hash_t *entry = &bio->bi_hash; + bio_hash_t **hash = bhash_fn(htable, bio->bi_sector); + bio_hash_t *nxt = *hash; + + *hash = entry; + entry->next_hash = nxt; + entry->pprev_hash = hash; + entry->valid_counter = htable->valid_counter; + + if (nxt) + nxt->pprev_hash = &entry->next_hash; + + BIO_HASH_PROF_INC(htable, nr_inserts); +} + +inline void bio_hash_add(request_queue_t *q, struct bio *bio) +{ + if (in_interrupt()) + BUG(); + + spin_lock_irq(&q->queue_lock); + __bio_hash_add(&q->queue_hash, bio); + spin_unlock_irq(&q->queue_lock); +} + +inline struct bio *__bio_hash_find(bio_hash_table_t *htable, sector_t sector) +{ + bio_hash_t **hash = bhash_fn(htable, sector); + bio_hash_t *next = *hash, *entry; + struct bio *bio; + int nr = 0; + + BIO_HASH_PROF_INC(htable, nr_lookups); + + while ((entry = next)) { + next = entry->next_hash; + prefetch(next); + if (entry->valid_counter == htable->valid_counter) { + bio = hash_entry(entry, struct bio, bi_hash); + if (bio->bi_sector == sector) { +#ifdef BIO_HASH_PROFILING + if (nr > htable->st.max_bucket_size) + htable->st.max_bucket_size = nr; + if (nr <= MAX_PROFILE_BUCKETS) + htable->st.bucket_size[nr]++; +#endif + BIO_HASH_PROF_INC(htable, nr_hits); + bio_get(bio); + return bio; + } + + nr++; + continue; + } + + /* + * prune out-of-date entries as we go along + */ + __bio_hash_remove(entry); + nr++; + } + + return NULL; +} + +inline struct bio *bio_hash_find(request_queue_t *q, sector_t sector) +{ + struct bio *bio; + + /* + * can't see any valid uses for that yet -- if something comes + * up this will trigger and I can re-decide + */ + if (in_interrupt()) + BUG(); + + spin_lock_irq(&q->queue_lock); + bio = __bio_hash_find(&q->queue_hash, sector); + spin_unlock_irq(&q->queue_lock); + + return bio; +} + +inline int __bio_hash_add_unique(bio_hash_table_t *htable, struct bio *bio) +{ + struct bio *alias; + + alias = __bio_hash_find(htable, bio->bi_sector); + if (!alias) { + __bio_hash_add(htable, bio); + return 0; + } + + /* + * release reference to alias + */ + __bio_put(alias); + return 1; +} + +inline int bio_hash_add_unique(request_queue_t *q, struct bio *bio) +{ + int ret; + + if (in_interrupt()) + BUG(); + + spin_lock_irq(&q->queue_lock); + ret = __bio_hash_add_unique(&q->queue_hash, bio); + spin_unlock_irq(&q->queue_lock); + + return ret; +} + +/* + * if need be, add bio_pool_get_irq() to match... + */ +static inline struct bio *__bio_pool_get(void) +{ + struct bio *bio; + + if ((bio = bio_pool)) { + bio_pool = bio->bi_next; + bio->bi_next = NULL; + bio->bi_flags = BIO_POOL; + } + + return bio; +} + +static inline struct bio *bio_pool_get(void) +{ + struct bio *bio; + + if (in_interrupt()) + BUG(); + + spin_lock_irq(&bio_lock); + bio = __bio_pool_get(); + spin_unlock_irq(&bio_lock); + + return bio; +} + +static inline void bio_pool_put(struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&bio_lock, flags); + bio->bi_next = bio_pool; + bio_pool = bio; + spin_unlock_irqrestore(&bio_lock, flags); + + /* + * use wake-one + */ + if (waitqueue_active(&bio_pool_wait)) + wake_up_nr(&bio_pool_wait, 1); +} + +/** + * bio_alloc - allocate a bio for I/O + * @gfp_mask: the GFP_ mask given to the slab allocator + * + * Description: + * bio_alloc will first try it's on internal pool to satisfy the allocation + * and if that fails fall back to the bio slab cache. In the latter case, + * the @gfp_mask specifies the priority of the allocation. In particular, + * if %__GFP_WAIT is set then we will block on the internal pool waiting + * for a &struct bio to become free. + **/ +struct bio *bio_alloc(int gfp_mask) +{ + DECLARE_WAITQUEUE(wait, current); + struct bio *bio; + + /* + * first try our reserved pool + */ + if ((bio = bio_pool_get())) + goto gotit; + + /* + * no such luck, try slab alloc + */ + if ((bio = kmem_cache_alloc(bio_cachep, gfp_mask))) { + bio->bi_flags = 0; + goto gotit; + } + + /* + * hrmpf, not much luck. if __GFP_WAIT is set, wait on + * bio_pool. + */ + if ((gfp_mask & (__GFP_WAIT | __GFP_IO)) == (__GFP_WAIT | __GFP_IO)) { + add_wait_queue_exclusive(&bio_pool_wait, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + bio = bio_pool_get(); + if (bio) + break; + + run_task_queue(&tq_disk); + schedule(); + } + remove_wait_queue(&bio_pool_wait, &wait); + __set_current_state(TASK_RUNNING); + } + + if (bio) { +gotit: + bio->bi_next = NULL; + memset(&bio->bi_io_vec, 0, sizeof(bio->bi_io_vec)); + atomic_set(&bio->bi_cnt, 1); + bio->bi_end_io = NULL; + bio->bi_private = NULL; + } + return bio; +} + +#if 0 +struct bio *bio_alloc(int gfp_mask) +{ + struct bio *bio; + + /* + * first try slab alloc + */ +restart: + if ((bio = kmem_cache_alloc(bio_cachep, gfp_mask))) { + bio->bi_flags = 0; + goto gotit; + } + + wakeup_bdflush(); + + /* + * nope, now fall back to our reserved pool + */ + if ((bio = bio_pool_get())) + goto gotit; + + /* + * hrmpf, still nothing. fire pending I/O, and wait for progress + */ + run_task_queue(&tq_disk); + + current->policy |= SCHED_YIELD; + __set_current_state(TASK_RUNNING); + schedule(); + goto restart; + +gotit: + bio->bi_next = NULL; + memset(&bio->bi_io_vec, 0, sizeof(bio->bi_io_vec)); + atomic_set(&bio->bi_cnt, 1); + bio->bi_end_io = NULL; + bio->bi_private = NULL; + return bio; +} +#endif + +/* + * queue lock assumed held! + */ +static inline void __bio_free(struct bio *bio) +{ + __bio_hash_remove(&bio->bi_hash); + + if (bio->bi_flags & BIO_POOL) + bio_pool_put(bio); + else + kmem_cache_free(bio_cachep, bio); +} + +/** + * __bio_put - release a reference to a bio + * @bio: bio to release reference to + * + * Description: + * Put a reference to a &struct bio, either one you have gotten with + * bio_alloc or bio_get. The last put of a bio will free it. Must be + * run with the queue lock held. + **/ +void __bio_put(struct bio *bio) +{ + if (!atomic_read(&bio->bi_cnt)) + BUG(); + + /* + * last put frees it + */ + if (atomic_dec_and_test(&bio->bi_cnt)) { + if (bio->bi_next) + BUG(); + + __bio_free(bio); + } +} + +#ifdef BIO_PAGEIO +static int bio_end_io_page(struct bio *bio) +{ + struct page *page = bio_page(bio); + + if (!(bio->bi_flags & BIO_UPTODATE)) + SetPageError(page); + if (!PageError(page)) + SetPageUptodate(page); + + /* + * Run the hooks that have to be done when a page I/O has completed. + */ + if (PageTestandClearDecrAfter(page)) + atomic_dec(&nr_async_pages); + + UnlockPage(page); + __bio_put(bio); + return 1; +} +#endif + +static void bio_end_io_kio(struct bio *bio) +{ + struct kiobuf *kio = bio->bi_private; + + end_kio_request(kio, bio->bi_flags & BIO_UPTODATE); + __bio_put(bio); +} + +/** + * ll_rw_kio - submit a &struct kiobuf for I/O + * @rw: %READ or %WRITE + * @kio: the kiobuf to do I/O on + * @dev: target device + * @sector: start location on disk + * + * Description: + * ll_rw_kio will map the page list inside the &struct kiobuf to + * &struct bio and queue them for I/O. The kiobuf given must describe + * a continous range of data, and must be fully prepared for I/O. + **/ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long sector) +{ + struct bio *bio; + int i, offset, len, size; + + if ((rw & WRITE) && is_read_only(dev)) { + printk("ll_rw_bio: WRITE to ro device %s\n", kdevname(dev)); + kio->errno = -EPERM; + return; + } + + offset = kio->offset & ~PAGE_MASK; + + len = kio->length; + for (i = 0; i < kio->nr_pages; i++) { + bio = bio_alloc(GFP_NOIO); + + bio->bi_dev = dev; + bio->bi_sector = sector; + + size = PAGE_SIZE - offset; + if (size > len) + size = len; + + bio->bi_io_vec.bv_page = kio->maplist[i]; + bio->bi_io_vec.bv_len = size; + bio->bi_io_vec.bv_offset = offset; + + bio->bi_end_io = bio_end_io_kio; + bio->bi_private = kio; + + /* + * kiobuf only has an offset into the first page + */ + offset = 0; + len -= size; + sector += (size >> 9); + + atomic_inc(&kio->io_count); + submit_bio(rw, bio); + } +} + +static void bio_init(void *foo, kmem_cache_t *cachep, unsigned long flg) +{ + if ((flg & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { + struct bio *bio = foo; + + bio->bi_next = NULL; + bio->bi_flags = 0; + bio->bi_end_io = NULL; + } +} + +static int __init bio_init_pool(void) +{ + struct page *page; + struct bio *bio, *bio_map; + int nr = BIO_POOL_PAGES, i, total; + + total = 0; + do { + page = alloc_page(GFP_KERNEL); + + bio_map = page_address(page); + memset(bio_map, 0, PAGE_SIZE); + for (i = 0; i < PAGE_SIZE / sizeof(struct bio); i++) { + bio = bio_map + i; + bio_pool_put(bio); + total++; + } + } while (--nr); + + return total; +} + +static int __init init_bio(void) +{ + int nr; + + bio_cachep = kmem_cache_create("bio", sizeof(struct bio), 0, + SLAB_HWCACHE_ALIGN, bio_init, NULL); + if (!bio_cachep) + panic("bio: can't create bio_cachep slab cache\n"); + + nr = bio_init_pool(); + printk("BIO: pool of %d setup, %luKb (%d bytes/bio)\n", nr, (BIO_POOL_PAGES * PAGE_SIZE) >> 10, sizeof(struct bio)); + + return 0; +} + +module_init(init_bio); + +EXPORT_SYMBOL(bio_alloc); +EXPORT_SYMBOL(__bio_put); +EXPORT_SYMBOL(ll_rw_kio); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/fs/block_dev.c linux/fs/block_dev.c --- /opt/kernel/linux-2.4.10-pre4/fs/block_dev.c Wed Sep 5 12:41:18 2001 +++ linux/fs/block_dev.c Wed Sep 5 11:13:23 2001 @@ -17,7 +17,6 @@ #include -extern int *blk_size[]; extern int *blksize_size[]; #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/fs/buffer.c linux/fs/buffer.c --- /opt/kernel/linux-2.4.10-pre4/fs/buffer.c Wed Sep 5 12:41:18 2001 +++ linux/fs/buffer.c Wed Sep 5 11:13:23 2001 @@ -587,7 +587,8 @@ * will force it bad). This shouldn't really happen currently, but * the code is ready. */ -static inline struct buffer_head * __get_hash_table(kdev_t dev, int block, int size) +static inline struct buffer_head * __get_hash_table(kdev_t dev, sector_t block, + int size) { struct buffer_head *bh = hash(dev, block); @@ -602,7 +603,7 @@ return bh; } -struct buffer_head * get_hash_table(kdev_t dev, int block, int size) +struct buffer_head * get_hash_table(kdev_t dev, sector_t block, int size) { struct buffer_head *bh; @@ -721,7 +722,6 @@ void set_blocksize(kdev_t dev, int size) { - extern int *blksize_size[]; int i, nlist, slept; struct buffer_head * bh, * bh_next; @@ -768,8 +768,10 @@ if (!atomic_read(&bh->b_count)) { if (buffer_dirty(bh)) printk(KERN_WARNING - "set_blocksize: dev %s buffer_dirty %lu size %hu\n", - kdevname(dev), bh->b_blocknr, bh->b_size); + "set_blocksize: dev %s buffer_dirty %Lu size %hu\n", + kdevname(dev), + (unsigned long long) bh->b_blocknr, + bh->b_size); remove_inode_queue(bh); __remove_from_queues(bh); put_last_free(bh); @@ -779,9 +781,10 @@ clear_bit(BH_Uptodate, &bh->b_state); printk(KERN_WARNING "set_blocksize: " - "b_count %d, dev %s, block %lu, from %p\n", + "b_count %d, dev %s, block %Lu, from %p\n", atomic_read(&bh->b_count), bdevname(bh->b_dev), - bh->b_blocknr, __builtin_return_address(0)); + (unsigned long long) bh->b_blocknr, + __builtin_return_address(0)); } write_unlock(&hash_table_lock); if (slept) @@ -1032,7 +1035,7 @@ * 14.02.92: changed it to sync dirty buffers a bit: better performance * when the filesystem starts to get full of dirty blocks (I hope). */ -struct buffer_head * getblk(kdev_t dev, int block, int size) +struct buffer_head * getblk(kdev_t dev, sector_t block, int size) { struct buffer_head * bh; int isize; @@ -2024,7 +2027,8 @@ goto done; } -int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block) +sector_t generic_block_bmap(struct address_space *mapping, sector_t block, + get_block_t *get_block) { struct buffer_head tmp; struct inode *inode = mapping->host; @@ -2035,57 +2039,6 @@ } /* - * IO completion routine for a buffer_head being used for kiobuf IO: we - * can't dispatch the kiobuf callback until io_count reaches 0. - */ - -static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate) -{ - struct kiobuf *kiobuf; - - mark_buffer_uptodate(bh, uptodate); - - kiobuf = bh->b_private; - unlock_buffer(bh); - end_kio_request(kiobuf, uptodate); -} - -/* - * For brw_kiovec: submit a set of buffer_head temporary IOs and wait - * for them to complete. Clean up the buffer_heads afterwards. - */ - -static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size) -{ - int iosize, err; - int i; - struct buffer_head *tmp; - - iosize = 0; - err = 0; - - for (i = nr; --i >= 0; ) { - iosize += size; - tmp = bh[i]; - if (buffer_locked(tmp)) { - wait_on_buffer(tmp); - } - - if (!buffer_uptodate(tmp)) { - /* We are traversing bh'es in reverse order so - clearing iosize on error calculates the - amount of IO before the first error. */ - iosize = 0; - err = -EIO; - } - } - - if (iosize) - return iosize; - return err; -} - -/* * Start I/O on a physical range of kernel memory, defined by a vector * of kiobuf structs (much like a user-space iovec list). * @@ -2097,21 +2050,14 @@ * passed in to completely map the iobufs to disk. */ -int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size) +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], kdev_t dev, sector_t b[], + int size) { - int err; - int length; int transferred; int i; - int bufind; int pageind; - int bhind; - int offset; - unsigned long blocknr; - struct kiobuf * iobuf = NULL; + struct kiobuf * iobuf; struct page * map; - struct buffer_head *tmp, **bhs = NULL; if (!nr) return 0; @@ -2121,92 +2067,37 @@ */ for (i = 0; i < nr; i++) { iobuf = iovec[i]; - if ((iobuf->offset & (size-1)) || - (iobuf->length & (size-1))) + if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1))) return -EINVAL; if (!iobuf->nr_pages) panic("brw_kiovec: iobuf not initialised"); + for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { + map = iobuf->maplist[pageind]; + if (!map) + return -EFAULT; + } } /* * OK to walk down the iovec doing page IO on each page we find. */ - bufind = bhind = transferred = err = 0; for (i = 0; i < nr; i++) { iobuf = iovec[i]; - offset = iobuf->offset; - length = iobuf->length; iobuf->errno = 0; - if (!bhs) - bhs = iobuf->bh; - - for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { - map = iobuf->maplist[pageind]; - if (!map) { - err = -EFAULT; - goto finished; - } - - while (length > 0) { - blocknr = b[bufind++]; - tmp = bhs[bhind++]; - - tmp->b_dev = B_FREE; - tmp->b_size = size; - set_bh_page(tmp, map, offset); - tmp->b_this_page = tmp; - - init_buffer(tmp, end_buffer_io_kiobuf, iobuf); - tmp->b_dev = dev; - tmp->b_blocknr = blocknr; - tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req); - - if (rw == WRITE) { - set_bit(BH_Uptodate, &tmp->b_state); - clear_bit(BH_Dirty, &tmp->b_state); - } else - set_bit(BH_Uptodate, &tmp->b_state); - - length -= size; - offset += size; - - atomic_inc(&iobuf->io_count); - submit_bh(rw, tmp); - /* - * Wait for IO if we have got too much - */ - if (bhind >= KIO_MAX_SECTORS) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - bhind = 0; - } - - if (offset >= PAGE_SIZE) { - offset = 0; - break; - } - } /* End of block loop */ - } /* End of page loop */ - } /* End of iovec loop */ - - /* Is there any IO still left to submit? */ - if (bhind) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - } - - finished: - if (transferred) - return transferred; - return err; + ll_rw_kio(rw, iobuf, dev, b[i] * (size >> 9)); + } + + /* + * now they are all submitted, wait for completion + */ + transferred = 0; + for (i = 0; i < nr; i++) { + iobuf = iovec[i]; + kiobuf_wait_for_io(iobuf); + transferred += iobuf->length; + } + + return transferred; } /* @@ -2221,7 +2112,7 @@ * FIXME: we need a swapper_inode->get_block function to remove * some of the bmap kludges and interface ugliness here. */ -int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size) +int brw_page(int rw, struct page *page, kdev_t dev, sector_t b[], int size) { struct buffer_head *head, *bh; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/fs/iobuf.c linux/fs/iobuf.c --- /opt/kernel/linux-2.4.10-pre4/fs/iobuf.c Fri Apr 27 23:23:25 2001 +++ linux/fs/iobuf.c Fri Aug 3 12:04:42 2001 @@ -8,7 +8,6 @@ #include #include -#include void end_kio_request(struct kiobuf *kiobuf, int uptodate) { @@ -26,52 +25,23 @@ { memset(iobuf, 0, sizeof(*iobuf)); init_waitqueue_head(&iobuf->wait_queue); + atomic_set(&iobuf->io_count, 0); iobuf->array_len = KIO_STATIC_PAGES; iobuf->maplist = iobuf->map_array; } -int alloc_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) - if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) { - while (i--) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } - return -ENOMEM; - } - return 0; -} - -void free_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } -} - int alloc_kiovec(int nr, struct kiobuf **bufp) { int i; struct kiobuf *iobuf; for (i = 0; i < nr; i++) { - iobuf = vmalloc(sizeof(struct kiobuf)); + iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL); if (!iobuf) { free_kiovec(i, bufp); return -ENOMEM; } kiobuf_init(iobuf); - if (alloc_kiobuf_bhs(iobuf)) { - vfree(iobuf); - free_kiovec(i, bufp); - return -ENOMEM; - } bufp[i] = iobuf; } @@ -89,8 +59,7 @@ unlock_kiovec(1, &iobuf); if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - free_kiobuf_bhs(iobuf); - vfree(bufp[i]); + kfree(bufp[i]); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/fs/partitions/check.c linux/fs/partitions/check.c --- /opt/kernel/linux-2.4.10-pre4/fs/partitions/check.c Sun Aug 5 22:12:41 2001 +++ linux/fs/partitions/check.c Tue Aug 7 10:32:32 2001 @@ -1,4 +1,6 @@ /* + * fs/partitions/check.c + * * Code extracted from drivers/block/genhd.c * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King @@ -34,12 +36,11 @@ #include "ibm.h" #include "ultrix.h" -extern int *blk_size[]; - struct gendisk *gendisk_head; int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ -static int (*check_part[])(struct gendisk *hd, kdev_t dev, unsigned long first_sect, int first_minor) = { +static int (*check_part[])(struct gendisk *hd, kdev_t dev, + unsigned long first_sect, int first_minor) = { #ifdef CONFIG_ACORN_PARTITION acorn_partition, #endif @@ -272,8 +273,9 @@ char buf[64]; len += sprintf(page + len, - "%4d %4d %10d %s\n", - dsk->major, n, dsk->sizes[n], + "%4d %4d %10Lu %s\n", + dsk->major, n, + (u64) dsk->sizes[n], disk_name(dsk, n, buf)); if (len < offset) offset -= len, len = 0; @@ -431,32 +433,85 @@ { if (!gdev) return; - grok_partitions(gdev, MINOR(dev)>>gdev->minor_shift, minors, size); + grok_partitions(dev, size); } -void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size) +void grok_partitions(kdev_t dev, long size) { - int i; - int first_minor = drive << dev->minor_shift; - int end_minor = first_minor + dev->max_p; + int i, minors, first_minor, end_minor; + struct gendisk *g = get_gendisk(dev); - if(!dev->sizes) - blk_size[dev->major] = NULL; + if (!g) + return; + + minors = 1 << g->minor_shift; + first_minor = MINOR(dev); + if (first_minor & (minors-1)) { + printk("grok_partitions: bad device 0x%02x:%02x\n", + MAJOR(dev), first_minor); + first_minor &= ~(minors-1); + } + end_minor = first_minor + minors; + + if (!g->sizes) + blk_size[g->major] = NULL; + + g->part[first_minor].nr_sects = size; - dev->part[first_minor].nr_sects = size; /* No such device or no minors to use for partitions */ if (!size || minors == 1) return; - check_partition(dev, MKDEV(dev->major, first_minor), 1 + first_minor); + check_partition(g, MKDEV(g->major, first_minor), 1 + first_minor); /* * We need to set the sizes array before we will be able to access * any of the partitions on this device. */ - if (dev->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ + if (g->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ for (i = first_minor; i < end_minor; i++) - dev->sizes[i] = dev->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); - blk_size[dev->major] = dev->sizes; + g->sizes[i] = g->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); + blk_size[g->major] = g->sizes; + } +} + +int wipe_partitions(kdev_t dev) +{ + struct gendisk *g; + kdev_t devp; + int p, major, minor, minor0, max_p, res; + + g = get_gendisk(dev); + if (g == NULL) + return -EINVAL; + + max_p = 1 << g->minor_shift; + major = MAJOR(dev); + minor = MINOR(dev); + minor0 = minor & ~(max_p - 1); + if (minor0 != minor) /* for now only whole-disk reread */ + return -EINVAL; /* %%% later.. */ + + /* invalidate stuff */ + for (p = max_p - 1; p >= 0; p--) { + minor = minor0 + p; + devp = MKDEV(major,minor); +#if 0 /* %%% superfluous? */ + if (g->part[minor].nr_sects == 0) + continue; +#endif + res = invalidate_device(devp, 1); + if (res) + return res; + g->part[minor].start_sect = 0; + g->part[minor].nr_sects = 0; + } + + /* some places do blksize_size[major][minor] = 1024, + as preparation for reading partition table - superfluous */ + /* sd.c used to set blksize_size to 2048 in case + rscsi_disks[target].device->sector_size == 2048 */ + + return 0; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/fs/partitions/check.h linux/fs/partitions/check.h --- /opt/kernel/linux-2.4.10-pre4/fs/partitions/check.h Thu Feb 17 00:42:06 2000 +++ linux/fs/partitions/check.h Fri Aug 3 12:04:42 2001 @@ -1,5 +1,5 @@ /* - * add_partition adds a partitions details to the devices partition + * add_gd_partition adds a partitions details to the devices partition * description. */ void add_gd_partition(struct gendisk *hd, int minor, int start, int size); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/fs/udf/lowlevel.c linux/fs/udf/lowlevel.c --- /opt/kernel/linux-2.4.10-pre4/fs/udf/lowlevel.c Tue Jun 12 04:15:27 2001 +++ linux/fs/udf/lowlevel.c Fri Aug 3 12:04:42 2001 @@ -1,5 +1,5 @@ /* - * lowlevel.c + * fs/udf/lowlevel.c * * PURPOSE * Low Level Device Routines for the UDF filesystem diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/bitops.h linux/include/asm-alpha/bitops.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/bitops.h Tue Jul 10 06:26:24 2001 +++ linux/include/asm-alpha/bitops.h Wed Sep 5 12:42:08 2001 @@ -20,7 +20,7 @@ * bit 0 is the LSB of addr; bit 64 is the LSB of (addr+1). */ -extern __inline__ void +static inline void set_bit(unsigned long nr, volatile void * addr) { unsigned long temp; @@ -41,7 +41,7 @@ /* * WARNING: non atomic version. */ -extern __inline__ void +static inline void __set_bit(unsigned long nr, volatile void * addr) { int *m = ((int *) addr) + (nr >> 5); @@ -52,7 +52,7 @@ #define smp_mb__before_clear_bit() smp_mb() #define smp_mb__after_clear_bit() smp_mb() -extern __inline__ void +static inline void clear_bit(unsigned long nr, volatile void * addr) { unsigned long temp; @@ -81,7 +81,7 @@ *m ^= 1 << (nr & 31); } -extern __inline__ void +static inline void change_bit(unsigned long nr, volatile void * addr) { unsigned long temp; @@ -99,7 +99,7 @@ :"Ir" (1UL << (nr & 31)), "m" (*m)); } -extern __inline__ int +static inline int test_and_set_bit(unsigned long nr, volatile void *addr) { unsigned long oldbit; @@ -129,7 +129,7 @@ /* * WARNING: non atomic version. */ -extern __inline__ int +static inline int __test_and_set_bit(unsigned long nr, volatile void * addr) { unsigned long mask = 1 << (nr & 0x1f); @@ -140,7 +140,7 @@ return (old & mask) != 0; } -extern __inline__ int +static inline int test_and_clear_bit(unsigned long nr, volatile void * addr) { unsigned long oldbit; @@ -170,7 +170,7 @@ /* * WARNING: non atomic version. */ -extern __inline__ int +static inline int __test_and_clear_bit(unsigned long nr, volatile void * addr) { unsigned long mask = 1 << (nr & 0x1f); @@ -195,7 +195,7 @@ return (old & mask) != 0; } -extern __inline__ int +static inline int test_and_change_bit(unsigned long nr, volatile void * addr) { unsigned long oldbit; @@ -220,7 +220,7 @@ return oldbit != 0; } -extern __inline__ int +static inline int test_bit(int nr, volatile void * addr) { return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; @@ -233,7 +233,7 @@ * Do a binary search on the bits. Due to the nature of large * constants on the alpha, it is worthwhile to split the search. */ -extern inline unsigned long ffz_b(unsigned long x) +static inline unsigned long ffz_b(unsigned long x) { unsigned long sum = 0; @@ -245,7 +245,7 @@ return sum; } -extern inline unsigned long ffz(unsigned long word) +static inline unsigned long ffz(unsigned long word) { #if defined(__alpha_cix__) && defined(__alpha_fix__) /* Whee. EV67 can calculate it directly. */ @@ -272,12 +272,32 @@ * differs in spirit from the above ffz (man ffs). */ -extern inline int ffs(int word) +static inline int ffs(int word) { int result = ffz(~word); return word ? result+1 : 0; } +/* Compute powers of two for the given integer. */ +static inline int floor_log2(unsigned long word) +{ + long bit; +#if defined(__alpha_cix__) && defined(__alpha_fix__) + __asm__("ctlz %1,%0" : "=r"(bit) : "r"(word)); + return 63 - bit; +#else + for (bit = -1; word ; bit++) + word >>= 1; + return bit; +#endif +} + +static inline int ceil_log2(unsigned int word) +{ + long bit = floor_log2(word); + return bit + (word > (1UL << bit)); +} + /* * hweightN: returns the hamming weight (i.e. the number * of bits set) of a N-bit word @@ -285,7 +305,7 @@ #if defined(__alpha_cix__) && defined(__alpha_fix__) /* Whee. EV67 can calculate it directly. */ -extern __inline__ unsigned long hweight64(unsigned long w) +static inline unsigned long hweight64(unsigned long w) { unsigned long result; __asm__("ctpop %1,%0" : "=r"(result) : "r"(w)); @@ -306,7 +326,7 @@ /* * Find next zero bit in a bitmap reasonably efficiently.. */ -extern inline unsigned long +static inline unsigned long find_next_zero_bit(void * addr, unsigned long size, unsigned long offset) { unsigned long * p = ((unsigned long *) addr) + (offset >> 6); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/core_cia.h linux/include/asm-alpha/core_cia.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/core_cia.h Sun Mar 19 19:26:21 2000 +++ linux/include/asm-alpha/core_cia.h Wed Sep 5 12:42:08 2001 @@ -263,6 +263,9 @@ #define PYXIS_IIC_CTRL (IDENT_ADDR + 0x87A00002C0UL) #define PYXIS_RESET (IDENT_ADDR + 0x8780000900UL) +/* Offset between ram physical addresses and pci64 DAC bus addresses. */ +#define PYXIS_DAC_OFFSET (1UL << 40) + /* * Data structure for handling CIA machine checks. */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/core_mcpcia.h linux/include/asm-alpha/core_mcpcia.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/core_mcpcia.h Fri Mar 17 22:01:38 2000 +++ linux/include/asm-alpha/core_mcpcia.h Wed Sep 5 12:42:08 2001 @@ -181,6 +181,8 @@ #define MCPCIA_IO_BIAS MCPCIA_IO(4) #define MCPCIA_MEM_BIAS MCPCIA_DENSE(4) +/* Offset between ram physical addresses and pci64 DAC bus addresses. */ +#define MCPCIA_DAC_OFFSET (1UL << 40) /* * Data structure for handling MCPCIA machine checks: diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/core_titan.h linux/include/asm-alpha/core_titan.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/core_titan.h Tue Jun 20 02:59:33 2000 +++ linux/include/asm-alpha/core_titan.h Wed Sep 5 12:42:08 2001 @@ -308,14 +308,18 @@ * devices can use their familiar numbers and have them map to bus 0. */ -#define TITAN_IO_BIAS TITAN_IO(0) -#define TITAN_MEM_BIAS TITAN_MEM(0) +#define TITAN_IO_BIAS TITAN_IO(0) +#define TITAN_MEM_BIAS TITAN_MEM(0) /* The IO address space is larger than 0xffff */ #define TITAN_IO_SPACE (TITAN_CONF(0) - TITAN_IO(0)) /* TIG Space */ #define TITAN_TIG_SPACE (TITAN_BASE + 0x100000000UL) + +/* Offset between ram physical addresses and pci64 DAC bus addresses. */ +/* ??? Just a guess. Ought to confirm it hasn't been moved. */ +#define TITAN_DAC_OFFSET (1UL << 40) /* * Data structure for handling TITAN machine checks: diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/core_tsunami.h linux/include/asm-alpha/core_tsunami.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/core_tsunami.h Fri May 25 00:24:37 2001 +++ linux/include/asm-alpha/core_tsunami.h Wed Sep 5 12:42:08 2001 @@ -275,7 +275,7 @@ /* The IO address space is larger than 0xffff */ #define TSUNAMI_IO_SPACE (TSUNAMI_CONF(0) - TSUNAMI_IO(0)) -/* Offset between ram physical addresses and pci64 DAC bus addresses */ +/* Offset between ram physical addresses and pci64 DAC bus addresses. */ #define TSUNAMI_DAC_OFFSET (1UL << 40) /* diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/machvec.h linux/include/asm-alpha/machvec.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/machvec.h Fri Mar 2 20:12:07 2001 +++ linux/include/asm-alpha/machvec.h Wed Sep 5 12:42:08 2001 @@ -39,6 +39,7 @@ unsigned long iack_sc; unsigned long min_io_address; unsigned long min_mem_address; + unsigned long pci_dac_offset; void (*mv_pci_tbi)(struct pci_controller *hose, dma_addr_t start, dma_addr_t end); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/pci.h linux/include/asm-alpha/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/pci.h Mon May 21 22:38:41 2001 +++ linux/include/asm-alpha/pci.h Thu Sep 6 09:29:50 2001 @@ -15,6 +15,7 @@ struct pci_bus; struct resource; struct pci_iommu_arena; +struct page; /* A controller. Used to manage multiple PCI busses. */ @@ -60,12 +61,17 @@ /* IOMMU controls. */ +/* The PCI address space does not equal the physical memory address space. + The networking and block device layers use this boolean for bounce buffer + decisions. */ +#define PCI_DMA_BUS_IS_PHYS 0 + /* Allocate and map kernel buffer using consistant mode DMA for PCI device. Returns non-NULL cpu-view pointer to the buffer if successful and sets *DMA_ADDRP to the pci side dma address as well, else DMA_ADDRP is undefined. */ -extern void *pci_alloc_consistent(struct pci_dev *, long, dma_addr_t *); +extern void *pci_alloc_consistent(struct pci_dev *, size_t, dma_addr_t *); /* Free and unmap a consistant DMA buffer. CPU_ADDR and DMA_ADDR must be values that were returned from pci_alloc_consistant. SIZE must @@ -73,14 +79,18 @@ References to the memory and mappings assosciated with CPU_ADDR or DMA_ADDR past this call are illegal. */ -extern void pci_free_consistent(struct pci_dev *, long, void *, dma_addr_t); +extern void pci_free_consistent(struct pci_dev *, size_t, void *, dma_addr_t); /* Map a single buffer of the indicate size for PCI DMA in streaming mode. The 32-bit PCI bus mastering address to use is returned. Once the device is given the dma address, the device owns this memory until either pci_unmap_single or pci_dma_sync_single is performed. */ -extern dma_addr_t pci_map_single(struct pci_dev *, void *, long, int); +extern dma_addr_t pci_map_single(struct pci_dev *, void *, size_t, int); + +/* Likewise, but for a page instead of an address. */ +extern dma_addr_t pci_map_page(struct pci_dev *, struct page *, + unsigned long, size_t, int); /* Unmap a single streaming mode DMA translation. The DMA_ADDR and SIZE must match what was provided for in a previous pci_map_single @@ -88,7 +98,8 @@ the cpu to the buffer are guarenteed to see whatever the device wrote there. */ -extern void pci_unmap_single(struct pci_dev *, dma_addr_t, long, int); +extern void pci_unmap_single(struct pci_dev *, dma_addr_t, size_t, int); +extern void pci_unmap_page(struct pci_dev *, dma_addr_t, size_t, int); /* Map a set of buffers described by scatterlist in streaming mode for PCI DMA. This is the scather-gather version of the above @@ -121,7 +132,7 @@ point you give the PCI dma address back to the card, the device again owns the buffer. */ -extern inline void +static inline void pci_dma_sync_single(struct pci_dev *dev, dma_addr_t dma_addr, long size, int direction) { @@ -132,7 +143,7 @@ translations after a transfer. The same as pci_dma_sync_single but for a scatter-gather list, same rules and usage. */ -extern inline void +static inline void pci_dma_sync_sg(struct pci_dev *dev, struct scatterlist *sg, int nents, int direction) { @@ -144,7 +155,22 @@ only drive the low 24-bits during PCI bus mastering, then you would pass 0x00ffffff as the mask to this function. */ -extern int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask); +extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask); + +/* True if the machine supports DAC addressing, and DEV can + make use of it given MASK. */ +extern int pci_dac_dma_supported(struct pci_dev *hwdev, u64 mask); + +/* Convert to/from DAC dma address and struct page. */ +extern dma64_addr_t pci_dac_page_to_dma(struct pci_dev *, struct page *, unsigned long, int); +extern struct page *pci_dac_dma_to_page(struct pci_dev *, dma64_addr_t); +extern unsigned long pci_dac_dma_to_offset(struct pci_dev *, dma64_addr_t); + +static __inline__ void +pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) +{ + /* Nothing to do. */ +} /* Return the index of the PCI controller for device PDEV. */ extern int pci_controller_num(struct pci_dev *pdev); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/scatterlist.h linux/include/asm-alpha/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/scatterlist.h Tue Feb 8 05:09:05 2000 +++ linux/include/asm-alpha/scatterlist.h Wed Sep 5 12:36:31 2001 @@ -4,17 +4,23 @@ #include struct scatterlist { - char *address; /* Source/target vaddr. */ - char *alt_address; /* Location of actual if address is a - dma indirect buffer, else NULL. */ - dma_addr_t dma_address; + /* This will disappear in 2.5.x */ + char *address; + + /* These two are only valid if ADDRESS member of this + struct is NULL. */ + struct page *page; + unsigned int offset; + unsigned int length; + + dma_addr_t dma_address; unsigned int dma_length; }; -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->dma_length) - +#define sg_dma_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->dma_length) + #define ISA_DMA_THRESHOLD (~0UL) #endif /* !(_ALPHA_SCATTERLIST_H) */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/types.h linux/include/asm-alpha/types.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-alpha/types.h Tue Feb 8 05:09:05 2000 +++ linux/include/asm-alpha/types.h Wed Sep 5 12:42:08 2001 @@ -47,10 +47,8 @@ #define BITS_PER_LONG 64 -/* PCI dma addresses are 32-bits wide. Ignore PCI64 for now, since - we'll typically be sending it all through iommu tables anyway. */ - -typedef u32 dma_addr_t; +typedef u64 dma_addr_t; +typedef u64 dma64_addr_t; #endif /* __KERNEL__ */ #endif /* _ALPHA_TYPES_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-arm/pci.h linux/include/asm-arm/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-arm/pci.h Sun Aug 12 20:14:00 2001 +++ linux/include/asm-arm/pci.h Wed Sep 5 12:42:08 2001 @@ -152,7 +152,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -static inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { return 1; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-arm/scatterlist.h linux/include/asm-arm/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-arm/scatterlist.h Sun Sep 3 20:19:11 2000 +++ linux/include/asm-arm/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -5,7 +5,6 @@ struct scatterlist { char *address; /* virtual address */ - char *alt_address; /* indirect dma address, or NULL */ dma_addr_t dma_address; /* dma address */ unsigned int length; /* length */ }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-i386/highmem.h linux/include/asm-i386/highmem.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-i386/highmem.h Wed Aug 15 23:21:21 2001 +++ linux/include/asm-i386/highmem.h Wed Sep 5 13:06:36 2001 @@ -74,17 +74,15 @@ kunmap_high(page); } -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. - */ -static inline void *kmap_atomic(struct page *page, enum km_type type) +static inline void *__kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; + /* + * could be moved outside __cli context, but then caller would + * have to check... + */ if (page < highmem_start_page) return page_address(page); @@ -100,7 +98,26 @@ return (void*) vaddr; } -static inline void kunmap_atomic(void *kvaddr, enum km_type type) +/* + * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap + * gives a more generic (and caching) interface. But kmap_atomic can + * be used in IRQ contexts, so in some (very limited) cases we need + * it. + */ +static inline void *kmap_atomic(struct page *page, enum km_type type) +{ + //unsigned long flags; + void *vaddr; + + //__save_flags(flags); + //__cli(); + vaddr = __kmap_atomic(page, type); + //__restore_flags(flags); + + return vaddr; +} + +static inline void __kunmap_atomic(void *kvaddr, enum km_type type) { #if HIGHMEM_DEBUG unsigned long vaddr = (unsigned long) kvaddr; @@ -118,6 +135,18 @@ */ pte_clear(kmap_pte-idx); __flush_tlb_one(vaddr); +#endif +} + +static inline void kunmap_atomic(void *kvaddr, enum km_type type) +{ +#if HIGHMEM_DEBUG + //unsigned long flags; + + //__save_flags(flags); + //__cli(); + __kunmap_atomic(kvaddr, type); + //__restore_flags(flags); #endif } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-i386/kmap_types.h linux/include/asm-i386/kmap_types.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-i386/kmap_types.h Wed Sep 5 12:41:18 2001 +++ linux/include/asm-i386/kmap_types.h Wed Sep 5 11:13:02 2001 @@ -3,9 +3,9 @@ enum km_type { KM_BOUNCE_READ, - KM_BOUNCE_WRITE, KM_SKB_DATA, KM_SKB_DATA_SOFTIRQ, + KM_BIO_IRQ, KM_USER0, KM_USER1, KM_TYPE_NR diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-i386/pci.h linux/include/asm-i386/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-i386/pci.h Wed Aug 15 23:21:44 2001 +++ linux/include/asm-i386/pci.h Fri Sep 7 08:59:34 2001 @@ -34,6 +34,12 @@ struct pci_dev; +/* The PCI address space does equal the physical memory + * address space. The networking and block device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (1) + /* Allocate and map kernel buffer using consistent mode DMA for a device. * hwdev should be valid struct pci_dev pointer for PCI devices, * NULL for PCI-like buses (ISA, EISA). @@ -84,6 +90,27 @@ /* Nothing to do */ } +/* + * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical + * to pci_map_single, but takes a struct page instead of a virtual address + */ +static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + + return (page - mem_map) * PAGE_SIZE + offset; +} + +static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scather-gather version of the * above pci_map_single interface. Here the scatter gather list @@ -102,8 +129,26 @@ static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) { + int i; + if (direction == PCI_DMA_NONE) BUG(); + + /* + * temporary 2.4 hack + */ + for (i = 0; i < nents; i++ ) { + if (sg[i].address && sg[i].page) + BUG(); + else if (!sg[i].address && !sg[i].page) + BUG(); + + if (sg[i].address) + sg[i].dma_address = virt_to_bus(sg[i].address); + else + sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; + } + return nents; } @@ -157,7 +202,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -static inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { /* * we fall back to GFP_DMA when the mask isn't all 1s, @@ -170,13 +215,42 @@ return 1; } +/* This is always fine. */ +#define pci_dac_dma_supported(pci_dev, mask) (1) + +static __inline__ dma64_addr_t +pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction) +{ + return ((dma64_addr_t) page_to_bus(page) + + (dma64_addr_t) offset); +} + +static __inline__ struct page * +pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + unsigned long poff = (dma_addr >> PAGE_SHIFT); + + return mem_map + poff; +} + +static __inline__ unsigned long +pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + return (dma_addr & ~PAGE_MASK); +} + +static __inline__ void +pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) +{ + /* Nothing to do. */ +} + /* These macros should be used after a pci_map_sg call has been done * to get bus addresses of each of the SG entries and their lengths. * You should only work with the number of sg entries pci_map_sg - * returns, or alternatively stop on the first sg_dma_len(sg) which - * is 0. + * returns. */ -#define sg_dma_address(sg) (virt_to_bus((sg)->address)) +#define sg_dma_address(sg) ((sg)->dma_address) #define sg_dma_len(sg) ((sg)->length) /* Return the index of the PCI controller for device. */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-i386/processor.h linux/include/asm-i386/processor.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-i386/processor.h Wed Aug 15 23:21:11 2001 +++ linux/include/asm-i386/processor.h Wed Sep 5 13:06:35 2001 @@ -477,4 +477,32 @@ __asm__ __volatile__("rep;nop"); } +/* Prefetch instructions for Pentium III and AMD Athlon */ +#ifdef CONFIG_MPENTIUMIII + +#define ARCH_HAS_PREFETCH +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); +} + +#elif CONFIG_X86_USE_3DNOW + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} +#define spin_lock_prefetch(x) prefetchw(x) + +#endif + #endif /* __ASM_I386_PROCESSOR_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-i386/scatterlist.h linux/include/asm-i386/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-i386/scatterlist.h Mon Dec 30 12:01:10 1996 +++ linux/include/asm-i386/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -2,9 +2,12 @@ #define _I386_SCATTERLIST_H struct scatterlist { - char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ + char * address; /* Location data is to be transferred to, NULL for + * highmem page */ + struct page * page; /* Location for highmem page, if any */ + unsigned int offset;/* for highmem, page offset */ + + dma_addr_t dma_address; unsigned int length; }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-i386/types.h linux/include/asm-i386/types.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-i386/types.h Thu Jan 27 17:58:15 2000 +++ linux/include/asm-i386/types.h Wed Sep 5 12:42:08 2001 @@ -27,6 +27,8 @@ */ #ifdef __KERNEL__ +#include + typedef signed char s8; typedef unsigned char u8; @@ -41,9 +43,14 @@ #define BITS_PER_LONG 32 -/* Dma addresses are 32-bits wide. */ +/* DMA addresses come in generic and 64-bit flavours. */ +#ifdef CONFIG_HIGHMEM +typedef u64 dma_addr_t; +#else typedef u32 dma_addr_t; +#endif +typedef u64 dma64_addr_t; #endif /* __KERNEL__ */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-i386/uaccess.h linux/include/asm-i386/uaccess.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-i386/uaccess.h Wed Aug 15 23:21:12 2001 +++ linux/include/asm-i386/uaccess.h Fri Sep 7 08:58:40 2001 @@ -6,6 +6,7 @@ */ #include #include +#include #include #define VERIFY_READ 0 @@ -526,6 +527,7 @@ static inline unsigned long __constant_copy_to_user(void *to, const void *from, unsigned long n) { + prefetch(from); if (access_ok(VERIFY_WRITE, to, n)) __constant_copy_user(to,from,n); return n; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-ia64/pci.h linux/include/asm-ia64/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-ia64/pci.h Wed May 16 19:31:27 2001 +++ linux/include/asm-ia64/pci.h Wed Sep 5 12:42:08 2001 @@ -52,7 +52,7 @@ * you would pass 0x00ffffff as the mask to this function. */ static inline int -pci_dma_supported (struct pci_dev *hwdev, dma_addr_t mask) +pci_dma_supported (struct pci_dev *hwdev, u64 mask) { return 1; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-ia64/scatterlist.h linux/include/asm-ia64/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-ia64/scatterlist.h Sat Aug 12 04:09:06 2000 +++ linux/include/asm-ia64/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -8,11 +8,6 @@ struct scatterlist { char *address; /* location data is to be transferred to */ - /* - * Location of actual buffer if ADDRESS points to a DMA - * indirection buffer, NULL otherwise: - */ - char *alt_address; char *orig_address; /* Save away the original buffer address (used by pci-dma.c) */ unsigned int length; /* buffer length */ }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-m68k/machdep.h linux/include/asm-m68k/machdep.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-m68k/machdep.h Tue Nov 28 02:57:34 2000 +++ linux/include/asm-m68k/machdep.h Fri Aug 3 12:04:42 2001 @@ -5,7 +5,6 @@ struct kbd_repeat; struct mktime; struct hwclk_time; -struct gendisk; struct buffer_head; extern void (*mach_sched_init) (void (*handler)(int, void *, struct pt_regs *)); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-m68k/scatterlist.h linux/include/asm-m68k/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-m68k/scatterlist.h Tue May 11 18:57:14 1999 +++ linux/include/asm-m68k/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; unsigned long dvma_address; }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-mips/pci.h linux/include/asm-mips/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-mips/pci.h Mon Jul 2 22:56:40 2001 +++ linux/include/asm-mips/pci.h Wed Sep 5 12:42:08 2001 @@ -206,7 +206,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { /* * we fall back to GFP_DMA when the mask isn't all 1s, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-mips/scatterlist.h linux/include/asm-mips/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-mips/scatterlist.h Tue Dec 16 21:46:12 1997 +++ linux/include/asm-mips/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; __u32 dvma_address; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-mips64/pci.h linux/include/asm-mips64/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-mips64/pci.h Wed Jul 4 20:50:39 2001 +++ linux/include/asm-mips64/pci.h Wed Sep 5 12:42:08 2001 @@ -195,7 +195,7 @@ #endif } -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { /* * we fall back to GFP_DMA when the mask isn't all 1s, diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-mips64/scatterlist.h linux/include/asm-mips64/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-mips64/scatterlist.h Fri Feb 25 07:53:35 2000 +++ linux/include/asm-mips64/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; __u32 dvma_address; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-parisc/pci.h linux/include/asm-parisc/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-parisc/pci.h Wed May 16 19:31:27 2001 +++ linux/include/asm-parisc/pci.h Wed Sep 5 12:42:08 2001 @@ -113,7 +113,7 @@ ** See Documentation/DMA-mapping.txt */ struct pci_dma_ops { - int (*dma_supported)(struct pci_dev *dev, dma_addr_t mask); + int (*dma_supported)(struct pci_dev *dev, u64 mask); void *(*alloc_consistent)(struct pci_dev *dev, size_t size, dma_addr_t *iova); void (*free_consistent)(struct pci_dev *dev, size_t size, void *vaddr, dma_addr_t iova); dma_addr_t (*map_single)(struct pci_dev *dev, void *addr, size_t size, int direction); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-parisc/scatterlist.h linux/include/asm-parisc/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-parisc/scatterlist.h Tue Dec 5 21:29:39 2000 +++ linux/include/asm-parisc/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; /* an IOVA can be 64-bits on some PA-Risc platforms. */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-ppc/pci.h linux/include/asm-ppc/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-ppc/pci.h Tue May 22 00:02:06 2001 +++ linux/include/asm-ppc/pci.h Wed Sep 5 12:42:08 2001 @@ -108,7 +108,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { return 1; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-ppc/scatterlist.h linux/include/asm-ppc/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-ppc/scatterlist.h Tue May 22 00:02:06 2001 +++ linux/include/asm-ppc/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -9,8 +9,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-s390/scatterlist.h linux/include/asm-s390/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-s390/scatterlist.h Tue Feb 13 23:13:44 2001 +++ linux/include/asm-s390/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-s390x/cache.h linux/include/asm-s390x/cache.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-s390x/cache.h Sun Aug 5 22:12:41 2001 +++ linux/include/asm-s390x/cache.h Fri Aug 3 12:02:58 2001 @@ -12,6 +12,6 @@ #define __ARCH_S390_CACHE_H #define L1_CACHE_BYTES 256 -#define L1_CACHE_SHIFT 8 +#define L1_CACHE_SHIFT 16 #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-s390x/scatterlist.h linux/include/asm-s390x/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-s390x/scatterlist.h Tue Feb 13 23:13:44 2001 +++ linux/include/asm-s390x/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-sh/pci.h linux/include/asm-sh/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-sh/pci.h Wed Jun 27 22:55:29 2001 +++ linux/include/asm-sh/pci.h Wed Sep 5 12:42:08 2001 @@ -167,7 +167,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { return 1; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-sh/scatterlist.h linux/include/asm-sh/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-sh/scatterlist.h Sun Mar 5 18:33:55 2000 +++ linux/include/asm-sh/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-sparc/pci.h linux/include/asm-sparc/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-sparc/pci.h Wed May 16 19:31:27 2001 +++ linux/include/asm-sparc/pci.h Wed Sep 5 12:42:08 2001 @@ -108,7 +108,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { return 1; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-sparc/scatterlist.h linux/include/asm-sparc/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-sparc/scatterlist.h Tue Feb 1 08:37:19 2000 +++ linux/include/asm-sparc/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -6,8 +6,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; __u32 dvma_address; /* A place to hang host-specific addresses at. */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-sparc64/pci.h linux/include/asm-sparc64/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-sparc64/pci.h Wed May 16 19:31:27 2001 +++ linux/include/asm-sparc64/pci.h Wed Sep 5 12:42:08 2001 @@ -28,6 +28,12 @@ /* Dynamic DMA mapping stuff. */ +/* The PCI address space does not equal the physical memory + * address space. The networking and block device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (0) + #include struct pci_dev; @@ -64,6 +70,11 @@ */ extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction); +/* No highmem on sparc64, plus we have an IOMMU, so mapping pages is easy. */ +#define pci_map_page(dev, page, off, size, dir) \ + pci_map_single(dev, (page_address(page) + (off)), size, dir) +#define pci_unmap_page(dev,addr,sz,dir) pci_unmap_single(dev,addr,sz,dir) + /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scather-gather version of the * above pci_map_single interface. Here the scatter gather list @@ -79,13 +90,15 @@ * Device ownership issues as mentioned above for pci_map_single are * the same here. */ -extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction); +extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction); /* Unmap a set of streaming mode DMA translations. * Again, cpu read rules concerning calls here are the same as for * pci_unmap_single() above. */ -extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nhwents, int direction); +extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nhwents, int direction); /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. @@ -96,7 +109,8 @@ * next point you give the PCI dma address back to the card, the * device again owns the buffer. */ -extern void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction); +extern void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, + size_t size, int direction); /* Make physical memory consistent for a set of streaming * mode DMA translations after a transfer. @@ -111,7 +125,51 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask); +extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask); + +/* PCI IOMMU mapping bypass support. */ + +/* PCI 64-bit addressing works for all slots on all controller + * types on sparc64. However, it requires that the device + * can drive enough of the 64 bits. + */ +#define PCI64_REQUIRED_MASK (~(dma64_addr_t)0) +#define PCI64_ADDR_BASE 0xfffc000000000000 + +/* Usage of the pci_dac_foo interfaces is only valid if this + * test passes. + */ +#define pci_dac_dma_supported(pci_dev, mask) \ + ((((mask) & PCI64_REQUIRED_MASK) == PCI64_REQUIRED_MASK) ? 1 : 0) + +static __inline__ dma64_addr_t +pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction) +{ + return (PCI64_ADDR_BASE + + __pa(page_address(page)) + offset); +} + +static __inline__ struct page * +pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + unsigned long paddr = (dma_addr & PAGE_MASK) - PCI64_ADDR_BASE; + + return virt_to_page(__va(paddr)); +} + +static __inline__ unsigned long +pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + return (dma_addr & ~PAGE_MASK); +} + +static __inline__ void +pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) +{ + /* DAC cycle addressing does not make use of the + * PCI controller's streaming cache, so nothing to do. + */ +} /* Return the index of the PCI controller for device PDEV. */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-sparc64/scatterlist.h linux/include/asm-sparc64/scatterlist.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-sparc64/scatterlist.h Tue Dec 21 07:05:52 1999 +++ linux/include/asm-sparc64/scatterlist.h Wed Sep 5 12:42:08 2001 @@ -5,17 +5,23 @@ #include struct scatterlist { - char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ - unsigned int length; + /* This will disappear in 2.5.x */ + char *address; - __u32 dvma_address; /* A place to hang host-specific addresses at. */ - __u32 dvma_length; + /* These two are only valid if ADDRESS member of this + * struct is NULL. + */ + struct page *page; + unsigned int offset; + + unsigned int length; + + dma_addr_t dma_address; + __u32 dma_length; }; -#define sg_dma_address(sg) ((sg)->dvma_address) -#define sg_dma_len(sg) ((sg)->dvma_length) +#define sg_dma_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->dma_length) #define ISA_DMA_THRESHOLD (~0UL) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/asm-sparc64/types.h linux/include/asm-sparc64/types.h --- /opt/kernel/linux-2.4.10-pre4/include/asm-sparc64/types.h Tue Feb 1 08:37:19 2000 +++ linux/include/asm-sparc64/types.h Wed Sep 5 12:42:08 2001 @@ -45,9 +45,10 @@ #define BITS_PER_LONG 64 -/* Dma addresses are 32-bits wide for now. */ +/* Dma addresses come in generic and 64-bit flavours. */ typedef u32 dma_addr_t; +typedef u64 dma64_addr_t; #endif /* __KERNEL__ */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/bio.h linux/include/linux/bio.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/bio.h Thu Jan 1 01:00:00 1970 +++ linux/include/linux/bio.h Fri Sep 7 08:38:57 2001 @@ -0,0 +1,212 @@ +/* + * New 2.5 block I/O model + * + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or mo + * it under the terms of the GNU General Public License as publishe + * the Free Software Foundation; either version 2 of the License, o + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BIO_H +#define __LINUX_BIO_H + +/* + * transition to 64-bit sector_t, possibly making it an option... + */ +#undef BLK_64BIT_SECTOR + +#ifdef BLK_64BIT_SECTOR +typedef u64 sector_t; +#else +typedef unsigned long sector_t; +#endif + +struct bio_vec { + struct page *bv_page; + unsigned short bv_len; + unsigned short bv_offset; +}; + +typedef struct bio_hash_s { + struct bio_hash_s *next_hash; + struct bio_hash_s **pprev_hash; + unsigned int valid_counter; +} bio_hash_t; + +#define BLKHASHPROF _IOR(0x12,108,sizeof(struct bio_hash_stats)) +#define BLKHASHCLEAR _IO(0x12,109) + +#define MAX_PROFILE_BUCKETS 64 + +struct bio_hash_stats { + unsigned long nr_lookups; + unsigned long nr_hits; + unsigned long nr_inserts; + unsigned long max_bucket_size; + unsigned long bucket_size[MAX_PROFILE_BUCKETS + 1]; + + unsigned long q_nr_back_lookups; + unsigned long q_nr_back_hits; + unsigned long q_nr_back_merges; + unsigned long q_nr_front_lookups; + unsigned long q_nr_front_hits; + unsigned long q_nr_front_merges; +}; + +#define BIO_HASH_PROFILING + +#ifdef BIO_HASH_PROFILING +#define BIO_HASH_PROF_INC(hash, mbr) ((hash)->st.##mbr)++ +#define BIO_HASH_PROF_DEC(hash, mbr) ((hash)->st.##mbr)-- +#else +#define BIO_HASH_PROF_INC(hash, mbr) +#define BIO_HASH_PROC_DEC(hash, mbr) +#endif + +/* + * hash table must be a power of two + */ +typedef struct bio_hash_table_s { + bio_hash_t **table; + unsigned long mask; + unsigned int valid_counter; +#ifdef BIO_HASH_PROFILING + struct bio_hash_stats st; +#endif +} bio_hash_table_t; + +/* + * shamelessly stolen from the list.h implementation + */ +#define hash_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/* + * main unit of I/O for the block layer and lower layers (ie drivers) + */ +struct bio { + kdev_t bi_dev; + sector_t bi_sector; + bio_hash_t bi_hash; + struct bio *bi_next; /* request queue link */ + struct bio_vec bi_io_vec; + unsigned long bi_flags; /* status, command, etc */ + atomic_t bi_cnt; /* free when it hits zero */ + void (*bi_end_io)(struct bio *bio); + void *bi_private; + struct request *bi_req; /* linked to this request */ +}; + +#define BIO_SECTOR_BITS 9 +#define BIO_OFFSET_MASK ((1UL << (PAGE_CACHE_SHIFT - BIO_SECTOR_BITS)) - 1) +#define BIO_PAGE_MASK (PAGE_CACHE_SIZE - 1) + +/* + * bio flags + */ +#define BIO_UPTODATE 1 /* ok after I/O completion */ +#define BIO_READ 2 /* read request */ +#define BIO_WRITE 4 /* write request */ +#define BIO_RW_AHEAD 8 /* read/write ahead */ +#define BIO_BARRIER 16 /* barrier I/O */ +#define BIO_RW_BLOCK 32 /* RW_AHEAD set, and read/write would block */ +#define BIO_EOF 64 /* out-out-bounds error */ +#define BIO_POOL 128 /* from bio pool, not slab cache */ + +/* + * if you change any of the above, make sure this is still correct!! + */ +#define BIO_RW_MASK (BIO_READ + BIO_WRITE + BIO_RW_AHEAD + BIO_BARRIER) + +/* + * various member access, note that bio_data should of course not be used + * on highmem page vectors + */ +#define bio_iovec(bio) (&(bio)->bi_io_vec) +#define bio_page(bio) bio_iovec((bio))->bv_page +#define bio_size(bio) bio_iovec((bio))->bv_len +#define bio_offset(bio) bio_iovec((bio))->bv_offset +#define bio_sectors(bio) (bio_size((bio)) >> BIO_SECTOR_BITS) +#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) +#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + bio_offset((bio))) +#define bio_barrier(bio) ((bio)->bi_flags & BIO_BARRIER) + +/* + * queues that have highmem support enabled may still need to revert to + * PIO transfers occasionally and thus map high pages temporarily. For + * permanent PIO fall back, user is probably better off disabling highmem + * I/O completely on that queue (see ide-dma for example) + */ +#define bio_kmap(bio) (kmap(bio_page((bio))) + bio_offset((bio))) +#define bio_kunmap(bio) kunmap(bio_page((bio))) + +#define BIO_CONTIG(bio, nxt) (bio_to_phys((bio)) + bio_size((bio)) == bio_to_phys((nxt))) +#define BIO_PHYS_4G(b1, b2) ((bio_to_phys((b1)) | 0xffffffff) == ((bio_to_phys((b2)) + bio_size((b2)) - 1) | 0xffffffff)) + +typedef void (bi_end_io_t) (struct bio *); + +#define bio_endio(bio, ok) do { \ + if (ok) \ + (bio)->bi_flags |= BIO_UPTODATE;\ + else \ + (bio)->bi_flags &= ~BIO_UPTODATE; \ + (bio)->bi_end_io((bio)); \ + } while (0) + +#define bio_io_error(bio) bio_endio((bio), 0) + +/* + * get a reference to a bio, so it won't disappear. the intended use is + * something like: + * + * bio_get(bio); + * submit_bio(rw, bio); + * if (bio->bi_flags ...) + * do_something + * __bio_put(bio); + * + * without the bio_get(), it could potentially complete I/O before submit_bio + * returns. and then bio would be freed memory when if (bio->bi_flags ...) + * runs + */ +#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) + +extern struct bio *bio_alloc(int); +extern void __bio_put(struct bio *); + +/* + * the hash stuff is pretty closely tied to the request queue (needed for + * locking etc anyway, and it's in no way an attempt at a generic hash) + */ +struct request_queue; + +extern int bio_hash_init(bio_hash_table_t *, int); +extern void bio_hash_cleanup(bio_hash_table_t *); +extern inline void bio_hash_remove(struct request_queue *, struct bio *); +extern inline void bio_hash_add(struct request_queue *, struct bio *); +extern inline struct bio *bio_hash_find(struct request_queue *, sector_t); +extern inline struct bio *__bio_hash_find(bio_hash_table_t *, sector_t); +extern inline int bio_hash_add_unique(struct request_queue *, struct bio *); +extern inline int __bio_hash_add_unique(bio_hash_table_t *, struct bio *); +extern inline void __bio_hash_remove(bio_hash_t *entry); + +/* + * increment valid_counter, and make sure that wraps go to 1 and not 0 + */ +#define __bio_hash_inval(htable) do { \ + if (!++(htable)->valid_counter) \ + (htable)->valid_counter = 1; \ + } while (0) + +#endif /* __LINUX_BIO_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/blk.h linux/include/linux/blk.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/blk.h Wed Aug 15 23:21:32 2001 +++ linux/include/linux/blk.h Fri Sep 7 08:59:36 2001 @@ -7,13 +7,6 @@ #include /* - * Spinlock for protecting the request queue which - * is mucked around with in interrupts on potentially - * multiple CPU's.. - */ -extern spinlock_t io_request_lock; - -/* * Initialization functions. */ extern int isp16_init(void); @@ -85,13 +78,25 @@ * code duplication in drivers. */ -static inline void blkdev_dequeue_request(struct request * req) +static inline void blkdev_dequeue_request(struct request *req) { - list_del(&req->queue); + request_queue_t *q = req->q; + + if (q && !spin_is_locked(&q->queue_lock)) + BUG(); + + /* + * make sure noone gets a pointer to this req by doing a hash lookup + */ + if (req->bio) + req->bio->bi_req = req->biotail->bi_req = NULL; + + list_del(&req->queuelist); } -int end_that_request_first(struct request *req, int uptodate, char *name); -void end_that_request_last(struct request *req); +int end_that_request_first(request_queue_t *, struct request *, int uptodate); +extern inline int __end_that_request_first(struct request *, int uptodate); +void end_that_request_last(struct request *); #if defined(MAJOR_NR) || defined(IDE_DRIVER) @@ -336,12 +341,16 @@ #if !defined(IDE_DRIVER) #ifndef CURRENT -#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define CURRENT elv_next_request(&blk_dev[MAJOR_NR].request_queue) +#endif +#ifndef QUEUE +#define QUEUE (&blk_dev[MAJOR_NR].request_queue) #endif #ifndef QUEUE_EMPTY -#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define QUEUE_EMPTY blk_queue_empty(QUEUE) #endif + #ifndef DEVICE_NAME #define DEVICE_NAME "unknown" #endif @@ -365,16 +374,14 @@ #endif #define INIT_REQUEST \ - if (QUEUE_EMPTY) {\ + if (QUEUE_EMPTY) { \ CLEAR_INTR; \ - return; \ + return; \ } \ if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ panic(DEVICE_NAME ": request list destroyed"); \ - if (CURRENT->bh) { \ - if (!buffer_locked(CURRENT->bh)) \ - panic(DEVICE_NAME ": block not locked"); \ - } + if (!CURRENT->bio) \ + panic(DEVICE_NAME ": no bio"); \ #endif /* !defined(IDE_DRIVER) */ @@ -383,18 +390,31 @@ #if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) -static inline void end_request(int uptodate) { - struct request *req = CURRENT; +static inline void __end_request(struct request *req, int uptodate) +{ + request_queue_t *q = req->q; - if (end_that_request_first(req, uptodate, DEVICE_NAME)) - return; + if (q && !spin_is_locked(&q->queue_lock)) + BUG(); + if (!__end_that_request_first(req, uptodate)) { #ifndef DEVICE_NO_RANDOM - add_blkdev_randomness(MAJOR(req->rq_dev)); + add_blkdev_randomness(MAJOR(req->rq_dev)); #endif - DEVICE_OFF(req->rq_dev); - blkdev_dequeue_request(req); - end_that_request_last(req); + DEVICE_OFF(req->rq_dev); + blkdev_dequeue_request(req); + end_that_request_last(req); + } +} + +static inline void end_request(int uptodate) +{ + request_queue_t *q = CURRENT->q; + unsigned long flags; + + spin_lock_irqsave(&q->queue_lock, flags); + __end_request(CURRENT, uptodate); + spin_unlock_irqrestore(&q->queue_lock, flags); } #endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/blkdev.h linux/include/linux/blkdev.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/blkdev.h Wed Aug 15 23:21:30 2001 +++ linux/include/linux/blkdev.h Fri Sep 7 08:58:41 2001 @@ -12,55 +12,46 @@ struct elevator_s; typedef struct elevator_s elevator_t; -/* - * Ok, this is an expanded form so that we can use the same - * request for paging requests. - */ struct request { - struct list_head queue; + struct list_head queuelist; /* looking for ->queue? you must _not_ + * access it directly, use + * blkdev_dequeue_request! */ int elevator_sequence; - struct list_head table; - - volatile int rq_status; /* should split this into a few status bits */ -#define RQ_INACTIVE (-1) -#define RQ_ACTIVE 1 -#define RQ_SCSI_BUSY 0xffff -#define RQ_SCSI_DONE 0xfffe -#define RQ_SCSI_DISCONNECTING 0xffe0 + int rq_status; /* should split this into a few status bits */ kdev_t rq_dev; int cmd; /* READ or WRITE */ int errors; - unsigned long sector; + sector_t sector; unsigned long nr_sectors; unsigned long hard_sector, hard_nr_sectors; - unsigned int nr_segments; - unsigned int nr_hw_segments; - unsigned long current_nr_sectors; - void * special; - char * buffer; - struct completion * waiting; - struct buffer_head * bh; - struct buffer_head * bhtail; + unsigned short nr_segments; + unsigned short nr_hw_segments; + unsigned short current_nr_sectors; + unsigned short hard_cur_sectors; + void *special; + char *buffer; + struct completion *waiting; + struct bio *bio, *biotail; request_queue_t *q; }; #include -typedef int (merge_request_fn) (request_queue_t *q, - struct request *req, - struct buffer_head *bh, - int); -typedef int (merge_requests_fn) (request_queue_t *q, - struct request *req, - struct request *req2, - int); +typedef int (merge_request_fn) (request_queue_t *, struct request *, + struct bio *); +typedef int (merge_requests_fn) (request_queue_t *, struct request *, + struct request *); typedef void (request_fn_proc) (request_queue_t *q); typedef request_queue_t * (queue_proc) (kdev_t dev); -typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh); -typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); +typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); typedef void (unplug_device_fn) (void *q); +enum blk_queue_state { + Queue_down, + Queue_up, +}; + /* * Default nr free requests per queue, ll_rw_blk will scale it down * according to available RAM at init time @@ -87,7 +78,7 @@ merge_request_fn * front_merge_fn; merge_requests_fn * merge_requests_fn; make_request_fn * make_request_fn; - plug_device_fn * plug_device_fn; + /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. @@ -95,33 +86,112 @@ void * queuedata; /* - * This is used to remove the plug when tq_disk runs. + * queue needs bounce pages for pages above this limit (phys addr) */ - struct tq_struct plug_tq; + struct page *bounce_limit; + int bounce_gfp; /* - * Boolean that indicates whether this queue is plugged or not. + * This is used to remove the plug when tq_disk runs. */ - char plugged; + struct tq_struct plug_tq; /* - * Boolean that indicates whether current_request is active or - * not. + * various queue flags, see QUEUE_* below */ - char head_active; + unsigned long queue_flags; /* - * Is meant to protect the queue in the future instead of - * io_request_lock + * protects queue structures from reentrancy */ spinlock_t queue_lock; /* * Tasks wait here for free request */ - wait_queue_head_t wait_for_request; + wait_queue_head_t wait_for_request[2]; + + /* + * queue settings + */ + unsigned short max_sectors; + unsigned short max_segments; + unsigned short hardsect_size; + + /* + * queue state + */ + enum blk_queue_state queue_state; + wait_queue_head_t queue_wait; + + /* + * bio hash table + */ + bio_hash_table_t queue_hash; }; +#define RQ_INACTIVE (-1) +#define RQ_ACTIVE 1 +#define RQ_SCSI_BUSY 0xffff +#define RQ_SCSI_DONE 0xfffe +#define RQ_SCSI_DISCONNECTING 0xffe0 + +#define QUEUE_FLAG_PLUGGED 0 /* queue is plugged */ +#define QUEUE_FLAG_HEADACTIVE 1 /* has active head (going away) */ + +#define blk_queue_flag(q, flag) test_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) +#define blk_set_flag(q, flag) set_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) +#define blk_clear_flag(q, flag) clear_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) + +#define blk_queue_plugged(q) blk_queue_flag(q, PLUGGED) +#define blk_queue_headlive(q) blk_queue_flag(q, HEADACTIVE) + +#define blk_mark_plugged(q) blk_set_flag(q, PLUGGED) +#define blk_mark_headactive(q) blk_set_flag(q, HEADACTIVE) + +#define blk_set_unplugged(q) test_and_clear_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) +#define blk_set_plugged(q) test_and_set_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) + +#define blk_queue_empty(q) list_empty(&(q)->queue_head) + +#ifdef CONFIG_HIGHMEM +extern struct bio *create_bounce(struct bio *, int); +extern inline struct bio *blk_queue_bounce(request_queue_t *q, struct bio *bio) +{ + if (bio_page(bio) <= q->bounce_limit) + return bio; + + return create_bounce(bio, q->bounce_gfp); +} +#else +#define blk_queue_bounce(q, bio) (bio) +#endif + +extern unsigned long blk_max_low_pfn, blk_max_pfn; + +static inline struct request *elv_next_request(request_queue_t *q) +{ + return q->elevator.elevator_next_req_fn(q); +} + +static inline void elv_add_request_fn(request_queue_t *q, struct request *rq, + struct list_head *insert_here) +{ + /* + * insert into queue pending list, merge hash, and possible latency + * list + */ + list_add(&rq->queuelist, insert_here); +} + +static inline struct request *elv_next_request_fn(request_queue_t *q) +{ + return list_entry(q->queue_head.next, struct request, queuelist); +} + +#define BLK_BOUNCE_HIGH (blk_max_low_pfn * PAGE_SIZE) +#define BLK_BOUNCE_ANY (blk_max_pfn * PAGE_SIZE) + struct blk_dev_struct { /* * queue_proc has to be atomic @@ -146,59 +216,66 @@ extern struct sec_size * blk_sec[MAX_BLKDEV]; extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; -extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); +extern void grok_partitions(kdev_t dev, long size); +extern int wipe_partitions(kdev_t dev); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); -extern void generic_make_request(int rw, struct buffer_head * bh); -extern request_queue_t *blk_get_queue(kdev_t dev); -extern inline request_queue_t *__blk_get_queue(kdev_t dev); +extern void generic_make_request(struct bio *bio); +extern inline request_queue_t *blk_get_queue(kdev_t dev); extern void blkdev_release_request(struct request *); +extern void blk_wake_queue(request_queue_t *); +extern void blk_attempt_remerge(request_queue_t *, struct request *); /* * Access functions for manipulating queue properties */ -extern void blk_init_queue(request_queue_t *, request_fn_proc *); +extern int blk_init_queue(request_queue_t *, request_fn_proc *, char *); extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_headactive(request_queue_t *, int); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); +extern void blk_queue_bounce_limit(request_queue_t *, unsigned long long); +extern void blk_queue_max_sectors(request_queue_t *q, unsigned short); +extern void blk_queue_max_segments(request_queue_t *q, unsigned short); +extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short); extern void generic_unplug_device(void *); extern int * blk_size[MAX_BLKDEV]; extern int * blksize_size[MAX_BLKDEV]; -extern int * hardsect_size[MAX_BLKDEV]; - extern int * max_readahead[MAX_BLKDEV]; -extern int * max_sectors[MAX_BLKDEV]; - -extern int * max_segments[MAX_BLKDEV]; - #define MAX_SEGMENTS 128 #define MAX_SECTORS 255 -#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) - /* read-ahead in pages.. */ #define MAX_READAHEAD 31 #define MIN_READAHEAD 3 -#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue) +#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) #define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next) #define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev) -#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next) -#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev) +#define blkdev_next_request(req) blkdev_entry_to_request((req)->queuelist.next) +#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queuelist.prev) extern void drive_stat_acct (kdev_t dev, int rw, unsigned long nr_sectors, int new_io); +static inline void blk_clear(int major) +{ + blk_size[major] = NULL; +#if 0 + blk_size_in_bytes[major] = NULL; +#endif + blksize_size[major] = NULL; + max_readahead[major] = NULL; + read_ahead[major] = 0; +} + static inline int get_hardsect_size(kdev_t dev) { - extern int *hardsect_size[]; - if (hardsect_size[MAJOR(dev)] != NULL) - return hardsect_size[MAJOR(dev)][MINOR(dev)]; - else - return 512; + request_queue_t *q = blk_get_queue(dev); + + return q ? q->hardsect_size : 512; } #define blk_finished_io(nsects) do { } while (0) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/bootmem.h linux/include/linux/bootmem.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/bootmem.h Wed Aug 15 23:22:13 2001 +++ linux/include/linux/bootmem.h Fri Sep 7 08:58:51 2001 @@ -18,6 +18,11 @@ extern unsigned long min_low_pfn; /* + * highest page + */ +extern unsigned long max_pfn; + +/* * node_bootmem_map is a map pointer - the bits represent all physical * memory pages (including holes) on the node. */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/cdrom.h linux/include/linux/cdrom.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/cdrom.h Wed Aug 15 23:21:30 2001 +++ linux/include/linux/cdrom.h Fri Sep 7 08:58:40 2001 @@ -577,6 +577,8 @@ struct dvd_manufact manufact; } dvd_struct; +#define CDROM_MAX_CDROMS 256 + /* * DVD authentication ioctl */ @@ -733,6 +735,7 @@ int number; /* generic driver updates this */ /* specifications */ kdev_t dev; /* device number */ + int nr; /* cdrom entry */ int mask; /* mask of capability: disables them */ int speed; /* maximum speed for reading data */ int capacity; /* number of discs in jukebox */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/devfs_fs_kernel.h linux/include/linux/devfs_fs_kernel.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/devfs_fs_kernel.h Wed Aug 15 23:21:30 2001 +++ linux/include/linux/devfs_fs_kernel.h Fri Sep 7 08:58:40 2001 @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/elevator.h linux/include/linux/elevator.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/elevator.h Fri Feb 16 01:58:34 2001 +++ linux/include/linux/elevator.h Wed Sep 5 14:20:10 2001 @@ -5,13 +5,20 @@ struct list_head *, struct list_head *, int); -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, - struct buffer_head *, int, int); +typedef int (elevator_merge_fn) (request_queue_t *, struct request **, + struct list_head *, struct bio *); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); typedef void (elevator_merge_req_fn) (struct request *, struct request *); +typedef struct request *(elevator_next_req_fn) (request_queue_t *); + +typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, struct list_head *); + +typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); +typedef void (elevator_exit_fn) (request_queue_t *, elevator_t *); + struct elevator_s { int read_latency; @@ -21,31 +28,44 @@ elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; elevator_merge_req_fn *elevator_merge_req_fn; - unsigned int queue_ID; + elevator_next_req_fn *elevator_next_req_fn; + elevator_add_req_fn *elevator_add_req_fn; + + elevator_init_fn *elevator_init_fn; + elevator_exit_fn *elevator_exit_fn; + + /* + * per-elevator private data + */ + void *elevator_data; + + char queue_name[16]; }; -int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); void elevator_noop_merge_req(struct request *, struct request *); -int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); void elevator_linus_merge_req(struct request *, struct request *); +int elv_linus_init(request_queue_t *, elevator_t *); +void elv_linus_exit(request_queue_t *, elevator_t *); +/* + * use the /proc/iosched interface, all the below is history -> + */ typedef struct blkelv_ioctl_arg_s { int queue_ID; int read_latency; int write_latency; int max_bomb_segments; } blkelv_ioctl_arg_t; - #define BLKELVGET _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t)) #define BLKELVSET _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t)) -extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *); -extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *); - -extern void elevator_init(elevator_t *, elevator_t); +extern int elevator_init(request_queue_t *, elevator_t *, elevator_t, char *); +extern void elevator_exit(request_queue_t *, elevator_t *); /* * Return values from elevator merger @@ -81,6 +101,24 @@ return latency; } +/* + * will change once we move to a more complex data structure than a simple + * list for pending requests + */ +#define elv_queue_empty(q) list_empty(&(q)->queue_head) + +/* + * elevator private data + */ +struct elv_linus_data { + unsigned long flags; +}; + +#define ELV_DAT(e) ((struct elv_linus_data *)(e)->elevator_data) + +#define ELV_LINUS_BACK_MERGE 1 +#define ELV_LINUS_FRONT_MERGE 2 + #define ELEVATOR_NOOP \ ((elevator_t) { \ 0, /* read_latency */ \ @@ -89,6 +127,10 @@ elevator_noop_merge, /* elevator_merge_fn */ \ elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_noop_merge_req, /* elevator_merge_req_fn */ \ + elv_next_request_fn, \ + elv_add_request_fn, \ + elv_linus_init, \ + elv_linus_exit, \ }) #define ELEVATOR_LINUS \ @@ -99,6 +141,10 @@ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ + elv_next_request_fn, \ + elv_add_request_fn, \ + elv_linus_init, \ + elv_linus_exit, \ }) #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/fs.h linux/include/linux/fs.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/fs.h Wed Sep 5 12:41:18 2001 +++ linux/include/linux/fs.h Fri Sep 7 08:58:40 2001 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -231,32 +232,31 @@ struct buffer_head { /* First cache line: */ struct buffer_head *b_next; /* Hash queue list */ - unsigned long b_blocknr; /* block number */ + sector_t b_blocknr; /* block number */ unsigned short b_size; /* block size */ unsigned short b_list; /* List that this buffer appears */ kdev_t b_dev; /* device (B_FREE = free) */ atomic_t b_count; /* users using this block */ - kdev_t b_rdev; /* Real device */ unsigned long b_state; /* buffer state bitmap (see above) */ unsigned long b_flushtime; /* Time when (dirty) buffer should be written */ struct buffer_head *b_next_free;/* lru/free list linkage */ struct buffer_head *b_prev_free;/* doubly linked list of buffers */ struct buffer_head *b_this_page;/* circular list of buffers in one page */ - struct buffer_head *b_reqnext; /* request queue */ - struct buffer_head **b_pprev; /* doubly linked list of hash-queue */ char * b_data; /* pointer to data block */ struct page *b_page; /* the page this bh is mapped to */ - void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ + void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completio +n */ void *b_private; /* reserved for b_end_io */ - unsigned long b_rsector; /* Real buffer location on disk */ wait_queue_head_t b_wait; struct inode * b_inode; struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ + + struct bio *b_bio; /* allocated on I/O to/from buffer */ }; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); @@ -1133,10 +1133,24 @@ static inline void buffer_IO_error(struct buffer_head * bh) { mark_buffer_clean(bh); + /* - * b_end_io has to clear the BH_Uptodate bitflag in the error case! + * b_end_io has to clear the BH_Uptodate bitflag in the read error + * case, however buffer contents are not necessarily bad if a + * write fails */ - bh->b_end_io(bh, 0); + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); +} + +extern inline int bio_rw(struct bio *bio) +{ + if (bio->bi_flags & BIO_READ) + return READ; + else if (bio->bi_flags & BIO_WRITE) + return WRITE; + + BUG(); + return -1; /* ahem */ } extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *); @@ -1298,10 +1312,11 @@ extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); extern void file_moveto(struct file *new, struct file *old); -extern struct buffer_head * get_hash_table(kdev_t, int, int); -extern struct buffer_head * getblk(kdev_t, int, int); +extern struct buffer_head * get_hash_table(kdev_t, sector_t, int); +extern struct buffer_head * getblk(kdev_t, sector_t, int); extern void ll_rw_block(int, int, struct buffer_head * bh[]); extern void submit_bh(int, struct buffer_head *); +extern void submit_bio(int, struct bio *); extern int is_read_only(kdev_t); extern void __brelse(struct buffer_head *); static inline void brelse(struct buffer_head *buf) @@ -1319,7 +1334,7 @@ extern struct buffer_head * bread(kdev_t, int, int); extern void wakeup_bdflush(void); -extern int brw_page(int, struct page *, kdev_t, int [], int); +extern int brw_page(int, struct page *, kdev_t, sector_t [], int); typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); @@ -1333,7 +1348,7 @@ unsigned long *); extern int block_sync_page(struct page *); -int generic_block_bmap(struct address_space *, long, get_block_t *); +sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/genhd.h linux/include/linux/genhd.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/genhd.h Wed Aug 15 23:21:30 2001 +++ linux/include/linux/genhd.h Fri Sep 7 08:58:40 2001 @@ -74,6 +74,47 @@ devfs_handle_t *de_arr; /* one per physical disc */ char *flags; /* one per physical disc */ }; + +extern struct gendisk *blk_gendisk[MAX_BLKDEV]; /* in ll_rw_blk.c */ +extern struct gendisk *gendisk_head; /* in partitions/check.c */ + +static inline void +add_gendisk(struct gendisk *g) { + if (!blk_gendisk[g->major]) { + g->next = gendisk_head; + gendisk_head = g; + blk_gendisk[g->major] = g; + } +} + +static inline void +del_gendisk(struct gendisk *g) { + struct gendisk ** gp; + + blk_gendisk[g->major] = NULL; + for (gp = &gendisk_head; *gp; gp = &((*gp)->next)) + if (*gp == g) + break; + if (*gp) + *gp = (*gp)->next; +} + +static inline struct gendisk * +get_gendisk(kdev_t dev) { + return blk_gendisk[MAJOR(dev)]; +} + +static inline unsigned long +get_start_sect(kdev_t dev) { + struct gendisk *g = get_gendisk(dev); + return g ? g->part[MINOR(dev)].start_sect : 0; +} + +static inline unsigned long +get_nr_sects(kdev_t dev) { + struct gendisk *g = get_gendisk(dev); + return g ? g->part[MINOR(dev)].nr_sects : 0; +} #endif /* __KERNEL__ */ #ifdef CONFIG_SOLARIS_X86_PARTITION @@ -237,35 +278,10 @@ extern void devfs_register_partitions (struct gendisk *dev, int minor, int unregister); - - -/* - * FIXME: this should use genhd->minor_shift, but that is slow to look up. - */ static inline unsigned int disk_index (kdev_t dev) { - int major = MAJOR(dev); - int minor = MINOR(dev); - unsigned int index; - - switch (major) { - case DAC960_MAJOR+0: - index = (minor & 0x00f8) >> 3; - break; - case SCSI_DISK0_MAJOR: - index = (minor & 0x00f0) >> 4; - break; - case IDE0_MAJOR: /* same as HD_MAJOR */ - case XT_DISK_MAJOR: - index = (minor & 0x0040) >> 6; - break; - case IDE1_MAJOR: - index = ((minor & 0x0040) >> 6) + 2; - break; - default: - return 0; - } - return index; + struct gendisk *g = get_gendisk(dev); + return g ? (MINOR(dev) >> g->minor_shift) : 0; } #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/highmem.h linux/include/linux/highmem.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/highmem.h Wed Sep 5 12:41:18 2001 +++ linux/include/linux/highmem.h Fri Sep 7 08:58:46 2001 @@ -2,6 +2,7 @@ #define _LINUX_HIGHMEM_H #include +#include #include #ifdef CONFIG_HIGHMEM @@ -13,7 +14,7 @@ /* declarations for linux/mm/highmem.c */ FASTCALL(unsigned int nr_free_highpages(void)); -extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); +extern struct bio *create_bounce(struct bio * bio_orig, int gfp_mask); static inline char *bh_kmap(struct buffer_head *bh) @@ -26,6 +27,44 @@ kunmap(bh->b_page); } +/* + * remember to add offset! and never ever reenable interrupts between a + * bio_kmap_irq and bio_kunmap_irq!! + */ +static inline char *bio_kmap_irq(struct bio *bio, unsigned long *flags) +{ + unsigned long addr; + + __save_flags(*flags); + + /* + * could be low + */ + if (!PageHighMem(bio_page(bio))) + return bio_data(bio); + + /* + * it's a highmem page + */ + __cli(); + addr = (unsigned long) kmap_atomic(bio_page(bio), KM_BIO_IRQ); + + if (addr & ~PAGE_MASK) + BUG(); + + return (char *) addr + bio_offset(bio); +} + +static inline void bio_kunmap_irq(char *buffer, unsigned long *flags) +{ + unsigned long ptr = (unsigned long) buffer & PAGE_MASK; + + kunmap_atomic((void *) ptr, KM_BIO_IRQ); + __restore_flags(*flags); +} + + + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -39,6 +78,9 @@ #define bh_kmap(bh) ((bh)->b_data) #define bh_kunmap(bh) do { } while (0) + +#define bio_kmap_irq(bio, flags) (bio_data(bio)) +#define bio_kunmap_irq(buf, flags) do { } while (0) #endif /* CONFIG_HIGHMEM */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/ide.h linux/include/linux/ide.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/ide.h Wed Aug 15 23:22:22 2001 +++ linux/include/linux/ide.h Thu Sep 6 13:26:45 2001 @@ -149,6 +149,21 @@ #define DATA_READY (DRQ_STAT) /* + * Our Physical Region Descriptor (PRD) table should be large enough + * to handle the biggest I/O request we are likely to see. Since requests + * can have no more than 256 sectors, and since the typical blocksize is + * two or more sectors, we could get by with a limit of 128 entries here for + * the usual worst case. Most requests seem to include some contiguous blocks, + * further reducing the number of table entries required. + * + * As it turns out though, we must allocate a full 4KB page for this, + * so the two PRD tables (ide0 & ide1) will each get half of that, + * allowing each to have about 256 entries (8 bytes each) from this. + */ +#define PRD_BYTES 8 +#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) + +/* * Some more useful definitions */ #define IDE_MAJOR_NAME "hd" /* the same for all i/f; see also genhd.c */ @@ -304,9 +319,9 @@ special_t special; /* special action flags */ byte keep_settings; /* restore settings after drive reset */ byte using_dma; /* disk is using dma for read/write */ - byte retry_pio; /* retrying dma capable host in pio */ - byte state; /* retry state */ byte waiting_for_dma; /* dma currently in progress */ + byte retry_pio; /* retrying in pio mode */ + byte state; /* retry state */ byte unmask; /* flag: okay to unmask other irqs */ byte slow; /* flag: slow data port */ byte bswap; /* flag: byte swap data */ @@ -481,7 +496,7 @@ ide_maskproc_t *maskproc; /* special host masking for drive selection */ ide_quirkproc_t *quirkproc; /* check host's drive quirk list */ ide_rw_proc_t *rwproc; /* adjust timing based upon rq->cmd direction */ - ide_ideproc_t *ideproc; /* CPU-polled transfer routine */ + ide_ideproc_t *ideproc; /* CPU-polled transfer routine */ ide_dmaproc_t *dmaproc; /* dma read/write/abort routine */ unsigned int *dmatable_cpu; /* dma physical region descriptor table (cpu view) */ dma_addr_t dmatable_dma; /* dma physical region descriptor table (dma view) */ @@ -507,6 +522,7 @@ unsigned reset : 1; /* reset after probe */ unsigned autodma : 1; /* automatically try to enable DMA at boot */ unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */ + unsigned highmem : 1; /* can do full 32-bit dma */ byte channel; /* for dual-port chips: 0=primary, 1=secondary */ #ifdef CONFIG_BLK_DEV_IDEPCI struct pci_dev *pci_dev; /* for pci chipsets */ @@ -541,10 +557,12 @@ */ typedef int (ide_expiry_t)(ide_drive_t *); +#define IDE_BUSY 0 +#define IDE_SLEEP 1 + typedef struct hwgroup_s { ide_handler_t *handler;/* irq handler, if active */ - volatile int busy; /* BOOL: protects all fields below */ - int sleeping; /* BOOL: wake us up on timer expiry */ + unsigned long flags; /* BUSY, SLEEPING */ ide_drive_t *drive; /* current drive */ ide_hwif_t *hwif; /* ptr to current hwif in linked-list */ struct request *rq; /* current request */ @@ -787,6 +805,11 @@ unsigned long current_capacity (ide_drive_t *drive); /* + * Revalidate (read partition tables) + */ +void ide_revalidate_drive (ide_drive_t *drive); + +/* * Start a reset operation for an IDE interface. * The caller should return immediately after invoking this. */ @@ -814,6 +837,21 @@ } ide_action_t; /* + * temporarily mapping a (possible) highmem bio for PIO transfer + */ +#define ide_rq_offset(rq) (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9) + +extern inline void *ide_map_buffer(struct request *rq, unsigned long *flags) +{ + return bio_kmap_irq(rq->bio, flags) + ide_rq_offset(rq); +} + +extern inline void ide_unmap_buffer(char *buffer, unsigned long *flags) +{ + bio_kunmap_irq(buffer, flags); +} + +/* * This function issues a special IDE device request * onto the request queue. * @@ -960,5 +998,8 @@ #endif void hwif_unregister (ide_hwif_t *hwif); + +#define DRIVE_LOCK(drive) ((drive)->queue.queue_lock) +extern spinlock_t ide_lock; #endif /* _IDE_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/iobuf.h linux/include/linux/iobuf.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/iobuf.h Wed Aug 15 23:21:30 2001 +++ linux/include/linux/iobuf.h Fri Sep 7 08:58:46 2001 @@ -26,7 +26,6 @@ #define KIO_MAX_ATOMIC_IO 512 /* in kb */ #define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) -#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) /* The main kiobuf struct used for all our IO! */ @@ -48,8 +47,6 @@ /* Always embed enough struct pages for atomic IO */ struct page * map_array[KIO_STATIC_PAGES]; - struct buffer_head * bh[KIO_MAX_SECTORS]; - unsigned long blocks[KIO_MAX_SECTORS]; /* Dynamic state for IO completion: */ atomic_t io_count; /* IOs still in progress */ @@ -81,6 +78,9 @@ /* fs/buffer.c */ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size); + kdev_t dev, sector_t [], int size); + +/* fs/bio.c */ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, unsigned long block); #endif /* __LINUX_IOBUF_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/list.h linux/include/linux/list.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/list.h Sat Feb 17 01:06:17 2001 +++ linux/include/linux/list.h Wed Sep 5 13:06:36 2001 @@ -3,6 +3,8 @@ #if defined(__KERNEL__) || defined(_LVM_H_INCLUDE) +#include + /* * Simple doubly linked list implementation. * @@ -90,6 +92,7 @@ static __inline__ void list_del(struct list_head *entry) { __list_del(entry->prev, entry->next); + entry->prev = entry->next = 0; } /** @@ -146,8 +149,14 @@ * @pos: the &struct list_head to use as a loop counter. * @head: the head for your list. */ +#if 0 +#define list_for_each(pos, head) \ + for (pos = (head)->next, prefetch(pos->next); pos != (head); \ + pos = pos->next, prefetch(pos->next)) +#else #define list_for_each(pos, head) \ for (pos = (head)->next; pos != (head); pos = pos->next) +#endif #endif /* __KERNEL__ || _LVM_H_INCLUDE */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/loop.h linux/include/linux/loop.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/loop.h Thu Jul 26 22:55:24 2001 +++ linux/include/linux/loop.h Fri Aug 3 12:04:42 2001 @@ -49,8 +49,8 @@ int old_gfp_mask; spinlock_t lo_lock; - struct buffer_head *lo_bh; - struct buffer_head *lo_bhtail; + struct bio *lo_bio; + struct bio *lo_biotail; int lo_state; struct semaphore lo_sem; struct semaphore lo_ctl_mutex; @@ -77,6 +77,7 @@ */ #define LO_FLAGS_DO_BMAP 1 #define LO_FLAGS_READ_ONLY 2 +#define LO_FLAGS_BH_REMAP 4 /* * Note that this structure gets the wrong offsets when directly used diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/nbd.h linux/include/linux/nbd.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/nbd.h Tue May 1 23:20:25 2001 +++ linux/include/linux/nbd.h Thu Sep 6 13:30:33 2001 @@ -37,24 +37,25 @@ static void nbd_end_request(struct request *req) { - struct buffer_head *bh; + struct bio *bio; unsigned nsect; unsigned long flags; int uptodate = (req->errors == 0) ? 1 : 0; + request_queue_t *q = req->q; #ifdef PARANOIA requests_out++; #endif - spin_lock_irqsave(&io_request_lock, flags); - while((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; + spin_lock_irqsave(&q->queue_lock, flags); + while((bio = req->bio) != NULL) { + nsect = bio_sectors(bio); blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); + req->bio = bio->bi_next; + bio->bi_next = NULL; + bio_endio(bio, uptodate); } blkdev_release_request(req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } #define MAX_NBD 128 diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/pci.h linux/include/linux/pci.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/pci.h Wed Aug 15 23:21:44 2001 +++ linux/include/linux/pci.h Thu Sep 6 13:25:55 2001 @@ -314,6 +314,8 @@ #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 +#define PCI_MAX_DMA32 (0xffffffff) + #define DEVICE_COUNT_COMPATIBLE 4 #define DEVICE_COUNT_IRQ 2 #define DEVICE_COUNT_DMA 2 @@ -353,7 +355,7 @@ struct pci_driver *driver; /* which driver has allocated this device */ void *driver_data; /* data private to the driver */ - dma_addr_t dma_mask; /* Mask of the bits of bus address this + u64 dma_mask; /* Mask of the bits of bus address this device implements. Normally this is 0xffffffff. You only need to change this if your device has broken DMA @@ -559,7 +561,8 @@ int pci_enable_device(struct pci_dev *dev); void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); -int pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask); +int pci_set_dma_mask(struct pci_dev *dev, u64 mask); +int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_assign_resource(struct pci_dev *dev, int i); /* Power management related routines */ @@ -641,7 +644,8 @@ static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; } static inline void pci_disable_device(struct pci_dev *dev) { } static inline int pci_module_init(struct pci_driver *drv) { return -ENODEV; } -static inline int pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) { return -EIO; } +static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; } +static inline int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; } static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;} static inline int pci_register_driver(struct pci_driver *drv) { return 0;} static inline void pci_unregister_driver(struct pci_driver *drv) { } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/prefetch.h linux/include/linux/prefetch.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/prefetch.h Thu Jan 1 01:00:00 1970 +++ linux/include/linux/prefetch.h Wed Sep 5 13:06:36 2001 @@ -0,0 +1,60 @@ +/* + * Generic cache management functions. Everything is arch-specific, + * but this header exists to make sure the defines/functions can be + * used in a generic way. + * + * 2000-11-13 Arjan van de Ven + * + */ + +#ifndef _LINUX_PREFETCH_H +#define _LINUX_PREFETCH_H + +#include +#include + +/* + prefetch(x) attempts to pre-emptively get the memory pointed to + by address "x" into the CPU L1 cache. + prefetch(x) should not cause any kind of exception, prefetch(0) is + specifically ok. + + prefetch() should be defined by the architecture, if not, the + #define below provides a no-op define. + + There are 3 prefetch() macros: + + prefetch(x) - prefetches the cacheline at "x" for read + prefetchw(x) - prefetches the cacheline at "x" for write + spin_lock_prefetch(x) - prefectches the spinlock *x for taking + + there is also PREFETCH_STRIDE which is the architecure-prefered + "lookahead" size for prefetching streamed operations. + +*/ + +/* + * These cannot be do{}while(0) macros. See the mental gymnastics in + * the loop macro. + */ + +#ifndef ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCH +static inline void prefetch(const void *x) {;} +#endif + +#ifndef ARCH_HAS_PREFETCHW +#define ARCH_HAS_PREFETCHW +static inline void prefetchw(const void *x) {;} +#endif + +#ifndef ARCH_HAS_SPINLOCK_PREFETCH +#define ARCH_HAS_SPINLOCK_PREFETCH +#define spin_lock_prefetch(x) prefetchw(x) +#endif + +#ifndef PREFETCH_STRIDE +#define PREFETCH_STRIDE (4*L1_CACHE_BYTE) +#endif + +#endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/raid/md.h linux/include/linux/raid/md.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/raid/md.h Wed Aug 15 23:21:32 2001 +++ linux/include/linux/raid/md.h Fri Sep 7 08:59:56 2001 @@ -78,7 +78,6 @@ extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors); extern void md_recover_arrays (void); extern int md_check_ordering (mddev_t *mddev); -extern struct gendisk * find_gendisk (kdev_t dev); extern int md_notify_reboot(struct notifier_block *this, unsigned long code, void *x); extern int md_error (mddev_t *mddev, kdev_t rdev); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/include/linux/slab.h linux/include/linux/slab.h --- /opt/kernel/linux-2.4.10-pre4/include/linux/slab.h Wed Sep 5 12:41:18 2001 +++ linux/include/linux/slab.h Fri Sep 7 08:58:40 2001 @@ -76,6 +76,8 @@ extern kmem_cache_t *bh_cachep; extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sigact_cachep; +extern kmem_cache_t *bio_cachep; +extern kmem_cache_t *biovec_cachep; #endif /* __KERNEL__ */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/kernel/ksyms.c linux/kernel/ksyms.c --- /opt/kernel/linux-2.4.10-pre4/kernel/ksyms.c Wed Sep 5 12:41:18 2001 +++ linux/kernel/ksyms.c Wed Sep 5 11:13:24 2001 @@ -121,6 +121,8 @@ EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(create_bounce); +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); #endif /* filesystem internal functions */ @@ -287,7 +289,6 @@ EXPORT_SYMBOL(block_read); EXPORT_SYMBOL(block_write); EXPORT_SYMBOL(blksize_size); -EXPORT_SYMBOL(hardsect_size); EXPORT_SYMBOL(blk_size); EXPORT_SYMBOL(blk_dev); EXPORT_SYMBOL(is_read_only); @@ -305,9 +306,10 @@ EXPORT_SYMBOL(tq_disk); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(refile_buffer); -EXPORT_SYMBOL(max_sectors); EXPORT_SYMBOL(max_readahead); EXPORT_SYMBOL(file_moveto); +EXPORT_SYMBOL(wipe_partitions); +EXPORT_SYMBOL(blk_gendisk); /* tty routines */ EXPORT_SYMBOL(tty_hangup); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/kernel/sched.c linux/kernel/sched.c --- /opt/kernel/linux-2.4.10-pre4/kernel/sched.c Wed Sep 5 12:41:18 2001 +++ linux/kernel/sched.c Wed Sep 5 11:13:24 2001 @@ -765,12 +765,13 @@ void complete(struct completion *x) { - unsigned long flags; - - spin_lock_irqsave(&x->wait.lock, flags); - x->done++; - __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0); - spin_unlock_irqrestore(&x->wait.lock, flags); + if (x) { + unsigned long flags; + spin_lock_irqsave(&x->wait.lock, flags); + x->done++; + __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0); + spin_unlock_irqrestore(&x->wait.lock, flags); + } } void wait_for_completion(struct completion *x) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/mm/highmem.c linux/mm/highmem.c --- /opt/kernel/linux-2.4.10-pre4/mm/highmem.c Wed Sep 5 12:41:18 2001 +++ linux/mm/highmem.c Fri Sep 7 09:00:25 2001 @@ -22,6 +22,8 @@ #include #include +#include + /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped @@ -200,49 +202,69 @@ static LIST_HEAD(emergency_bhs); /* - * Simple bounce buffer support for highmem pages. - * This will be moved to the block layer in 2.5. + * Simple bounce buffer support for highmem pages. Depending on the + * queue gfp mask set, *to may or may not be a highmem page. kmap it + * always, it will do the Right Thing */ - -static inline void copy_from_high_bh (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_from_high_bio(struct bio *to, struct bio *from) { - struct page *p_from; - char *vfrom; + if (bio_offset(to)) + BUG(); + if (bio_size(to) != bio_size(from)) + BUG(); - p_from = from->b_page; + memcpy(kmap(bio_page(to)), kmap(bio_page(from)) + bio_offset(from), bio_size(to)); - vfrom = kmap_atomic(p_from, KM_BOUNCE_WRITE); - memcpy(to->b_data, vfrom + bh_offset(from), to->b_size); - kunmap_atomic(vfrom, KM_BOUNCE_WRITE); + kunmap(bio_page(from)); + kunmap(bio_page(to)); } -static inline void copy_to_high_bh_irq (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from) { - struct page *p_to; - char *vto; + char *vto, *vfrom; unsigned long flags; - p_to = to->b_page; + if (bio_offset(from)) + BUG(); + if (bio_size(to) != bio_size(from)) + BUG(); + __save_flags(flags); __cli(); - vto = kmap_atomic(p_to, KM_BOUNCE_READ); - memcpy(vto + bh_offset(to), from->b_data, to->b_size); - kunmap_atomic(vto, KM_BOUNCE_READ); + vto = __kmap_atomic(bio_page(to), KM_BOUNCE_READ); + vfrom = __kmap_atomic(bio_page(from), KM_BOUNCE_READ); + memcpy(vto + bio_offset(to), vfrom, bio_size(to)); + __kunmap_atomic(vfrom, KM_BOUNCE_READ); + __kunmap_atomic(vto, KM_BOUNCE_READ); __restore_flags(flags); } -static inline void bounce_end_io (struct buffer_head *bh, int uptodate) +static __init int init_emergency_pool(void) { - struct page *page; - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); - unsigned long flags; + spin_lock_irq(&emergency_lock); + while (nr_emergency_pages < POOL_SIZE) { + struct page * page = alloc_page(GFP_ATOMIC); + if (!page) { + printk("couldn't refill highmem emergency pages"); + break; + } + list_add(&page->list, &emergency_pages); + nr_emergency_pages++; + } + spin_unlock_irq(&emergency_lock); + printk("allocated %d pages reserved for the highmem bounces\n", nr_emergency_pages); + return 0; +} - bh_orig->b_end_io(bh_orig, uptodate); +__initcall(init_emergency_pool); - page = bh->b_page; +static inline void bounce_end_io (struct bio *bio) +{ + struct bio *bio_orig = bio->bi_private; + struct page *page = bio_page(bio); + unsigned long flags; + bio_endio(bio_orig, bio->bi_flags & BIO_UPTODATE); spin_lock_irqsave(&emergency_lock, flags); if (nr_emergency_pages >= POOL_SIZE) __free_page(page); @@ -254,74 +276,32 @@ list_add(&page->list, &emergency_pages); nr_emergency_pages++; } - - if (nr_emergency_bhs >= POOL_SIZE) { -#ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); -#endif - kmem_cache_free(bh_cachep, bh); - } else { - /* - * Ditto in the bh case, here we abuse b_inode_buffers: - */ - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } spin_unlock_irqrestore(&emergency_lock, flags); + __bio_put(bio); } -static __init int init_emergency_pool(void) +static void bounce_end_io_write (struct bio *bio) { - spin_lock_irq(&emergency_lock); - while (nr_emergency_pages < POOL_SIZE) { - struct page * page = alloc_page(GFP_ATOMIC); - if (!page) { - printk("couldn't refill highmem emergency pages"); - break; - } - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - while (nr_emergency_bhs < POOL_SIZE) { - struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); - if (!bh) { - printk("couldn't refill highmem emergency bhs"); - break; - } - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } - spin_unlock_irq(&emergency_lock); - printk("allocated %d pages and %d bhs reserved for the highmem bounces\n", - nr_emergency_pages, nr_emergency_bhs); - - return 0; + bounce_end_io(bio); } -__initcall(init_emergency_pool); - -static void bounce_end_io_write (struct buffer_head *bh, int uptodate) +static void bounce_end_io_read (struct bio *bio) { - bounce_end_io(bh, uptodate); -} + struct bio *bio_orig = bio->bi_private; -static void bounce_end_io_read (struct buffer_head *bh, int uptodate) -{ - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); + if (bio->bi_flags & BIO_UPTODATE) + copy_to_high_bio_irq(bio_orig, bio); - if (uptodate) - copy_to_high_bh_irq(bh_orig, bh); - bounce_end_io(bh, uptodate); + bounce_end_io(bio); } -struct page *alloc_bounce_page (void) +struct page *alloc_bounce_page(int gfp_mask) { struct list_head *tmp; struct page *page; repeat_alloc: - page = alloc_page(GFP_NOHIGHIO); + page = alloc_page(gfp_mask); if (page) return page; /* @@ -353,91 +333,35 @@ goto repeat_alloc; } -struct buffer_head *alloc_bounce_bh (void) +struct bio *create_bounce(struct bio *bio_orig, int gfp_mask) { - struct list_head *tmp; - struct buffer_head *bh; + struct page *page; + struct bio *bio; -repeat_alloc: - bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO); - if (bh) - return bh; - /* - * No luck. First, kick the VM so it doesnt idle around while - * we are using up our emergency rations. - */ - wakeup_bdflush(); + bio = bio_alloc(GFP_NOHIGHIO); /* - * Try to allocate from the emergency pool. + * wasteful for 1kB fs, but machines with lots of ram are less likely + * to have 1kB fs for anything that needs to go fast. so all things + * considered, it should be ok. */ - tmp = &emergency_bhs; - spin_lock_irq(&emergency_lock); - if (!list_empty(tmp)) { - bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); - list_del(tmp->next); - nr_emergency_bhs--; - } - spin_unlock_irq(&emergency_lock); - if (bh) - return bh; + page = alloc_bounce_page(gfp_mask); - /* we need to wait I/O completion */ - run_task_queue(&tq_disk); + bio->bi_dev = bio_orig->bi_dev; + bio->bi_sector = bio_orig->bi_sector; + bio->bi_flags |= bio_orig->bi_flags & BIO_RW_MASK; - current->policy |= SCHED_YIELD; - __set_current_state(TASK_RUNNING); - schedule(); - goto repeat_alloc; -} + bio->bi_io_vec.bv_page = page; + bio->bi_io_vec.bv_len = bio_size(bio_orig); + bio->bi_io_vec.bv_offset = 0; -struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig) -{ - struct page *page; - struct buffer_head *bh; + bio->bi_private = bio_orig; - if (!PageHighMem(bh_orig->b_page)) - return bh_orig; - - bh = alloc_bounce_bh(); - /* - * This is wasteful for 1k buffers, but this is a stopgap measure - * and we are being ineffective anyway. This approach simplifies - * things immensly. On boxes with more than 4GB RAM this should - * not be an issue anyway. - */ - page = alloc_bounce_page(); - - set_bh_page(bh, page, 0); - - bh->b_next = NULL; - bh->b_blocknr = bh_orig->b_blocknr; - bh->b_size = bh_orig->b_size; - bh->b_list = -1; - bh->b_dev = bh_orig->b_dev; - bh->b_count = bh_orig->b_count; - bh->b_rdev = bh_orig->b_rdev; - bh->b_state = bh_orig->b_state; -#ifdef HIGHMEM_DEBUG - bh->b_flushtime = jiffies; - bh->b_next_free = NULL; - bh->b_prev_free = NULL; - /* bh->b_this_page */ - bh->b_reqnext = NULL; - bh->b_pprev = NULL; -#endif - /* bh->b_page */ - if (rw == WRITE) { - bh->b_end_io = bounce_end_io_write; - copy_from_high_bh(bh, bh_orig); + if (bio->bi_flags & BIO_WRITE) { + bio->bi_end_io = bounce_end_io_write; + copy_from_high_bio(bio, bio_orig); } else - bh->b_end_io = bounce_end_io_read; - bh->b_private = (void *)bh_orig; - bh->b_rsector = bh_orig->b_rsector; -#ifdef HIGHMEM_DEBUG - memset(&bh->b_wait, -1, sizeof(bh->b_wait)); -#endif + bio->bi_end_io = bounce_end_io_read; - return bh; + return bio; } - diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.10-pre4/mm/page_io.c linux/mm/page_io.c --- /opt/kernel/linux-2.4.10-pre4/mm/page_io.c Wed Apr 25 23:46:21 2001 +++ linux/mm/page_io.c Fri Aug 3 12:04:42 2001 @@ -36,7 +36,7 @@ static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page) { unsigned long offset; - int zones[PAGE_SIZE/512]; + sector_t zones[PAGE_SIZE/512]; int zones_used; kdev_t dev = 0; int block_size;