diff -urpN -X /home/fletch/.diff.exclude 00-virgin/Documentation/filesystems/proc.txt 90-mjb1/Documentation/filesystems/proc.txt --- 00-virgin/Documentation/filesystems/proc.txt Mon Jan 13 21:09:08 2003 +++ 90-mjb1/Documentation/filesystems/proc.txt Mon Jan 13 23:20:20 2003 @@ -37,6 +37,7 @@ Table of Contents 2.8 /proc/sys/net/ipv4 - IPV4 settings 2.9 Appletalk 2.10 IPX + 2.11 /proc/sys/sched - scheduler tunables ------------------------------------------------------------------------------ Preface @@ -1662,6 +1663,92 @@ IPX. The /proc/net/ipx_route table holds a list of IPX routes. For each route it gives the destination network, the router node (or Directly) and the network address of the router (or Connected) for internal networks. + +2.11 /proc/sys/sched - scheduler tunables +----------------------------------------- + +Useful knobs for tuning the scheduler live in /proc/sys/sched. + +child_penalty +------------- + +Percentage of the parent's sleep_avg that children inherit. sleep_avg is +a running average of the time a process spends sleeping. Tasks with high +sleep_avg values are considered interactive and given a higher dynamic +priority and a larger timeslice. You typically want this some value just +under 100. + +exit_weight +----------- + +When a CPU hog task exits, its parent's sleep_avg is reduced by a factor of +exit_weight against the exiting task's sleep_avg. + +interactive_delta +----------------- + +If a task is "interactive" it is reinserted into the active array after it +has expired its timeslice, instead of being inserted into the expired array. +How "interactive" a task must be in order to be deemed interactive is a +function of its nice value. This interactive limit is scaled linearly by nice +value and is offset by the interactive_delta. + +max_sleep_avg +------------- + +max_sleep_avg is the largest value (in ms) stored for a task's running sleep +average. The larger this value, the longer a task needs to sleep to be +considered interactive (maximum interactive bonus is a function of +max_sleep_avg). + +max_timeslice +------------- + +Maximum timeslice, in milliseconds. This is the value given to tasks of the +highest dynamic priority. + +min_timeslice +------------- + +Minimum timeslice, in milliseconds. This is the value given to tasks of the +lowest dynamic priority. Every task gets at least this slice of the processor +per array switch. + +parent_penalty +-------------- + +Percentage of the parent's sleep_avg that it retains across a fork(). +sleep_avg is a running average of the time a process spends sleeping. Tasks +with high sleep_avg values are considered interactive and given a higher +dynamic priority and a larger timeslice. Normally, this value is 100 and thus +task's retain their sleep_avg on fork. If you want to punish interactive +tasks for forking, set this below 100. + +prio_bonus_ratio +---------------- + +Middle percentage of the priority range that tasks can receive as a dynamic +priority. The default value of 25% ensures that nice values at the +extremes are still enforced. For example, nice +19 interactive tasks will +never be able to preempt a nice 0 CPU hog. Setting this higher will increase +the size of the priority range the tasks can receive as a bonus. Setting +this lower will decrease this range, making the interactivity bonus less +apparent and user nice values more applicable. + +starvation_limit +---------------- + +Sufficiently interactive tasks are reinserted into the active array when they +run out of timeslice. Normally, tasks are inserted into the expired array. +Reinserting interactive tasks into the active array allows them to remain +runnable, which is important to interactive performance. This could starve +expired tasks, however, since the interactive task could prevent the array +switch. To prevent starving the tasks on the expired array for too long. the +starvation_limit is the longest (in ms) we will let the expired array starve +at the expense of reinserting interactive tasks back into active. Higher +values here give more preferance to running interactive tasks, at the expense +of expired tasks. Lower values provide more fair scheduling behavior, at the +expense of interactivity. The units are in milliseconds. ------------------------------------------------------------------------------ Summary diff -urpN -X /home/fletch/.diff.exclude 00-virgin/Documentation/i386/gdb-serial.txt 90-mjb1/Documentation/i386/gdb-serial.txt --- 00-virgin/Documentation/i386/gdb-serial.txt Wed Dec 31 16:00:00 1969 +++ 90-mjb1/Documentation/i386/gdb-serial.txt Mon Jan 13 23:20:30 2003 @@ -0,0 +1,386 @@ +Version +======= + +This version of the gdbstub package was developed and tested on +kernel version 2.3.48. It will not install on a 2.2 kernel. It may +not work on earlier versions of 2.3 kernels. It is possible that +it will continue to work on later versions of 2.3 and then +versions of 2.4 (I hope). + + +Debugging Setup +=============== + +Designate one machine as the "development" machine. This is the +machine on which you run your compiles and which has your source +code for the kernel. Designate a second machine as the "target" +machine. This is the machine that will run your experimental +kernel. + +The two machines will be connected together via a serial line out +one or the other of the COM ports of the PC. You will need a modem +eliminator and the appropriate cables. + +On the DEVELOPMENT machine you need to apply the patch for the gdb +hooks. You have probably already done that if you are reading this +file. + +On your DEVELOPMENT machine, go to your kernel source directory and +do "make menuconfig". Go down to the kernel hacking menu item and +open it up. Enable the kernel gdb stub code by selecting that item. + +Save and exit the menuconfig program. Then do "make clean" and +"make bzImage" (or whatever target you want to make). This gets +the kernel compiled with the "-g" option set -- necessary for +debugging. + +You have just built the kernel on your DEVELOPMENT machine that you +intend to run on our TARGET machine. + +To install this new kernel, use the following installation procedure. +Remember, you are on the DEVELOPMENT machine patching the kernel source +for the kernel that you intend to run on the TARGET machine. + +Copy this kernel to your target machine using your usual procedures. +I usually arrange to copy development:/usr/src/linux/arch/i386/boot/zImage +to /vmlinuz on the TARGET machine via a LAN based NFS access. That is, +I run the cp command on the target and copy from the development machine +via the LAN. Run Lilo on the new kernel on the target machine so that it +will boot! Then boot the kernel on the target machine. + +There is an utility program named "gdbstart" in the +development:/usr/src/linux/arch/i386/kernel directory. +You should copy this program over to your target machine, probably into +/sbin. This utility program is run on the target machine to +activate the kernel hooks for the debugger. It is invoked as follows: + + gdbstart [-s speed] [-t tty-dev] + defaults: /dev/ttyS0 with speed unmodified by gdbstart + +Don't run the program just yet. We'll get to that in a bit. + +Decide on which tty port you want the machines to communicate, then +cable them up back-to-back using the null modem. COM1 is /dev/ttyS0 +and COM2 is /dev/ttyS1. + +On the DEVELOPMENT machine, create a file called .gdbinit in the +directory /usr/src/linux. An example .gdbinit file looks like this: + +define rmt +set remotebaud 38400 +target remote /dev/ttyS0 +end + +Assuming that you added my gdbinit stuff to your .gdbinit, edit .gdbinit +and find the section that looks like this: + + define rmt + set remotebaud 38400 + target remote /dev/ttyS0 + end + +Change the "target" definition so that it specifies the tty port that +you intend to use. Change the "remotebaud" definition to match the +data rate that you are going to use for the com line. + +On the TARGET machine I find it helpful to create shell script file +named "debug" in the root home directory with the following contents: + + gdbstart -s 38400 -t /dev/ttyS0 < + EOF + +This runs the gdbstart program and gives it the carriage return that +it prompts for. This sets the data rate from the target machine's side. + +You are now ready to try it out. + +On your TARGET machine, freshly rebooted with your gdbstub-equipped +kernel, type "debug" in the root home directory. The system will appear +to hang with some messages on the screen from the debug stub. What +it is doing is waiting for contact from the development machine. + +On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". +When gdb gets the symbols loaded and prompts you, enter "rmt" (that's +the macro from the .gdbinit file that you just edited). If everything +is working correctly you should see gdb print out a few lines indicating +that a breakpoint has been taken. It will actually show a line of +code in the target kernel inside the gdbstub activation code. + +The gdb interaction should look something like this: + + linux-dev:/usr/src/linux# gdb vmlinux + GDB is free software and you are welcome to distribute copies of it + under certain conditions; type "show copying" to see the conditions. + There is absolutely no warranty for GDB; type "show warranty" for details. + GDB 4.15.1 (i486-slackware-linux), + Copyright 1995 Free Software Foundation, Inc... + (gdb) rmt + breakpoint () at i386-stub.c:750 + 750 } + (gdb) + + +You can now use whatever gdb commands you like to set breakpoints. +Enter "continue" to start your target machine executing again. At this +point the target system will run at full speed until it encounters +your breakpoint or gets a segment violation in the kernel, or whatever. + + +Triggering gdbstub at Kernel Boot Time +====================================== + +The gdbstub patch now has the ability for gdb to connect to the kernel during +bootup (as opposed to waiting for the system to come all the way up and then +running the gdbstart program on the target machine). This new functionality was +added by Scott Foehner at SGI. + +To force a kernel that has been compiled with gdbstub to pause during the boot +process and wait for a connection from gdb, the paramter "gdb" should be passed +to the kernel. This can be done by typing "gdb" after the name of the kernel +on the LILO command line. The patch defaults to use ttyS1 at a baud rate of +38400. These parameters can be changed by using "gdbttyS=" and +"gdbbaud=" on the command line. + +Example: + +LILO boot: linux gdb gdbttyS=1 gdbbaud=38400 + +Note that this command is entered on the TARGET machine as it is booting +the kernel that was compiled on the DEVELOPMENT machine. + +An alternate approach is to place a line in the /etc/lilo.conf file on +your TARGET machine. Under the heading for the kernel that you intend +to boot, place a line that looks like this: + + append = "gdb gdbttyS=1 gdbbaud=38400" + +This will cause the kernel to enter the gdbstub automatically at boot +time. + +BE SURE to run "lilo" after changing the /etc/lilo.conf file. + + +The "gdbstart" Program +===================== + +This utility program is used to set up the com port and data rate +for the connection from the target system to the development system. +Its usage has been described above. + +This version of the patch uses the same tty ioctl for kernel versions +2.0.30 onwards. Thus, the gdbstart utility does not need to be re-compiled +to install the patch in a later version of the kernel. The ioctl added +to the kernel for this purpose is far enough "off the end" of existing +ioctls (as of 2.1.120) that it should not interfere with any new kernel +tty ioctls for quite some time (famous last words). + +The source for the gdbstart program resides in the arch/i386/kernel directory. + + +Debugging hints +=============== + +You can break into the target machine at any time from the development +machine by typing ^C. If the target machine has interrupts enabled +this will stop it in the kernel and enter the debugger. + +There is unfortunately no way of breaking into the kernel if it is +in a loop with interrupts disabled, so if this happens to you then +you need to place exploratory breakpoints or printk's into the kernel +to find out where it is looping. + +There is a copy of an e-mail in the kgdb distribution directory which +describes how to create an NMI on an ISA bus machine using a paper +clip. I have a sophisticated version of this made by wiring a push +button switch into a PC104/ISA bus adapter card. The adapter card +nicely furnishes wire wrap pins for all the ISA bus signals. + +When you are done debugging the kernel on the target machine it is +a good idea to leave it in a running state. This makes reboots +faster, bypassing the fsck. So do a gdb "continue" as the last gdb +command if this is possible. To terminate gdb itself on the development +machine and leave the target machine running, type ^Z to suspend gdb +and then kill it with "kill %1" or something similar. + +If gdbstub Does Not Work +======================== + +If it doesn't work, you will have to troubleshoot it. Do the easy things +first like double checking your cabling and data rates. You might +try some non-kernel based programs to see if the back-to-back connection +works properly. Just something simple like cat /etc/hosts >/dev/ttyS0 +on one machine and cat /dev/ttyS0 on the other will tell you if you +can send data from one machine to the other. There is no point in tearing +out your hair in the kernel if the line doesn't work. + +All of the real action takes place in the file +/usr/src/linux/arch/i386/kernel/gdbstub.c. That is the code on the target +machine that interacts with gdb on the development machine. In gdb you can +turn on a debug switch with the following command: + + set remotedebug + +This will print out the protocol messages that gdb is exchanging with +the target machine. + +Another place to look is /usr/src/linux/drivers/char/gdbserial.c +That is the code that talks to the serial port on the target side. +There might be a problem there. + +If you are really desperate you can use printk debugging in the +gdbstub code in the target kernel until you get it working. In particular, +there is a global variable in /usr/src/linux/arch/i386/kernel/gdbstub.c +named "remote_debug". Compile your kernel with this set to 1, rather +than 0 and the debug stub will print out lots of stuff as it does +what it does. + + +Debugging Loadable Modules +========================== + +This technique comes courtesy of Edouard Parmelan + + +When you run gdb, enter the command + +source gdbinit-modules + +This will read in a file of gdb macros that was installed in your +kernel source directory with kgdb was installed. This file implements +the following commands: + +mod-list + Lists the loaded modules in the form + +mod-print-symbols + Prints all the symbols in the indicated module. + +mod-add-symbols + Loads the symbols from the object file and associates them + with the indicated module. + +After you have loaded the module that you want to debug, use the command +mod-list to find the of your module. Then use that +address in the mod-add-symbols command to load your module's symbols. +From that point onward you can debug your module as if it were a part +of the kernel. + +The file gdbinit-modules also contains a command named mod-add-lis as +an example of how to construct a command of your own to load your +favorite module. The idea is to "can" the pathname of the module +in the command so you don't have to type so much. + +Threads +======= + +Each process in a target machine is seen as a gdb thread. gdb thread related +commands (info threads, thread n) can be used. + +ia-32 hardware breakpoints +========================== + +gdb stub contains support for hardware breakpoints using debugging features +of ia-32(x86) processors. These breakpoints do not need code modification. +They use debugging registers. 4 hardware breakpoints are available in ia-32 +processors. + +Each hardware breakpoint can be of one of the following three types. +1. Execution breakpoint - An Execution breakpoint is triggered when code at the + breakpoint address is executed. + + As limited number of hardware breakpoints are available, it is advisable + to use software breakpoints ( break command ) instead of execution + hardware breakpoints, unless modification of code is to be avoided. + +2. Write breakpoint - A write breakpoint is triggered when memory location at the + breakpoint address is written. + + A write or can be placed for data of variable length. Length of a write + breakpoint indicates length of the datatype to be watched. Length is 1 + for 1 byte data , 2 for 2 byte data, 3 for 4 byte data. + +3. Access breakpoint - An access breakpoint is triggered when memory location at + the breakpoint address is either read or written. + + Access breakpoints also have lengths similar to write breakpoints. + +IO breakpoints in ia-32 are not supported. + +Since gdb stub at present does not use the protocol used by gdb for hardware +breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros +for hardware breakpoints are described below. + +hwebrk - Places an execution breakpoint + hwebrk breakpointno address +hwwbrk - Places a write breakpoint + hwwbrk breakpointno length address +hwabrk - Places an access breakpoint + hwabrk breakpointno length address +hwrmbrk - Removes a breakpoint + hwrmbrk breakpointno +exinfo - Tells whether a software or hardware breakpoint has occured. + Prints number of the hardware breakpoint if a hardware breakpoint has + occured. + +Arguments required by these commands are as follows +breakpointno - 0 to 3 +length - 1 to 3 +address - Memory location in hex digits ( without 0x ) e.g c015e9bc + +MP support +========== + +When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb client, +all the processors are forced to enter the debugger. Current thread +corresponds to the thread running on the processor where breakpoint occured. +Threads running on other processor(s) appear similar to other non running +threads in the 'info threads' output. + +ia-32 hardware debugging registers on all processors are set to same values. +Hence any hardware breakpoints may occur on any processor. + +gdb troubleshooting +=================== + +1. gdb hangs +Kill it. restart gdb. Connect to target machine. + +2. gdb cannot connect to target machine (after killing a gdb and restarting +another) +If the target machine was not inside debugger when you killed gdb, gdb cannot +connect because the target machine won't respond. +In this case echo "Ctrl+C"(ascii 3) in the serial line. +e.g. echo -e "\003" > /dev/ttyS1 +This forces that target machine into debugger after which you can connect. + +3. gdb cannot connect even after echoing Ctrl+C into serial line +Try changing serial line settings min to 1 and time to 0 +e.g. stty min 1 time 0 < /dev/ttyS1 +Try echoing again + +check serial line speed and set it to correct value if required +e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 + +Final Items +=========== + +I picked up this code from Dave Grothe and enhanced it. + +If you make some really cool modification to this stuff, or if you +fix a bug, please let me know. + +Amit S. Kale + + +(First kgdb by David Grothe ) + +(modified by Tigran Aivazian ) + Putting gdbstub into the kernel config menu. + +(modified by Scott Foehner ) + Hooks for entering gdbstub at boot time. + +(modified by Amit S. Kale ) + Threads, ia-32 hw debugging, mp support, console support, + nmi watchdog handling. diff -urpN -X /home/fletch/.diff.exclude 00-virgin/Documentation/sysrq.txt 90-mjb1/Documentation/sysrq.txt --- 00-virgin/Documentation/sysrq.txt Thu Jan 2 22:04:57 2003 +++ 90-mjb1/Documentation/sysrq.txt Mon Jan 13 23:20:30 2003 @@ -73,6 +73,8 @@ On other - If you know of the key combos 'l' - Send a SIGKILL to all processes, INCLUDING init. (Your system will be non-functional after this.) +'g' - Enter the kernel debugger (if configured and supported). + 'h' - Will display help ( actually any other key than those listed above will display help. but 'h' is easy to remember :-) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/Makefile 90-mjb1/Makefile --- 00-virgin/Makefile Mon Jan 13 23:16:24 2003 +++ 90-mjb1/Makefile Mon Jan 13 23:24:22 2003 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 5 SUBLEVEL = 58 -EXTRAVERSION = +EXTRAVERSION = -mjb1 # *DOCUMENTATION* # To see a list of typical targets execute "make help" @@ -47,7 +47,7 @@ TOPDIR := $(CURDIR) HOSTCC = gcc HOSTCXX = g++ -HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer +HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 HOSTCXXFLAGS = -O2 CROSS_COMPILE = @@ -260,8 +260,8 @@ ifdef CONFIG_MODULES export EXPORT_FLAGS := -DEXPORT_SYMTAB endif -ifndef CONFIG_FRAME_POINTER -CFLAGS += -fomit-frame-pointer +ifdef CONFIG_X86_REMOTE_DEBUG +CFLAGS += -g endif # diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/Kconfig 90-mjb1/arch/i386/Kconfig --- 00-virgin/arch/i386/Kconfig Mon Jan 13 23:16:24 2003 +++ 90-mjb1/arch/i386/Kconfig Mon Jan 13 23:23:44 2003 @@ -328,11 +328,6 @@ config X86_ALIGNMENT_16 depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 default y -config X86_TSC - bool - depends on MWINCHIP3D || MWINCHIP2 || MCRUSOE || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 - default y - config X86_GOOD_APIC bool depends on MK7 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 @@ -486,6 +481,11 @@ config HAVE_ARCH_BOOTMEM_NODE depends on NUMA default y +config X86_TSC + bool + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8) && !X86_NUMAQ + default y + config X86_MCE bool "Machine Check Exception" ---help--- @@ -660,6 +660,44 @@ config HIGHMEM64G endchoice +choice + help + On i386, a process can only virtually address 4GB of memory. This + lets you select how much of that virtual space you would like to + devoted to userspace, and how much to the kernel. + + Some userspace programs would like to address as much as possible and + have few demands of the kernel other than it get out of the way. These + users may opt to use the 3.5GB option to give their userspace program + as much room as possible. Due to alignment issues imposed by PAE, + the "3.5GB" option is unavailable if "64GB" high memory support is + enabled. + + Other users (especially those who use PAE) may be running out of + ZONE_NORMAL memory. Those users may benefit from increasing the + kernel's virtual address space size by taking it away from userspace, + which may not need all of its space. An indicator that this is + happening is when /proc/Meminfo's "LowFree:" is a small percentage of + "LowTotal:" while "HighFree:" is very large. + + If unsure, say "3GB" + prompt "User address space size" + default 1GB + +config 05GB + bool "3.5 GB" + depends on !HIGHMEM64G + +config 1GB + bool "3 GB" + +config 2GB + bool "2 GB" + +config 3GB + bool "1 GB" +endchoice + config HIGHMEM bool depends on HIGHMEM64G || HIGHMEM4G @@ -679,6 +717,16 @@ config HIGHPTE low memory. Setting this option will put user-space page table entries in high memory. +config 4K_STACK + bool "Use smaller 4k per-task stacks" + help + This option will shrink the kernel's per-task stack from 8k to + 4k. This will greatly increase your chance of overflowing it. + But, if you use the per-cpu interrupt stacks as well, your chances + go way down. Also try the CONFIG_X86_STACK_CHECK overflow + detection. It is much more reliable than the currently in-kernel + version. + config MATH_EMULATION bool "Math emulation" ---help--- @@ -738,6 +786,25 @@ config MTRR See for more information. +choice + help + This is unrelated to your processor's speed. This variable alters + how often the system is asked to generate timer interrupts. A larger + value can lead to a more responsive system, but also causes extra + overhead from the increased number of context switches. + + If in doubt, leave it at the default of 1000. + + prompt "Kernel HZ" + default 1000HZ + +config 100HZ + bool "100 Hz" + +config 1000HZ + bool "1000 Hz" +endchoice + config HAVE_DEC_LOCK bool depends on (SMP || PREEMPT) && X86_CMPXCHG @@ -1555,6 +1622,17 @@ config DEBUG_SLAB allocation as well as poisoning memory on free to catch use of freed memory. +config X86_REMOTE_DEBUG + bool "KGDB: Remote (serial) kernel debugging with gdb" + +config KGDB_THREAD + bool "KGDB: Thread analysis" + depends on X86_REMOTE_DEBUG + +config GDB_CONSOLE + bool "KGDB: Console messages through gdb" + depends on X86_REMOTE_DEBUG + config DEBUG_IOVIRT bool "Memory mapped I/O debugging" depends on DEBUG_KERNEL @@ -1580,6 +1658,26 @@ config MAGIC_SYSRQ keys are documented in . Don't say Y unless you really know what this hack does. +config EARLY_PRINTK + bool "Early console support" + default n + depends on DEBUG_KERNEL + help + Write kernel log output directly into the VGA buffer or serial port. + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server.You should normally N here, + unless you want to debug such a crash. + + Syntax: earlyprintk=vga + earlyprintk=serial[,ttySn[,baudrate]] + Append ,keep to not disable it when the real console takes over. + Only vga or serial at a time, not both. + Currently only ttyS0 and ttyS1 are supported. + Interaction with the standard serial driver is not very good. + The VGA output is eventually overwritten by the real console. + config DEBUG_SPINLOCK bool "Spinlock debugging" depends on DEBUG_KERNEL @@ -1610,12 +1708,33 @@ config DEBUG_SPINLOCK_SLEEP noisy if they are called with a spinlock held. config FRAME_POINTER - bool "Compile the kernel with frame pointers" + bool + default y if X86_REMOTE_DEBUG + default n if !X86_REMOTE_DEBUG help If you say Y here the resulting kernel image will be slightly larger and slower, but it will give very useful debugging information. If you don't debug the kernel, you can say N, but we may not be able to solve problems without frame pointers. + +config X86_STACK_CHECK + bool "Detect stack overflows" + depends on FRAME_POINTER + help + Say Y here to have the kernel attempt to detect when the per-task + kernel stack overflows. This is much more robust checking than + the above overflow check, which will only occasionally detect + an overflow. The level of guarantee here is much greater. + + Some older versions of gcc don't handle the -p option correctly. + Kernprof is affected by the same problem, which is described here: + http://oss.sgi.com/projects/kernprof/faq.html#Q9 + + Basically, if you get oopses in __free_pages_ok during boot when + you have this turned on, you need to fix gcc. The Redhat 2.96 + version and gcc-3.x seem to work. + + If not debugging a stack overflow problem, say N config X86_EXTRA_IRQS bool diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/Makefile 90-mjb1/arch/i386/Makefile --- 00-virgin/arch/i386/Makefile Mon Jan 13 23:16:24 2003 +++ 90-mjb1/arch/i386/Makefile Mon Jan 13 23:23:41 2003 @@ -69,9 +69,17 @@ mcore-$(CONFIG_X86_NUMAQ) := mach-defaul mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-i386/mach-bigsmp mcore-$(CONFIG_X86_BIGSMP) := mach-default +#Summit subarch support +mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit +mcore-$(CONFIG_X86_SUMMIT) := mach-default + # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default +ifdef CONFIG_X86_STACK_CHECK +CFLAGS += -p +endif + HEAD := arch/i386/kernel/head.o arch/i386/kernel/init_task.o libs-y += arch/i386/lib/ @@ -85,6 +93,7 @@ drivers-$(CONFIG_OPROFILE) += arch/i386 CFLAGS += $(mflags-y) AFLAGS += $(mflags-y) +AFLAGS_vmlinux.lds.o += -imacros $(TOPDIR)/include/asm-i386/page.h boot := arch/i386/boot diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/boot/compressed/misc.c 90-mjb1/arch/i386/boot/compressed/misc.c --- 00-virgin/arch/i386/boot/compressed/misc.c Thu Jan 2 22:04:58 2003 +++ 90-mjb1/arch/i386/boot/compressed/misc.c Mon Jan 13 23:23:41 2003 @@ -377,3 +377,7 @@ asmlinkage int decompress_kernel(struct if (high_loaded) close_output_buffer_if_we_run_high(mv); return high_loaded; } + +/* We don't actually check for stack overflows this early. */ +__asm__(".globl mcount ; mcount: ret\n"); + diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/Makefile 90-mjb1/arch/i386/kernel/Makefile --- 00-virgin/arch/i386/kernel/Makefile Thu Jan 2 22:04:58 2003 +++ 90-mjb1/arch/i386/kernel/Makefile Mon Jan 13 23:20:30 2003 @@ -17,6 +17,7 @@ obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_X86_REMOTE_DEBUG) += gdbstub.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_SLEEP) += acpi_wakeup.o @@ -31,6 +32,14 @@ obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_EDD) += edd.o obj-$(CONFIG_MODULES) += module.o obj-y += sysenter.o + +ifdef CONFIG_X86_REMOTE_DEBUG +GDBSTART=gdbstart +GDBCLEAN= -rm -f gdbstart /sbin/gdbstart +else +GDBSTART= +GDBCLEAN= +endif EXTRA_AFLAGS := -traditional diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/acpi.c 90-mjb1/arch/i386/kernel/acpi.c --- 00-virgin/arch/i386/kernel/acpi.c Mon Dec 16 21:50:37 2002 +++ 90-mjb1/arch/i386/kernel/acpi.c Mon Jan 13 23:16:48 2003 @@ -44,6 +44,8 @@ #include #include +#include +#include #define PREFIX "ACPI: " @@ -126,6 +128,8 @@ acpi_parse_madt ( printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n", madt->lapic_address); + acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); + return 0; } @@ -430,8 +434,10 @@ acpi_boot_init ( #endif /*CONFIG_X86_IO_APIC*/ #ifdef CONFIG_X86_LOCAL_APIC - if (acpi_lapic && acpi_ioapic) + if (acpi_lapic && acpi_ioapic) { smp_found_config = 1; + clustered_apic_check(); + } #endif return 0; diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/entry.S 90-mjb1/arch/i386/kernel/entry.S --- 00-virgin/arch/i386/kernel/entry.S Mon Jan 13 21:09:08 2003 +++ 90-mjb1/arch/i386/kernel/entry.S Mon Jan 13 23:23:41 2003 @@ -165,7 +165,7 @@ do_lcall: pushl %eax popfl - andl $-8192, %ebp # GET_THREAD_INFO + GET_THREAD_INFO_WITH_ESP(%ebp) # GET_THREAD_INFO movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain call *4(%edx) # Call the lcall7 handler for the domain addl $4, %esp @@ -228,7 +228,7 @@ need_resched: jz restore_all movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp) sti - call schedule + call user_schedule movl $0,TI_PRE_COUNT(%ebp) cli jmp need_resched @@ -310,7 +310,7 @@ work_pending: testb $_TIF_NEED_RESCHED, %cl jz work_notifysig work_resched: - call schedule + call user_schedule cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -401,7 +401,45 @@ vector=vector+1 ALIGN common_interrupt: SAVE_ALL + + + GET_THREAD_INFO(%ebx) + movl TI_IRQ_STACK(%ebx),%ecx + movl TI_TASK(%ebx),%edx + movl %esp,%eax + leal (THREAD_SIZE-4)(%ecx),%esi # %ecx+THREAD_SIZE is next stack + # -4 keeps us in the right one + testl %ecx,%ecx # is there a valid irq_stack? + + # switch to the irq stack +#ifdef CONFIG_X86_HAVE_CMOV + cmovnz %esi,%esp +#else + jz 1f + mov %esi,%esp +1: +#endif + + # update the task pointer in the irq stack + GET_THREAD_INFO(%esi) + movl %edx,TI_TASK(%esi) + + # update the preempt count in the irq stack + movl TI_PRE_COUNT(%ebx),%ecx + movl %ecx,TI_PRE_COUNT(%esi) + call do_IRQ + + movl %eax,%esp # potentially restore non-irq stack + + # copy flags from the irq stack back into the task's thread_info + # %esi is saved over the do_IRQ call and contains the irq stack + # thread_info pointer + # %ebx contains the original thread_info pointer + movl TI_FLAGS(%esi),%eax + movl $0,TI_FLAGS(%esi) + LOCK orl %eax,TI_FLAGS(%ebx) + jmp ret_from_intr #define BUILD_INTERRUPT(name, nr) \ @@ -514,6 +552,31 @@ ENTRY(double_fault) pushl $do_double_fault jmp error_code +#ifdef CONFIG_KGDB_THREAD +ENTRY(kern_schedule) + pushl %ebp + movl %esp, %ebp + pushl %ss + pushl %ebp + pushfl + pushl %cs + pushl 4(%ebp) + pushl %eax + pushl %es + pushl %ds + pushl %eax + pushl (%ebp) + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + call kern_do_schedule + movl %ebp, %esp + pop %ebp + ret +#endif + ENTRY(invalid_TSS) pushl $do_invalid_TSS jmp error_code @@ -549,6 +612,61 @@ ENTRY(spurious_interrupt_bug) pushl $0 pushl $do_spurious_interrupt_bug jmp error_code + + +#ifdef CONFIG_X86_STACK_CHECK +.data + .globl stack_overflowed +stack_overflowed: + .long 0 +.text + +ENTRY(mcount) + push %eax + movl $(THREAD_SIZE - 1),%eax + andl %esp,%eax + cmpl $STACK_WARN,%eax /* more than half the stack is used*/ + jle 1f +2: + popl %eax + ret +1: + lock; btsl $0,stack_overflowed + jc 2b + + # switch to overflow stack + movl %esp,%eax + movl $(stack_overflow_stack + THREAD_SIZE - 4),%esp + + pushf + cli + pushl %eax + + # push eip then esp of error for stack_overflow_panic + pushl 4(%eax) + pushl %eax + + # update the task pointer and cpu in the overflow stack's thread_info. + GET_THREAD_INFO_WITH_ESP(%eax) + movl TI_TASK(%eax),%ebx + movl %ebx,stack_overflow_stack+TI_TASK + movl TI_CPU(%eax),%ebx + movl %ebx,stack_overflow_stack+TI_CPU + + call stack_overflow + + # pop off call arguments + addl $8,%esp + + popl %eax + popf + movl %eax,%esp + popl %eax + movl $0,stack_overflowed + ret + +#warning stack check enabled +#endif .data ENTRY(sys_call_table) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/gdbstart.c 90-mjb1/arch/i386/kernel/gdbstart.c --- 00-virgin/arch/i386/kernel/gdbstart.c Wed Dec 31 16:00:00 1969 +++ 90-mjb1/arch/i386/kernel/gdbstart.c Mon Jan 13 23:20:30 2003 @@ -0,0 +1,147 @@ +/* + * This program opens a tty file and issues the GDB stub activating + * ioctl on it. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +char *tty_name = "/dev/ttyS0" ; /* COM1 port */ +int speed = 9600 ; /* default speed */ +struct termios save_ts ; /* original term struct */ + +void print_usage(void) +{ + printf("gdbstub [-s speed] [-t tty-dev]\n") ; + printf(" defaults: /dev/ttyS0 with speed unmodified by this program\n"); + +} /* print_usage */ + +void tty_err(char *msg) +{ + char buf[100] ; + + strcpy(buf, msg) ; + strcat(buf, ": ") ; + strcat(buf, tty_name) ; + perror(buf) ; + exit(1) ; + +} /* tty_err */ + + +void setup_term(int fd) +{ + struct termios ts ; + int speed_code ; + + if (tcgetattr(fd, &ts) < 0) tty_err("tcgetattr") ; + + save_ts = ts ; + switch (speed) + { + case 4800: + speed_code = B4800 ; + break ; + case 9600: + speed_code = B9600 ; + break ; + case 19200: + speed_code = B19200 ; + break ; + case 38400: + speed_code = B38400 ; + break ; + case 57600: + speed_code = B57600 ; + break ; + case 115200: + speed_code = B115200 ; + break ; + case 230400: + speed_code = B230400 ; + break ; + default: + printf("Invalid speed: %d\n", speed) ; + exit(1) ; + } + + ts.c_cflag = CS8 | CREAD | CLOCAL ; + if (cfsetospeed(&ts, speed_code) < 0) tty_err("cfsetospeed") ; + if (cfsetispeed(&ts, speed_code) < 0) tty_err("cfsetispeed") ; + + if (tcsetattr(fd, TCSANOW, &ts) < 0) tty_err("tcsetattr") ; + +} /* setup_term */ + +int main(int argc, char **argv) +{ + int opt ; + int fil ; + int rslt ; + + while ((opt = getopt(argc, argv, "hs:t:")) > 0) + { + switch (opt) + { + case 's': + speed = atol(optarg) ; + break ; + case 't': + tty_name = optarg ; + break ; + case ':': + printf("Invalid option\n") ; + break ; + case '?': + case 'h': + default: + print_usage() ; + return 1; + } + } + + fil = open(tty_name, O_RDWR) ; + if (fil < 0) + { + perror(tty_name) ; + return 1; + } + + + setup_term(fil) ; + + /* + * When we issue this ioctl, control will not return until + * the debugger running on the remote host machine says "go". + */ + printf("\nAbout to activate GDB stub in the kernel on %s\n", tty_name) ; + printf("Hit CR to continue, kill program to abort -- ") ; + getchar() ; + sync() ; + rslt = ioctl(fil, TIOCGDB, 0) ; + if (rslt < 0) + { + perror("TIOCGDB ioctl") ; + return 1; + } + + printf("\nGDB stub successfully activated\n") ; + + for (;;) + { + pause() ; + } + + if (tcsetattr(fil, TCSANOW, &save_ts) < 0) tty_err("tcsetattr") ; + + exit(0); +} /* main */ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/gdbstub.c 90-mjb1/arch/i386/kernel/gdbstub.c --- 00-virgin/arch/i386/kernel/gdbstub.c Wed Dec 31 16:00:00 1969 +++ 90-mjb1/arch/i386/kernel/gdbstub.c Mon Jan 13 23:20:30 2003 @@ -0,0 +1,1208 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (C) 2000-2001 VERITAS Software Corporation. + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: Amit Kale + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Origianl kgdb, compatibility with 2.1.xx kernel by David Grothe + * Integrated into 2.2.5 kernel by Tigran Aivazian + * thread support, + * support for multiple processors, + * support for ia-32(x86) hardware debugging, + * Console support, + * handling nmi watchdog + * Amit S. Kale ( akale@veritas.com ) + * + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#ifdef CONFIG_GDB_CONSOLE +#include +#endif +#include + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int putDebugChar(int); /* write a single character */ +extern int getDebugChar(void); /* read and return a single char */ + +extern int pid_max; + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 1024 + +static char initialized; /* boolean flag. != 0 means we've been initialized */ + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES 64 +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +enum regnames { _EAX, /* 0 */ + _ECX, /* 1 */ + _EDX, /* 2 */ + _EBX, /* 3 */ + _ESP, /* 4 */ + _EBP, /* 5 */ + _ESI, /* 6 */ + _EDI, /* 7 */ + _PC /* 8 also known as eip */ , + _PS /* 9 also known as eflags */ , + _CS, /* 10 */ + _SS, /* 11 */ + _DS, /* 12 */ + _ES, /* 13 */ + _FS, /* 14 */ + _GS +}; /* 15 */ + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* */ + +#define BREAKPOINT() asm(" int $3"); + +/* Put the error code here just in case the user cares. */ +int gdb_i386errcode; +/* Likewise, the vector number here (since GDB only gets the signal + number through the usual means, and that's not very specific). */ +int gdb_i386vector = -1; + +static spinlock_t slavecpulocks[KGDB_MAX_NO_CPUS]; +volatile int procindebug[KGDB_MAX_NO_CPUS]; + +#ifdef CONFIG_SMP +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +spinlock_t kgdb_nmispinlock = SPIN_LOCK_UNLOCKED; +#else +unsigned kgdb_spinlock = 0; +unsigned kgdb_nmispinlock = 0; +#endif + +static void +kgdb_usercode(void) +{ +} + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + do { + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + if (!putDebugChar(ch)) + return; + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + + } while ((getDebugChar() & 0x7f) != '+'); + +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static short error; + +static void +regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_EAX] = regs->eax; + gdb_regs[_EBX] = regs->ebx; + gdb_regs[_ECX] = regs->ecx; + gdb_regs[_EDX] = regs->edx; + gdb_regs[_ESI] = regs->esi; + gdb_regs[_EDI] = regs->edi; + gdb_regs[_EBP] = regs->ebp; + gdb_regs[_DS] = regs->xds; + gdb_regs[_ES] = regs->xes; + gdb_regs[_PS] = regs->eflags; + gdb_regs[_CS] = regs->xcs; + gdb_regs[_PC] = regs->eip; + gdb_regs[_ESP] = (int) (®s->esp); + gdb_regs[_SS] = __KERNEL_DS; + gdb_regs[_FS] = 0xFFFF; + gdb_regs[_GS] = 0xFFFF; +} /* regs_to_gdb_regs */ + +static void +gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) +{ + regs->eax = gdb_regs[_EAX]; + regs->ebx = gdb_regs[_EBX]; + regs->ecx = gdb_regs[_ECX]; + regs->edx = gdb_regs[_EDX]; + regs->esi = gdb_regs[_ESI]; + regs->edi = gdb_regs[_EDI]; + regs->ebp = gdb_regs[_EBP]; + regs->xds = gdb_regs[_DS]; + regs->xes = gdb_regs[_ES]; + regs->eflags = gdb_regs[_PS]; + regs->xcs = gdb_regs[_CS]; + regs->eip = gdb_regs[_PC]; +#if 0 /* can't change these */ + regs->esp = gdb_regs[_ESP]; + regs->xss = gdb_regs[_SS]; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif + +} /* gdb_regs_to_regs */ + +/* Indicate to caller of mem2hex or hex2mem that there has been an + error. */ +static volatile int kgdb_memerr = 0; +volatile int kgdb_memerr_expected = 0; +static volatile int kgdb_memerr_cnt = 0; +static int garbage_loc = -1; + +int +get_char(char *addr) +{ + return *addr; +} + +void +set_char(char *addr, int val) +{ + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set kgdb_memerr in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + kgdb_memerr_expected = 1; + kgdb_memerr = 0; + } + for (i = 0; i < count; i++) { + + ch = get_char(mem++); + + if (may_fault && kgdb_memerr) { + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + kgdb_memerr_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + kgdb_memerr_expected = 1; + kgdb_memerr = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch); + + if (may_fault && kgdb_memerr) { + return (mem); + } + } + if (may_fault) + kgdb_memerr_expected = 0; + return (mem); +} + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToInt(char **ptr, int *intValue) +{ + int numChars = 0; + int hexValue; + + *intValue = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *intValue = (*intValue << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#ifdef CONFIG_KGDB_THREAD +static int +stubhex(int ch) +{ + if (ch >= 'a' && ch <= 'f') + return ch - 'a' + 10; + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'A' && ch <= 'F') + return ch - 'A' + 10; + return -1; +} + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif + +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +#ifdef CONFIG_KGDB_THREAD +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif + +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} + +#ifdef CONFIG_KGDB_THREAD +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } +#if 0 + thread = init_tasks[0]; + do { + if (thread->pid == pid) { + return thread; + } + thread = thread->next_task; + } while (thread != init_tasks[0]); +#endif + return NULL; +} +#endif + +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned addr; +} breakinfo[4] = { { +enabled:0}, { +enabled:0}, { +enabled:0}, { +enabled:0}}; + +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned dr7; + + asm volatile ("movl %%db7, %0\n":"=r" (dr7) + :); + do { + unsigned addr0, addr1, addr2, addr3; + asm volatile ("movl %%db0, %0\n" + "movl %%db1, %1\n" + "movl %%db2, %2\n" + "movl %%db3, %3\n":"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3):); + } while (0); + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movl %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movl %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movl %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movl %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movl %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +void +gdb_wait(void *arg) +{ + unsigned flags; + int processor; + + local_irq_save(flags); + processor = smp_processor_id(); + procindebug[processor] = 1; + current->thread.kgdbregs = arg; + spin_lock(slavecpulocks + processor); + correct_hw_break(); + procindebug[processor] = 0; + local_irq_restore(flags); +} + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + int addr, length; + int breakno, breaktype; + char *ptr; + int newPC; + unsigned long flags = ~0UL; + int gdb_regs[NUMREGBYTES / 4]; + int i; + int dr6; + int reboot = 0; +#ifdef CONFIG_KGDB_THREAD + int nothreads; + int maxthreads; + int threadid; + threadref thref; + struct task_struct *thread = NULL; +#endif +#define regs (*linux_regs) + + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { + return (0); + } + + if (kgdb_memerr_expected) { + /* + * This fault occured because of the get_char or set_char + * routines. These two routines use either eax of edx to + * indirectly reference the location in memory that they + * are working with. For a page fault, when we return + * the instruction will be retried, so we have to make + * sure that these registers point to valid memory. + */ + kgdb_memerr = 1; /* set mem error flag */ + kgdb_memerr_expected = 0; + kgdb_memerr_cnt++; /* helps in debugging */ + regs.eax = (long) &garbage_loc; /* make valid address */ + regs.edx = (long) &garbage_loc; /* make valid address */ + return (0); + } +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_nmispinlock)) +#else + if (!kgdb_nmispinlock) +#endif + { + + /* Get kgdb spinlock */ +#ifdef CONFIG_SMP + _raw_spin_lock(&kgdb_spinlock); +#else + kgdb_spinlock = 1; +#endif + + local_irq_save(flags); + + /* Disable hardware debugging while we are in kgdb */ + __asm__("movl %0,%%db7": /* no output */ + :"r"(0)); + + for (i = 0; i < NR_CPUS; i++) { + spin_lock_init(&slavecpulocks[i]); + _raw_spin_lock(&slavecpulocks[i]); + } + + if (num_online_cpus() > 1) { + /* Force other cpus in debugger */ + if (smp_call_function(gdb_wait, NULL, 0, 99) != 0) { + return (1); + } + } + + procindebug[smp_processor_id()] = 1; + } + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'g': /* return the value of the CPU registers */ + if (!usethread || usethread == current) { + regs_to_gdb_regs(gdb_regs, ®s); + } else { + memset(gdb_regs, 0, NUMREGBYTES); + if (usethread->thread.kgdbregs) { + kgdb_memerr_expected = 1; + kgdb_memerr = 0; + get_char((char *) usethread->thread. + kgdbregs); + kgdb_memerr_expected = 0; + if (kgdb_memerr) { + gdb_regs[_PC] = + (int) kgdb_usercode; + } else { + regs_to_gdb_regs(gdb_regs, + usethread-> + thread. + kgdbregs); + } + } else { + gdb_regs[_PC] = (int) kgdb_usercode; + } + } + mem2hex((char *) gdb_regs, remcomOutBuffer, NUMREGBYTES, + 0); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], (char *) gdb_regs, + NUMREGBYTES, 0); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) + if (*(ptr++) == ',') + if (hexToInt(&ptr, &length)) { + ptr = 0; + mem2hex((char *) addr, + remcomOutBuffer, length, + 1); + if (kgdb_memerr) { + strcpy(remcomOutBuffer, + "E03"); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + } + break; + + /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) + if (*(ptr++) == ',') + if (hexToInt(&ptr, &length)) + if (*(ptr++) == ':') { + hex2mem(ptr, + (char *) addr, + length, 1); + + if (kgdb_memerr) { + strcpy + (remcomOutBuffer, + "E03"); + } else { + strcpy + (remcomOutBuffer, + "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + } + break; + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + case 'c': + case 's': +#ifdef CONFIG_SMP + if (spin_is_locked(&kgdb_nmispinlock)) +#else + if (kgdb_nmispinlock) +#endif + { + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* try to read optional parameter, pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) { + regs.eip = addr; + } + + newPC = regs.eip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno)) { + if (breakinfo[breakno].type == + 0) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + } + correct_hw_break(); + asm volatile ("movl %0, %%db6\n"::"r" (0)); + for (i = 0; i < NR_CPUS; i++) { + _raw_spin_unlock(&slavecpulocks[i]); + } + + procindebug[smp_processor_id()] = 0; + /* Release kgdb spinlock */ +#ifdef CONFIG_SMP + _raw_spin_unlock(&kgdb_spinlock); +#else + kgdb_spinlock = 0; +#endif + if (flags != ~0UL) + local_irq_restore(flags); + return (0); + + /* kill the program */ + case 'k': + break; + + /* query */ + case 'q': + switch (remcomInBuffer[1]) { +#ifdef CONFIG_KGDB_THREAD + case 'L': + /* List threads */ + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads + && threadid < pid_max; threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + } + } + if (threadid == pid_max) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; + + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; +#endif + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, err_code, + remcomOutBuffer); + break; + } + break; + +#ifdef CONFIG_KGDB_THREAD + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + usethread = thread; + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; +#endif + + case 'r': + reboot = 1; + strcpy(remcomOutBuffer, "OK"); + break; + case 'Y': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + ptr++; + hexToInt(&ptr, &breaktype); + ptr++; + hexToInt(&ptr, &length); + ptr++; + hexToInt(&ptr, &addr); + if (set_hw_break + (breakno & 0x3, breaktype & 0x3, length & 0x3, addr) + == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + if (reboot == 1) { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt)); + __asm__ __volatile__("int3"); + } + } +} + +/* this function is used to set up exception handlers for tracing and + breakpoints */ +void +set_debug_traps(void) +{ + /* + * linux_debug_hook is defined in traps.c. We store a pointer + * to our own exception handler into it. + */ + linux_debug_hook = handle_exception; + + /* + * In case GDB is started before us, ack any packets (presumably + * "$?#xx") sitting there. */ + putDebugChar('+'); + + initialized = 1; +} + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ + +void +breakpoint(void) +{ + if (initialized) + BREAKPOINT(); +} + +#ifdef CONFIG_GDB_CONSOLE +char gdbconbuf[BUFMAX]; + +void +gdb_console_write(struct console *co, const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + + if (!gdb_initialized) { + return; + } + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } +} +#endif +static int __init +kgdb_opt_gdb(char *dummy) +{ + gdb_enter = 1; + return 1; +} +static int __init +kgdb_opt_gdbttyS(char *str) +{ + gdb_ttyS = simple_strtoul(str, NULL, 10); + return 1; +} +static int __init +kgdb_opt_gdbbaud(char *str) +{ + gdb_baud = simple_strtoul(str, NULL, 10); + return 1; +} + +/* + * Sequence of these lines has to be maintained because gdb option is a prefix + * of the other two options + */ + +__setup("gdbttyS=", kgdb_opt_gdbttyS); +__setup("gdbbaud=", kgdb_opt_gdbbaud); +__setup("gdb", kgdb_opt_gdb); diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/head.S 90-mjb1/arch/i386/kernel/head.S --- 00-virgin/arch/i386/kernel/head.S Thu Jan 2 22:04:58 2003 +++ 90-mjb1/arch/i386/kernel/head.S Mon Jan 13 23:23:34 2003 @@ -16,6 +16,7 @@ #include #include #include +#include #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -309,7 +310,7 @@ rp_sidt: ret ENTRY(stack_start) - .long init_thread_union+8192 + .long init_thread_union+THREAD_SIZE .long __BOOT_DS /* This is the default interrupt "handler" :-) */ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/i386_ksyms.c 90-mjb1/arch/i386/kernel/i386_ksyms.c --- 00-virgin/arch/i386/kernel/i386_ksyms.c Mon Jan 13 21:09:20 2003 +++ 90-mjb1/arch/i386/kernel/i386_ksyms.c Mon Jan 13 23:23:41 2003 @@ -146,6 +146,20 @@ EXPORT_SYMBOL(smp_num_siblings); EXPORT_SYMBOL(cpu_sibling_map); #endif +#ifdef CONFIG_X86_REMOTE_DEBUG +void __this_fixmap_does_not_exist(void) +{ + BUG(); +} +EXPORT_SYMBOL(__this_fixmap_does_not_exist); + +void __br_lock_usage_bug(void) +{ + BUG(); +} +EXPORT_SYMBOL(__br_lock_usage_bug); +#endif + #ifdef CONFIG_SMP EXPORT_SYMBOL(cpu_data); EXPORT_SYMBOL(cpu_online_map); @@ -213,4 +227,9 @@ EXPORT_SYMBOL(kmap_atomic_to_page); #ifdef CONFIG_EDD_MODULE EXPORT_SYMBOL(edd); EXPORT_SYMBOL(eddnr); +#endif + +#ifdef CONFIG_X86_STACK_CHECK +extern void mcount(void); +EXPORT_SYMBOL(mcount); #endif diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/init_task.c 90-mjb1/arch/i386/kernel/init_task.c --- 00-virgin/arch/i386/kernel/init_task.c Sun Nov 17 20:29:56 2002 +++ 90-mjb1/arch/i386/kernel/init_task.c Mon Jan 13 23:23:41 2003 @@ -13,6 +13,14 @@ static struct files_struct init_files = static struct signal_struct init_signals = INIT_SIGNALS(init_signals); struct mm_struct init_mm = INIT_MM(init_mm); +union thread_union init_irq_union + __attribute__((__section__(".data.init_task"))); + +#ifdef CONFIG_X86_STACK_CHECK +union thread_union stack_overflow_stack + __attribute__((__section__(".data.init_task"))); +#endif + /* * Initial thread structure. * diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/io_apic.c 90-mjb1/arch/i386/kernel/io_apic.c --- 00-virgin/arch/i386/kernel/io_apic.c Mon Dec 23 23:01:44 2002 +++ 90-mjb1/arch/i386/kernel/io_apic.c Mon Jan 13 23:16:53 2003 @@ -278,7 +278,7 @@ static inline void balance_irq(int irq) new_cpu = move(entry->cpu, allowed_mask, now, random_number); if (entry->cpu != new_cpu) { entry->cpu = new_cpu; - set_ioapic_affinity(irq, 1 << new_cpu); + set_ioapic_affinity(irq, cpu_to_logical_apicid(new_cpu)); } } } @@ -719,8 +719,8 @@ void __init setup_IO_APIC_irqs(void) */ memset(&entry,0,sizeof(entry)); - entry.delivery_mode = dest_LowestPrio; - entry.dest_mode = INT_DELIVERY_MODE; + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; entry.mask = 0; /* enable IRQ */ entry.dest.logical.logical_dest = TARGET_CPUS; @@ -799,10 +799,10 @@ void __init setup_ExtINT_IRQ0_pin(unsign * We use logical delivery to get the timer IRQ * to the first CPU. */ - entry.dest_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; entry.mask = 0; /* unmask IRQ now */ entry.dest.logical.logical_dest = TARGET_CPUS; - entry.delivery_mode = dest_LowestPrio; + entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; entry.trigger = 0; entry.vector = vector; @@ -1763,7 +1763,7 @@ late_initcall(io_apic_bug_finalize); #ifdef CONFIG_ACPI_BOOT -#define IO_APIC_MAX_ID 15 +#define IO_APIC_MAX_ID APIC_BROADCAST_ID int __init io_apic_get_unique_id (int ioapic, int apic_id) { @@ -1880,8 +1880,8 @@ int io_apic_set_pci_routing (int ioapic, memset(&entry,0,sizeof(entry)); - entry.delivery_mode = dest_LowestPrio; - entry.dest_mode = INT_DELIVERY_MODE; + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; entry.dest.logical.logical_dest = TARGET_CPUS; entry.mask = 1; /* Disabled (masked) */ entry.trigger = 1; /* Level sensitive */ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/irq.c 90-mjb1/arch/i386/kernel/irq.c --- 00-virgin/arch/i386/kernel/irq.c Sun Nov 17 20:29:22 2002 +++ 90-mjb1/arch/i386/kernel/irq.c Mon Jan 13 23:23:37 2003 @@ -311,7 +311,8 @@ void enable_irq(unsigned int irq) * SMP cross-CPU interrupts have their own specific * handlers). */ -asmlinkage unsigned int do_IRQ(struct pt_regs regs) +struct pt_regs *do_IRQ(struct pt_regs *regs) __attribute__((regparm(1))); +struct pt_regs *do_IRQ(struct pt_regs *regs) { /* * We ack quickly, we don't want the irq controller @@ -323,7 +324,7 @@ asmlinkage unsigned int do_IRQ(struct pt * 0 return value means that this irq is already being * handled by some other CPU. (or is disabled) */ - int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */ + int irq = regs->orig_eax & 0xff; /* high bits used in ret_from_ code */ int cpu = smp_processor_id(); irq_desc_t *desc = irq_desc + irq; struct irqaction * action; @@ -388,7 +389,7 @@ asmlinkage unsigned int do_IRQ(struct pt */ for (;;) { spin_unlock(&desc->lock); - handle_IRQ_event(irq, ®s, action); + handle_IRQ_event(irq, regs, action); spin_lock(&desc->lock); if (likely(!(desc->status & IRQ_PENDING))) @@ -407,7 +408,7 @@ out: irq_exit(); - return 1; + return regs; } /** diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/mpparse.c 90-mjb1/arch/i386/kernel/mpparse.c --- 00-virgin/arch/i386/kernel/mpparse.c Mon Jan 13 21:09:08 2003 +++ 90-mjb1/arch/i386/kernel/mpparse.c Mon Jan 13 23:16:48 2003 @@ -72,8 +72,8 @@ static unsigned int __initdata num_proce /* Bitmask of physically existing CPUs */ unsigned long phys_cpu_present_map; -int summit_x86 = 0; -u8 raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +int x86_summit = 0; +u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; /* * Intel MP BIOS table parsing routines: @@ -186,7 +186,7 @@ void __init MP_processor_info (struct mp ver = 0x10; } apic_version[m->mpc_apicid] = ver; - raw_phys_apicid[num_processors - 1] = m->mpc_apicid; + bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; } static void __init MP_bus_info (struct mpc_config_bus *m) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/nmi.c 90-mjb1/arch/i386/kernel/nmi.c --- 00-virgin/arch/i386/kernel/nmi.c Thu Jan 2 22:04:58 2003 +++ 90-mjb1/arch/i386/kernel/nmi.c Mon Jan 13 23:20:30 2003 @@ -20,11 +20,26 @@ #include #include #include +#include #include #include #include +#ifdef CONFIG_X86_REMOTE_DEBUG +extern gdb_debug_hook * linux_debug_hook; +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ + { \ + if (linux_debug_hook != (gdb_debug_hook *) NULL && !user_mode(regs)) \ + { \ + (*linux_debug_hook)(trapnr, signr, error_code, regs) ; \ + after; \ + } \ + } +#else +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) +#endif + unsigned int nmi_watchdog = NMI_NONE; static unsigned int nmi_hz = HZ; unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ @@ -363,12 +378,59 @@ void nmi_watchdog_tick (struct pt_regs * sum = irq_stat[cpu].apic_timer_irqs; if (last_irq_sums[cpu] == sum) { +#ifdef CONFIG_X86_REMOTE_DEBUG +#ifdef CONFIG_SMP + if (spin_is_locked(&kgdb_spinlock)) +#else + if (kgdb_spinlock) +#endif + { + /* We are inside kgdb, this isn't a stuck cpu */ + alert_counter[cpu] = 0; + } else { +#ifdef CONFIG_SMP + if (spin_is_locked(&kgdb_nmispinlock)) +#else + if (kgdb_nmispinlock) +#endif + { + if (!procindebug[cpu]) { + procindebug[cpu] = 1; + current->thread.kgdbregs = regs; + while (1) { + /* nothing */ + } + } + return; + } + } +#endif /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; if (alert_counter[cpu] == 5*nmi_hz) { +#ifdef CONFIG_X86_REMOTE_DEBUG +#ifdef CONFIG_SMP + if (spin_trylock(&kgdb_nmispinlock)) +#else + kgdb_nmispinlock = 1; +#endif + { + procindebug[cpu] = 1; + CHK_REMOTE_DEBUG(2,SIGBUS,0,regs,) + } +#ifdef CONFIG_SMP + else { + procindebug[cpu] = 1; + current->thread.kgdbregs = regs; + while (1) { + /* nothing */ + } + } +#endif +#endif spin_lock(&nmi_print_lock); /* * We are in trouble anyway, lets at least try diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/process.c 90-mjb1/arch/i386/kernel/process.c --- 00-virgin/arch/i386/kernel/process.c Thu Jan 9 19:15:56 2003 +++ 90-mjb1/arch/i386/kernel/process.c Mon Jan 13 23:23:41 2003 @@ -159,7 +159,25 @@ static int __init idle_setup (char *str) __setup("idle=", idle_setup); -void show_regs(struct pt_regs * regs) +void stack_overflow(unsigned long esp, unsigned long eip) +{ + int panicing = ((esp&(THREAD_SIZE-1)) <= STACK_PANIC); + + printk( "esp: 0x%lx masked: 0x%lx STACK_PANIC:0x%x %d %d\n", + esp, (esp&(THREAD_SIZE-1)), STACK_PANIC, (((esp&(THREAD_SIZE-1)) <= STACK_PANIC)), panicing ); + + if (panicing) + print_symbol("stack overflow from %s\n", eip); + else + print_symbol("excessive stack use from %s\n", eip); + printk("esp: %p\n", (void*)esp); + show_trace((void*)esp); + + if (panicing) + panic("stack overflow\n"); +} + +asmlinkage void show_regs(struct pt_regs * regs) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -432,6 +450,7 @@ void __switch_to(struct task_struct *pre /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ + next_p->thread_info->irq_stack = prev_p->thread_info->irq_stack; unlazy_fpu(prev_p); /* diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/smp.c 90-mjb1/arch/i386/kernel/smp.c --- 00-virgin/arch/i386/kernel/smp.c Thu Jan 9 19:15:56 2003 +++ 90-mjb1/arch/i386/kernel/smp.c Mon Jan 13 23:20:30 2003 @@ -24,6 +24,7 @@ #include #include #include +#include /* * Some notes on x86 processor bugs affecting SMP operation: @@ -515,10 +516,17 @@ int smp_call_function (void (*func) (voi { struct call_data_struct data; int cpus = num_online_cpus()-1; + int count = 0; + int gdb; - if (!cpus) + if (cpus <= 0) return 0; + gdb = 0; + if (wait == 99) { + wait = 0; + gdb = 1; + } data.func = func; data.info = info; atomic_set(&data.started, 0); @@ -533,12 +541,27 @@ int smp_call_function (void (*func) (voi send_IPI_allbutself(CALL_FUNCTION_VECTOR); /* Wait for response */ - while (atomic_read(&data.started) != cpus) + while (atomic_read(&data.started) != cpus) { + if (gdb) { + if (count++ == 2000000) { + printk("%s: timeout\n", __FUNCTION__); + break; + } + if (count == 1000000) { + printk("looks bad\n"); + printk("cpus=%d, started=%d\n", cpus, + atomic_read(&data.started)); + } + if (count > 1000000) + udelay(1); + } barrier(); + } if (wait) while (atomic_read(&data.finished) != cpus) barrier(); + spin_unlock(&call_lock); return 0; @@ -580,9 +603,9 @@ asmlinkage void smp_reschedule_interrupt ack_APIC_irq(); } -asmlinkage void smp_call_function_interrupt(void) +asmlinkage void smp_call_function_interrupt(struct pt_regs regs) { - void (*func) (void *info) = call_data->func; + void (*func) (void *info, struct pt_regs *) = (void (*)(void *, struct pt_regs*))call_data->func; void *info = call_data->info; int wait = call_data->wait; @@ -597,7 +620,7 @@ asmlinkage void smp_call_function_interr * At this point the info structure may be out of scope unless wait==1 */ irq_enter(); - (*func)(info); + (*func)(info, ®s); irq_exit(); if (wait) { diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/smpboot.c 90-mjb1/arch/i386/kernel/smpboot.c --- 00-virgin/arch/i386/kernel/smpboot.c Thu Jan 9 19:15:56 2003 +++ 90-mjb1/arch/i386/kernel/smpboot.c Mon Jan 13 23:23:37 2003 @@ -71,6 +71,11 @@ static unsigned long smp_commenced_mask; /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; +/* Per CPU interrupt stacks */ +extern union thread_union init_irq_union; +union thread_union *irq_stacks[NR_CPUS] __cacheline_aligned = + { &init_irq_union, }; + /* Set when the idlers are all forked */ int smp_threads_ready; @@ -772,6 +777,28 @@ wakeup_secondary_cpu(int phys_apicid, un } #endif /* WAKE_SECONDARY_VIA_INIT */ +static void __init setup_irq_stack(struct task_struct *p, int cpu) +{ + unsigned long stk; + + stk = __get_free_pages(GFP_KERNEL, THREAD_ORDER); + if (!stk) + panic("I can't seem to allocate my irq stack. Oh well, giving up."); + + irq_stacks[cpu] = (void *)stk; + memset(irq_stacks[cpu], 0, THREAD_SIZE); + irq_stacks[cpu]->thread_info.cpu = cpu; + irq_stacks[cpu]->thread_info.preempt_count = 1; + /* interrupts are not preemptable */ + p->thread_info->irq_stack = &irq_stacks[cpu]->thread_info; + + /* If we want to make the irq stack more than one unit + * deep, we can chain then off of the irq_stack pointer + * here. + */ +} + + extern unsigned long cpu_initialized; static int __init do_boot_cpu(int apicid) @@ -796,6 +823,8 @@ static int __init do_boot_cpu(int apicid if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); + setup_irq_stack(idle, cpu); + /* * We remove it from the pidhash and the runqueue * once we got the process: @@ -1045,10 +1074,10 @@ static void __init smp_boot_cpus(unsigne /* * Don't even attempt to start the boot CPU! */ - if (apicid == boot_cpu_apicid) + if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID)) continue; - if (!(phys_cpu_present_map & (1 << bit))) + if (!check_apicid_present(bit)) continue; if (max_cpus <= cpucount+1) continue; diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/kernel/traps.c 90-mjb1/arch/i386/kernel/traps.c --- 00-virgin/arch/i386/kernel/traps.c Mon Jan 13 21:09:20 2003 +++ 90-mjb1/arch/i386/kernel/traps.c Mon Jan 13 23:20:30 2003 @@ -50,6 +50,24 @@ #include #include +#ifdef CONFIG_X86_REMOTE_DEBUG +#include +#endif + +#ifdef CONFIG_X86_REMOTE_DEBUG +gdb_debug_hook * linux_debug_hook; +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ + { \ + if (linux_debug_hook != (gdb_debug_hook *) NULL && !user_mode(regs)) \ + { \ + (*linux_debug_hook)(trapnr, signr, error_code, regs) ; \ + after; \ + } \ + } +#else +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) +#endif + asmlinkage int system_call(void); asmlinkage void lcall7(void); asmlinkage void lcall27(void); @@ -252,6 +270,7 @@ void die(const char * str, struct pt_reg bust_spinlocks(1); handle_BUG(regs); printk("%s: %04lx\n", str, err & 0xffff); + CHK_REMOTE_DEBUG(1,SIGTRAP,err,regs,) show_registers(regs); bust_spinlocks(0); spin_unlock_irq(&die_lock); @@ -312,6 +331,7 @@ static inline void do_trap(int trapnr, i #define DO_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,)\ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } @@ -329,7 +349,9 @@ asmlinkage void do_##name(struct pt_regs #define DO_VM86_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,return)\ do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ + return; \ } #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -374,8 +396,10 @@ gp_in_vm86: return; gp_in_kernel: - if (!fixup_exception(regs)) + if (!fixup_exception(regs)) { + CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,); die("general protection fault", regs, error_code); + } } static void mem_parity_error(unsigned char reason, struct pt_regs * regs) @@ -537,8 +561,10 @@ asmlinkage void do_debug(struct pt_regs * allowing programs to debug themselves without the ptrace() * interface. */ +#ifndef CONFIG_X86_REMOTE_DEBUG if ((regs->xcs & 3) == 0) goto clear_TF_reenable; +#endif if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) goto clear_TF; } @@ -550,11 +576,13 @@ asmlinkage void do_debug(struct pt_regs info.si_errno = 0; info.si_code = TRAP_BRKPT; - /* If this is a kernel mode trap, save the user PC on entry to - * the kernel, that's what the debugger can make sense of. - */ - info.si_addr = ((regs->xcs & 3) == 0) ? (void *)tsk->thread.eip : - (void *)regs->eip; + + /* If this is a kernel mode trap, we need to reset db7 to allow us + * to continue sanely */ + if ((regs->xcs & 3) == 0) + goto clear_dr7; + + info.si_addr = (void *)regs->eip; force_sig_info(SIGTRAP, &info, tsk); /* Disable additional traps. They'll be re-enabled when @@ -564,13 +592,16 @@ clear_dr7: __asm__("movl %0,%%db7" : /* no output */ : "r" (0)); + CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) return; debug_vm86: handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); return; +#ifndef CONFIG_X86_REMOTE_DEBUG clear_TF_reenable: +#endif set_tsk_thread_flag(tsk, TIF_SINGLESTEP); clear_TF: regs->eflags &= ~TF_MASK; diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/mm/discontig.c 90-mjb1/arch/i386/mm/discontig.c --- 00-virgin/arch/i386/mm/discontig.c Sun Nov 17 20:29:47 2002 +++ 90-mjb1/arch/i386/mm/discontig.c Mon Jan 13 23:20:23 2003 @@ -48,6 +48,14 @@ extern unsigned long max_low_pfn; extern unsigned long totalram_pages; extern unsigned long totalhigh_pages; +#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) + +unsigned long node_remap_start_pfn[MAX_NUMNODES]; +unsigned long node_remap_size[MAX_NUMNODES]; +unsigned long node_remap_offset[MAX_NUMNODES]; +void *node_remap_start_vaddr[MAX_NUMNODES]; +void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); + /* * Find the highest page frame number we have available for the node */ @@ -65,12 +73,13 @@ static void __init find_max_pfn_node(int */ static void __init allocate_pgdat(int nid) { - unsigned long node_datasz; - - node_datasz = PFN_UP(sizeof(struct pglist_data)); - NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); - min_low_pfn += node_datasz; - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + if (nid) + NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; + else { + NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); + min_low_pfn += PFN_UP(sizeof(pg_data_t)); + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + } } /* @@ -113,14 +122,6 @@ static void __init register_bootmem_low_ } } -#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) - -unsigned long node_remap_start_pfn[MAX_NUMNODES]; -unsigned long node_remap_size[MAX_NUMNODES]; -unsigned long node_remap_offset[MAX_NUMNODES]; -void *node_remap_start_vaddr[MAX_NUMNODES]; -extern void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - void __init remap_numa_kva(void) { void *vaddr; @@ -145,7 +146,7 @@ static unsigned long calculate_numa_rema for (nid = 1; nid < numnodes; nid++) { /* calculate the size of the mem_map needed in bytes */ size = (node_end_pfn[nid] - node_start_pfn[nid] + 1) - * sizeof(struct page); + * sizeof(struct page) + sizeof(pg_data_t); /* convert size to large (pmd size) pages, rounding up */ size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; /* now the roundup is correct, convert to PAGE_SIZE pages */ @@ -195,9 +196,9 @@ unsigned long __init setup_memory(void) printk("Low memory ends at vaddr %08lx\n", (ulong) pfn_to_kaddr(max_low_pfn)); for (nid = 0; nid < numnodes; nid++) { - allocate_pgdat(nid); node_remap_start_vaddr[nid] = pfn_to_kaddr( highstart_pfn - node_remap_offset[nid]); + allocate_pgdat(nid); printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], (ulong) pfn_to_kaddr(highstart_pfn @@ -251,13 +252,6 @@ unsigned long __init setup_memory(void) */ find_smp_config(); - /*insert other nodes into pgdat_list*/ - for (nid = 1; nid < numnodes; nid++){ - NODE_DATA(nid)->pgdat_next = pgdat_list; - pgdat_list = NODE_DATA(nid); - } - - #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (system_max_low_pfn << PAGE_SHIFT)) { @@ -282,6 +276,18 @@ void __init zone_sizes_init(void) { int nid; + /* + * Insert nodes into pgdat_list backward so they appear in order. + * Clobber node 0's links and NULL out pgdat_list before starting. + */ + pgdat_list = NULL; + for (nid = numnodes - 1; nid >= 0; nid--) { + if (nid) + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + NODE_DATA(nid)->pgdat_next = pgdat_list; + pgdat_list = NODE_DATA(nid); + } + for (nid = 0; nid < numnodes; nid++) { unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned int max_dma; @@ -312,13 +318,18 @@ void __init zone_sizes_init(void) * normal bootmem allocator, but other nodes come from the * remapped KVA area - mbligh */ - if (nid) - free_area_init_node(nid, NODE_DATA(nid), - node_remap_start_vaddr[nid], zones_size, - start, 0); - else + if (!nid) free_area_init_node(nid, NODE_DATA(nid), 0, zones_size, start, 0); + else { + unsigned long lmem_map; + lmem_map = (unsigned long)node_remap_start_vaddr[nid]; + lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; + lmem_map &= PAGE_MASK; + free_area_init_node(nid, NODE_DATA(nid), + (struct page *)lmem_map, zones_size, + start, 0); + } } return; } diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/mm/fault.c 90-mjb1/arch/i386/mm/fault.c --- 00-virgin/arch/i386/mm/fault.c Mon Jan 13 21:09:20 2003 +++ 90-mjb1/arch/i386/mm/fault.c Mon Jan 13 23:20:30 2003 @@ -2,6 +2,11 @@ * linux/arch/i386/mm/fault.c * * Copyright (C) 1995 Linus Torvalds + * + * Change History + * + * Tigran Aivazian Remote debugging support. + * */ #include @@ -20,6 +25,9 @@ #include #include /* For unblank_screen() */ #include +#ifdef CONFIG_X86_REMOTE_DEBUG +#include +#endif #include #include @@ -193,6 +201,15 @@ asmlinkage void do_page_fault(struct pt_ if (in_atomic() || !mm) goto no_context; +#ifdef CONFIG_X86_REMOTE_DEBUG + if (kgdb_memerr_expected) { + if (linux_debug_hook != (gdb_debug_hook *) NULL) { + (*linux_debug_hook)(14, SIGSEGV, error_code, regs) ; + return; /* return w/modified regs */ + } + } +#endif + down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -291,6 +308,19 @@ bad_area: force_sig_info(SIGSEGV, &info, tsk); return; } + +#ifdef CONFIG_X86_REMOTE_DEBUG + if (kgdb_memerr_expected) { + if (linux_debug_hook != (gdb_debug_hook *) NULL) { + (*linux_debug_hook)(14, SIGSEGV, error_code, regs); + return; /* Return with modified registers */ + } + } else { + if (linux_debug_hook != (gdb_debug_hook *) NULL) { + (*linux_debug_hook)(14, SIGSEGV, error_code, regs); + } + } +#endif #ifdef CONFIG_X86_F00F_BUG /* diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/i386/vmlinux.lds.S 90-mjb1/arch/i386/vmlinux.lds.S --- 00-virgin/arch/i386/vmlinux.lds.S Mon Jan 13 21:09:20 2003 +++ 90-mjb1/arch/i386/vmlinux.lds.S Mon Jan 13 23:20:10 2003 @@ -7,7 +7,7 @@ ENTRY(_start) jiffies = jiffies_64; SECTIONS { - . = 0xC0000000 + 0x100000; + . = __PAGE_OFFSET + 0x100000; /* read-only */ _text = .; /* Text and read-only data */ .text : { diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/sparc64/kernel/rtrap.S 90-mjb1/arch/sparc64/kernel/rtrap.S --- 00-virgin/arch/sparc64/kernel/rtrap.S Sun Nov 17 20:29:45 2002 +++ 90-mjb1/arch/sparc64/kernel/rtrap.S Mon Jan 13 23:20:30 2003 @@ -33,7 +33,7 @@ __handle_softirq: ba,a,pt %xcc, __handle_softirq_continue nop __handle_preemption: - call schedule + call user_schedule wrpr %g0, RTRAP_PSTATE, %pstate ba,pt %xcc, __handle_preemption_continue wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate @@ -48,7 +48,7 @@ __handle_user_windows: be,pt %xcc, 1f nop - call schedule + call user_schedule wrpr %g0, RTRAP_PSTATE, %pstate wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate ldx [%g6 + TI_FLAGS], %l0 @@ -92,7 +92,7 @@ __handle_perfctrs: be,pt %xcc, 1f nop - call schedule + call user_schedule wrpr %g0, RTRAP_PSTATE, %pstate wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate ldx [%g6 + TI_FLAGS], %l0 @@ -271,7 +271,7 @@ to_kernel: brnz %l5, kern_fpucheck sethi %hi(PREEMPT_ACTIVE), %l6 stw %l6, [%g6 + TI_PRE_COUNT] - call schedule + call user_schedule nop ba,pt %xcc, rtrap stw %g0, [%g6 + TI_PRE_COUNT] diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/x86_64/kernel/early_printk.c 90-mjb1/arch/x86_64/kernel/early_printk.c --- 00-virgin/arch/x86_64/kernel/early_printk.c Sun Nov 17 20:29:50 2002 +++ 90-mjb1/arch/x86_64/kernel/early_printk.c Wed Dec 31 16:00:00 1969 @@ -1,218 +0,0 @@ -#include -#include -#include -#include -#include - -/* Simple VGA output */ - -#define VGABASE 0xffffffff800b8000UL - -#define MAX_YPOS 25 -#define MAX_XPOS 80 - -static int current_ypos = 1, current_xpos = 0; - -static void early_vga_write(struct console *con, const char *str, unsigned n) -{ - char c; - int i, k, j; - - while ((c = *str++) != '\0' && n-- > 0) { - if (current_ypos >= MAX_YPOS) { - /* scroll 1 line up */ - for(k = 1, j = 0; k < MAX_YPOS; k++, j++) { - for(i = 0; i < MAX_XPOS; i++) { - writew(readw(VGABASE + 2*(MAX_XPOS*k + i)), - VGABASE + 2*(MAX_XPOS*j + i)); - } - } - for(i = 0; i < MAX_XPOS; i++) { - writew(0x720, VGABASE + 2*(MAX_XPOS*j + i)); - } - current_ypos = MAX_YPOS-1; - } - if (c == '\n') { - current_xpos = 0; - current_ypos++; - } else if (c != '\r') { - writew(((0x7 << 8) | (unsigned short) c), - VGABASE + 2*(MAX_XPOS*current_ypos + current_xpos++)); - if (current_xpos >= MAX_XPOS) { - current_xpos = 0; - current_ypos++; - } - } - } -} - -static struct console early_vga_console = { - .name = "earlyvga", - .write = early_vga_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; - -/* Serial functions losely based on a similar package from Klaus P. Gerlicher */ - -int early_serial_base = 0x3f8; /* ttyS0 */ - -#define XMTRDY 0x20 - -#define DLAB 0x80 - -#define TXR 0 /* Transmit register (WRITE) */ -#define RXR 0 /* Receive register (READ) */ -#define IER 1 /* Interrupt Enable */ -#define IIR 2 /* Interrupt ID */ -#define FCR 2 /* FIFO control */ -#define LCR 3 /* Line control */ -#define MCR 4 /* Modem control */ -#define LSR 5 /* Line Status */ -#define MSR 6 /* Modem Status */ -#define DLL 0 /* Divisor Latch Low */ -#define DLH 1 /* Divisor latch High */ - -static int early_serial_putc(unsigned char ch) -{ - unsigned timeout = 0xffff; - while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) - rep_nop(); - outb(ch, early_serial_base + TXR); - return timeout ? 0 : -1; -} - -static void early_serial_write(struct console *con, const char *s, unsigned n) -{ - while (*s && n-- > 0) { - early_serial_putc(*s); - if (*s == '\n') - early_serial_putc('\r'); - s++; - } -} - -static __init void early_serial_init(char *opt) -{ - unsigned char c; - unsigned divisor, baud = 38400; - char *s, *e; - - if (*opt == ',') - ++opt; - - s = strsep(&opt, ","); - if (s != NULL) { - unsigned port; - if (!strncmp(s,"0x",2)) - early_serial_base = simple_strtoul(s, &e, 16); - else { - static int bases[] = { 0x3f8, 0x2f8 }; - if (!strncmp(s,"ttyS",4)) - s+=4; - port = simple_strtoul(s, &e, 10); - if (port > 1 || s == e) - port = 0; - early_serial_base = bases[port]; - } - } - - outb(0x3, early_serial_base + LCR); /* 8n1 */ - outb(0, early_serial_base + IER); /* no interrupt */ - outb(0, early_serial_base + FCR); /* no fifo */ - outb(0x3, early_serial_base + MCR); /* DTR + RTS */ - - s = strsep(&opt, ","); - if (s != NULL) { - baud = simple_strtoul(s, &e, 0); - if (baud == 0 || s == e) - baud = 38400; - } - - divisor = 115200 / baud; - c = inb(early_serial_base + LCR); - outb(c | DLAB, early_serial_base + LCR); - outb(divisor & 0xff, early_serial_base + DLL); - outb((divisor >> 8) & 0xff, early_serial_base + DLH); - outb(c & ~DLAB, early_serial_base + LCR); -} - -static struct console early_serial_console = { - .name = "earlyser", - .write = early_serial_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; - -/* Direct interface for emergencies */ -struct console *early_console = &early_vga_console; -static int early_console_initialized = 0; - -void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; - va_list ap; - va_start(ap,fmt); - n = vsnprintf(buf,512,fmt,ap); - early_console->write(early_console,buf,n); - va_end(ap); -} - -static int keep_early; - -int __init setup_early_printk(char *opt) -{ - char *space; - char buf[256]; - - if (early_console_initialized) - return -1; - - strncpy(buf,opt,256); - buf[255] = 0; - space = strchr(buf, ' '); - if (space) - *space = 0; - - if (strstr(buf,"keep")) - keep_early = 1; - - if (!strncmp(buf, "serial", 6)) { - early_serial_init(buf + 6); - early_console = &early_serial_console; - } else if (!strncmp(buf, "ttyS", 4)) { - early_serial_init(buf); - early_console = &early_serial_console; - } else if (!strncmp(buf, "vga", 3)) { - early_console = &early_vga_console; - } else { - early_console = NULL; - return -1; - } - early_console_initialized = 1; - register_console(early_console); - return 0; -} - -void __init disable_early_printk(void) -{ - if (!early_console_initialized || !early_console) - return; - if (!keep_early) { - printk("disabling early console...\n"); - unregister_console(early_console); - early_console_initialized = 0; - } else { - printk("keeping early console.\n"); - } -} - -/* syntax: earlyprintk=vga - earlyprintk=serial[,ttySn[,baudrate]] - Append ,keep to not disable it when the real console takes over. - Only vga or serial at a time, not both. - Currently only ttyS0 and ttyS1 are supported. - Interaction with the standard serial driver is not very good. - The VGA output is eventually overwritten by the real console. */ -__setup("earlyprintk=", setup_early_printk); diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/x86_64/kernel/entry.S 90-mjb1/arch/x86_64/kernel/entry.S --- 00-virgin/arch/x86_64/kernel/entry.S Mon Dec 23 23:01:49 2002 +++ 90-mjb1/arch/x86_64/kernel/entry.S Mon Jan 13 23:20:30 2003 @@ -187,7 +187,7 @@ sysret_careful: jnc sysret_signal sti pushq %rdi - call schedule + call user_schedule popq %rdi jmp sysret_check @@ -256,7 +256,7 @@ int_careful: jnc int_very_careful sti pushq %rdi - call schedule + call user_schedule popq %rdi jmp int_with_check @@ -420,7 +420,7 @@ retint_careful: jnc retint_signal sti pushq %rdi - call schedule + call user_schedule popq %rdi GET_THREAD_INFO(%rcx) cli @@ -454,7 +454,7 @@ retint_kernel: jc retint_restore_args movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx) sti - call schedule + call user_schedule cli GET_THREAD_INFO(%rcx) movl $0,threadinfo_preempt_count(%rcx) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/arch/x86_64/kernel/head64.c 90-mjb1/arch/x86_64/kernel/head64.c --- 00-virgin/arch/x86_64/kernel/head64.c Sun Nov 17 20:29:32 2002 +++ 90-mjb1/arch/x86_64/kernel/head64.c Mon Jan 13 23:20:05 2003 @@ -70,7 +70,7 @@ static void __init setup_boot_cpu_data(v boot_cpu_data.x86_mask = eax & 0xf; } -extern void start_kernel(void), pda_init(int), setup_early_printk(char *); +extern void start_kernel(void), pda_init(int); extern int disable_apic; void __init x86_64_start_kernel(char * real_mode_data) @@ -80,9 +80,6 @@ void __init x86_64_start_kernel(char * r clear_bss(); pda_init(0); copy_bootdata(real_mode_data); - s = strstr(saved_command_line, "earlyprintk="); - if (s != NULL) - setup_early_printk(s+12); #ifdef CONFIG_X86_IO_APIC if (strstr(saved_command_line, "disableapic")) disable_apic = 1; diff -urpN -X /home/fletch/.diff.exclude 00-virgin/drivers/char/Makefile 90-mjb1/drivers/char/Makefile --- 00-virgin/drivers/char/Makefile Mon Jan 13 23:16:25 2003 +++ 90-mjb1/drivers/char/Makefile Mon Jan 13 23:20:30 2003 @@ -32,6 +32,7 @@ obj-$(CONFIG_COMPUTONE) += ip2.o ip2main obj-$(CONFIG_RISCOM8) += riscom8.o obj-$(CONFIG_ISI) += isicom.o obj-$(CONFIG_ESPSERIAL) += esp.o +obj-$(CONFIG_X86_REMOTE_DEBUG) += gdbserial.o obj-$(CONFIG_SYNCLINK) += synclink.o obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o obj-$(CONFIG_N_HDLC) += n_hdlc.o diff -urpN -X /home/fletch/.diff.exclude 00-virgin/drivers/char/gdbserial.c 90-mjb1/drivers/char/gdbserial.c --- 00-virgin/drivers/char/gdbserial.c Wed Dec 31 16:00:00 1969 +++ 90-mjb1/drivers/char/gdbserial.c Mon Jan 13 23:20:30 2003 @@ -0,0 +1,274 @@ +/* + * Serial interface GDB stub + * + * Written (hacked together) by David Grothe (dave@gcom.com) + * + * Modified by Scott Foehner (sfoehner@engr.sgi.com) to allow connect + * on boot-up + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#undef PRNT /* define for debug printing */ + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +static char gdb_buf[GDB_BUF_SIZE]; +static int gdb_buf_in_inx; +static atomic_t gdb_buf_in_cnt; +static int gdb_buf_out_inx; + +extern void set_debug_traps(void); /* GDB routine */ +extern int gdb_serial_setup(int ttyS, int baud, int *port, int *irq); +extern void shutdown_for_gdb(struct async_struct *info); + /* in serial.c */ + +int gdb_irq; +int gdb_port; +int gdb_ttyS = 1; /* Default: ttyS1 */ +int gdb_baud = 38400; +int gdb_enter = 0; /* Default: do not do gdb_hook on boot */ +int gdb_initialized = 0; + +static int initialized = -1; + +/* + * Get a byte from the hardware data buffer and return it + */ +static int +read_data_bfr(void) +{ + if (inb(gdb_port + UART_LSR) & UART_LSR_DR) + return (inb(gdb_port + UART_RX)); + + return (-1); + +} /* read_data_bfr */ + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + */ +static int +read_char(void) +{ + if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ + int chr; + + chr = gdb_buf[gdb_buf_out_inx++]; + gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&gdb_buf_in_cnt); + return (chr); + } + + return (read_data_bfr()); /* read from hardware */ + +} /* read_char */ + +/* + * Wait until the interface can accept a char, then write it. + */ +static void +write_char(int chr) +{ + while (!(inb(gdb_port + UART_LSR) & UART_LSR_THRE)) ; + + outb(chr, gdb_port + UART_TX); + +} /* write_char */ + +/* + * This is the receiver interrupt routine for the GDB stub. + * It will receive a limited number of characters of input + * from the gdb host machine and save them up in a buffer. + * + * When the gdb stub routine getDebugChar() is called it + * draws characters out of the buffer until it is empty and + * then reads directly from the serial port. + * + * We do not attempt to write chars from the interrupt routine + * since the stubs do all of that via putDebugChar() which + * writes one byte after waiting for the interface to become + * ready. + * + * The debug stubs like to run with interrupts disabled since, + * after all, they run as a consequence of a breakpoint in + * the kernel. + * + * Perhaps someone who knows more about the tty driver than I + * care to learn can make this work for any low level serial + * driver. + */ +static void +gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + int chr; + int iir; + + do { + chr = read_data_bfr(); + iir = inb(gdb_port + UART_IIR); +#ifdef PRNT + printk("gdb_interrupt: chr=%02x '%c' after read iir=%02x\n", + chr, chr > ' ' && chr < 0x7F ? chr : ' ', iir); +#endif + if (chr < 0) + continue; + + if (chr == 3) { /* Ctrl-C means remote interrupt */ + breakpoint(); + continue; + } + + if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { /* buffer overflow, clear it */ + gdb_buf_in_inx = 0; + atomic_set(&gdb_buf_in_cnt, 0); + gdb_buf_out_inx = 0; + break; + } + + gdb_buf[gdb_buf_in_inx++] = chr; + gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); + atomic_inc(&gdb_buf_in_cnt); + } + while (iir & UART_IIR_RDI); + +} /* gdb_interrupt */ + +/* + * Just a NULL routine for testing. + */ +void +gdb_null(void) +{ +} /* gdb_null */ + +extern int serial8250_init(void); + +int +gdb_hook(void) +{ + int retval; + +#ifdef CONFIG_SMP + if (NR_CPUS > KGDB_MAX_NO_CPUS) { + printk + ("kgdb: too manu cpus. Cannot enable debugger with more than 8 cpus\n"); + return (-1); + } +#endif + + /* + * Call first time just to get the ser ptr + */ + + serial8250_init(); + + if (gdb_serial_setup(gdb_ttyS, gdb_baud, &gdb_port, &gdb_irq)) { + printk("gdb_serial_setup() error"); + return (-1); + } + + retval = request_irq(gdb_irq, + gdb_interrupt, SA_INTERRUPT, "GDB-stub", NULL); + if (retval == 0) + initialized = 1; + else { + initialized = 0; + printk("gdb_hook: request_irq(irq=%d) failed: %d\n", gdb_irq, + retval); + } + + /* + * Call GDB routine to setup the exception vectors for the debugger + */ + set_debug_traps(); + + /* + * Call the breakpoint() routine in GDB to start the debugging + * session. + */ + printk("Waiting for connection from remote gdb... "); + breakpoint(); + gdb_null(); + + printk("Connected.\n"); + + gdb_initialized = 1; + return (0); + +} /* gdb_hook_interrupt2 */ + +/* + * getDebugChar + * + * This is a GDB stub routine. It waits for a character from the + * serial interface and then returns it. If there is no serial + * interface connection then it returns a bogus value which will + * almost certainly cause the system to hang. + */ +int +getDebugChar(void) +{ + volatile int chr; + +#ifdef PRNT + printk("getDebugChar: "); +#endif + + while ((chr = read_char()) < 0) + touch_nmi_watchdog(); + +#ifdef PRNT + printk("%c\n", chr > ' ' && chr < 0x7F ? chr : ' '); +#endif + return (chr); + +} /* getDebugChar */ + +/* + * putDebugChar + * + * This is a GDB stub routine. It waits until the interface is ready + * to transmit a char and then sends it. If there is no serial + * interface connection then it simply returns to its caller, having + * pretended to send the char. + */ +void +putDebugChar(int chr) +{ +#ifdef PRNT + printk("putDebugChar: chr=%02x '%c'\n", chr, + chr > ' ' && chr < 0x7F ? chr : ' '); +#endif + + write_char(chr); /* this routine will wait */ + +} /* putDebugChar */ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/drivers/char/sysrq.c 90-mjb1/drivers/char/sysrq.c --- 00-virgin/drivers/char/sysrq.c Thu Jan 2 22:05:04 2003 +++ 90-mjb1/drivers/char/sysrq.c Mon Jan 13 23:20:30 2003 @@ -107,6 +107,18 @@ static struct sysrq_key_op sysrq_reboot_ .action_msg = "Resetting", }; +#ifdef CONFIG_X86_REMOTE_DEBUG +static void sysrq_handle_gdb(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) { + int gdb_hook(void); + gdb_hook(); +} +static struct sysrq_key_op sysrq_gdb_op = { + handler: sysrq_handle_gdb, + help_msg: "Gdb", + action_msg: "Entering debugger", +}; +#endif /* SYNC SYSRQ HANDLERS BLOCK */ @@ -357,7 +369,11 @@ static struct sysrq_key_op *sysrq_key_ta /* d */ NULL, /* e */ &sysrq_term_op, /* f */ NULL, +#ifdef CONFIG_X86_REMOTE_DEBUG +/* g */ &sysrq_gdb_op, +#else /* CONFIG_X86_REMOTE_DEBUG */ /* g */ NULL, +#endif /* CONFIG_X86_REMOTE_DEBUG */ /* h */ NULL, /* i */ &sysrq_kill_op, /* j */ NULL, diff -urpN -X /home/fletch/.diff.exclude 00-virgin/drivers/char/tty_io.c 90-mjb1/drivers/char/tty_io.c --- 00-virgin/drivers/char/tty_io.c Thu Jan 2 22:05:04 2003 +++ 90-mjb1/drivers/char/tty_io.c Mon Jan 13 23:20:30 2003 @@ -91,6 +91,9 @@ #include #include #include +#ifdef CONFIG_GDB_CONSOLE +#include +#endif #include #include @@ -2203,6 +2206,9 @@ void __init console_init(void) #endif #ifdef CONFIG_VT con_init(); +#endif +#ifdef CONFIG_GDB_CONSOLE + gdb_console_init(); #endif #ifdef CONFIG_AU1000_SERIAL_CONSOLE au1000_serial_console_init(); diff -urpN -X /home/fletch/.diff.exclude 00-virgin/drivers/serial/8250.c 90-mjb1/drivers/serial/8250.c --- 00-virgin/drivers/serial/8250.c Mon Jan 13 21:09:12 2003 +++ 90-mjb1/drivers/serial/8250.c Mon Jan 13 23:20:30 2003 @@ -2031,9 +2031,116 @@ void serial8250_get_irq_map(unsigned int } } -static int __init serial8250_init(void) +#ifdef CONFIG_X86_REMOTE_DEBUG +/* + * Takes: + * ttyS - integer specifying which serial port to use for debugging + * baud - baud rate of specified serial port + * Returns: + * port for use by the gdb serial driver + */ +int gdb_serial_setup(int ttyS, int baud, int *port, int *irq) +{ + struct uart_8250_port *up; + unsigned cval; + int bits = 8; + int parity = 'n'; + int cflag = CREAD | HUPCL | CLOCAL; + int quot = 0; + + /* + * Now construct a cflag setting. + */ + switch(baud) { + case 1200: + cflag |= B1200; + break; + case 2400: + cflag |= B2400; + break; + case 4800: + cflag |= B4800; + break; + case 19200: + cflag |= B19200; + break; + case 38400: + cflag |= B38400; + break; + case 57600: + cflag |= B57600; + break; + case 115200: + cflag |= B115200; + break; + case 9600: + default: + cflag |= B9600; + break; + } + switch(bits) { + case 7: + cflag |= CS7; + break; + default: + case 8: + cflag |= CS8; + break; + } + switch(parity) { + case 'o': case 'O': + cflag |= PARODD; + break; + case 'e': case 'E': + cflag |= PARENB; + break; + } + + /* + * Divisor, bytesize and parity + */ + + up = &serial8250_ports[ttyS]; +// ser->flags &= ~ASYNC_BOOT_AUTOCONF; + quot = ( 1843200 / 16 ) / baud; + cval = cflag & (CSIZE | CSTOPB); + cval >>= 4; + if (cflag & PARENB) + cval |= UART_LCR_PARITY; + if (!(cflag & PARODD)) + cval |= UART_LCR_EPAR; + + /* + * Disable UART interrupts, set DTR and RTS high + * and set speed. + */ + cval = 0x3; + serial_outp(up, UART_LCR, cval | UART_LCR_DLAB); /* set DLAB */ + serial_outp(up, UART_DLL, quot & 0xff); /* LS of divisor */ + serial_outp(up, UART_DLM, quot >> 8); /* MS of divisor */ + serial_outp(up, UART_LCR, cval); /* reset DLAB */ + serial_outp(up, UART_IER, UART_IER_RDI); /* turn on interrupts*/ + serial_outp(up, UART_MCR, UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS); + + /* + * If we read 0xff from the LSR, there is no UART here. + */ + if (serial_inp(up, UART_LSR) == 0xff) + return 1; + *port = up->port.iobase; + *irq = up->port.irq; +// serial8250_shutdown(&up->port); + return 0; +} +#endif + +int serial8250_init(void) { int ret, i; + static int didit = 0; + + if (didit++) + return 0; printk(KERN_INFO "Serial: 8250/16550 driver $Revision: 1.90 $ " "IRQ sharing %sabled\n", share_irqs ? "en" : "dis"); diff -urpN -X /home/fletch/.diff.exclude 00-virgin/drivers/serial/core.c 90-mjb1/drivers/serial/core.c --- 00-virgin/drivers/serial/core.c Mon Jan 13 21:09:12 2003 +++ 90-mjb1/drivers/serial/core.c Mon Jan 13 23:20:30 2003 @@ -36,6 +36,10 @@ #include #include /* for serial_state and serial_icounter_struct */ +#ifdef CONFIG_X86_REMOTE_DEBUG +#include +#endif + #include #include @@ -1040,6 +1044,17 @@ uart_ioctl(struct tty_struct *tty, struc (unsigned int *)arg); break; +#ifdef CONFIG_X86_REMOTE_DEBUG + case TIOCGDB: + ret = -ENOTTY; + if (capable(CAP_SYS_ADMIN)) { + gdb_ttyS = minor(tty->device) & 0x03F; + gdb_baud = tty_get_baud_rate(tty); + ret = gdb_hook(); + } + break; +#endif + case TIOCMBIS: case TIOCMBIC: case TIOCMSET: @@ -1115,6 +1130,30 @@ uart_ioctl(struct tty_struct *tty, struc } return ret; } + + /* + * ------------------------------------------------------------ + * Serial GDB driver (most in gdbserial.c) + * ------------------------------------------------------------ + */ + +#ifdef CONFIG_X86_REMOTE_DEBUG +#ifdef CONFIG_GDB_CONSOLE +static struct console gdbcons = { + name: "gdb", + write: gdb_console_write, + flags: CON_PRINTBUFFER | CON_ENABLED, + index: -1, +}; +#endif + +#ifdef CONFIG_GDB_CONSOLE +void __init gdb_console_init(void) +{ + register_console(&gdbcons); +} +#endif +#endif /* CONFIG_X86_REMOTE_DEBUG */ static void uart_set_termios(struct tty_struct *tty, struct termios *old_termios) { diff -urpN -X /home/fletch/.diff.exclude 00-virgin/fs/dcache.c 90-mjb1/fs/dcache.c --- 00-virgin/fs/dcache.c Sun Dec 1 10:00:00 2002 +++ 90-mjb1/fs/dcache.c Mon Jan 13 23:17:11 2003 @@ -24,6 +24,7 @@ #include #include #include +#include #include #define DCACHE_PARANOIA 1 @@ -55,6 +56,15 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; +static void d_callback(void *arg) +{ + struct dentry * dentry = (struct dentry *)arg; + + if (dname_external(dentry)) + kfree(dentry->d_name.name); + kmem_cache_free(dentry_cache, dentry); +} + /* * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry * inside dcache_lock. @@ -63,9 +73,7 @@ static void d_free(struct dentry *dentry { if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - if (dname_external(dentry)) - kfree(dentry->d_name.name); - kmem_cache_free(dentry_cache, dentry); + call_rcu(&dentry->d_rcu, d_callback, dentry); } /* @@ -126,9 +134,13 @@ repeat: if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) return; - /* dput on a free dentry? */ - if (!list_empty(&dentry->d_lru)) - BUG(); + spin_lock(&dentry->d_lock); + if (atomic_read(&dentry->d_count)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + return; + } + /* * AV: ->d_delete() is _NOT_ allowed to block now. */ @@ -139,8 +151,12 @@ repeat: /* Unreachable? Get rid of it */ if (d_unhashed(dentry)) goto kill_it; - list_add(&dentry->d_lru, &dentry_unused); - dentry_stat.nr_unused++; + if (list_empty(&dentry->d_lru)) { + dentry->d_vfs_flags &= ~DCACHE_REFERENCED; + list_add(&dentry->d_lru, &dentry_unused); + dentry_stat.nr_unused++; + } + spin_unlock(&dentry->d_lock); dentry->d_vfs_flags |= DCACHE_REFERENCED; spin_unlock(&dcache_lock); return; @@ -150,7 +166,12 @@ unhash_it: kill_it: { struct dentry *parent; - list_del(&dentry->d_child); + if (!list_empty(&dentry->d_lru)) { + list_del(&dentry->d_lru); + dentry_stat.nr_unused--; + } + list_del(&dentry->d_child); + spin_unlock(&dentry->d_lock); dentry_stat.nr_dentry--; /* For d_free, below */ /* drops the lock, at that point nobody can reach this dentry */ dentry_iput(dentry); @@ -222,6 +243,7 @@ int d_invalidate(struct dentry * dentry) static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); + dentry->d_vfs_flags |= DCACHE_REFERENCED; if (atomic_read(&dentry->d_count) == 1) { dentry_stat.nr_unused--; list_del_init(&dentry->d_lru); @@ -289,8 +311,8 @@ restart: struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); if (!atomic_read(&dentry->d_count)) { __dget_locked(dentry); + __d_drop(dentry); spin_unlock(&dcache_lock); - d_drop(dentry); dput(dentry); goto restart; } @@ -310,6 +332,7 @@ static inline void prune_one_dentry(stru __d_drop(dentry); list_del(&dentry->d_child); + spin_unlock(&dentry->d_lock); dentry_stat.nr_dentry--; /* For d_free, below */ dentry_iput(dentry); parent = dentry->d_parent; @@ -343,18 +366,20 @@ static void prune_dcache(int count) if (tmp == &dentry_unused) break; list_del_init(tmp); + dentry_stat.nr_unused--; dentry = list_entry(tmp, struct dentry, d_lru); + spin_lock(&dentry->d_lock); /* If the dentry was recently referenced, don't free it. */ if (dentry->d_vfs_flags & DCACHE_REFERENCED) { dentry->d_vfs_flags &= ~DCACHE_REFERENCED; - list_add(&dentry->d_lru, &dentry_unused); + if (!atomic_read(&dentry->d_count)) { + list_add(&dentry->d_lru, &dentry_unused); + dentry_stat.nr_unused++; + } + spin_unlock(&dentry->d_lock); continue; } - dentry_stat.nr_unused--; - - /* Unused dentry with a count? */ - BUG_ON(atomic_read(&dentry->d_count)); prune_one_dentry(dentry); } spin_unlock(&dcache_lock); @@ -414,10 +439,13 @@ repeat: dentry = list_entry(tmp, struct dentry, d_lru); if (dentry->d_sb != sb) continue; - if (atomic_read(&dentry->d_count)) - continue; dentry_stat.nr_unused--; list_del_init(tmp); + spin_lock(&dentry->d_lock); + if (atomic_read(&dentry->d_count)) { + spin_unlock(&dentry->d_lock); + continue; + } prune_one_dentry(dentry); goto repeat; } @@ -497,8 +525,8 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; + list_del_init(&dentry->d_lru); if (!atomic_read(&dentry->d_count)) { - list_del(&dentry->d_lru); list_add(&dentry->d_lru, dentry_unused.prev); found++; } @@ -561,8 +589,8 @@ void shrink_dcache_anon(struct list_head spin_lock(&dcache_lock); list_for_each(lp, head) { struct dentry *this = list_entry(lp, struct dentry, d_hash); + list_del(&this->d_lru); if (!atomic_read(&this->d_count)) { - list_del(&this->d_lru); list_add_tail(&this->d_lru, &dentry_unused); found++; } @@ -648,7 +676,8 @@ struct dentry * d_alloc(struct dentry * str[name->len] = 0; atomic_set(&dentry->d_count, 1); - dentry->d_vfs_flags = 0; + dentry->d_vfs_flags = DCACHE_UNHASHED; + dentry->d_lock = SPIN_LOCK_UNLOCKED; dentry->d_flags = 0; dentry->d_inode = NULL; dentry->d_parent = NULL; @@ -785,12 +814,15 @@ struct dentry * d_alloc_anon(struct inod res = tmp; tmp = NULL; if (res) { + spin_lock(&res->d_lock); res->d_sb = inode->i_sb; res->d_parent = res; res->d_inode = inode; res->d_flags |= DCACHE_DISCONNECTED; + res->d_vfs_flags &= ~DCACHE_UNHASHED; list_add(&res->d_alias, &inode->i_dentry); list_add(&res->d_hash, &inode->i_sb->s_anon); + spin_unlock(&res->d_lock); } inode = NULL; /* don't drop reference */ } @@ -859,30 +891,16 @@ struct dentry *d_splice_alias(struct ino struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { - struct dentry * dentry; - spin_lock(&dcache_lock); - dentry = __d_lookup(parent,name); - if (dentry) - __dget_locked(dentry); - spin_unlock(&dcache_lock); - return dentry; -} - -struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) -{ - unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; struct list_head *head = d_hash(parent,hash); struct list_head *tmp; + struct dentry *found = NULL; - tmp = head->next; - for (;;) { + rcu_read_lock(); + __list_for_each_rcu(tmp, head) { struct dentry * dentry = list_entry(tmp, struct dentry, d_hash); - if (tmp == head) - break; - tmp = tmp->next; if (dentry->d_name.hash != hash) continue; if (dentry->d_parent != parent) @@ -896,9 +914,14 @@ struct dentry * __d_lookup(struct dentry if (memcmp(dentry->d_name.name, str, len)) continue; } - return dentry; - } - return NULL; + spin_lock(&dentry->d_lock); + if (!d_unhashed(dentry)) + found = dget(dentry); + spin_unlock(&dentry->d_lock); + break; + } + rcu_read_unlock(); + return found; } /** @@ -937,7 +960,7 @@ int d_validate(struct dentry *dentry, st lhp = base = d_hash(dparent, dentry->d_name.hash); while ((lhp = lhp->next) != base) { if (dentry == list_entry(lhp, struct dentry, d_hash)) { - __dget_locked(dentry); + dget(dentry); spin_unlock(&dcache_lock); return 1; } @@ -974,17 +997,18 @@ void d_delete(struct dentry * dentry) * Are we the only user? */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) == 1) { + spin_unlock(&dentry->d_lock); dentry_iput(dentry); return; } - spin_unlock(&dcache_lock); - /* - * If not, just drop the dentry and let dput - * pick up the tab.. - */ - d_drop(dentry); + if (!d_unhashed(dentry)) + __d_drop(dentry); + + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); } /** @@ -997,9 +1021,10 @@ void d_delete(struct dentry * dentry) void d_rehash(struct dentry * entry) { struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!d_unhashed(entry)) BUG(); spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); + if (!list_empty(&entry->d_hash) && !d_unhashed(entry)) BUG(); + entry->d_vfs_flags &= ~DCACHE_UNHASHED; + list_add_rcu(&entry->d_hash, list); spin_unlock(&dcache_lock); } diff -urpN -X /home/fletch/.diff.exclude 00-virgin/fs/exec.c 90-mjb1/fs/exec.c --- 00-virgin/fs/exec.c Mon Jan 13 21:09:13 2003 +++ 90-mjb1/fs/exec.c Mon Jan 13 23:20:18 2003 @@ -1031,6 +1031,8 @@ int do_execve(char * filename, char ** a int retval; int i; + sched_balance_exec(); + file = open_exec(filename); retval = PTR_ERR(file); diff -urpN -X /home/fletch/.diff.exclude 00-virgin/fs/namei.c 90-mjb1/fs/namei.c --- 00-virgin/fs/namei.c Mon Jan 13 21:09:27 2003 +++ 90-mjb1/fs/namei.c Mon Jan 13 23:17:09 2003 @@ -286,27 +286,6 @@ static struct dentry * cached_lookup(str return dentry; } -/*for fastwalking*/ -static inline void unlock_nd(struct nameidata *nd) -{ - struct vfsmount *mnt = nd->old_mnt; - struct dentry *dentry = nd->old_dentry; - mntget(nd->mnt); - dget_locked(nd->dentry); - nd->old_mnt = NULL; - nd->old_dentry = NULL; - spin_unlock(&dcache_lock); - dput(dentry); - mntput(mnt); -} - -static inline void lock_nd(struct nameidata *nd) -{ - spin_lock(&dcache_lock); - nd->old_mnt = nd->mnt; - nd->old_dentry = nd->dentry; -} - /* * Short-cut version of permission(), for calling by * path_walk(), when dcache lock is held. Combines parts @@ -451,11 +430,18 @@ static int follow_mount(struct vfsmount { int res = 0; while (d_mountpoint(*dentry)) { - struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); - if (!mounted) + struct vfsmount *mounted; + spin_lock(&dcache_lock); + mounted = lookup_mnt(*mnt, *dentry); + if (!mounted) { + spin_unlock(&dcache_lock); break; - *mnt = mounted; - *dentry = mounted->mnt_root; + } + *mnt = mntget(mounted); + spin_unlock(&dcache_lock); + dput(*dentry); + mntput(mounted->mnt_parent); + *dentry = dget(mounted->mnt_root); res = 1; } return res; @@ -488,17 +474,32 @@ static inline void follow_dotdot(struct { while(1) { struct vfsmount *parent; + struct dentry *old = *dentry; + + read_lock(¤t->fs->lock); if (*dentry == current->fs->root && - *mnt == current->fs->rootmnt) + *mnt == current->fs->rootmnt) { + read_unlock(¤t->fs->lock); break; + } + read_unlock(¤t->fs->lock); + spin_lock(&dcache_lock); if (*dentry != (*mnt)->mnt_root) { - *dentry = (*dentry)->d_parent; + *dentry = dget((*dentry)->d_parent); + spin_unlock(&dcache_lock); + dput(old); break; } - parent=(*mnt)->mnt_parent; - if (parent == *mnt) + parent = (*mnt)->mnt_parent; + if (parent == *mnt) { + spin_unlock(&dcache_lock); break; - *dentry=(*mnt)->mnt_mountpoint; + } + mntget(parent); + *dentry = dget((*mnt)->mnt_mountpoint); + spin_unlock(&dcache_lock); + dput(old); + mntput(*mnt); *mnt = parent; } follow_mount(mnt, dentry); @@ -515,14 +516,13 @@ struct path { * It _is_ time-critical. */ static int do_lookup(struct nameidata *nd, struct qstr *name, - struct path *path, struct path *cached_path, - int flags) + struct path *path, int flags) { struct vfsmount *mnt = nd->mnt; - struct dentry *dentry = __d_lookup(nd->dentry, name); + struct dentry *dentry = d_lookup(nd->dentry, name); if (!dentry) - goto dcache_miss; + goto need_lookup; if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; done: @@ -530,36 +530,21 @@ done: path->dentry = dentry; return 0; -dcache_miss: - unlock_nd(nd); - need_lookup: dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE); if (IS_ERR(dentry)) goto fail; - mntget(mnt); -relock: - dput(cached_path->dentry); - mntput(cached_path->mnt); - cached_path->mnt = mnt; - cached_path->dentry = dentry; - lock_nd(nd); goto done; need_revalidate: - mntget(mnt); - dget_locked(dentry); - unlock_nd(nd); if (dentry->d_op->d_revalidate(dentry, flags)) - goto relock; + goto done; if (d_invalidate(dentry)) - goto relock; + goto done; dput(dentry); - mntput(mnt); goto need_lookup; fail: - lock_nd(nd); return PTR_ERR(dentry); } @@ -573,7 +558,7 @@ fail: */ int link_path_walk(const char * name, struct nameidata *nd) { - struct path next, pinned = {NULL, NULL}; + struct path next; struct inode *inode; int err; unsigned int lookup_flags = nd->flags; @@ -594,10 +579,8 @@ int link_path_walk(const char * name, st unsigned int c; err = exec_permission_lite(inode); - if (err == -EAGAIN) { - unlock_nd(nd); + if (err == -EAGAIN) { err = permission(inode, MAY_EXEC); - lock_nd(nd); } if (err) break; @@ -648,7 +631,7 @@ int link_path_walk(const char * name, st break; } /* This does the actual lookups.. */ - err = do_lookup(nd, &this, &next, &pinned, LOOKUP_CONTINUE); + err = do_lookup(nd, &this, &next, LOOKUP_CONTINUE); if (err) break; /* Check mountpoints.. */ @@ -657,21 +640,16 @@ int link_path_walk(const char * name, st err = -ENOENT; inode = next.dentry->d_inode; if (!inode) - break; + goto out_dput; err = -ENOTDIR; if (!inode->i_op) - break; + goto out_dput; if (inode->i_op->follow_link) { - mntget(next.mnt); - dget_locked(next.dentry); - unlock_nd(nd); err = do_follow_link(next.dentry, nd); dput(next.dentry); - mntput(next.mnt); if (err) goto return_err; - lock_nd(nd); err = -ENOENT; inode = nd->dentry->d_inode; if (!inode) @@ -680,6 +658,7 @@ int link_path_walk(const char * name, st if (!inode->i_op) break; } else { + dput(nd->dentry); nd->mnt = next.mnt; nd->dentry = next.dentry; } @@ -711,24 +690,20 @@ last_component: if (err < 0) break; } - err = do_lookup(nd, &this, &next, &pinned, 0); + err = do_lookup(nd, &this, &next, 0); if (err) break; follow_mount(&next.mnt, &next.dentry); inode = next.dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) && inode && inode->i_op && inode->i_op->follow_link) { - mntget(next.mnt); - dget_locked(next.dentry); - unlock_nd(nd); err = do_follow_link(next.dentry, nd); dput(next.dentry); - mntput(next.mnt); if (err) goto return_err; inode = nd->dentry->d_inode; - lock_nd(nd); } else { + dput(nd->dentry); nd->mnt = next.mnt; nd->dentry = next.dentry; } @@ -751,23 +726,19 @@ lookup_parent: else if (this.len == 2 && this.name[1] == '.') nd->last_type = LAST_DOTDOT; return_base: - unlock_nd(nd); - dput(pinned.dentry); - mntput(pinned.mnt); return 0; +out_dput: + dput(next.dentry); + break; } - unlock_nd(nd); path_release(nd); return_err: - dput(pinned.dentry); - mntput(pinned.mnt); return err; } int path_walk(const char * name, struct nameidata *nd) { current->total_link_count = 0; - lock_nd(nd); return link_path_walk(name, nd); } @@ -855,28 +826,24 @@ int path_lookup(const char *name, unsign { nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; + + read_lock(¤t->fs->lock); if (*name=='/') { - read_lock(¤t->fs->lock); if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { nd->mnt = mntget(current->fs->altrootmnt); nd->dentry = dget(current->fs->altroot); read_unlock(¤t->fs->lock); if (__emul_lookup_dentry(name,nd)) return 0; - } else { - read_unlock(¤t->fs->lock); } - spin_lock(&dcache_lock); - nd->mnt = current->fs->rootmnt; - nd->dentry = current->fs->root; + nd->mnt = mntget(current->fs->rootmnt); + nd->dentry = dget(current->fs->root); } else{ - spin_lock(&dcache_lock); - nd->mnt = current->fs->pwdmnt; - nd->dentry = current->fs->pwd; + nd->mnt = mntget(current->fs->pwdmnt); + nd->dentry = dget(current->fs->pwd); } - nd->old_mnt = NULL; - nd->old_dentry = NULL; + read_unlock(¤t->fs->lock); current->total_link_count = 0; return link_path_walk(name, nd); } @@ -2117,7 +2084,6 @@ __vfs_follow_link(struct nameidata *nd, /* weird __emul_prefix() stuff did it */ goto out; } - lock_nd(nd); res = link_path_walk(link, nd); out: if (current->link_count || res || nd->last_type!=LAST_NORM) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/fs/namespace.c 90-mjb1/fs/namespace.c --- 00-virgin/fs/namespace.c Fri Dec 13 23:18:10 2002 +++ 90-mjb1/fs/namespace.c Mon Jan 13 23:17:09 2003 @@ -892,12 +892,10 @@ void set_fs_root(struct fs_struct *fs, s struct dentry *old_root; struct vfsmount *old_rootmnt; write_lock(&fs->lock); - spin_lock(&dcache_lock); old_root = fs->root; old_rootmnt = fs->rootmnt; fs->rootmnt = mntget(mnt); fs->root = dget(dentry); - spin_unlock(&dcache_lock); write_unlock(&fs->lock); if (old_root) { dput(old_root); @@ -916,12 +914,10 @@ void set_fs_pwd(struct fs_struct *fs, st struct vfsmount *old_pwdmnt; write_lock(&fs->lock); - spin_lock(&dcache_lock); old_pwd = fs->pwd; old_pwdmnt = fs->pwdmnt; fs->pwdmnt = mntget(mnt); fs->pwd = dget(dentry); - spin_unlock(&dcache_lock); write_unlock(&fs->lock); if (old_pwd) { diff -urpN -X /home/fletch/.diff.exclude 00-virgin/fs/proc/proc_misc.c 90-mjb1/fs/proc/proc_misc.c --- 00-virgin/fs/proc/proc_misc.c Thu Jan 2 22:05:14 2003 +++ 90-mjb1/fs/proc/proc_misc.c Mon Jan 13 23:20:25 2003 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,40 @@ static int uptime_read_proc(char *page, return proc_calc_metrics(page, start, off, count, eof, len); } +struct vmalloc_info { + unsigned long used; + unsigned long largest_chunk; +}; + +static struct vmalloc_info get_vmalloc_info(void) +{ + unsigned long prev_end = VMALLOC_START; + struct vm_struct* vma; + struct vmalloc_info vmi; + vmi.used = 0; + + read_lock(&vmlist_lock); + + if(!vmlist) + vmi.largest_chunk = (VMALLOC_END-VMALLOC_START); + else + vmi.largest_chunk = 0; + + for (vma = vmlist; vma; vma = vma->next) { + unsigned long free_area_size = + (unsigned long)vma->addr - prev_end; + vmi.used += vma->size; + if (vmi.largest_chunk < free_area_size ) + vmi.largest_chunk = free_area_size; + prev_end = vma->size + (unsigned long)vma->addr; + } + if(VMALLOC_END-prev_end > vmi.largest_chunk) + vmi.largest_chunk = VMALLOC_END-prev_end; + + read_unlock(&vmlist_lock); + return vmi; +} + extern atomic_t vm_committed_space; static int meminfo_read_proc(char *page, char **start, off_t off, @@ -140,6 +175,8 @@ static int meminfo_read_proc(char *page, struct page_state ps; unsigned long inactive; unsigned long active; + unsigned long vmtot; + struct vmalloc_info vmi; get_page_state(&ps); get_zone_counts(&active, &inactive); @@ -152,6 +189,11 @@ static int meminfo_read_proc(char *page, si_swapinfo(&i); committed = atomic_read(&vm_committed_space); + vmtot = (VMALLOC_END-VMALLOC_START)>>10; + vmi = get_vmalloc_info(); + vmi.used >>= 10; + vmi.largest_chunk >>= 10; + /* * Tagged format, for easy grepping and expansion. */ @@ -175,7 +217,10 @@ static int meminfo_read_proc(char *page, "Slab: %8lu kB\n" "Committed_AS: %8u kB\n" "PageTables: %8lu kB\n" - "ReverseMaps: %8lu\n", + "ReverseMaps: %8lu\n" + "VmallocTotal: %8lu kB\n" + "VmallocUsed: %8lu kB\n" + "VmallocChunk: %8lu kB\n", K(i.totalram), K(i.freeram), K(i.bufferram), @@ -195,7 +240,10 @@ static int meminfo_read_proc(char *page, K(ps.nr_slab), K(committed), K(ps.nr_page_table_pages), - ps.nr_reverse_maps + ps.nr_reverse_maps, + vmtot, + vmi.used, + vmi.largest_chunk ); len += hugetlb_report_meminfo(page + len); @@ -297,6 +345,71 @@ static struct file_operations proc_modul }; #endif +#ifdef CONFIG_NUMA +#define K(x) ((x) << (PAGE_SHIFT - 10)) +static int show_meminfo_numa (struct seq_file *m, void *v) +{ + int *d = v; + int nid = *d; + struct sysinfo i; + si_meminfo_node(&i, nid); + seq_printf(m, "\n" + "Node %d MemTotal: %8lu kB\n" + "Node %d MemFree: %8lu kB\n" + "Node %d MemUsed: %8lu kB\n" + "Node %d HighTotal: %8lu kB\n" + "Node %d HighFree: %8lu kB\n" + "Node %d LowTotal: %8lu kB\n" + "Node %d LowFree: %8lu kB\n", + nid, K(i.totalram), + nid, K(i.freeram), + nid, K(i.totalram-i.freeram), + nid, K(i.totalhigh), + nid, K(i.freehigh), + nid, K(i.totalram-i.totalhigh), + nid, K(i.freeram-i.freehigh)); + + return 0; +} +#undef K + +extern struct seq_operations meminfo_numa_op; +static int meminfo_numa_open(struct inode *inode, struct file *file) +{ + return seq_open(file,&meminfo_numa_op); +} + +static struct file_operations proc_meminfo_numa_operations = { + open: meminfo_numa_open, + read: seq_read, + llseek: seq_lseek, + release: seq_release, +}; + +static void *meminfo_numa_start(struct seq_file *m, loff_t *pos) +{ + return *pos < numnodes ? pos : NULL; +} + +static void *meminfo_numa_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return meminfo_numa_start(m, pos); +} + +static void meminfo_numa_stop(struct seq_file *m, void *v) +{ +} + +struct seq_operations meminfo_numa_op = { + .start = meminfo_numa_start, + .next = meminfo_numa_next, + .stop = meminfo_numa_stop, + .show = show_meminfo_numa, +}; + +#endif + extern struct seq_operations slabinfo_op; extern ssize_t slabinfo_write(struct file *, const char *, size_t, loff_t *); static int slabinfo_open(struct inode *inode, struct file *file) @@ -593,6 +706,9 @@ void __init proc_misc_init(void) create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); #ifdef CONFIG_MODULES create_seq_entry("modules", 0, &proc_modules_operations); +#endif +#ifdef CONFIG_NUMA + create_seq_entry("meminfo.numa",0,&proc_meminfo_numa_operations); #endif proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); if (proc_root_kcore) { diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/apicdef.h 90-mjb1/include/asm-i386/apicdef.h --- 00-virgin/include/asm-i386/apicdef.h Sun Nov 17 20:29:46 2002 +++ 90-mjb1/include/asm-i386/apicdef.h Mon Jan 13 23:16:53 2003 @@ -11,8 +11,13 @@ #define APIC_DEFAULT_PHYS_BASE 0xfee00000 #define APIC_ID 0x20 -#define APIC_ID_MASK (0x0F<<24) -#define GET_APIC_ID(x) (((x)>>24)&0x0F) +#ifdef CONFIG_X86_SUMMIT + #define APIC_ID_MASK (0xFF<<24) + #define GET_APIC_ID(x) (((x)>>24)&0xFF) +#else + #define APIC_ID_MASK (0x0F<<24) + #define GET_APIC_ID(x) (((x)>>24)&0x0F) +#endif #define APIC_LVR 0x30 #define APIC_LVR_MASK 0xFF00FF #define GET_APIC_VERSION(x) ((x)&0xFF) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/bug.h 90-mjb1/include/asm-i386/bug.h --- 00-virgin/include/asm-i386/bug.h Mon Jan 13 23:16:26 2003 +++ 90-mjb1/include/asm-i386/bug.h Mon Jan 13 23:22:43 2003 @@ -10,6 +10,11 @@ * undefined" opcode for parsing in the trap handler. */ +#ifdef CONFIG_X86_REMOTE_DEBUG +#define BUG() do { \ + asm ("int $0x3"); \ +} while (0) +#else #if 1 /* Set to zero for a slightly smaller kernel */ #define BUG() \ __asm__ __volatile__( "ud2\n" \ @@ -18,6 +23,7 @@ : : "i" (__LINE__), "i" (__FILE__)) #else #define BUG() __asm__ __volatile__("ud2\n") +#endif #endif #define PAGE_BUG(page) do { \ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/early_printk.h 90-mjb1/include/asm-i386/early_printk.h --- 00-virgin/include/asm-i386/early_printk.h Wed Dec 31 16:00:00 1969 +++ 90-mjb1/include/asm-i386/early_printk.h Mon Jan 13 23:20:05 2003 @@ -0,0 +1,8 @@ +#ifndef __EARLY_PRINTK_H_I386_ +#define __EARLY_PRINTK_H_i386_ + +#define VGABASE 0xB8000 +#define SERIAL_BASES { 0x3f8, 0x2f8 } +#define SERIAL_BASES_LEN 2 + +#endif diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/ioctls.h 90-mjb1/include/asm-i386/ioctls.h --- 00-virgin/include/asm-i386/ioctls.h Sun Nov 17 20:29:22 2002 +++ 90-mjb1/include/asm-i386/ioctls.h Mon Jan 13 23:20:30 2003 @@ -68,6 +68,7 @@ #define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ #define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ #define FIOQSIZE 0x5460 +#define TIOCGDB 0x547F /* enable GDB stub mode on this tty */ /* Used for packet mode */ #define TIOCPKT_DATA 0 diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/mach-default/mach_apic.h 90-mjb1/include/asm-i386/mach-default/mach_apic.h --- 00-virgin/include/asm-i386/mach-default/mach_apic.h Thu Jan 9 19:16:11 2003 +++ 90-mjb1/include/asm-i386/mach-default/mach_apic.h Mon Jan 13 23:16:50 2003 @@ -12,8 +12,12 @@ #define no_balance_irq (0) #define esr_disable (0) +#define INT_DELIVERY_MODE dest_LowestPrio +#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ + #define APIC_BROADCAST_ID 0x0F #define check_apicid_used(bitmap, apicid) (bitmap & (1 << apicid)) +#define check_apicid_present(bit) (phys_cpu_present_map & (1 << bit)) static inline int apic_id_registered(void) { @@ -57,6 +61,12 @@ static inline int multi_timer_check(int static inline int apicid_to_node(int logical_apicid) { return 0; +} + +/* Mapping from cpu number to logical apicid */ +static inline int cpu_to_logical_apicid(int cpu) +{ + return 1 << cpu; } static inline int cpu_present_to_apicid(int mps_cpu) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/mach-default/mach_mpparse.h 90-mjb1/include/asm-i386/mach-default/mach_mpparse.h --- 00-virgin/include/asm-i386/mach-default/mach_mpparse.h Mon Dec 23 23:01:56 2002 +++ 90-mjb1/include/asm-i386/mach-default/mach_mpparse.h Mon Jan 13 23:16:48 2003 @@ -17,4 +17,10 @@ static inline void mps_oem_check(struct { } +/* Hook from generic ACPI tables.c */ +static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ +} + + #endif /* __ASM_MACH_MPPARSE_H */ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/mach-numaq/mach_apic.h 90-mjb1/include/asm-i386/mach-numaq/mach_apic.h --- 00-virgin/include/asm-i386/mach-numaq/mach_apic.h Thu Jan 9 19:16:11 2003 +++ 90-mjb1/include/asm-i386/mach-numaq/mach_apic.h Mon Jan 13 23:16:50 2003 @@ -8,8 +8,12 @@ #define no_balance_irq (1) #define esr_disable (1) +#define INT_DELIVERY_MODE dest_LowestPrio +#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ + #define APIC_BROADCAST_ID 0x0F #define check_apicid_used(bitmap, apicid) ((bitmap) & (1 << (apicid))) +#define check_apicid_present(bit) (phys_cpu_present_map & (1 << bit)) static inline int apic_id_registered(void) { @@ -40,6 +44,13 @@ static inline ulong ioapic_phys_id_map(u { /* We don't have a good way to do this yet - hack */ return 0xf; +} + +/* Mapping from cpu number to logical apicid */ +extern volatile u8 cpu_2_logical_apicid[]; +static inline int cpu_to_logical_apicid(int cpu) +{ + return (int)cpu_2_logical_apicid[cpu]; } static inline int cpu_present_to_apicid(int mps_cpu) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/mach-numaq/mach_mpparse.h 90-mjb1/include/asm-i386/mach-numaq/mach_mpparse.h --- 00-virgin/include/asm-i386/mach-numaq/mach_mpparse.h Mon Dec 23 23:01:56 2002 +++ 90-mjb1/include/asm-i386/mach-numaq/mach_mpparse.h Mon Jan 13 23:16:48 2003 @@ -34,4 +34,9 @@ static inline void mps_oem_check(struct mpc->mpc_oemsize); } +/* Hook from generic ACPI tables.c */ +static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ +} + #endif /* __ASM_MACH_MPPARSE_H */ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/mach-summit/mach_apic.h 90-mjb1/include/asm-i386/mach-summit/mach_apic.h --- 00-virgin/include/asm-i386/mach-summit/mach_apic.h Thu Jan 9 19:16:11 2003 +++ 90-mjb1/include/asm-i386/mach-summit/mach_apic.h Mon Jan 13 23:16:55 2003 @@ -4,6 +4,7 @@ extern int x86_summit; #define esr_disable (1) +#define no_balance_irq (0) #define XAPIC_DEST_CPUS_MASK 0x0Fu #define XAPIC_DEST_CLUSTER_MASK 0xF0u @@ -11,22 +12,43 @@ extern int x86_summit; #define xapic_phys_to_log_apicid(phys_apic) ( (1ul << ((phys_apic) & 0x3)) |\ ((phys_apic) & XAPIC_DEST_CLUSTER_MASK) ) -static inline unsigned long calculate_ldr(unsigned long old) +#define APIC_DFR_VALUE (x86_summit ? APIC_DFR_CLUSTER : APIC_DFR_FLAT) +#define TARGET_CPUS (x86_summit ? XAPIC_DEST_CPUS_MASK : cpu_online_map) + +#define INT_DELIVERY_MODE dest_Fixed +#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ + +#define APIC_BROADCAST_ID (x86_summit ? 0xFF : 0x0F) +#define check_apicid_used(bitmap, apicid) (0) + +/* we don't use the phys_cpu_present_map to indicate apicid presence */ +#define check_apicid_present(bit) (1) + +extern u8 bios_cpu_apicid[]; + +static inline void init_apic_ldr(void) { - unsigned long id; + unsigned long val, id; if (x86_summit) id = xapic_phys_to_log_apicid(hard_smp_processor_id()); else id = 1UL << smp_processor_id(); - return ((old & ~APIC_LDR_MASK) | SET_APIC_LOGICAL_ID(id)); + apic_write_around(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(id); + apic_write_around(APIC_LDR, val); } -#define APIC_DFR_VALUE (x86_summit ? APIC_DFR_CLUSTER : APIC_DFR_FLAT) -#define TARGET_CPUS (x86_summit ? XAPIC_DEST_CPUS_MASK : cpu_online_map) +static inline int multi_timer_check(int apic, int irq) +{ + return 0; +} -#define APIC_BROADCAST_ID (x86_summit ? 0xFF : 0x0F) -#define check_apicid_used(bitmap, apicid) (0) +static inline int apic_id_registered(void) +{ + return 1; +} static inline void clustered_apic_check(void) { @@ -39,10 +61,17 @@ static inline int apicid_to_node(int log return (logical_apicid >> 5); /* 2 clusterids per CEC */ } +/* Mapping from cpu number to logical apicid */ +extern volatile u8 cpu_2_logical_apicid[]; +static inline int cpu_to_logical_apicid(int cpu) +{ + return (int)cpu_2_logical_apicid[cpu]; +} + static inline int cpu_present_to_apicid(int mps_cpu) { if (x86_summit) - return (int) raw_phys_apicid[mps_cpu]; + return (int) bios_cpu_apicid[mps_cpu]; else return mps_cpu; } @@ -53,12 +82,22 @@ static inline ulong ioapic_phys_id_map(u return (x86_summit ? 0x0F : phys_map); } -static inline unsigned long apicid_to_phys_cpu_present(int apicid) +static inline unsigned long apicid_to_cpu_present(int apicid) { if (x86_summit) - return (1ul << (((apicid >> 4) << 2) | (apicid & 0x3))); + return 1; else return (1ul << apicid); +} + +static inline int mpc_apic_id(struct mpc_config_processor *m, int quad) +{ + printk("Processor #%d %ld:%ld APIC version %d\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver); + return (m->mpc_apicid); } static inline void setup_portio_remap(void) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/mach-summit/mach_mpparse.h 90-mjb1/include/asm-i386/mach-summit/mach_mpparse.h --- 00-virgin/include/asm-i386/mach-summit/mach_mpparse.h Mon Dec 23 23:01:56 2002 +++ 90-mjb1/include/asm-i386/mach-summit/mach_mpparse.h Mon Jan 13 23:16:53 2003 @@ -15,8 +15,14 @@ static inline void mpc_oem_pci_bus(struc static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) { - if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(str, "VIGIL SMP", 9)) + if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(productid, "VIGIL SMP", 9)) x86_summit = 1; } +/* Hook from generic ACPI tables.c */ +static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "SERVIGIL", 8)) + x86_summit = 1; +} #endif /* __ASM_MACH_MPPARSE_H */ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/mpspec.h 90-mjb1/include/asm-i386/mpspec.h --- 00-virgin/include/asm-i386/mpspec.h Fri Dec 13 23:18:12 2002 +++ 90-mjb1/include/asm-i386/mpspec.h Mon Jan 13 23:16:53 2003 @@ -16,11 +16,11 @@ /* * a maximum of 16 APICs with the current APIC ID architecture. */ -#ifdef CONFIG_X86_NUMA +#if defined(CONFIG_X86_NUMAQ) || defined (CONFIG_X86_SUMMIT) #define MAX_APICS 256 -#else /* !CONFIG_X86_NUMA */ +#else #define MAX_APICS 16 -#endif /* CONFIG_X86_NUMA */ +#endif #define MAX_MPC_ENTRY 1024 diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/page.h 90-mjb1/include/asm-i386/page.h --- 00-virgin/include/asm-i386/page.h Mon Jan 13 23:16:26 2003 +++ 90-mjb1/include/asm-i386/page.h Mon Jan 13 23:23:34 2003 @@ -3,7 +3,11 @@ /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) +#ifndef __ASSEMBLY__ +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#else +#define PAGE_SIZE (1 << PAGE_SHIFT) +#endif #define PAGE_MASK (~(PAGE_SIZE-1)) #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) @@ -89,7 +93,16 @@ typedef struct { unsigned long pgprot; } * and CONFIG_HIGHMEM64G options in the kernel configuration. */ -#define __PAGE_OFFSET (0xC0000000) +#include +#ifdef CONFIG_05GB +#define __PAGE_OFFSET (0xE0000000) +#elif defined(CONFIG_1GB) +#define __PAGE_OFFSET (0xC0000000) +#elif defined(CONFIG_2GB) +#define __PAGE_OFFSET (0x80000000) +#elif defined(CONFIG_3GB) +#define __PAGE_OFFSET (0x40000000) +#endif /* * This much address space is reserved for vmalloc() and iomap() diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/param.h 90-mjb1/include/asm-i386/param.h --- 00-virgin/include/asm-i386/param.h Sun Nov 17 20:29:26 2002 +++ 90-mjb1/include/asm-i386/param.h Mon Jan 13 23:20:08 2003 @@ -2,10 +2,18 @@ #define _ASMi386_PARAM_H #ifdef __KERNEL__ -# define HZ 1000 /* Internal kernel timer frequency */ -# define USER_HZ 100 /* .. some user interfaces are in "ticks" */ -# define CLOCKS_PER_SEC (USER_HZ) /* like times() */ +#include + +#ifdef CONFIG_1000HZ +# define HZ 1000 /* Internal kernel timer frequency */ +#else +# define HZ 100 #endif + +#define USER_HZ 100 /* .. some user interfaces are in "ticks" */ +#define CLOCKS_PER_SEC (USER_HZ) /* like times() */ + +#endif /* __KERNEL__ */ #ifndef HZ #define HZ 100 diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/processor.h 90-mjb1/include/asm-i386/processor.h --- 00-virgin/include/asm-i386/processor.h Thu Jan 2 22:05:15 2003 +++ 90-mjb1/include/asm-i386/processor.h Mon Jan 13 23:20:30 2003 @@ -279,7 +279,11 @@ extern unsigned int mca_pentium_flag; /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ +#ifdef CONFIG_05GB +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 16)) +#else #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) +#endif /* * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. @@ -393,6 +397,9 @@ struct thread_struct { unsigned int saved_fs, saved_gs; /* IO permissions */ unsigned long *ts_io_bitmap; +#ifdef CONFIG_X86_REMOTE_DEBUG + struct pt_regs *kgdbregs; +#endif }; #define INIT_THREAD { \ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/smp.h 90-mjb1/include/asm-i386/smp.h --- 00-virgin/include/asm-i386/smp.h Thu Jan 9 19:16:11 2003 +++ 90-mjb1/include/asm-i386/smp.h Mon Jan 13 23:16:50 2003 @@ -22,12 +22,6 @@ #endif #endif -#ifdef CONFIG_X86_NUMAQ - #define INT_DELIVERY_MODE 0 /* physical delivery on LOCAL quad */ -#else - #define INT_DELIVERY_MODE 1 /* logical delivery broadcast to all procs */ -#endif - #define BAD_APICID 0xFFu #ifdef CONFIG_SMP #ifndef __ASSEMBLY__ @@ -82,12 +76,6 @@ static inline int num_booting_cpus(void) return hweight32(cpu_callout_map); } -/* Mapping from cpu number to logical apicid */ -extern volatile u8 cpu_2_logical_apicid[]; -static inline int cpu_to_logical_apicid(int cpu) -{ - return (int)cpu_2_logical_apicid[cpu]; -} extern void map_cpu_to_logical_apicid(void); extern void unmap_cpu_to_logical_apicid(int cpu); diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-i386/thread_info.h 90-mjb1/include/asm-i386/thread_info.h --- 00-virgin/include/asm-i386/thread_info.h Thu Jan 9 19:16:11 2003 +++ 90-mjb1/include/asm-i386/thread_info.h Mon Jan 13 23:23:44 2003 @@ -9,6 +9,7 @@ #ifdef __KERNEL__ +#include #ifndef __ASSEMBLY__ #include #endif @@ -29,9 +30,11 @@ struct thread_info { __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space: + 0 for interrupts: illegal 0-0xBFFFFFFF for user-thead 0-0xFFFFFFFF for kernel-thread */ + struct thread_info *irq_stack; /* pointer to cpu irq stack */ struct restart_block restart_block; __u8 supervisor_stack[0]; @@ -46,7 +49,8 @@ struct thread_info { #define TI_CPU 0x0000000C #define TI_PRE_COUNT 0x00000010 #define TI_ADDR_LIMIT 0x00000014 -#define TI_RESTART_BLOCK 0x0000018 +#define TI_IRQ_STACK 0x00000018 +#define TI_RESTART_BLOCK 0x0000022 #endif @@ -57,48 +61,66 @@ struct thread_info { * * preempt_count needs to be 1 initially, until the scheduler is functional. */ +#ifdef CONFIG_4K_STACK +#define THREAD_ORDER 0 +#define STACK_WARN 0x200 +#define STACK_PANIC 0x100 +#else +#define THREAD_ORDER 1 +#define STACK_WARN ((THREAD_SIZE)>>1) +#define STACK_PANIC 0x100 +#endif +#define INIT_THREAD_SIZE THREAD_SIZE + #ifndef __ASSEMBLY__ -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .irq_stack = &init_irq_union.thread_info, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + } \ } #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) +/* thread information allocation */ +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) +#define alloc_thread_info() ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)) +#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER) +#define get_thread_info(ti) get_task_struct((ti)->task) +#define put_thread_info(ti) put_task_struct((ti)->task) + /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~8191UL)); + __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~(THREAD_SIZE - 1))); return ti; } -/* thread information allocation */ -#define THREAD_SIZE (2*PAGE_SIZE) -#define alloc_thread_info() ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) -#define get_thread_info(ti) get_task_struct((ti)->task) -#define put_thread_info(ti) put_task_struct((ti)->task) - #else /* !__ASSEMBLY__ */ +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) + /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movl $-8192, reg; \ + movl $-THREAD_SIZE, reg; \ andl %esp, reg -#endif +/* use this one if reg already contains %esp */ +#define GET_THREAD_INFO_WITH_ESP(reg) \ + andl $-THREAD_SIZE, reg +#endif + /* * thread information flags * - these are process state flags that various assembly files may need to access diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/asm-x86_64/early_printk.h 90-mjb1/include/asm-x86_64/early_printk.h --- 00-virgin/include/asm-x86_64/early_printk.h Wed Dec 31 16:00:00 1969 +++ 90-mjb1/include/asm-x86_64/early_printk.h Mon Jan 13 23:20:05 2003 @@ -0,0 +1,8 @@ +#ifdef __EARLY_PRINTK_H_X86_64_ +#define __EARLY_PRINTK_H_X86_64_ + +#define VGABASE 0xffffffff800b8000UL +#define SERIAL_BASES { 0x3f8, 0x2f8 } +#define SERIAL_BASES_LEN 2 + +#endif diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/linux/dcache.h 90-mjb1/include/linux/dcache.h --- 00-virgin/include/linux/dcache.h Mon Jan 13 23:16:27 2003 +++ 90-mjb1/include/linux/dcache.h Mon Jan 13 23:19:35 2003 @@ -7,6 +7,7 @@ #include #include #include +#include #include struct vfsmount; @@ -72,11 +73,13 @@ struct dcookie_struct; struct dentry { atomic_t d_count; + unsigned long d_vfs_flags; /* moved here to be on same cacheline */ + spinlock_t d_lock; /* per dentry lock */ unsigned int d_flags; struct inode * d_inode; /* Where the name belongs to - NULL is negative */ struct dentry * d_parent; /* parent directory */ struct list_head d_hash; /* lookup hash list */ - struct list_head d_lru; /* d_count = 0 LRU list */ + struct list_head d_lru; /* LRU list */ struct list_head d_child; /* child of parent list */ struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ @@ -85,8 +88,8 @@ struct dentry { unsigned long d_time; /* used by d_revalidate */ struct dentry_operations *d_op; struct super_block * d_sb; /* The root of the dentry tree */ - unsigned long d_vfs_flags; void * d_fsdata; /* fs-specific data */ + struct rcu_head d_rcu; struct dcookie_struct * d_cookie; /* cookie, if any */ unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ } ____cacheline_aligned; @@ -139,6 +142,7 @@ d_iput: no no yes */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ +#define DCACHE_UNHASHED 0x0010 extern spinlock_t dcache_lock; extern rwlock_t dparent_lock; @@ -162,7 +166,8 @@ extern rwlock_t dparent_lock; static __inline__ void __d_drop(struct dentry * dentry) { - list_del_init(&dentry->d_hash); + dentry->d_vfs_flags |= DCACHE_UNHASHED; + list_del_rcu(&dentry->d_hash); } static __inline__ void d_drop(struct dentry * dentry) @@ -254,9 +259,8 @@ extern char * d_path(struct dentry *, st static __inline__ struct dentry * dget(struct dentry *dentry) { if (dentry) { - if (!atomic_read(&dentry->d_count)) - BUG(); atomic_inc(&dentry->d_count); + dentry->d_vfs_flags |= DCACHE_REFERENCED; } return dentry; } @@ -272,7 +276,7 @@ extern struct dentry * dget_locked(struc static __inline__ int d_unhashed(struct dentry *dentry) { - return list_empty(&dentry->d_hash); + return (dentry->d_vfs_flags & DCACHE_UNHASHED); } extern void dput(struct dentry *); diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/linux/early_printk.h 90-mjb1/include/linux/early_printk.h --- 00-virgin/include/linux/early_printk.h Wed Dec 31 16:00:00 1969 +++ 90-mjb1/include/linux/early_printk.h Mon Jan 13 23:20:05 2003 @@ -0,0 +1,47 @@ +#ifndef __EARLY_PRINTK_H_ +#define __EARLY_PRINTK_H_ + +#ifdef CONFIG_EARLY_PRINTK +#include +#include +#include +#include +#include +#include + +/* Simple VGA output */ + +#define MAX_YPOS 25 +#define MAX_XPOS 80 + +/* Simple serial port output */ + +#define DEFAULT_BAUD 57600 +#define XMTRDY 0x20 + +#define DLAB 0x80 + +#define TXR 0 /* Transmit register (WRITE) */ +#define RXR 0 /* Receive register (READ) */ +#define IER 1 /* Interrupt Enable */ +#define IIR 2 /* Interrupt ID */ +#define FCR 2 /* FIFO control */ +#define LCR 3 /* Line control */ +#define MCR 4 /* Modem control */ +#define LSR 5 /* Line Status */ +#define MSR 6 /* Modem Status */ +#define DLL 0 /* Divisor Latch Low */ +#define DLH 1 /* Divisor latch High */ + + +void early_printk(const char *fmt, ...); +int __init setup_early_printk(char *opt); + +#else + +#define early_printk(...) do {} while(0) +#define setup_early_printk(X) do {} while(0) + +#endif + +#endif diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/linux/gdb.h 90-mjb1/include/linux/gdb.h --- 00-virgin/include/linux/gdb.h Wed Dec 31 16:00:00 1969 +++ 90-mjb1/include/linux/gdb.h Mon Jan 13 23:20:30 2003 @@ -0,0 +1,67 @@ +#ifndef _GDB_H_ +#define _GDB_H_ + +/* + * Copyright (C) 2001 Amit S. Kale + */ + +/* gdb locks */ +#define KGDB_MAX_NO_CPUS NR_CPUS + +extern int gdb_enter; /* 1 = enter debugger on boot */ +extern int gdb_ttyS; +extern int gdb_baud; +extern int gdb_initialized; + +extern int gdb_hook(void); +extern void breakpoint(void); + +typedef int gdb_debug_hook(int trapno, + int signo, + int err_code, + struct pt_regs *regs); +extern gdb_debug_hook *linux_debug_hook; + +#ifdef CONFIG_SMP +extern spinlock_t kgdb_spinlock; +extern spinlock_t kgdb_nmispinlock; +#else +extern unsigned kgdb_spinlock; +extern unsigned kgdb_nmispinlock; +#endif + +extern volatile int kgdb_memerr_expected; + +struct console; +void gdb_console_write(struct console *co, const char *s, + unsigned count); +void gdb_console_init(void); + +extern volatile int procindebug[KGDB_MAX_NO_CPUS]; + +#define KGDB_ASSERT(message, condition) do { \ + if (!(condition)) { \ + printk("kgdb assertion failed: %s\n", message); \ + asm ("int $0x3"); \ + } \ +} while (0) + +#ifdef CONFIG_KERNEL_ASSERTS +#define KERNEL_ASSERT(message, condition) KGDB_ASSERT(message, condition) +#else +#define KERNEL_ASSERT(message, condition) +#endif + +#define KA_VALID_ERRNO(errno) ((errno) > 0 && (errno) <= EMEDIUMTYPE) + +#define KA_VALID_PTR_ERR(ptr) KA_VALID_ERRNO(-PTR_ERR(ptr)) + +#define KA_VALID_KPTR(ptr) (!(ptr) || \ + ((void *)(ptr) >= (void *)PAGE_OFFSET && \ + (void *)(ptr) < ERR_PTR(-EMEDIUMTYPE))) + +#define KA_VALID_PTRORERR(errptr) (KA_VALID_KPTR(errptr) || KA_VALID_PTR_ERR(errptr)) + +#define KA_HELD_GKL() (current->lock_depth >= 0) + +#endif /* _GDB_H_ */ diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/linux/namei.h 90-mjb1/include/linux/namei.h --- 00-virgin/include/linux/namei.h Sun Nov 17 20:29:30 2002 +++ 90-mjb1/include/linux/namei.h Mon Jan 13 23:17:09 2003 @@ -11,8 +11,6 @@ struct nameidata { struct qstr last; unsigned int flags; int last_type; - struct dentry *old_dentry; - struct vfsmount *old_mnt; }; /* diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/linux/sched.h 90-mjb1/include/linux/sched.h --- 00-virgin/include/linux/sched.h Mon Jan 13 21:09:28 2003 +++ 90-mjb1/include/linux/sched.h Mon Jan 13 23:20:30 2003 @@ -160,10 +160,11 @@ extern void update_one_process(struct ta extern void scheduler_tick(int user_tick, int system); extern unsigned long cache_decay_ticks; - #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); -asmlinkage void schedule(void); +asmlinkage void do_schedule(void); +asmlinkage void kern_schedule(void); +asmlinkage void kern_do_schedule(struct pt_regs); struct namespace; @@ -444,6 +445,20 @@ extern void set_cpus_allowed(task_t *p, # define set_cpus_allowed(p, new_mask) do { } while (0) #endif +#ifdef CONFIG_NUMA +extern void sched_balance_exec(void); +extern void node_nr_running_init(void); +#define nr_running_inc(rq) atomic_inc(rq->node_ptr); \ + rq->nr_running++ +#define nr_running_dec(rq) atomic_dec(rq->node_ptr); \ + rq->nr_running-- +#else +#define sched_balance_exec() {} +#define node_nr_running_init() {} +#define nr_running_inc(rq) rq->nr_running++ +#define nr_running_dec(rq) rq->nr_running-- +#endif + extern void set_user_nice(task_t *p, long nice); extern int task_prio(task_t *p); extern int task_nice(task_t *p); @@ -637,6 +652,12 @@ static inline int thread_group_empty(tas (thread_group_leader(p) && !thread_group_empty(p)) extern void unhash_process(struct task_struct *p); + +#ifdef CONFIG_KGDB_THREAD +#define schedule() kern_schedule() +#else +#define schedule() do_schedule() +#endif /* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */ static inline void task_lock(struct task_struct *p) diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/linux/sysctl.h 90-mjb1/include/linux/sysctl.h --- 00-virgin/include/linux/sysctl.h Mon Dec 23 23:01:57 2002 +++ 90-mjb1/include/linux/sysctl.h Mon Jan 13 23:20:20 2003 @@ -66,7 +66,8 @@ enum CTL_DEV=7, /* Devices */ CTL_BUS=8, /* Busses */ CTL_ABI=9, /* Binary emulation */ - CTL_CPU=10 /* CPU stuff (speed scaling, etc) */ + CTL_CPU=10, /* CPU stuff (speed scaling, etc) */ + CTL_SCHED=11, /* scheduler tunables */ }; /* CTL_BUS names: */ @@ -157,6 +158,18 @@ enum VM_LOWER_ZONE_PROTECTION=20,/* Amount of protection of lower zones */ }; +/* Tunable scheduler parameters in /proc/sys/sched/ */ +enum { + SCHED_MIN_TIMESLICE=1, /* minimum process timeslice */ + SCHED_MAX_TIMESLICE=2, /* maximum process timeslice */ + SCHED_CHILD_PENALTY=3, /* penalty on fork to child */ + SCHED_PARENT_PENALTY=4, /* penalty on fork to parent */ + SCHED_EXIT_WEIGHT=5, /* penalty to parent of CPU hog child */ + SCHED_PRIO_BONUS_RATIO=6, /* percent of max prio given as bonus */ + SCHED_INTERACTIVE_DELTA=7, /* delta used to scale interactivity */ + SCHED_MAX_SLEEP_AVG=8, /* maximum sleep avg attainable */ + SCHED_STARVATION_LIMIT=9, /* no re-active if expired is starved */ +}; /* CTL_NET names: */ enum diff -urpN -X /home/fletch/.diff.exclude 00-virgin/include/linux/timex.h 90-mjb1/include/linux/timex.h --- 00-virgin/include/linux/timex.h Sun Nov 17 20:29:21 2002 +++ 90-mjb1/include/linux/timex.h Mon Jan 13 23:20:08 2003 @@ -76,7 +76,7 @@ #elif HZ >= 768 && HZ < 1536 # define SHIFT_HZ 10 #else -# error You lose. +# error Please use a HZ value which is between 12 and 1536 #endif /* diff -urpN -X /home/fletch/.diff.exclude 00-virgin/init/main.c 90-mjb1/init/main.c --- 00-virgin/init/main.c Thu Jan 9 19:16:15 2003 +++ 90-mjb1/init/main.c Mon Jan 13 23:20:30 2003 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,10 @@ #include #endif +#ifdef CONFIG_X86_REMOTE_DEBUG +#include +#endif + /* * Versions of gcc older than that listed below may actually compile * and link okay, but the end product can have subtle run time bugs. @@ -374,6 +379,7 @@ asmlinkage void __init start_kernel(void */ lock_kernel(); printk(linux_banner); + setup_early_printk(&command_line); setup_arch(&command_line); setup_per_cpu_areas(); @@ -445,6 +451,12 @@ asmlinkage void __init start_kernel(void */ init_idle(current, smp_processor_id()); +#ifdef CONFIG_X86_REMOTE_DEBUG + if (gdb_enter) { + gdb_hook(); /* right at boot time */ + } +#endif + /* Do the rest non-__init'ed, we're now alive */ rest_init(); } @@ -495,6 +507,7 @@ static void do_pre_smp_initcalls(void) migration_init(); #endif + node_nr_running_init(); spawn_ksoftirqd(); } diff -urpN -X /home/fletch/.diff.exclude 00-virgin/kernel/Makefile 90-mjb1/kernel/Makefile --- 00-virgin/kernel/Makefile Thu Jan 9 19:16:15 2003 +++ 90-mjb1/kernel/Makefile Mon Jan 13 23:20:05 2003 @@ -22,6 +22,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o obj-$(CONFIG_COMPAT) += compat.o +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ifneq ($(CONFIG_IA64),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -urpN -X /home/fletch/.diff.exclude 00-virgin/kernel/early_printk.c 90-mjb1/kernel/early_printk.c --- 00-virgin/kernel/early_printk.c Wed Dec 31 16:00:00 1969 +++ 90-mjb1/kernel/early_printk.c Mon Jan 13 23:20:05 2003 @@ -0,0 +1,209 @@ +#include +#include +#include +#include +#include +#include + +/* Simple VGA output */ + +#define MAX_YPOS 25 +#define MAX_XPOS 80 + +static int current_ypos = 1, current_xpos = 0; + +static void early_vga_write(struct console *con, const char *str, unsigned n) +{ + char c; + int i, k, j; + + while ((c = *str++) != '\0' && n-- > 0) { + if (current_ypos >= MAX_YPOS) { + /* scroll 1 line up */ + for(k = 1, j = 0; k < MAX_YPOS; k++, j++) { + for(i = 0; i < MAX_XPOS; i++) { + writew(readw(VGABASE + 2*(MAX_XPOS*k + i)), + VGABASE + 2*(MAX_XPOS*j + i)); + } + } + for(i = 0; i < MAX_XPOS; i++) { + writew(0x720, VGABASE + 2*(MAX_XPOS*j + i)); + } + current_ypos = MAX_YPOS-1; + } + if (c == '\n') { + current_xpos = 0; + current_ypos++; + } else if (c != '\r') { + writew(((0x7 << 8) | (unsigned short) c), + VGABASE + 2*(MAX_XPOS*current_ypos + current_xpos++)); + if (current_xpos >= MAX_XPOS) { + current_xpos = 0; + current_ypos++; + } + } + } +} + +static struct console early_vga_console = { + .name = "earlyvga", + .write = early_vga_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +/* Serial functions losely based on a similar package from Klaus P. Gerlicher */ + +int early_serial_base; /* ttyS0 */ + +static int early_serial_putc(unsigned char ch) +{ + unsigned timeout = 0xffff; + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) + rep_nop(); + outb(ch, early_serial_base + TXR); + return timeout ? 0 : -1; +} + +static void early_serial_write(struct console *con, const char *s, unsigned n) +{ + while (*s && n-- > 0) { + early_serial_putc(*s); + if (*s == '\n') + early_serial_putc('\r'); + s++; + } +} + +static __init void early_serial_init(char *opt) +{ + unsigned char c; + unsigned divisor, baud = DEFAULT_BAUD; + static int bases[] = SERIAL_BASES; + char *s, *e; + + early_serial_base = bases[0]; + + if (*opt == ',') + ++opt; + + s = strsep(&opt, ","); + if (s != NULL) { + unsigned port; + if (!strncmp(s,"0x",2)) + early_serial_base = simple_strtoul(s, &e, 16); + else { + if (!strncmp(s,"ttyS",4)) + s+=4; + port = simple_strtoul(s, &e, 10); + if (port > (SERIAL_BASES_LEN-1) || s == e) + port = 0; + early_serial_base = bases[port]; + } + } + + outb(0x3, early_serial_base + LCR); /* 8n1 */ + outb(0, early_serial_base + IER); /* no interrupt */ + outb(0, early_serial_base + FCR); /* no fifo */ + outb(0x3, early_serial_base + MCR); /* DTR + RTS */ + + s = strsep(&opt, ","); + if (s != NULL) { + baud = simple_strtoul(s, &e, 0); + if (baud == 0 || s == e) + baud = DEFAULT_BAUD; + } + + divisor = 115200 / baud; + c = inb(early_serial_base + LCR); + outb(c | DLAB, early_serial_base + LCR); + outb(divisor & 0xff, early_serial_base + DLL); + outb((divisor >> 8) & 0xff, early_serial_base + DLH); + outb(c & ~DLAB, early_serial_base + LCR); +} + +static struct console early_serial_console = { + .name = "earlyser", + .write = early_serial_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +/* Direct interface for emergencies */ +struct console *early_console = &early_vga_console; +static int early_console_initialized = 0; + +void early_printk(const char *fmt, ...) +{ + char buf[512]; + int n; + va_list ap; + va_start(ap,fmt); + n = vsnprintf(buf,512,fmt,ap); + early_console->write(early_console,buf,n); + va_end(ap); +} + +static int keep_early; + +int __init setup_early_printk(char *opt) +{ + char *space, *s; + char buf[256]; + + s = strstr(opt, "earlyprintk="); + if (s == NULL) + return -1; + opt = s+12; + + if (early_console_initialized) + return -1; + + strncpy(buf,opt,256); + buf[255] = 0; + space = strchr(buf, ' '); + if (space) + *space = 0; + + if (strstr(buf,"keep")) + keep_early = 1; + + if (!strncmp(buf, "serial", 6)) { + early_serial_init(buf + 6); + early_console = &early_serial_console; + } else if (!strncmp(buf, "ttyS", 4)) { + early_serial_init(buf); + early_console = &early_serial_console; + } else if (!strncmp(buf, "vga", 3)) { + early_console = &early_vga_console; + } else { + early_console = NULL; + return -1; + } + early_console_initialized = 1; + register_console(early_console); + early_printk( "early printk console registered\n" ); + return 0; +} + +void __init disable_early_printk(void) +{ + if (!early_console_initialized || !early_console) + return; + if (!keep_early) { + printk("disabling early console...\n"); + unregister_console(early_console); + early_console_initialized = 0; + } else { + printk("keeping early console.\n"); + } +} + +/* syntax: earlyprintk=vga + earlyprintk=serial[,ttySn[,baudrate]] + Append ,keep to not disable it when the real console takes over. + Only vga or serial at a time, not both. + Currently only ttyS0 and ttyS1 are supported. + Interaction with the standard serial driver is not very good. + The VGA output is eventually overwritten by the real console. */ +__setup("earlyprintk=", setup_early_printk); diff -urpN -X /home/fletch/.diff.exclude 00-virgin/kernel/ksyms.c 90-mjb1/kernel/ksyms.c --- 00-virgin/kernel/ksyms.c Mon Jan 13 23:16:27 2003 +++ 90-mjb1/kernel/ksyms.c Mon Jan 13 23:20:30 2003 @@ -469,7 +469,10 @@ EXPORT_SYMBOL(sleep_on); EXPORT_SYMBOL(sleep_on_timeout); EXPORT_SYMBOL(interruptible_sleep_on); EXPORT_SYMBOL(interruptible_sleep_on_timeout); -EXPORT_SYMBOL(schedule); +EXPORT_SYMBOL(do_schedule); +#ifdef CONFIG_KGDB_THREAD +EXPORT_SYMBOL(kern_schedule); +#endif #ifdef CONFIG_PREEMPT EXPORT_SYMBOL(preempt_schedule); #endif diff -urpN -X /home/fletch/.diff.exclude 00-virgin/kernel/sched.c 90-mjb1/kernel/sched.c --- 00-virgin/kernel/sched.c Mon Jan 13 21:09:28 2003 +++ 90-mjb1/kernel/sched.c Mon Jan 13 23:20:30 2003 @@ -57,16 +57,29 @@ * Minimum timeslice is 10 msecs, default timeslice is 150 msecs, * maximum timeslice is 300 msecs. Timeslices get refilled after * they expire. + * + * They are configurable via /proc/sys/sched */ -#define MIN_TIMESLICE ( 10 * HZ / 1000) -#define MAX_TIMESLICE (300 * HZ / 1000) -#define CHILD_PENALTY 95 -#define PARENT_PENALTY 100 -#define EXIT_WEIGHT 3 -#define PRIO_BONUS_RATIO 25 -#define INTERACTIVE_DELTA 2 -#define MAX_SLEEP_AVG (2*HZ) -#define STARVATION_LIMIT (2*HZ) + +int min_timeslice = (10 * HZ) / 1000; +int max_timeslice = (300 * HZ) / 1000; +int child_penalty = 95; +int parent_penalty = 100; +int exit_weight = 3; +int prio_bonus_ratio = 25; +int interactive_delta = 2; +int max_sleep_avg = 2 * HZ; +int starvation_limit = 2 * HZ; + +#define MIN_TIMESLICE (min_timeslice) +#define MAX_TIMESLICE (max_timeslice) +#define CHILD_PENALTY (child_penalty) +#define PARENT_PENALTY (parent_penalty) +#define EXIT_WEIGHT (exit_weight) +#define PRIO_BONUS_RATIO (prio_bonus_ratio) +#define INTERACTIVE_DELTA (interactive_delta) +#define MAX_SLEEP_AVG (max_sleep_avg) +#define STARVATION_LIMIT (starvation_limit) /* * If a task is 'interactive' then we reinsert it in the active @@ -153,6 +166,7 @@ struct runqueue { task_t *curr, *idle; prio_array_t *active, *expired, arrays[2]; int prev_nr_running[NR_CPUS]; + atomic_t * node_ptr; task_t *migration_thread; struct list_head migration_queue; @@ -294,7 +308,7 @@ static inline void activate_task(task_t p->prio = effective_prio(p); } enqueue_task(p, array); - rq->nr_running++; + nr_running_inc(rq); } /* @@ -302,7 +316,7 @@ static inline void activate_task(task_t */ static inline void deactivate_task(struct task_struct *p, runqueue_t *rq) { - rq->nr_running--; + nr_running_dec(rq); if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; dequeue_task(p, p->array); @@ -652,9 +666,9 @@ static inline unsigned int double_lock_b } /* - * find_busiest_queue - find the busiest runqueue. + * find_busiest_in_mask - find the busiest runqueue among the cpus in cpumask */ -static inline runqueue_t *find_busiest_queue(runqueue_t *this_rq, int this_cpu, int idle, int *imbalance) +static inline runqueue_t *find_busiest_in_mask(runqueue_t *this_rq, int this_cpu, int idle, int *imbalance, unsigned long cpumask) { int nr_running, load, max_load, i; runqueue_t *busiest, *rq_src; @@ -689,7 +703,7 @@ static inline runqueue_t *find_busiest_q busiest = NULL; max_load = 1; for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) + if (!cpu_online(i) || !((1UL << i) & cpumask) ) continue; rq_src = cpu_rq(i); @@ -730,15 +744,25 @@ out: } /* + * find_busiest_queue - find the busiest runqueue. + */ +static inline runqueue_t *find_busiest_queue(runqueue_t *this_rq, int this_cpu, int idle, int *imbalance) +{ + unsigned long cpumask = __node_to_cpu_mask(__cpu_to_node(this_cpu)); + return find_busiest_in_mask(this_rq, this_cpu, idle, imbalance, + cpumask); +} + +/* * pull_task - move a task from a remote runqueue to the local runqueue. * Both runqueues must be locked. */ static inline void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, int this_cpu) { dequeue_task(p, src_array); - src_rq->nr_running--; + nr_running_dec(src_rq); set_task_cpu(p, this_cpu); - this_rq->nr_running++; + nr_running_inc(this_rq); enqueue_task(p, this_rq->active); /* * Note that idle threads have a prio of MAX_PRIO, for this test @@ -966,7 +990,7 @@ void scheduling_functions_start_here(voi /* * schedule() is the main scheduler function. */ -asmlinkage void schedule(void) +asmlinkage void do_schedule(void) { task_t *prev, *next; runqueue_t *rq; @@ -1200,6 +1224,22 @@ void complete_all(struct completion *x) spin_unlock_irqrestore(&x->wait.lock, flags); } +asmlinkage void user_schedule(void) +{ +#ifdef CONFIG_KGDB_THREAD + current->thread.kgdbregs = NULL; +#endif + do_schedule(); +} + +#ifdef CONFIG_KGDB_THREAD +asmlinkage void kern_do_schedule(struct pt_regs regs) +{ + current->thread.kgdbregs = ®s; + do_schedule(); +} +#endif + void wait_for_completion(struct completion *x) { might_sleep(); @@ -2170,6 +2210,86 @@ __init int migration_init(void) #endif +#if CONFIG_NUMA +static atomic_t node_nr_running[MAX_NUMNODES] ____cacheline_maxaligned_in_smp = {[0 ...MAX_NUMNODES-1] = ATOMIC_INIT(0)}; + +__init void node_nr_running_init(void) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) { + cpu_rq(i)->node_ptr = &node_nr_running[__cpu_to_node(i)]; + } + return; +} + +/* + * If dest_cpu is allowed for this process, migrate the task to it. + * This is accomplished by forcing the cpu_allowed mask to only + * allow dest_cpu, which will force the cpu onto dest_cpu. Then + * the cpu_allowed mask is restored. + */ +static void sched_migrate_task(task_t *p, int dest_cpu) +{ + unsigned long old_mask; + + old_mask = p->cpus_allowed; + if (!(old_mask & (1UL << dest_cpu))) + return; + /* force the process onto the specified CPU */ + set_cpus_allowed(p, 1UL << dest_cpu); + + /* restore the cpus allowed mask */ + set_cpus_allowed(p, old_mask); +} + +/* + * Find the least loaded CPU. Slightly favor the current CPU by + * setting its runqueue length as the minimum to start. + */ +static int sched_best_cpu(struct task_struct *p) +{ + int i, minload, load, best_cpu, node = 0; + unsigned long cpumask; + + best_cpu = task_cpu(p); + if (cpu_rq(best_cpu)->nr_running <= 2) + return best_cpu; + + minload = 10000000; + for (i = 0; i < numnodes; i++) { + load = atomic_read(&node_nr_running[i]); + if (load < minload) { + minload = load; + node = i; + } + } + + minload = 10000000; + cpumask = __node_to_cpu_mask(node); + for (i = 0; i < NR_CPUS; ++i) { + if (!(cpumask & (1UL << i))) + continue; + if (cpu_rq(i)->nr_running < minload) { + best_cpu = i; + minload = cpu_rq(i)->nr_running; + } + } + return best_cpu; +} + +void sched_balance_exec(void) +{ + int new_cpu; + + if (numnodes > 1) { + new_cpu = sched_best_cpu(current); + if (new_cpu != smp_processor_id()) + sched_migrate_task(current, new_cpu); + } +} +#endif /* CONFIG_NUMA */ + #if CONFIG_SMP || CONFIG_PREEMPT /* * The 'big kernel lock' @@ -2231,6 +2351,10 @@ void __init sched_init(void) spin_lock_init(&rq->lock); INIT_LIST_HEAD(&rq->migration_queue); atomic_set(&rq->nr_iowait, 0); +#if CONFIG_NUMA + rq->node_ptr = &node_nr_running[0]; +#endif /* CONFIG_NUMA */ + for (j = 0; j < 2; j++) { array = rq->arrays + j; diff -urpN -X /home/fletch/.diff.exclude 00-virgin/kernel/sysctl.c 90-mjb1/kernel/sysctl.c --- 00-virgin/kernel/sysctl.c Mon Dec 16 21:50:51 2002 +++ 90-mjb1/kernel/sysctl.c Mon Jan 13 23:20:20 2003 @@ -55,6 +55,15 @@ extern char core_pattern[]; extern int cad_pid; extern int pid_max; extern int sysctl_lower_zone_protection; +extern int min_timeslice; +extern int max_timeslice; +extern int child_penalty; +extern int parent_penalty; +extern int exit_weight; +extern int prio_bonus_ratio; +extern int interactive_delta; +extern int max_sleep_avg; +extern int starvation_limit; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -112,6 +121,7 @@ static struct ctl_table_header root_tabl static ctl_table kern_table[]; static ctl_table vm_table[]; +static ctl_table sched_table[]; #ifdef CONFIG_NET extern ctl_table net_table[]; #endif @@ -156,6 +166,7 @@ static ctl_table root_table[] = { {CTL_FS, "fs", NULL, 0, 0555, fs_table}, {CTL_DEBUG, "debug", NULL, 0, 0555, debug_table}, {CTL_DEV, "dev", NULL, 0, 0555, dev_table}, + {CTL_SCHED, "sched", NULL, 0, 0555, sched_table}, {0} }; @@ -358,7 +369,38 @@ static ctl_table debug_table[] = { static ctl_table dev_table[] = { {0} -}; +}; + +static ctl_table sched_table[] = { + {SCHED_MAX_TIMESLICE, "max_timeslice", &max_timeslice, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &one, NULL}, + {SCHED_MIN_TIMESLICE, "min_timeslice", &min_timeslice, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &one, NULL}, + {SCHED_CHILD_PENALTY, "child_penalty", &child_penalty, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &zero, NULL}, + {SCHED_PARENT_PENALTY, "parent_penalty", &parent_penalty, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &zero, NULL}, + {SCHED_EXIT_WEIGHT, "exit_weight", &exit_weight, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &zero, NULL}, + {SCHED_PRIO_BONUS_RATIO, "prio_bonus_ratio", &prio_bonus_ratio, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &zero, NULL}, + {SCHED_INTERACTIVE_DELTA, "interactive_delta", &interactive_delta, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &zero, NULL}, + {SCHED_MAX_SLEEP_AVG, "max_sleep_avg", &max_sleep_avg, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &one, NULL}, + {SCHED_STARVATION_LIMIT, "starvation_limit", &starvation_limit, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &zero, NULL}, + {0} +}; extern void init_irq_proc (void); diff -urpN -X /home/fletch/.diff.exclude 00-virgin/mm/memory.c 90-mjb1/mm/memory.c --- 00-virgin/mm/memory.c Mon Jan 13 21:09:28 2003 +++ 90-mjb1/mm/memory.c Mon Jan 13 23:20:10 2003 @@ -101,8 +101,7 @@ static inline void free_one_pmd(struct m static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir) { - int j; - pmd_t * pmd; + pmd_t * pmd, * md, * emd; if (pgd_none(*dir)) return; @@ -113,8 +112,21 @@ static inline void free_one_pgd(struct m } pmd = pmd_offset(dir, 0); pgd_clear(dir); - for (j = 0; j < PTRS_PER_PMD ; j++) - free_one_pmd(tlb, pmd+j); + /* + * Beware if changing the loop below. It once used int j, + * for (j = 0; j < PTRS_PER_PMD; j++) + * free_one_pmd(pmd+j); + * but some older i386 compilers (e.g. egcs-2.91.66, gcc-2.95.3) + * terminated the loop with a _signed_ address comparison + * using "jle", when configured for HIGHMEM64GB (X86_PAE). + * If also configured for 3GB of kernel virtual address space, + * if page at physical 0x3ffff000 virtual 0x7ffff000 is used as + * a pmd, when that mm exits the loop goes on to free "entries" + * found at 0x80000000 onwards. The loop below compiles instead + * to be terminated by unsigned address comparison using "jb". + */ + for (md = pmd, emd = pmd + PTRS_PER_PMD; md < emd; md++) + free_one_pmd(tlb,md); pmd_free_tlb(tlb, pmd); }