Index: linux-2.6.14/Documentation/vserver/debug.txt =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/Documentation/vserver/debug.txt 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,106 @@ + +debug_cvirt: + + 2 4 "vx_map_tgid: %p/%llx: %d -> %d" + "vx_rmap_tgid: %p/%llx: %d -> %d" + +debug_dlim: + + 0 1 "ALLOC (%p,#%d)%c inode (%d)" + "FREE (%p,#%d)%c inode" + 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)" + "FREE (%p,#%d)%c %lld bytes" + 2 4 "ADJUST: %lld,%lld on %d,%d [mult=%d]" + 3 8 "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d" + "ext3_has_free_blocks(%p): free=%u, root=%u" + "rcu_free_dl_info(%p)" + 4 10 "alloc_dl_info(%p,%d) = %p" + "dealloc_dl_info(%p)" + "get_dl_info(%p[#%d.%d])" + "put_dl_info(%p[#%d.%d])" + 5 20 "alloc_dl_info(%p,%d)*" + 6 40 "__hash_dl_info: %p[#%d]" + "__unhash_dl_info: %p[#%d]" + 7 80 "locate_dl_info(%p,#%d) = %p" + +debug_net: + + 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d" + 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d" + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d" + 4 10 "ip_route_connect(%p) %p,%p;%lx" + 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx" + 6 40 "sk,egf: %p [#%d] (from %d)" + "sk,egn: %p [#%d] (from %d)" + "sk,req: %p [#%d] (from %d)" + "sk: %p [#%d] (from %d)" + "tw: %p [#%d] (from %d)" + 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d" + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d" + +debug_nid: + + 0 1 "__lookup_nx_info(#%u): %p[#%u]" + "alloc_nx_info(%d) = %p" + "create_nx_info(%d) (dynamic rejected)" + "create_nx_info(%d) = %p (already there)" + "create_nx_info(%d) = %p (new)" + "dealloc_nx_info(%p)" + 1 2 "alloc_nx_info(%d)*" + "create_nx_info(%d)*" + 2 4 "get_nx_info(%p[#%d.%d])" + "put_nx_info(%p[#%d.%d])" + 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p" + "clr_nx_info(%p[#%d.%d])" + "init_nx_info(%p[#%d.%d])" + "release_nx_info(%p[#%d.%d.%d]) %p" + "set_nx_info(%p[#%d.%d])" + 4 10 "__hash_nx_info: %p[#%d]" + "__nx_dynamic_id: [#%d]" + "__unhash_nx_info: %p[#%d]" + 5 20 "moved task %p into nxi:%p[#%d]" + "nx_migrate_task(%p,%p[#%d.%d.%d])" + "task_get_nx_info(%p)" + +debug_switch: + + 0 1 "vc: VCMD_%02d_%d[%d], %d,%p,%d" + 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld)" + 4 10 "%s: (%s %s) returned %s with %d" + +debug_xid: + + 0 1 "__lookup_vx_info(#%u): %p[#%u]" + "alloc_vx_info(%d) = %p" + "alloc_vx_info(%d)*" + "create_vx_info(%d) (dynamic rejected)" + "create_vx_info(%d) = %p (already there)" + "create_vx_info(%d) = %p (new)" + "dealloc_vx_info(%p)" + 1 2 "create_vx_info(%d)*" + 2 4 "get_vx_info(%p[#%d.%d])" + "put_vx_info(%p[#%d.%d])" + 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p" + "clr_vx_info(%p[#%d.%d])" + "init_vx_info(%p[#%d.%d])" + "release_vx_info(%p[#%d.%d.%d]) %p" + "set_vx_info(%p[#%d.%d])" + 4 10 "__hash_vx_info: %p[#%d]" + "__unhash_vx_info: %p[#%d]" + "__vx_dynamic_id: [#%d]" + 5 20 "moved task %p into vxi:%p[#%d]" + "task_get_vx_info(%p)" + "vx_migrate_task(%p,%p[#%d.%d])" + 6 40 "vx_set_init(%p[#%d],%p[#%d,%d,%d])" + 7 80 "vx_parse_xid(»%s«): %d:#%d" + "vx_propagate_xid(%p[#%lu.%d]): %d,%d" + + +debug_limit: + + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s" + "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d" + + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s" + "vx_acc_pages[%5d,%s,%2d]: %5d += %5d" + "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d" Index: linux-2.6.14/Makefile =================================================================== --- linux-2.6.14.orig/Makefile 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/Makefile 2005-10-31 11:05:45.000000000 -0600 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 14 -EXTRAVERSION = +EXTRAVERSION = -vs2.1.0-rc1 NAME=Affluent Albatross # *DOCUMENTATION* Index: linux-2.6.14/arch/alpha/Kconfig =================================================================== --- linux-2.6.14.orig/arch/alpha/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/alpha/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -609,6 +609,8 @@ source "arch/alpha/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/alpha/kernel/asm-offsets.c =================================================================== --- linux-2.6.14.orig/arch/alpha/kernel/asm-offsets.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/alpha/kernel/asm-offsets.c 2005-10-31 11:05:45.000000000 -0600 @@ -35,6 +35,7 @@ DEFINE(PT_PTRACED, PT_PTRACED); DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(CLONE_KTHREAD, CLONE_KTHREAD); DEFINE(SIGCHLD, SIGCHLD); BLANK(); Index: linux-2.6.14/arch/alpha/kernel/entry.S =================================================================== --- linux-2.6.14.orig/arch/alpha/kernel/entry.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/alpha/kernel/entry.S 2005-10-31 11:05:45.000000000 -0600 @@ -645,7 +645,7 @@ stq $2, 152($sp) /* HAE */ /* Shuffle FLAGS to the front; add CLONE_VM. */ - ldi $1, CLONE_VM|CLONE_UNTRACED + ldi $1, CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD; or $18, $1, $16 bsr $26, sys_clone @@ -874,22 +874,15 @@ .globl sys_getxpid .ent sys_getxpid sys_getxpid: + lda $sp, -16($sp) + stq $26, 0($sp) .prologue 0 - ldq $2, TI_TASK($8) - /* See linux/kernel/timer.c sys_getppid for discussion - about this loop. */ - ldq $3, TASK_REAL_PARENT($2) -1: ldl $1, TASK_TGID($3) -#ifdef CONFIG_SMP - mov $3, $4 - mb - ldq $3, TASK_REAL_PARENT($2) - cmpeq $3, $4, $4 - beq $4, 1b -#endif - stq $1, 80($sp) - ldl $0, TASK_TGID($2) + lda $16, 96($sp) + jsr $26, do_getxpid + ldq $26, 0($sp) + + lda $sp, 16($sp) ret .end sys_getxpid Index: linux-2.6.14/arch/alpha/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/alpha/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/alpha/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -289,6 +289,8 @@ read_unlock(&tasklist_lock); if (!child) goto out_notsk; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out; if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); Index: linux-2.6.14/arch/alpha/kernel/systbls.S =================================================================== --- linux-2.6.14.orig/arch/alpha/kernel/systbls.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/alpha/kernel/systbls.S 2005-10-31 11:05:45.000000000 -0600 @@ -447,7 +447,7 @@ .quad sys_stat64 /* 425 */ .quad sys_lstat64 .quad sys_fstat64 - .quad sys_ni_syscall /* sys_vserver */ + .quad sys_vserver /* sys_vserver */ .quad sys_ni_syscall /* sys_mbind */ .quad sys_ni_syscall /* sys_get_mempolicy */ .quad sys_ni_syscall /* sys_set_mempolicy */ Index: linux-2.6.14/arch/alpha/mm/init.c =================================================================== --- linux-2.6.14.orig/arch/alpha/mm/init.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/alpha/mm/init.c 2005-10-31 11:05:45.000000000 -0600 @@ -20,6 +20,7 @@ #include #include /* max_low_pfn */ #include +#include #include #include Index: linux-2.6.14/arch/arm/Kconfig =================================================================== --- linux-2.6.14.orig/arch/arm/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/arm/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -739,6 +739,8 @@ source "arch/arm/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/arm/kernel/calls.S =================================================================== --- linux-2.6.14.orig/arch/arm/kernel/calls.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/arm/kernel/calls.S 2005-10-31 11:05:45.000000000 -0600 @@ -327,7 +327,7 @@ /* 310 */ .long sys_request_key .long sys_keyctl .long sys_semtimedop -/* vserver */ .long sys_ni_syscall + .long sys_vserver .long sys_ioprio_set /* 315 */ .long sys_ioprio_get .long sys_inotify_init Index: linux-2.6.14/arch/arm/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/arm/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/arm/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -436,7 +436,8 @@ regs.ARM_pc = (unsigned long)kernel_thread_helper; regs.ARM_cpsr = SVC_MODE; - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); Index: linux-2.6.14/arch/arm/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/arm/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/arm/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -809,6 +809,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/arm26/Kconfig =================================================================== --- linux-2.6.14.orig/arch/arm26/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/arm26/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -232,6 +232,8 @@ source "arch/arm26/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/arm26/kernel/calls.S =================================================================== --- linux-2.6.14.orig/arch/arm26/kernel/calls.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/arm26/kernel/calls.S 2005-10-31 11:05:45.000000000 -0600 @@ -257,6 +257,11 @@ .long sys_lremovexattr .long sys_fremovexattr .long sys_tkill + + .rept 313 - (. - __syscall_start) / 4 + .long sys_ni_syscall + .endr + .long sys_vserver /* 313 */ __syscall_end: .rept NR_syscalls - (__syscall_end - __syscall_start) / 4 Index: linux-2.6.14/arch/arm26/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/arm26/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/arm26/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -369,7 +369,8 @@ regs.ARM_r3 = (unsigned long)do_exit; regs.ARM_pc = (unsigned long)kernel_thread_helper | MODE_SVC26; - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); Index: linux-2.6.14/arch/arm26/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/arm26/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/arm26/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -692,6 +692,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/cris/Kconfig =================================================================== --- linux-2.6.14.orig/arch/cris/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/cris/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -177,6 +177,8 @@ source "arch/cris/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/cris/arch-v10/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/cris/arch-v10/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/cris/arch-v10/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -104,7 +104,8 @@ regs.dccr = 1 << I_DCCR_BITNR; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* setup the child's kernel stack with a pt_regs and switch_stack on it. Index: linux-2.6.14/arch/cris/arch-v32/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/cris/arch-v32/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/cris/arch-v32/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -121,7 +121,8 @@ regs.ccs = 1 << (I_CCS_BITNR + CCS_SHIFT); /* Create the new process. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* Index: linux-2.6.14/arch/frv/kernel/kernel_thread.S =================================================================== --- linux-2.6.14.orig/arch/frv/kernel/kernel_thread.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/frv/kernel/kernel_thread.S 2005-10-31 11:05:45.000000000 -0600 @@ -13,6 +13,8 @@ #include #define CLONE_VM 0x00000100 /* set if VM shared between processes */ +#define CLONE_KTHREAD 0x10000000 /* kernel thread */ +#define CLONE_KT (CLONE_VM | CLONE_KTHREAD) /* kernel thread flags */ #define KERN_ERR "<3>" .section .rodata @@ -37,7 +39,7 @@ # start by forking the current process, but with shared VM setlos.p #__NR_clone,gr7 ; syscall number - ori gr10,#CLONE_VM,gr8 ; first syscall arg [clone_flags] + ori gr10,#CLONE_KT,gr8 ; first syscall arg [clone_flags] sethi.p #0xe4e4,gr9 ; second syscall arg [newsp] setlo #0xe4e4,gr9 setlos.p #0,gr10 ; third syscall arg [parent_tidptr] Index: linux-2.6.14/arch/h8300/Kconfig =================================================================== --- linux-2.6.14.orig/arch/h8300/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/h8300/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -191,6 +191,8 @@ source "arch/h8300/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/h8300/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/h8300/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/h8300/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -130,7 +130,7 @@ fs = get_fs(); set_fs (KERNEL_DS); - clone_arg = flags | CLONE_VM; + clone_arg = flags | CLONE_VM | CLONE_KTHREAD; __asm__("mov.l sp,er3\n\t" "sub.l er2,er2\n\t" "mov.l %2,er1\n\t" Index: linux-2.6.14/arch/h8300/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/h8300/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/h8300/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -81,6 +81,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/i386/Kconfig =================================================================== --- linux-2.6.14.orig/arch/i386/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -561,6 +561,14 @@ depends on X86_VISWS default y +config HZ + int "Timer Frequency (100-10000)" + range 100 10000 + default "250" + help + This allows you to specify the frequency at which the + kernel timer interrupt will occur. + config X86_TSC bool depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ @@ -746,6 +754,46 @@ endchoice +choice + prompt "Memory Split User Space" + default SPLIT_3GB + help + A different Userspace/Kernel split allows you to + utilize up to alsmost 3GB of RAM without the requirement + for HIGHMEM. It also increases the available lowmem. + +config SPLIT_3GB + bool "3.0GB/1.0GB Kernel (Default)" + help + This is the default split of 3GB userspace to 1GB kernel + space, which will result in about 860MB of lowmem. + +config SPLIT_25GB + bool "2.5GB/1.5GB Kernel" + help + This split provides 2.5GB userspace and 1.5GB kernel + space, which will result in about 1370MB of lowmem. + +config SPLIT_2GB + bool "2.0GB/2.0GB Kernel" + help + This split provides 2GB userspace and 2GB kernel + space, which will result in about 1880MB of lowmem. + +config SPLIT_15GB + bool "1.5GB/2.5GB Kernel" + help + This split provides 1.5GB userspace and 2.5GB kernel + space, which will result in about 2390MB of lowmem. + +config SPLIT_1GB + bool "1.0GB/3.0GB Kernel" + help + This split provides 1GB userspace and 3GB kernel + space, which will result in about 2900MB of lowmem. + +endchoice + config HIGHMEM bool depends on HIGHMEM64G || HIGHMEM4G @@ -1299,6 +1347,8 @@ source "arch/i386/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/i386/boot/compressed/misc.c =================================================================== --- linux-2.6.14.orig/arch/i386/boot/compressed/misc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/boot/compressed/misc.c 2005-10-31 11:05:45.000000000 -0600 @@ -309,7 +309,7 @@ #else if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); #endif - output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ + output_data = (char *)PHYSICAL_START; /* Normally Points to 1M */ free_mem_end_ptr = (long)real_mode; } @@ -334,8 +334,8 @@ low_buffer_size = low_buffer_end - LOW_BUFFER_START; high_loaded = 1; free_mem_end_ptr = (long)high_buffer_start; - if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { - high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size); + if ((PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { + high_buffer_start = (uch *)(PHYSICAL_START + low_buffer_size); mv->hcount = 0; /* say: we need not to move high_buffer */ } else mv->hcount = -1; Index: linux-2.6.14/arch/i386/kernel/cpu/proc.c =================================================================== --- linux-2.6.14.orig/arch/i386/kernel/cpu/proc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/kernel/cpu/proc.c 2005-10-31 11:05:45.000000000 -0600 @@ -129,8 +129,8 @@ seq_printf(m, " %s", x86_cap_flags[i]); seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); + HZ*(c->loops_per_jiffy >> 3)/62500, + (HZ*(c->loops_per_jiffy >> 3)/625) % 100); return 0; } Index: linux-2.6.14/arch/i386/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/i386/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -357,7 +357,8 @@ regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); Index: linux-2.6.14/arch/i386/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/i386/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -383,6 +383,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/i386/kernel/setup.c =================================================================== --- linux-2.6.14.orig/arch/i386/kernel/setup.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/kernel/setup.c 2005-10-31 11:05:45.000000000 -0600 @@ -1176,8 +1176,8 @@ * the (very unlikely) case of us accidentally initializing the * bootmem allocator with an invalid RAM area. */ - reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START)); + reserve_bootmem(PHYSICAL_START, (PFN_PHYS(min_low_pfn) + + bootmap_size + PAGE_SIZE-1) - (PHYSICAL_START)); /* * reserve physical page 0 - it's a special BIOS page on many boxes, Index: linux-2.6.14/arch/i386/kernel/smpboot.c =================================================================== --- linux-2.6.14.orig/arch/i386/kernel/smpboot.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/kernel/smpboot.c 2005-10-31 11:05:45.000000000 -0600 @@ -1207,8 +1207,8 @@ printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount+1, - bogosum/(500000/HZ), - (bogosum/(5000/HZ))%100); + HZ*(bogosum >> 3)/62500, + (HZ*(bogosum >> 3)/625) % 100); Dprintk("Before bogocount - setting activated=1.\n"); Index: linux-2.6.14/arch/i386/kernel/sys_i386.c =================================================================== --- linux-2.6.14.orig/arch/i386/kernel/sys_i386.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/kernel/sys_i386.c 2005-10-31 11:05:45.000000000 -0600 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -217,7 +218,7 @@ if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } @@ -225,6 +226,7 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -233,15 +235,16 @@ down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error |= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error |= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error |= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error |= __put_user(0,name->machine+__OLD_UTS_LEN); up_read(&uts_sem); Index: linux-2.6.14/arch/i386/kernel/syscall_table.S =================================================================== --- linux-2.6.14.orig/arch/i386/kernel/syscall_table.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/kernel/syscall_table.S 2005-10-31 11:05:45.000000000 -0600 @@ -273,7 +273,7 @@ .long sys_tgkill /* 270 */ .long sys_utimes .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ + .long sys_vserver .long sys_mbind .long sys_get_mempolicy .long sys_set_mempolicy Index: linux-2.6.14/arch/i386/kernel/traps.c =================================================================== --- linux-2.6.14.orig/arch/i386/kernel/traps.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/i386/kernel/traps.c 2005-10-31 11:05:45.000000000 -0600 @@ -53,6 +53,7 @@ #include #include +#include #include "mach_traps.h" @@ -307,6 +308,8 @@ }; static int die_counter; + vxh_throw_oops(); + if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); spin_lock_irq(&die.lock); @@ -333,8 +336,9 @@ #endif if (nl) printk("\n"); - notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); + notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); show_registers(regs); + vxh_dump_history(); } else printk(KERN_ERR "Recursive die() failure, output suppressed\n"); Index: linux-2.6.14/arch/ia64/Kconfig =================================================================== --- linux-2.6.14.orig/arch/ia64/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ia64/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -408,6 +408,8 @@ source "arch/ia64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/ia64/ia32/binfmt_elf32.c =================================================================== --- linux-2.6.14.orig/arch/ia64/ia32/binfmt_elf32.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ia64/ia32/binfmt_elf32.c 2005-10-31 11:05:45.000000000 -0600 @@ -199,7 +199,7 @@ int ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) { - unsigned long stack_base; + unsigned long stack_base, grow; struct vm_area_struct *mpnt; struct mm_struct *mm = current->mm; int i, ret; @@ -236,7 +236,8 @@ kmem_cache_free(vm_area_cachep, mpnt); return ret; } - current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(current->mm, current->mm->total_vm - vma_pages(mpnt)); + current->mm->stack_vm = current->mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { Index: linux-2.6.14/arch/ia64/ia32/ia32_entry.S =================================================================== --- linux-2.6.14.orig/arch/ia64/ia32/ia32_entry.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ia64/ia32/ia32_entry.S 2005-10-31 11:05:45.000000000 -0600 @@ -483,7 +483,7 @@ data8 sys_tgkill /* 270 */ data8 compat_sys_utimes data8 sys32_fadvise64_64 - data8 sys_ni_syscall + data8 sys32_vserver data8 sys_ni_syscall data8 sys_ni_syscall /* 275 */ data8 sys_ni_syscall Index: linux-2.6.14/arch/ia64/kernel/asm-offsets.c =================================================================== --- linux-2.6.14.orig/arch/ia64/kernel/asm-offsets.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ia64/kernel/asm-offsets.c 2005-10-31 11:05:45.000000000 -0600 @@ -192,6 +192,7 @@ /* for assembly files which can't include sched.h: */ DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); DEFINE(IA64_CLONE_VM, CLONE_VM); + DEFINE(IA64_CLONE_KTHREAD, CLONE_KTHREAD); BLANK(); DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, Index: linux-2.6.14/arch/ia64/kernel/entry.S =================================================================== --- linux-2.6.14.orig/arch/ia64/kernel/entry.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ia64/kernel/entry.S 2005-10-31 11:05:45.000000000 -0600 @@ -1589,7 +1589,7 @@ data8 sys_mq_notify data8 sys_mq_getsetattr data8 sys_ni_syscall // reserved for kexec_load - data8 sys_ni_syscall // reserved for vserver + data8 sys_vserver data8 sys_waitid // 1270 data8 sys_add_key data8 sys_request_key Index: linux-2.6.14/arch/ia64/kernel/perfmon.c =================================================================== --- linux-2.6.14.orig/arch/ia64/kernel/perfmon.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ia64/kernel/perfmon.c 2005-10-31 11:05:45.000000000 -0600 @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include @@ -2351,7 +2353,7 @@ */ insert_vm_struct(mm, vma); - mm->total_vm += size >> PAGE_SHIFT; + vx_vmpages_add(mm, size >> PAGE_SHIFT); vm_stat_account(vma); up_write(&task->mm->mmap_sem); Index: linux-2.6.14/arch/ia64/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/ia64/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ia64/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -683,7 +683,8 @@ regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR); regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET; regs.sw.pr = (1 << PRED_KERNEL_STACK); - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s.pt, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s.pt, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); Index: linux-2.6.14/arch/ia64/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/ia64/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ia64/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -1456,6 +1456,9 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; + ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; Index: linux-2.6.14/arch/ia64/mm/fault.c =================================================================== --- linux-2.6.14.orig/arch/ia64/mm/fault.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ia64/mm/fault.c 2005-10-31 11:05:45.000000000 -0600 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -35,12 +36,17 @@ grow = PAGE_SIZE >> PAGE_SHIFT; if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur - || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur)) + || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > + current->signal->rlim[RLIMIT_AS].rlim_cur)) + return -ENOMEM; + if (!vx_vmpages_avail(vma->vm_mm, grow) || + ((vma->vm_flags & VM_LOCKED) && + !vx_vmlocked_avail(vma->vm_mm, grow))) return -ENOMEM; vma->vm_end += PAGE_SIZE; - vma->vm_mm->total_vm += grow; + vx_vmpages_add(vma->vm_mm, grow); if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; + vx_vmlocked_add(vma->vm_mm, grow); __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow); return 0; } Index: linux-2.6.14/arch/m32r/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/m32r/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/m32r/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -202,8 +202,8 @@ regs.psw = M32R_PSW_BIE; /* Ok, create the new process. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, - NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* Index: linux-2.6.14/arch/m68k/Kconfig =================================================================== --- linux-2.6.14.orig/arch/m68k/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/m68k/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -672,6 +672,8 @@ source "arch/m68k/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/m68k/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/m68k/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/m68k/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -155,7 +155,8 @@ { register long retval __asm__ ("d0"); - register long clone_arg __asm__ ("d1") = flags | CLONE_VM | CLONE_UNTRACED; + register long clone_arg __asm__ ("d1") = + flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD; retval = __NR_clone; __asm__ __volatile__ Index: linux-2.6.14/arch/m68k/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/m68k/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/m68k/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -143,10 +143,13 @@ if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (unlikely(!child)) { - ret = -ESRCH; + + ret = -ESRCH; + if (unlikely(!child)) goto out; - } + + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; /* you may not mess with init */ if (unlikely(pid == 1)) { Index: linux-2.6.14/arch/m68knommu/Kconfig =================================================================== --- linux-2.6.14.orig/arch/m68knommu/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/m68knommu/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -635,6 +635,8 @@ source "arch/m68knommu/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/m68knommu/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/m68knommu/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/m68knommu/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -115,7 +115,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { int retval; - long clone_arg = flags | CLONE_VM; + long clone_arg = flags | CLONE_VM | CLONE_KTHREAD; mm_segment_t fs; fs = get_fs(); Index: linux-2.6.14/arch/m68knommu/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/m68knommu/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/m68knommu/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -125,6 +125,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/mips/Kconfig =================================================================== --- linux-2.6.14.orig/arch/mips/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -1732,6 +1732,8 @@ source "arch/mips/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/mips/kernel/linux32.c =================================================================== --- linux-2.6.14.orig/arch/mips/kernel/linux32.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/kernel/linux32.c 2005-10-31 11:05:45.000000000 -0600 @@ -1199,7 +1199,7 @@ int ret = 0; down_read(&uts_sem); - if (copy_to_user(name,&system_utsname,sizeof *name)) + if (copy_to_user(name, vx_new_utsname(), sizeof *name)) ret = -EFAULT; up_read(&uts_sem); Index: linux-2.6.14/arch/mips/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/mips/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -208,7 +208,8 @@ #endif /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } struct mips_frame_info { Index: linux-2.6.14/arch/mips/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/mips/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -78,6 +78,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/mips/kernel/scall32-o32.S =================================================================== --- linux-2.6.14.orig/arch/mips/kernel/scall32-o32.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/kernel/scall32-o32.S 2005-10-31 11:05:45.000000000 -0600 @@ -617,7 +617,7 @@ sys sys_mq_timedreceive 5 sys sys_mq_notify 2 /* 4275 */ sys sys_mq_getsetattr 3 - sys sys_ni_syscall 0 /* sys_vserver */ + sys sys_vserver 3 sys sys_waitid 4 sys sys_ni_syscall 0 /* available, was setaltroot */ sys sys_add_key 5 Index: linux-2.6.14/arch/mips/kernel/scall64-64.S =================================================================== --- linux-2.6.14.orig/arch/mips/kernel/scall64-64.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/kernel/scall64-64.S 2005-10-31 11:05:45.000000000 -0600 @@ -443,7 +443,7 @@ PTR sys_mq_timedreceive PTR sys_mq_notify PTR sys_mq_getsetattr /* 5235 */ - PTR sys_ni_syscall /* sys_vserver */ + PTR sys_vserver PTR sys_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key Index: linux-2.6.14/arch/mips/kernel/scall64-n32.S =================================================================== --- linux-2.6.14.orig/arch/mips/kernel/scall64-n32.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/kernel/scall64-n32.S 2005-10-31 11:05:45.000000000 -0600 @@ -357,7 +357,7 @@ PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* 6240, sys_vserver */ + PTR sys32_vserver /* 6240 */ PTR sys_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key Index: linux-2.6.14/arch/mips/kernel/scall64-o32.S =================================================================== --- linux-2.6.14.orig/arch/mips/kernel/scall64-o32.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/kernel/scall64-o32.S 2005-10-31 11:05:45.000000000 -0600 @@ -479,7 +479,7 @@ PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify /* 4275 */ PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* sys_vserver */ + PTR sys32_vserver PTR sys_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key /* 4280 */ Index: linux-2.6.14/arch/mips/kernel/syscall.c =================================================================== --- linux-2.6.14.orig/arch/mips/kernel/syscall.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/kernel/syscall.c 2005-10-31 11:05:45.000000000 -0600 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -210,7 +211,7 @@ */ asmlinkage int sys_uname(struct old_utsname * name) { - if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name))) return 0; return -EFAULT; } @@ -221,21 +222,23 @@ asmlinkage int sys_olduname(struct oldold_utsname * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) return -EFAULT; - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error = __put_user(0,name->machine+__OLD_UTS_LEN); error = error ? -EFAULT : 0; @@ -261,10 +264,10 @@ return -EFAULT; down_write(&uts_sem); - strncpy(system_utsname.nodename, nodename, len); + strncpy(vx_new_uts(nodename), nodename, len); nodename[__NEW_UTS_LEN] = '\0'; - strlcpy(system_utsname.nodename, nodename, - sizeof(system_utsname.nodename)); + strlcpy(vx_new_uts(nodename), nodename, + sizeof(vx_new_uts(nodename))); up_write(&uts_sem); return 0; } Index: linux-2.6.14/arch/mips/kernel/sysirix.c =================================================================== --- linux-2.6.14.orig/arch/mips/kernel/sysirix.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/mips/kernel/sysirix.c 2005-10-31 11:05:45.000000000 -0600 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include Index: linux-2.6.14/arch/parisc/Kconfig =================================================================== --- linux-2.6.14.orig/arch/parisc/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/parisc/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -204,6 +204,8 @@ source "arch/parisc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/parisc/kernel/entry.S =================================================================== --- linux-2.6.14.orig/arch/parisc/kernel/entry.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/parisc/kernel/entry.S 2005-10-31 11:05:45.000000000 -0600 @@ -753,6 +753,7 @@ #define CLONE_VM 0x100 /* Must agree with */ #define CLONE_UNTRACED 0x00800000 +#define CLONE_KTHREAD 0x10000000 .export __kernel_thread, code .import do_fork Index: linux-2.6.14/arch/parisc/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/parisc/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/parisc/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -197,7 +197,7 @@ * kernel_thread can become a #define. */ - return __kernel_thread(fn, arg, flags); + return __kernel_thread(fn, arg, flags | CLONE_KTHREAD); } EXPORT_SYMBOL(kernel_thread); Index: linux-2.6.14/arch/parisc/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/parisc/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/parisc/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -111,6 +111,9 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; + ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; Index: linux-2.6.14/arch/parisc/kernel/sys_parisc32.c =================================================================== --- linux-2.6.14.orig/arch/parisc/kernel/sys_parisc32.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/parisc/kernel/sys_parisc32.c 2005-10-31 11:05:45.000000000 -0600 @@ -657,6 +657,7 @@ do { seq = read_seqbegin(&xtime_lock); + /* FIXME requires vx virtualization */ val.uptime = jiffies / HZ; val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); Index: linux-2.6.14/arch/parisc/kernel/syscall_table.S =================================================================== --- linux-2.6.14.orig/arch/parisc/kernel/syscall_table.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/parisc/kernel/syscall_table.S 2005-10-31 11:05:45.000000000 -0600 @@ -368,5 +368,6 @@ ENTRY_COMP(mbind) /* 260 */ ENTRY_COMP(get_mempolicy) ENTRY_COMP(set_mempolicy) + ENTRY_DIFF(vserver) /* Nothing yet */ Index: linux-2.6.14/arch/ppc/Kconfig =================================================================== --- linux-2.6.14.orig/arch/ppc/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -1372,6 +1372,8 @@ source "arch/ppc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/ppc/boot/include/serial.h =================================================================== --- linux-2.6.14.orig/arch/ppc/boot/include/serial.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc/boot/include/serial.h 2005-10-31 11:05:45.000000000 -0600 @@ -36,8 +36,8 @@ int count; u8 *iomem_base; u16 iomem_reg_shift; - unsigned short close_delay; - unsigned short closing_wait; /* time to wait before closing */ + unsigned int close_delay; + unsigned int closing_wait; /* time to wait before closing */ unsigned long icount; int io_type; void *info; Index: linux-2.6.14/arch/ppc/kernel/asm-offsets.c =================================================================== --- linux-2.6.14.orig/arch/ppc/kernel/asm-offsets.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc/kernel/asm-offsets.c 2005-10-31 11:05:45.000000000 -0600 @@ -121,6 +121,7 @@ DEFINE(TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(CLONE_KTHREAD, CLONE_KTHREAD); DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); /* About the CPU features table */ Index: linux-2.6.14/arch/ppc/kernel/misc.S =================================================================== --- linux-2.6.14.orig/arch/ppc/kernel/misc.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc/kernel/misc.S 2005-10-31 11:05:45.000000000 -0600 @@ -1135,7 +1135,7 @@ mr r30,r3 /* function */ mr r31,r4 /* argument */ ori r3,r5,CLONE_VM /* flags */ - oris r3,r3,CLONE_UNTRACED>>16 + oris r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16 li r4,0 /* new sp (unused) */ li r0,__NR_clone sc @@ -1434,7 +1434,7 @@ .long ppc_fadvise64_64 .long sys_ni_syscall /* 255 - rtas (used on ppc64) */ .long sys_debug_setcontext - .long sys_ni_syscall /* 257 reserved for vserver */ + .long sys_vserver .long sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ .long sys_ni_syscall /* 259 reserved for new sys_mbind */ .long sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ Index: linux-2.6.14/arch/ppc/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/ppc/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -266,6 +266,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/ppc/kernel/syscalls.c =================================================================== --- linux-2.6.14.orig/arch/ppc/kernel/syscalls.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc/kernel/syscalls.c 2005-10-31 11:05:45.000000000 -0600 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -225,7 +226,7 @@ int err = -EFAULT; down_read(&uts_sem); - if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name))) err = 0; up_read(&uts_sem); return err; @@ -234,6 +235,7 @@ int sys_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -241,15 +243,16 @@ return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error = __put_user(0,name->machine+__OLD_UTS_LEN); up_read(&uts_sem); Index: linux-2.6.14/arch/ppc64/Kconfig =================================================================== --- linux-2.6.14.orig/arch/ppc64/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc64/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -465,6 +465,8 @@ source "arch/ppc64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" config KEYS_COMPAT Index: linux-2.6.14/arch/ppc64/kernel/asm-offsets.c =================================================================== --- linux-2.6.14.orig/arch/ppc64/kernel/asm-offsets.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc64/kernel/asm-offsets.c 2005-10-31 11:05:45.000000000 -0600 @@ -163,6 +163,7 @@ DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(CLONE_KTHREAD, CLONE_KTHREAD); /* About the CPU features table */ DEFINE(CPU_SPEC_ENTRY_SIZE, sizeof(struct cpu_spec)); Index: linux-2.6.14/arch/ppc64/kernel/misc.S =================================================================== --- linux-2.6.14.orig/arch/ppc64/kernel/misc.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc64/kernel/misc.S 2005-10-31 11:05:45.000000000 -0600 @@ -659,7 +659,7 @@ mr r29,r3 mr r30,r4 ori r3,r5,CLONE_VM /* flags */ - oris r3,r3,(CLONE_UNTRACED>>16) + oris r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16 li r4,0 /* new sp (unused) */ li r0,__NR_clone sc @@ -1209,7 +1209,7 @@ .llong .ppc32_fadvise64_64 /* 32bit only fadvise64_64 */ .llong .ppc_rtas /* 255 */ .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong .sys_ni_syscall /* 257 reserved for vserver */ + .llong .sys32_vserver .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ .llong .compat_sys_mbind .llong .compat_sys_get_mempolicy /* 260 */ @@ -1490,7 +1490,7 @@ .llong .sys_ni_syscall /* 32bit only fadvise64_64 */ .llong .ppc_rtas /* 255 */ .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong .sys_ni_syscall /* 257 reserved for vserver */ + .llong .sys_vserver .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ .llong .sys_mbind .llong .sys_get_mempolicy /* 260 */ Index: linux-2.6.14/arch/ppc64/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/ppc64/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc64/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -79,6 +79,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/ppc64/kernel/sys_ppc32.c =================================================================== --- linux-2.6.14.orig/arch/ppc64/kernel/sys_ppc32.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/ppc64/kernel/sys_ppc32.c 2005-10-31 11:05:45.000000000 -0600 @@ -1037,7 +1037,7 @@ int err = 0; down_read(&uts_sem); - if (copy_to_user(name, &system_utsname, sizeof(*name))) + if (copy_to_user(name, vx_new_utsname(), sizeof(*name))) err = -EFAULT; up_read(&uts_sem); if (!err && personality(current->personality) == PER_LINUX32) { @@ -1052,20 +1052,22 @@ asmlinkage int sys32_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error |= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error |= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error |= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error |= __put_user(0,name->machine+__OLD_UTS_LEN); if (personality(current->personality) == PER_LINUX32) { /* change "ppc64" to "ppc" */ Index: linux-2.6.14/arch/s390/Kconfig =================================================================== --- linux-2.6.14.orig/arch/s390/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/s390/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -485,6 +485,8 @@ source "arch/s390/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/s390/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/s390/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/s390/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -180,7 +180,7 @@ regs.orig_gpr2 = -1; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, 0, ®s, 0, NULL, NULL); } Index: linux-2.6.14/arch/s390/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/s390/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/s390/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -738,9 +738,11 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = do_ptrace(child, request, addr, data); - +out_tsk: put_task_struct(child); out: unlock_kernel(); Index: linux-2.6.14/arch/s390/kernel/syscalls.S =================================================================== --- linux-2.6.14.orig/arch/s390/kernel/syscalls.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/s390/kernel/syscalls.S 2005-10-31 11:05:45.000000000 -0600 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) -NI_SYSCALL /* reserved for vserver */ +SYSCALL(sys_vserver,sys_vserver,sys32_vserver) SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) Index: linux-2.6.14/arch/sh/Kconfig =================================================================== --- linux-2.6.14.orig/arch/sh/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sh/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -798,6 +798,8 @@ source "arch/sh/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/sh/kernel/kgdb_stub.c =================================================================== --- linux-2.6.14.orig/arch/sh/kernel/kgdb_stub.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sh/kernel/kgdb_stub.c 2005-10-31 11:05:45.000000000 -0600 @@ -412,7 +412,7 @@ if (pid == PID_MAX) pid = 0; /* First check via PID */ - thread = find_task_by_pid(pid); + thread = find_task_by_real_pid(pid); if (thread) return thread; Index: linux-2.6.14/arch/sh/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/sh/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sh/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -177,7 +177,8 @@ regs.sr = (1 << 30); /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* Index: linux-2.6.14/arch/sh/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/sh/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sh/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -108,6 +108,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/sh64/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/sh64/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sh64/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -643,7 +643,7 @@ static __inline__ _syscall2(int,clone,unsigned long,flags,unsigned long,newsp) static __inline__ _syscall1(int,exit,int,ret) - reply = clone(flags | CLONE_VM, 0); + reply = clone(flags | CLONE_VM | CLONE_KTHREAD, 0); if (!reply) { /* Child */ reply = exit(fn(arg)); Index: linux-2.6.14/arch/sparc/Kconfig =================================================================== --- linux-2.6.14.orig/arch/sparc/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -276,6 +276,8 @@ source "arch/sparc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/sparc/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/sparc/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -707,7 +707,8 @@ /* Notreached by child. */ "1: mov %%o0, %0\n\t" : "=r" (retval) : - "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED), + "i" (__NR_clone), "r" (flags | + CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD), "i" (__NR_exit), "r" (fn), "r" (arg) : "g1", "g2", "g3", "o0", "o1", "memory", "cc"); return retval; Index: linux-2.6.14/arch/sparc/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/sparc/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -322,6 +322,10 @@ pt_error_return(regs, ESRCH); goto out; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { Index: linux-2.6.14/arch/sparc/kernel/sys_sparc.c =================================================================== --- linux-2.6.14.orig/arch/sparc/kernel/sys_sparc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc/kernel/sys_sparc.c 2005-10-31 11:05:45.000000000 -0600 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -470,13 +471,13 @@ down_read(&uts_sem); - nlen = strlen(system_utsname.domainname) + 1; + nlen = strlen(vx_new_uts(domainname)) + 1; if (nlen < len) len = nlen; if (len > __NEW_UTS_LEN) goto done; - if (copy_to_user(name, system_utsname.domainname, len)) + if (copy_to_user(name, vx_new_uts(domainname), len)) goto done; err = 0; done: Index: linux-2.6.14/arch/sparc/kernel/systbls.S =================================================================== --- linux-2.6.14.orig/arch/sparc/kernel/systbls.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc/kernel/systbls.S 2005-10-31 11:05:45.000000000 -0600 @@ -72,7 +72,7 @@ /*250*/ .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl /*255*/ .long sys_nis_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .long sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl Index: linux-2.6.14/arch/sparc64/Kconfig =================================================================== --- linux-2.6.14.orig/arch/sparc64/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc64/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -381,6 +381,8 @@ source "arch/sparc64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/sparc64/kernel/binfmt_aout32.c =================================================================== --- linux-2.6.14.orig/arch/sparc64/kernel/binfmt_aout32.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc64/kernel/binfmt_aout32.c 2005-10-31 11:05:45.000000000 -0600 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include Index: linux-2.6.14/arch/sparc64/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/sparc64/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc64/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -701,7 +701,8 @@ /* Notreached by child. */ "1:" : "=r" (retval) : - "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED), + "i" (__NR_clone), "r" (flags | + CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD), "i" (__NR_exit), "r" (fn), "r" (arg) : "g1", "g2", "g3", "o0", "o1", "memory", "cc"); return retval; Index: linux-2.6.14/arch/sparc64/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/sparc64/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc64/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -233,6 +233,10 @@ pt_error_return(regs, ESRCH); goto out; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { Index: linux-2.6.14/arch/sparc64/kernel/sys_sparc.c =================================================================== --- linux-2.6.14.orig/arch/sparc64/kernel/sys_sparc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc64/kernel/sys_sparc.c 2005-10-31 11:05:45.000000000 -0600 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -476,13 +477,13 @@ down_read(&uts_sem); - nlen = strlen(system_utsname.domainname) + 1; + nlen = strlen(vx_new_uts(domainname)) + 1; if (nlen < len) len = nlen; if (len > __NEW_UTS_LEN) goto done; - if (copy_to_user(name, system_utsname.domainname, len)) + if (copy_to_user(name, vx_new_uts(domainname), len)) goto done; err = 0; done: Index: linux-2.6.14/arch/sparc64/kernel/systbls.S =================================================================== --- linux-2.6.14.orig/arch/sparc64/kernel/systbls.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc64/kernel/systbls.S 2005-10-31 11:05:45.000000000 -0600 @@ -73,7 +73,7 @@ /*250*/ .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl .word sys_ni_syscall, sys32_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys32_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys32_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid /*280*/ .word sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl @@ -139,7 +139,7 @@ /*250*/ .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl .word sys_ni_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .word sys_nis_syscall, sys_add_key, sys_request_key, sys_keyctl Index: linux-2.6.14/arch/sparc64/solaris/fs.c =================================================================== --- linux-2.6.14.orig/arch/sparc64/solaris/fs.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/sparc64/solaris/fs.c 2005-10-31 11:05:45.000000000 -0600 @@ -362,7 +362,7 @@ int j = strlen (p); if (j > 15) j = 15; - if (IS_RDONLY(inode)) i = 1; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1; if (mnt->mnt_flags & MNT_NOSUID) i |= 2; if (!sysv_valid_dev(inode->i_sb->s_dev)) return -EOVERFLOW; @@ -398,7 +398,7 @@ int j = strlen (p); if (j > 15) j = 15; - if (IS_RDONLY(inode)) i = 1; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1; if (mnt->mnt_flags & MNT_NOSUID) i |= 2; if (!sysv_valid_dev(inode->i_sb->s_dev)) return -EOVERFLOW; Index: linux-2.6.14/arch/um/Kconfig =================================================================== --- linux-2.6.14.orig/arch/um/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/um/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -293,6 +293,8 @@ source "fs/Kconfig" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/um/kernel/process_kern.c =================================================================== --- linux-2.6.14.orig/arch/um/kernel/process_kern.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/um/kernel/process_kern.c 2005-10-31 11:05:45.000000000 -0600 @@ -23,6 +23,8 @@ #include "linux/proc_fs.h" #include "linux/ptrace.h" #include "linux/random.h" +#include "linux/vs_cvirt.h" + #include "asm/unistd.h" #include "asm/mman.h" #include "asm/segment.h" @@ -97,7 +99,7 @@ current->thread.request.u.thread.proc = fn; current->thread.request.u.thread.arg = arg; - pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, + pid = do_fork(CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD | flags, 0, ¤t->thread.regs, 0, NULL, NULL); if(pid < 0) panic("do_fork failed in kernel_thread, errno = %d", pid); Index: linux-2.6.14/arch/um/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/um/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/um/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -72,6 +72,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/um/kernel/syscall_kern.c =================================================================== --- linux-2.6.14.orig/arch/um/kernel/syscall_kern.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/um/kernel/syscall_kern.c 2005-10-31 11:05:45.000000000 -0600 @@ -15,6 +15,8 @@ #include "linux/unistd.h" #include "linux/slab.h" #include "linux/utime.h" +#include + #include "asm/mman.h" #include "asm/uaccess.h" #include "kern_util.h" @@ -110,7 +112,7 @@ if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } @@ -118,6 +120,7 @@ long sys_olduname(struct oldold_utsname * name) { long error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -126,19 +129,20 @@ down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname, + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname, __OLD_UTS_LEN); error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename, + error |= __copy_to_user(&name->nodename,ptr->nodename, __OLD_UTS_LEN); error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release, + error |= __copy_to_user(&name->release,ptr->release, __OLD_UTS_LEN); error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version, + error |= __copy_to_user(&name->version,ptr->version, __OLD_UTS_LEN); error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine, + error |= __copy_to_user(&name->machine,ptr->machine, __OLD_UTS_LEN); error |= __put_user(0,name->machine+__OLD_UTS_LEN); Index: linux-2.6.14/arch/v850/Kconfig =================================================================== --- linux-2.6.14.orig/arch/v850/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/v850/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -311,6 +311,8 @@ source "arch/v850/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/v850/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/v850/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/v850/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -77,7 +77,7 @@ /* Clone this thread. Note that we don't pass the clone syscall's second argument -- it's ignored for calls from kernel mode (the child's SP is always set to the top of the kernel stack). */ - arg0 = flags | CLONE_VM; + arg0 = flags | CLONE_VM | CLONE_KTHREAD; syscall = __NR_clone; asm volatile ("trap " SYSCALL_SHORT_TRAP : "=r" (ret), "=r" (syscall) Index: linux-2.6.14/arch/v850/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/v850/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/v850/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -139,6 +139,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; rval = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/x86_64/Kconfig =================================================================== --- linux-2.6.14.orig/arch/x86_64/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/x86_64/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -536,6 +536,8 @@ source "arch/x86_64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" Index: linux-2.6.14/arch/x86_64/ia32/ia32_aout.c =================================================================== --- linux-2.6.14.orig/arch/x86_64/ia32/ia32_aout.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/x86_64/ia32/ia32_aout.c 2005-10-31 11:05:45.000000000 -0600 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include Index: linux-2.6.14/arch/x86_64/ia32/ia32_binfmt.c =================================================================== --- linux-2.6.14.orig/arch/x86_64/ia32/ia32_binfmt.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/x86_64/ia32/ia32_binfmt.c 2005-10-31 11:05:45.000000000 -0600 @@ -337,7 +337,7 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) { - unsigned long stack_base; + unsigned long stack_base, grow; struct vm_area_struct *mpnt; struct mm_struct *mm = current->mm; int i, ret; @@ -374,7 +374,8 @@ kmem_cache_free(vm_area_cachep, mpnt); return ret; } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { Index: linux-2.6.14/arch/x86_64/ia32/ia32entry.S =================================================================== --- linux-2.6.14.orig/arch/x86_64/ia32/ia32entry.S 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/x86_64/ia32/ia32entry.S 2005-10-31 11:05:45.000000000 -0600 @@ -622,7 +622,7 @@ .quad sys_tgkill /* 270 */ .quad compat_sys_utimes .quad sys32_fadvise64_64 - .quad quiet_ni_syscall /* sys_vserver */ + .quad sys32_vserver .quad sys_mbind .quad compat_sys_get_mempolicy /* 275 */ .quad sys_set_mempolicy Index: linux-2.6.14/arch/x86_64/ia32/sys_ia32.c =================================================================== --- linux-2.6.14.orig/arch/x86_64/ia32/sys_ia32.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/x86_64/ia32/sys_ia32.c 2005-10-31 11:05:45.000000000 -0600 @@ -860,6 +860,7 @@ asmlinkage long sys32_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -868,13 +869,14 @@ down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); __put_user(0,name->sysname+__OLD_UTS_LEN); - __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); __put_user(0,name->nodename+__OLD_UTS_LEN); - __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); __put_user(0,name->release+__OLD_UTS_LEN); - __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); __put_user(0,name->version+__OLD_UTS_LEN); { char *arch = "x86_64"; @@ -897,7 +899,7 @@ if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); Index: linux-2.6.14/arch/x86_64/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/x86_64/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/x86_64/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -53,7 +53,8 @@ asmlinkage extern void ret_from_fork(void); -unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; +unsigned long kernel_thread_flags = + CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD; static atomic_t hlt_counter = ATOMIC_INIT(0); Index: linux-2.6.14/arch/x86_64/kernel/ptrace.c =================================================================== --- linux-2.6.14.orig/arch/x86_64/kernel/ptrace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/x86_64/kernel/ptrace.c 2005-10-31 11:05:45.000000000 -0600 @@ -342,6 +342,8 @@ read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ Index: linux-2.6.14/arch/x86_64/kernel/sys_x86_64.c =================================================================== --- linux-2.6.14.orig/arch/x86_64/kernel/sys_x86_64.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/x86_64/kernel/sys_x86_64.c 2005-10-31 11:05:45.000000000 -0600 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -148,7 +149,7 @@ { int err; down_read(&uts_sem); - err = copy_to_user(name, &system_utsname, sizeof (*name)); + err = copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); Index: linux-2.6.14/arch/xtensa/kernel/process.c =================================================================== --- linux-2.6.14.orig/arch/xtensa/kernel/process.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/arch/xtensa/kernel/process.c 2005-10-31 11:05:45.000000000 -0600 @@ -203,7 +203,7 @@ :"=r" (retval) :"i" (__NR_clone), "i" (__NR_exit), "r" (arg), "r" (fn), - "r" (flags | CLONE_VM) + "r" (flags | CLONE_VM | CLONE_KTHREAD) : "a2", "a3", "a4", "a5", "a6" ); return retval; } Index: linux-2.6.14/drivers/acpi/osl.c =================================================================== --- linux-2.6.14.orig/drivers/acpi/osl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/acpi/osl.c 2005-10-31 11:05:45.000000000 -0600 @@ -834,7 +834,7 @@ // TODO: A better timeout algorithm? { int i = 0; - static const int quantum_ms = 1000 / HZ; + static const int quantum_ms = (HZ>1000)?1:(1000/HZ); ret = down_trylock(sem); for (i = timeout; (i > 0 && ret < 0); i -= quantum_ms) { Index: linux-2.6.14/drivers/block/Kconfig =================================================================== --- linux-2.6.14.orig/drivers/block/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/block/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -315,6 +315,12 @@ instead, which can be configured to be on-disk compatible with the cryptoloop device. +config BLK_DEV_VROOT + tristate "Virtual Root device support" + ---help--- + Saying Y here will allow you to use quota/fs ioctls on a shared + partition within a virtual server without compromising security. + config BLK_DEV_NBD tristate "Network block device support" depends on NET Index: linux-2.6.14/drivers/block/Makefile =================================================================== --- linux-2.6.14.orig/drivers/block/Makefile 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/block/Makefile 2005-10-31 11:05:45.000000000 -0600 @@ -44,4 +44,5 @@ obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o +obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o Index: linux-2.6.14/drivers/block/floppy.c =================================================================== --- linux-2.6.14.orig/drivers/block/floppy.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/block/floppy.c 2005-10-31 11:05:45.000000000 -0600 @@ -4086,7 +4086,7 @@ FD_SILENT_DCL_CLEAR; } else { default_drive_params[i].params.select_delay = - 2 * HZ / 100; + SEL_DLY; default_drive_params[i].params.flags &= ~FD_SILENT_DCL_CLEAR; } Index: linux-2.6.14/drivers/block/vroot.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/drivers/block/vroot.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,282 @@ +/* + * linux/drivers/block/vroot.c + * + * written by Herbert Pötzl, 9/11/2002 + * ported to 2.6.10 by Herbert Pötzl, 30/12/2004 + * + * based on the loop.c code by Theodore Ts'o. + * + * Copyright (C) 2002-2005 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +static int max_vroot = 8; + +static struct vroot_device *vroot_dev; +static struct gendisk **disks; + + +static int vroot_set_dev( + struct vroot_device *vr, + struct file *vr_file, + struct block_device *bdev, + unsigned int arg) +{ + struct block_device *real_bdev; + struct file *file; + struct inode *inode; + int error; + + error = -EBUSY; + if (vr->vr_state != Vr_unbound) + goto out; + + error = -EBADF; + file = fget(arg); + if (!file) + goto out; + + error = -EINVAL; + inode = file->f_dentry->d_inode; + + + if (S_ISBLK(inode->i_mode)) { + real_bdev = inode->i_bdev; + vr->vr_device = real_bdev; + __iget(real_bdev->bd_inode); + } else + goto out_fput; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_set_dev: dev=%p[%d,%d]\n", + vr->vr_number, real_bdev, + imajor(real_bdev->bd_inode), iminor(real_bdev->bd_inode)); + + vr->vr_state = Vr_bound; + error = 0; + + out_fput: + fput(file); + out: + return error; +} + +static int vroot_clr_dev( + struct vroot_device *vr, + struct file *vr_file, + struct block_device *bdev) +{ + struct block_device *real_bdev; + + if (vr->vr_state != Vr_bound) + return -ENXIO; + if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */ + return -EBUSY; + + real_bdev = vr->vr_device; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_clr_dev: dev=%p[%d,%d]\n", + vr->vr_number, real_bdev, + imajor(real_bdev->bd_inode), iminor(real_bdev->bd_inode)); + + bdput(real_bdev); + vr->vr_state = Vr_unbound; + vr->vr_device = NULL; + return 0; +} + + +static int vr_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + int err; + + down(&vr->vr_ctl_mutex); + switch (cmd) { + case VROOT_SET_DEV: + err = vroot_set_dev(vr, file, inode->i_bdev, arg); + break; + case VROOT_CLR_DEV: + err = vroot_clr_dev(vr, file, inode->i_bdev); + break; + default: + err = -EINVAL; + break; + } + up(&vr->vr_ctl_mutex); + return err; +} + +static int vr_open(struct inode *inode, struct file *file) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + + down(&vr->vr_ctl_mutex); + vr->vr_refcnt++; + up(&vr->vr_ctl_mutex); + return 0; +} + +static int vr_release(struct inode *inode, struct file *file) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + + down(&vr->vr_ctl_mutex); + --vr->vr_refcnt; + up(&vr->vr_ctl_mutex); + return 0; +} + +static struct block_device_operations vr_fops = { + .owner = THIS_MODULE, + .open = vr_open, + .release = vr_release, + .ioctl = vr_ioctl, +}; + +struct block_device *vroot_get_real_bdev(struct block_device *bdev) +{ + struct inode *inode = bdev->bd_inode; + struct vroot_device *vr; + struct block_device *real_bdev; + int minor = iminor(inode); + + vr = &vroot_dev[minor]; + real_bdev = vr->vr_device; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_get_real_bdev: dev=%p[%p,%d,%d]\n", + vr->vr_number, real_bdev, real_bdev->bd_inode, + imajor(real_bdev->bd_inode), iminor(real_bdev->bd_inode)); + + if (vr->vr_state != Vr_bound) + return ERR_PTR(-ENXIO); + + __iget(real_bdev->bd_inode); + return real_bdev; +} + +/* + * And now the modules code and kernel interface. + */ + +module_param(max_vroot, int, 0); + +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR); + +MODULE_AUTHOR ("Herbert Pötzl"); +MODULE_DESCRIPTION ("Virtual Root Device Mapper"); + + +int __init vroot_init(void) +{ + int i; + + if (max_vroot < 1 || max_vroot > 256) { + max_vroot = MAX_VROOT_DEFAULT; + printk(KERN_WARNING "vroot: invalid max_vroot " + "(must be between 1 and 256), " + "using default (%d)\n", max_vroot); + } + + if (register_blkdev(VROOT_MAJOR, "vroot")) + return -EIO; + + vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL); + if (!vroot_dev) + goto out_mem1; + memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device)); + + disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL); + if (!disks) + goto out_mem2; + + for (i = 0; i < max_vroot; i++) { + disks[i] = alloc_disk(1); + if (!disks[i]) + goto out_mem3; + } + + devfs_mk_dir("vroot"); + + for (i = 0; i < max_vroot; i++) { + struct vroot_device *vr = &vroot_dev[i]; + struct gendisk *disk = disks[i]; + + memset(vr, 0, sizeof(*vr)); + init_MUTEX(&vr->vr_ctl_mutex); + vr->vr_number = i; + disk->major = VROOT_MAJOR; + disk->first_minor = i; + disk->fops = &vr_fops; + sprintf(disk->disk_name, "vroot%d", i); + sprintf(disk->devfs_name, "vroot/%d", i); + disk->private_data = vr; + } + + for (i = 0; i < max_vroot; i++) + add_disk(disks[i]); + printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot); + return 0; + +out_mem3: + while (i--) + put_disk(disks[i]); + kfree(disks); +out_mem2: + kfree(vroot_dev); +out_mem1: + unregister_blkdev(VROOT_MAJOR, "vroot"); + printk(KERN_ERR "vroot: ran out of memory\n"); + return -ENOMEM; +} + +void vroot_exit(void) +{ + int i; + + for (i = 0; i < max_vroot; i++) { + del_gendisk(disks[i]); + put_disk(disks[i]); + } + devfs_remove("vroot"); + if (unregister_blkdev(VROOT_MAJOR, "vroot")) + printk(KERN_WARNING "vroot: cannot unregister blkdev\n"); + + kfree(disks); + kfree(vroot_dev); +} + +module_init(vroot_init); +module_exit(vroot_exit); + +#ifndef MODULE + +static int __init max_vroot_setup(char *str) +{ + max_vroot = simple_strtol(str, NULL, 0); + return 1; +} + +__setup("max_vroot=", max_vroot_setup); + +#endif + Index: linux-2.6.14/drivers/char/cyclades.c =================================================================== --- linux-2.6.14.orig/drivers/char/cyclades.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/cyclades.c 2005-10-31 11:05:45.000000000 -0600 @@ -4054,7 +4054,7 @@ break; #endif /* CONFIG_CYZ_INTR */ case CYSETWAIT: - info->closing_wait = (unsigned short)arg * HZ/100; + info->closing_wait = (unsigned int)arg * HZ/100; ret_val = 0; break; case CYGETWAIT: Index: linux-2.6.14/drivers/char/dtlk.c =================================================================== --- linux-2.6.14.orig/drivers/char/dtlk.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/dtlk.c 2005-10-31 11:05:45.000000000 -0600 @@ -198,7 +198,7 @@ up to 250 usec for the RDY bit to go nonzero. */ for (retries = 0; - retries < loops_per_jiffy / (4000/HZ); + retries < HZ*(loops_per_jiffy >> 3)/500; retries++) if (inb_p(dtlk_port_tts) & TTS_WRITABLE) @@ -445,7 +445,7 @@ LOOK dtlk_write_bytes("\0012I\r", 4); buffer[b++] = 0; - __delay(50 * loops_per_jiffy / (1000/HZ)); + __delay(50 * (loops_per_jiffy >> 3) * HZ / 125); outb_p(0xff, dtlk_port_lpc); buffer[b++] = 0; LOOK Index: linux-2.6.14/drivers/char/isicom.c =================================================================== --- linux-2.6.14.orig/drivers/char/isicom.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/isicom.c 2005-10-31 11:05:45.000000000 -0600 @@ -194,7 +194,7 @@ int close_delay; unsigned short channel; unsigned short status; - unsigned short closing_wait; + unsigned int closing_wait; struct isi_board * card; struct tty_struct * tty; wait_queue_head_t close_wait; Index: linux-2.6.14/drivers/char/moxa.c =================================================================== --- linux-2.6.14.orig/drivers/char/moxa.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/moxa.c 2005-10-31 11:05:45.000000000 -0600 @@ -148,7 +148,7 @@ int type; int port; int close_delay; - unsigned short closing_wait; + unsigned int closing_wait; int count; int blocked_open; long event; /* long req'd for set_bit --RR */ Index: linux-2.6.14/drivers/char/mxser.c =================================================================== --- linux-2.6.14.orig/drivers/char/mxser.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/mxser.c 2005-10-31 11:05:45.000000000 -0600 @@ -315,7 +315,7 @@ int custom_divisor; int x_char; /* xon/xoff character */ int close_delay; - unsigned short closing_wait; + unsigned int closing_wait; int IER; /* Interrupt Enable Register */ int MCR; /* Modem control register */ unsigned long event; Index: linux-2.6.14/drivers/char/pcmcia/synclink_cs.c =================================================================== --- linux-2.6.14.orig/drivers/char/pcmcia/synclink_cs.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/pcmcia/synclink_cs.c 2005-10-31 11:05:45.000000000 -0600 @@ -145,8 +145,8 @@ int flags; int count; /* count of opens */ int line; - unsigned short close_delay; - unsigned short closing_wait; /* time to wait before closing */ + unsigned int close_delay; + unsigned int closing_wait; /* time to wait before closing */ struct mgsl_icount icount; Index: linux-2.6.14/drivers/char/random.c =================================================================== --- linux-2.6.14.orig/drivers/char/random.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/random.c 2005-10-31 11:05:45.000000000 -0600 @@ -1174,7 +1174,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table fake_table; + ctl_table fake_table = {0}; unsigned char buf[64], tmp_uuid[16], *uuid; uuid = table->data; Index: linux-2.6.14/drivers/char/riscom8.h =================================================================== --- linux-2.6.14.orig/drivers/char/riscom8.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/riscom8.h 2005-10-31 11:05:45.000000000 -0600 @@ -85,7 +85,7 @@ struct work_struct tqueue_hangup; short wakeup_chars; short break_length; - unsigned short closing_wait; + unsigned int closing_wait; unsigned char mark_mask; unsigned char IER; unsigned char MSVR; Index: linux-2.6.14/drivers/char/specialix_io8.h =================================================================== --- linux-2.6.14.orig/drivers/char/specialix_io8.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/specialix_io8.h 2005-10-31 11:05:45.000000000 -0600 @@ -126,7 +126,7 @@ struct work_struct tqueue_hangup; short wakeup_chars; short break_length; - unsigned short closing_wait; + unsigned int closing_wait; unsigned char mark_mask; unsigned char IER; unsigned char MSVR; Index: linux-2.6.14/drivers/char/synclink.c =================================================================== --- linux-2.6.14.orig/drivers/char/synclink.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/synclink.c 2005-10-31 11:05:45.000000000 -0600 @@ -189,8 +189,8 @@ int count; /* count of opens */ int line; int hw_version; - unsigned short close_delay; - unsigned short closing_wait; /* time to wait before closing */ + unsigned int close_delay; + unsigned int closing_wait; /* time to wait before closing */ struct mgsl_icount icount; Index: linux-2.6.14/drivers/char/synclinkmp.c =================================================================== --- linux-2.6.14.orig/drivers/char/synclinkmp.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/synclinkmp.c 2005-10-31 11:05:45.000000000 -0600 @@ -156,8 +156,8 @@ int flags; int count; /* count of opens */ int line; - unsigned short close_delay; - unsigned short closing_wait; /* time to wait before closing */ + unsigned int close_delay; + unsigned int closing_wait; /* time to wait before closing */ struct mgsl_icount icount; Index: linux-2.6.14/drivers/char/sysrq.c =================================================================== --- linux-2.6.14.orig/drivers/char/sysrq.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/sysrq.c 2005-10-31 11:05:45.000000000 -0600 @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -271,6 +272,21 @@ .enable_mask = SYSRQ_ENABLE_RTNICE, }; + +#ifdef CONFIG_VSERVER_DEBUG +static void sysrq_handle_vxinfo(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + dump_vx_info_inactive((key == 'x')?0:1); +} +static struct sysrq_key_op sysrq_showvxinfo_op = { + .handler = sysrq_handle_vxinfo, + .help_msg = "conteXt", + .action_msg = "Show Context Info", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; +#endif + /* Key Operations table and lock */ static DEFINE_SPINLOCK(sysrq_key_table_lock); #define SYSRQ_KEY_TABLE_LENGTH 36 @@ -323,7 +339,11 @@ /* u */ &sysrq_mountro_op, /* v */ NULL, /* May be assigned at init time by SMP VOYAGER */ /* w */ NULL, +#ifdef CONFIG_VSERVER_DEBUG +/* x */ &sysrq_showvxinfo_op, +#else /* x */ NULL, +#endif /* y */ NULL, /* z */ NULL }; @@ -335,6 +355,8 @@ retval = key - '0'; } else if ((key >= 'a') && (key <= 'z')) { retval = key + 10 - 'a'; + } else if ((key >= 'A') && (key <= 'Z')) { + retval = key + 10 - 'A'; } else { retval = -1; } Index: linux-2.6.14/drivers/char/tty_io.c =================================================================== --- linux-2.6.14.orig/drivers/char/tty_io.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/char/tty_io.c 2005-10-31 11:05:45.000000000 -0600 @@ -103,6 +103,7 @@ #include #include #include +#include #include @@ -2155,13 +2156,16 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { + pid_t pgrp; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; - return put_user(real_tty->pgrp, p); + + pgrp = vx_map_pid(real_tty->pgrp); + return put_user(pgrp, p); } static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) @@ -2179,6 +2183,8 @@ return -ENOTTY; if (get_user(pgrp, p)) return -EFAULT; + + pgrp = vx_rmap_pid(pgrp); if (pgrp < 0) return -EINVAL; if (session_of_pgrp(pgrp) != current->signal->session) Index: linux-2.6.14/drivers/mtd/devices/blkmtd.c =================================================================== --- linux-2.6.14.orig/drivers/mtd/devices/blkmtd.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/mtd/devices/blkmtd.c 2005-10-31 11:05:45.000000000 -0600 @@ -617,7 +617,7 @@ } -extern dev_t __init name_to_dev_t(const char *line); +extern dev_t __init name_to_dev_t(char *line); static struct blkmtd_dev *add_device(char *devname, int readonly, int erase_size) { Index: linux-2.6.14/drivers/net/wan/z85230.h =================================================================== --- linux-2.6.14.orig/drivers/net/wan/z85230.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/net/wan/z85230.h 2005-10-31 11:05:45.000000000 -0600 @@ -348,7 +348,7 @@ int xmit_fifo_size; /* Transmit FIFO info */ int close_delay; /* Do we wait for drain on close ? */ - unsigned short closing_wait; + unsigned int closing_wait; /* We need to know the current clock divisor * to read the bps rate the chip has currently Index: linux-2.6.14/drivers/sbus/char/aurora.h =================================================================== --- linux-2.6.14.orig/drivers/sbus/char/aurora.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/sbus/char/aurora.h 2005-10-31 11:05:45.000000000 -0600 @@ -258,7 +258,7 @@ struct tq_struct tqueue_hangup; short wakeup_chars; short break_length; - unsigned short closing_wait; + unsigned int closing_wait; unsigned char mark_mask; unsigned char SRER; unsigned char MSVR; Index: linux-2.6.14/drivers/serial/68328serial.h =================================================================== --- linux-2.6.14.orig/drivers/serial/68328serial.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/serial/68328serial.h 2005-10-31 11:05:45.000000000 -0600 @@ -22,11 +22,11 @@ int xmit_fifo_size; int custom_divisor; int baud_base; - unsigned short close_delay; + unsigned int close_delay; char reserved_char[2]; int hub6; /* FIXME: We don't have AT&T Hub6 boards! */ - unsigned short closing_wait; /* time to wait before closing */ - unsigned short closing_wait2; /* no longer used... */ + unsigned int closing_wait; /* time to wait before closing */ + unsigned int closing_wait2; /* no longer used... */ int reserved[4]; }; @@ -148,8 +148,8 @@ int custom_divisor; int x_char; /* xon/xoff character */ int close_delay; - unsigned short closing_wait; - unsigned short closing_wait2; + unsigned int closing_wait; + unsigned int closing_wait2; unsigned long event; unsigned long last_active; int line; Index: linux-2.6.14/drivers/serial/68360serial.c =================================================================== --- linux-2.6.14.orig/drivers/serial/68360serial.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/serial/68360serial.c 2005-10-31 11:05:45.000000000 -0600 @@ -159,8 +159,8 @@ int count; u8 *iomem_base; u16 iomem_reg_shift; - unsigned short close_delay; - unsigned short closing_wait; /* time to wait before closing */ + unsigned int close_delay; + unsigned int closing_wait; /* time to wait before closing */ struct async_icount_24 icount; int io_type; struct async_struct *info; @@ -244,8 +244,8 @@ int line; int x_char; /* xon/xoff character */ int close_delay; - unsigned short closing_wait; - unsigned short closing_wait2; + unsigned int closing_wait; + unsigned int closing_wait2; unsigned long event; unsigned long last_active; int blocked_open; /* # of blocked opens */ Index: linux-2.6.14/drivers/serial/crisv10.h =================================================================== --- linux-2.6.14.orig/drivers/serial/crisv10.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/serial/crisv10.h 2005-10-31 11:05:45.000000000 -0600 @@ -78,8 +78,8 @@ int ignore_status_mask; int x_char; /* xon/xoff character */ int close_delay; - unsigned short closing_wait; - unsigned short closing_wait2; + unsigned int closing_wait; + unsigned int closing_wait2; unsigned long event; unsigned long last_active; int line; Index: linux-2.6.14/drivers/serial/mcfserial.h =================================================================== --- linux-2.6.14.orig/drivers/serial/mcfserial.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/serial/mcfserial.h 2005-10-31 11:05:45.000000000 -0600 @@ -52,8 +52,8 @@ int x_char; /* xon/xoff character */ int baud_base; int close_delay; - unsigned short closing_wait; - unsigned short closing_wait2; + unsigned int closing_wait; + unsigned int closing_wait2; unsigned long event; int line; int count; /* # of fd on device */ Index: linux-2.6.14/drivers/tc/zs.h =================================================================== --- linux-2.6.14.orig/drivers/tc/zs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/drivers/tc/zs.h 2005-10-31 11:05:45.000000000 -0600 @@ -24,11 +24,11 @@ int xmit_fifo_size; int custom_divisor; int baud_base; - unsigned short close_delay; + unsigned int close_delay; char reserved_char[2]; int hub6; - unsigned short closing_wait; /* time to wait before closing */ - unsigned short closing_wait2; /* no longer used... */ + unsigned int closing_wait; /* time to wait before closing */ + unsigned int closing_wait2; /* no longer used... */ int reserved[4]; }; @@ -128,8 +128,8 @@ int custom_divisor; int x_char; /* XON/XOFF character. */ int close_delay; - unsigned short closing_wait; - unsigned short closing_wait2; + unsigned int closing_wait; + unsigned int closing_wait2; unsigned long event; unsigned long last_active; int line; Index: linux-2.6.14/fs/attr.c =================================================================== --- linux-2.6.14.orig/fs/attr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/attr.c 2005-10-31 11:05:45.000000000 -0600 @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include /* Taken over from the old code... */ @@ -56,6 +59,28 @@ if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) goto error; } + + /* Check for evil vserver activity */ + if (vx_check(0, VX_ADMIN)) + goto fine; + + if (IS_BARRIER(inode)) { + vxwprintk(1, "xid=%d messing with the barrier.", + vx_current_xid()); + goto error; + } + switch (inode->i_sb->s_magic) { + case PROC_SUPER_MAGIC: + vxwprintk(1, "xid=%d messing with the procfs.", + vx_current_xid()); + goto error; + case DEVPTS_SUPER_MAGIC: + if (vx_check(inode->i_xid, VX_IDENT)) + goto fine; + vxwprintk(1, "xid=%d messing with the devpts.", + vx_current_xid()); + goto error; + } fine: retval = 0; error: @@ -87,6 +112,8 @@ inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((ia_valid & ATTR_XID) && IS_TAGXID(inode)) + inode->i_xid = attr->ia_xid; if (ia_valid & ATTR_ATIME) inode->i_atime = timespec_trunc(attr->ia_atime, inode->i_sb->s_time_gran); @@ -164,7 +191,8 @@ error = security_inode_setattr(dentry, attr); if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_XID && attr->ia_xid != inode->i_xid)) error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; if (!error) error = inode_setattr(inode, attr); Index: linux-2.6.14/fs/binfmt_aout.c =================================================================== --- linux-2.6.14.orig/fs/binfmt_aout.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/binfmt_aout.c 2005-10-31 11:05:45.000000000 -0600 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include Index: linux-2.6.14/fs/binfmt_elf.c =================================================================== --- linux-2.6.14.orig/fs/binfmt_elf.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/binfmt_elf.c 2005-10-31 11:05:45.000000000 -0600 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include Index: linux-2.6.14/fs/binfmt_flat.c =================================================================== --- linux-2.6.14.orig/fs/binfmt_flat.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/binfmt_flat.c 2005-10-31 11:05:45.000000000 -0600 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include Index: linux-2.6.14/fs/binfmt_som.c =================================================================== --- linux-2.6.14.orig/fs/binfmt_som.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/binfmt_som.c 2005-10-31 11:05:45.000000000 -0600 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include Index: linux-2.6.14/fs/buffer.c =================================================================== --- linux-2.6.14.orig/fs/buffer.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/buffer.c 2005-10-31 11:05:45.000000000 -0600 @@ -173,7 +173,7 @@ int fsync_super(struct super_block *sb) { sync_inodes_sb(sb, 0); - DQUOT_SYNC(sb); + DQUOT_SYNC(sb->s_dqh); lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); @@ -222,7 +222,7 @@ smp_wmb(); sync_inodes_sb(sb, 0); - DQUOT_SYNC(sb); + DQUOT_SYNC(sb->s_dqh); lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) Index: linux-2.6.14/fs/devpts/inode.c =================================================================== --- linux-2.6.14.orig/fs/devpts/inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/devpts/inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -19,7 +19,17 @@ #include #include -#define DEVPTS_SUPER_MAGIC 0x1cd1 +static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + int ret = -EACCES; + + if (vx_check(inode->i_xid, VX_IDENT)) + ret = generic_permission(inode, mask, NULL); + return ret; +} +static struct inode_operations devpts_file_inode_operations = { + .permission = devpts_permission, +}; static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -69,6 +79,24 @@ return 0; } +static int devpts_filter(struct dentry *de) +{ + return vx_check(de->d_inode->i_xid, VX_IDENT); +} + +static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + return dcache_readdir_filter(filp, dirent, filldir, devpts_filter); +} + +static struct file_operations devpts_dir_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .readdir = devpts_readdir, +}; + static struct super_operations devpts_sops = { .statfs = simple_statfs, .remount_fs = devpts_remount, @@ -95,8 +123,9 @@ inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_fop = &devpts_dir_operations; inode->i_nlink = 2; + inode->i_xid = vx_current_xid(); devpts_root = s->s_root = d_alloc_root(inode); if (s->s_root) @@ -155,6 +184,7 @@ inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|config.mode, device); + inode->i_xid = vx_current_xid(); inode->u.generic_ip = tty; dentry = get_node(number); Index: linux-2.6.14/fs/dquot.c =================================================================== --- linux-2.6.14.orig/fs/dquot.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/dquot.c 2005-10-31 11:05:45.000000000 -0600 @@ -73,10 +73,12 @@ #include #include #include +#include #include #include #include #include +#include #include @@ -183,7 +185,7 @@ /* * Dquot List Management: * The quota code uses three lists for dquot management: the inuse_list, - * free_dquots, and dquot_hash[] array. A single dquot structure may be + * free_dquots, and hash->dqh_hash[] array. A single dquot structure may be * on all three lists, depending on its current state. * * All dquots are placed to the end of inuse_list when first created, and this @@ -196,12 +198,13 @@ * dquot is invalidated it's completely released from memory. * * Dquots with a specific identity (device, type and id) are placed on - * one of the dquot_hash[] hash chains. The provides an efficient search + * one of the hash->dqh_hash[] hash chains. The provides an efficient search * mechanism to locate a specific dquot. */ static LIST_HEAD(inuse_list); static LIST_HEAD(free_dquots); + static unsigned int dq_hash_bits, dq_hash_mask; static struct hlist_head *dquot_hash; @@ -210,36 +213,44 @@ static void dqput(struct dquot *dquot); static inline unsigned int -hashfn(const struct super_block *sb, unsigned int id, int type) +hashfn(struct dqhash *hash, unsigned int id, int type) { unsigned long tmp; - tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); + tmp = (((unsigned long)hash >> L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask; } /* * Following list functions expect dq_list_lock to be held */ -static inline void insert_dquot_hash(struct dquot *dquot) +static inline void insert_dquot_hash(struct dqhash *hash, struct dquot *dquot) { - struct hlist_head *head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type); + struct hlist_head *head = dquot_hash + + hashfn(hash, dquot->dq_id, dquot->dq_type); + /* struct hlist_head *head = hash->dqh_hash + + hashfn(dquot->dq_dqh, dquot->dq_id, dquot->dq_type); */ hlist_add_head(&dquot->dq_hash, head); + dquot->dq_dqh = dqhget(hash); } static inline void remove_dquot_hash(struct dquot *dquot) { hlist_del_init(&dquot->dq_hash); + dqhput(dquot->dq_dqh); + dquot->dq_dqh = NULL; } -static inline struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, unsigned int id, int type) +static inline struct dquot *find_dquot(struct dqhash *hash, + unsigned int hashent, unsigned int id, int type) { struct hlist_node *node; struct dquot *dquot; - hlist_for_each (node, dquot_hash+hashent) { + /* hlist_for_each (node, hash->dqh_hash + hashent) { */ + hlist_for_each (node, dquot_hash + hashent) { dquot = hlist_entry(node, struct dquot, dq_hash); - if (dquot->dq_sb == sb && dquot->dq_id == id && dquot->dq_type == type) + if (dquot->dq_dqh == hash && dquot->dq_id == id && dquot->dq_type == type) return dquot; } return NODQUOT; @@ -283,13 +294,13 @@ up(&dquot->dq_lock); } -#define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot)) +#define mark_dquot_dirty(dquot) ((dquot)->dq_dqh->dqh_qop->mark_dirty(dquot)) int dquot_mark_dquot_dirty(struct dquot *dquot) { spin_lock(&dq_list_lock); if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) - list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> + list_add(&dquot->dq_dirty, &dqh_dqopt(dquot->dq_dqh)-> info[dquot->dq_type].dqi_dirty_list); spin_unlock(&dq_list_lock); return 0; @@ -304,9 +315,9 @@ return 1; } -void mark_info_dirty(struct super_block *sb, int type) +void mark_info_dirty(struct dqhash *hash, int type) { - set_bit(DQF_INFO_DIRTY_B, &sb_dqopt(sb)->info[type].dqi_flags); + set_bit(DQF_INFO_DIRTY_B, &dqh_dqopt(hash)->info[type].dqi_flags); } EXPORT_SYMBOL(mark_info_dirty); @@ -317,7 +328,7 @@ int dquot_acquire(struct dquot *dquot) { int ret = 0, ret2 = 0; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh); down(&dquot->dq_lock); down(&dqopt->dqio_sem); @@ -331,7 +342,7 @@ ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); /* Write the info if needed */ if (info_dirty(&dqopt->info[dquot->dq_type])) - ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type); + ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type); if (ret < 0) goto out_iolock; if (ret2 < 0) { @@ -352,7 +363,7 @@ int dquot_commit(struct dquot *dquot) { int ret = 0, ret2 = 0; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh); down(&dqopt->dqio_sem); spin_lock(&dq_list_lock); @@ -366,7 +377,7 @@ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); if (info_dirty(&dqopt->info[dquot->dq_type])) - ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type); + ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type); if (ret >= 0) ret = ret2; } @@ -381,7 +392,7 @@ int dquot_release(struct dquot *dquot) { int ret = 0, ret2 = 0; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh); down(&dquot->dq_lock); /* Check whether we are not racing with some other dqget() */ @@ -392,7 +403,7 @@ ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot); /* Write the info */ if (info_dirty(&dqopt->info[dquot->dq_type])) - ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type); + ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type); if (ret >= 0) ret = ret2; } @@ -407,13 +418,13 @@ * quota is disabled and pointers from inodes removed so there cannot be new * quota users. Also because we hold dqonoff_sem there can be no quota users * for this sb+type at all. */ -static void invalidate_dquots(struct super_block *sb, int type) +static void invalidate_dquots(struct dqhash *hash, int type) { struct dquot *dquot, *tmp; spin_lock(&dq_list_lock); list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { - if (dquot->dq_sb != sb) + if (dquot->dq_dqh != hash) continue; if (dquot->dq_type != type) continue; @@ -430,18 +441,94 @@ spin_unlock(&dq_list_lock); } -int vfs_quota_sync(struct super_block *sb, int type) + +/* Dquota Hash Management Functions */ + +static LIST_HEAD(dqhash_list); + +struct dqhash *new_dqhash(struct super_block *sb, unsigned int id) +{ + struct dqhash *hash; + int err; + + err = -ENOMEM; + hash = kmalloc(sizeof(struct dqhash), GFP_USER); + if (!hash) + goto out; + + memset(hash, 0, sizeof(struct dqhash)); + hash->dqh_id = id; + atomic_set(&hash->dqh_count, 1); + + INIT_LIST_HEAD(&hash->dqh_list); + + sema_init(&hash->dqh_dqopt.dqio_sem, 1); + sema_init(&hash->dqh_dqopt.dqonoff_sem, 1); + init_rwsem(&hash->dqh_dqopt.dqptr_sem); + hash->dqh_qop = sb->s_qop; + hash->dqh_qcop = sb->s_qcop; + hash->dqh_sb = sb; + + lock_kernel(); + list_add(&hash->dqh_list, &dqhash_list); + unlock_kernel(); + vxdprintk(VXD_CBIT(misc, 0), + "new_dqhash: %p [#0x%08x]", hash, hash->dqh_id); + return hash; + + // kfree(hash); +out: + return ERR_PTR(err); +} + +void destroy_dqhash(struct dqhash *hash) +{ + int cnt; + + vxdprintk(VXD_CBIT(misc, 0), + "destroy_dqhash: %p [#0x%08x] c=%d", + hash, hash->dqh_id, atomic_read(&hash->dqh_count)); + lock_kernel(); + list_del_init(&hash->dqh_list); + unlock_kernel(); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) /* should not be required anymore! */ + invalidate_dquots(hash, cnt); + kfree(hash); +} + + +struct dqhash *find_dqhash(unsigned int id) +{ + struct list_head *head; + struct dqhash *hash; + + lock_kernel(); + list_for_each(head, &dqhash_list) { + hash = list_entry(head, struct dqhash, dqh_list); + if (hash->dqh_id == id) + goto dqh_found; + } + unlock_kernel(); + return NULL; + +dqh_found: + unlock_kernel(); + return dqhget(hash); +} + + +int vfs_quota_sync(struct dqhash *hash, int type) { struct list_head *dirty; struct dquot *dquot; - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); int cnt; down(&dqopt->dqonoff_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; - if (!sb_has_quota_enabled(sb, cnt)) + if (!dqh_has_quota_enabled(hash, cnt)) continue; spin_lock(&dq_list_lock); dirty = &dqopt->info[cnt].dqi_dirty_list; @@ -458,7 +545,7 @@ atomic_inc(&dquot->dq_count); dqstats.lookups++; spin_unlock(&dq_list_lock); - sb->dq_op->write_dquot(dquot); + hash->dqh_qop->write_dquot(dquot); dqput(dquot); spin_lock(&dq_list_lock); } @@ -466,9 +553,10 @@ } for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) + if ((cnt == type || type == -1) + && dqh_has_quota_enabled(hash, cnt) && info_dirty(&dqopt->info[cnt])) - sb->dq_op->write_info(sb, cnt); + hash->dqh_qop->write_info(hash, cnt); spin_lock(&dq_list_lock); dqstats.syncs++; spin_unlock(&dq_list_lock); @@ -523,7 +611,7 @@ if (!atomic_read(&dquot->dq_count)) { printk("VFS: dqput: trying to free free dquot\n"); printk("VFS: device %s, dquot of %s %d\n", - dquot->dq_sb->s_id, + dquot->dq_dqh->dqh_sb->s_id, quotatypes[dquot->dq_type], dquot->dq_id); BUG(); @@ -545,14 +633,14 @@ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ - dquot->dq_sb->dq_op->write_dquot(dquot); + dquot->dq_dqh->dqh_qop->write_dquot(dquot); goto we_slept; } /* Clear flag in case dquot was inactive (something bad happened) */ clear_dquot_dirty(dquot); if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); - dquot->dq_sb->dq_op->release_dquot(dquot); + dquot->dq_dqh->dqh_qop->release_dquot(dquot); goto we_slept; } atomic_dec(&dquot->dq_count); @@ -565,7 +653,7 @@ spin_unlock(&dq_list_lock); } -static struct dquot *get_empty_dquot(struct super_block *sb, int type) +static struct dquot *get_empty_dquot(int type) { struct dquot *dquot; @@ -579,7 +667,7 @@ INIT_LIST_HEAD(&dquot->dq_inuse); INIT_HLIST_NODE(&dquot->dq_hash); INIT_LIST_HEAD(&dquot->dq_dirty); - dquot->dq_sb = sb; + dquot->dq_dqh = NULL; dquot->dq_type = type; atomic_set(&dquot->dq_count, 1); @@ -590,19 +678,19 @@ * Get reference to dquot * MUST be called with either dqptr_sem or dqonoff_sem held */ -static struct dquot *dqget(struct super_block *sb, unsigned int id, int type) +static struct dquot *dqget(struct dqhash *hash, unsigned int id, int type) { - unsigned int hashent = hashfn(sb, id, type); + unsigned int hashent = hashfn(hash, id, type); struct dquot *dquot, *empty = NODQUOT; - if (!sb_has_quota_enabled(sb, type)) + if (!dqh_has_quota_enabled(hash, type)) return NODQUOT; we_slept: spin_lock(&dq_list_lock); - if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) { + if ((dquot = find_dquot(hash, hashent, id, type)) == NODQUOT) { if (empty == NODQUOT) { spin_unlock(&dq_list_lock); - if ((empty = get_empty_dquot(sb, type)) == NODQUOT) + if ((empty = get_empty_dquot(type)) == NODQUOT) schedule(); /* Try to wait for a moment... */ goto we_slept; } @@ -611,7 +699,7 @@ /* all dquots go on the inuse_list */ put_inuse(dquot); /* hash it first so it can be found */ - insert_dquot_hash(dquot); + insert_dquot_hash(hash, dquot); dqstats.lookups++; spin_unlock(&dq_list_lock); } else { @@ -628,12 +716,13 @@ * finished or it will be canceled due to dq_count > 1 test */ wait_on_dquot(dquot); /* Read the dquot and instantiate it (everything done only if needed) */ - if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && sb->dq_op->acquire_dquot(dquot) < 0) { + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && + hash->dqh_qop->acquire_dquot(dquot) < 0) { dqput(dquot); return NODQUOT; } #ifdef __DQUOT_PARANOIA - if (!dquot->dq_sb) /* Has somebody invalidated entry under us? */ + if (!dquot->dq_dqh) /* Has somebody invalidated entry under us? */ BUG(); #endif @@ -655,9 +744,10 @@ } /* This routine is guarded by dqonoff_sem semaphore */ -static void add_dquot_ref(struct super_block *sb, int type) +static void add_dquot_ref(struct dqhash *hash, int type) { struct list_head *p; + struct super_block *sb = hash->dqh_sb; restart: file_list_lock(); @@ -667,7 +757,7 @@ if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) { struct dentry *dentry = dget(filp->f_dentry); file_list_unlock(); - sb->dq_op->initialize(inode, type); + hash->dqh_qop->initialize(inode, type); dput(dentry); /* As we may have blocked we had better restart... */ goto restart; @@ -726,16 +816,16 @@ } /* Gather all references from inodes and drop them */ -static void drop_dquot_ref(struct super_block *sb, int type) +static void drop_dquot_ref(struct dqhash *hash, int type) { LIST_HEAD(tofree_head); /* We need to be guarded against prune_icache to reach all the * inodes - otherwise some can be on the local list of prune_icache */ down(&iprune_sem); - down_write(&sb_dqopt(sb)->dqptr_sem); - remove_dquot_ref(sb, type, &tofree_head); - up_write(&sb_dqopt(sb)->dqptr_sem); + down_write(&dqh_dqopt(hash)->dqptr_sem); + remove_dquot_ref(hash, type, &tofree_head); + up_write(&dqh_dqopt(hash)->dqptr_sem); up(&iprune_sem); put_dquot_list(&tofree_head); } @@ -807,7 +897,7 @@ if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags))) return; - tty_write_message(current->signal->tty, dquot->dq_sb->s_id); + tty_write_message(current->signal->tty, dquot->dq_dqh->dqh_sb->s_id); if (warntype == ISOFTWARN || warntype == BSOFTWARN) tty_write_message(current->signal->tty, ": warning, "); else @@ -847,7 +937,7 @@ static inline char ignore_hardlimit(struct dquot *dquot) { - struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; + struct mem_dqinfo *info = &dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type]; return capable(CAP_SYS_RESOURCE) && (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & V1_DQF_RSQUASH)); @@ -879,7 +969,7 @@ (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_isoftlimit && dquot->dq_dqb.dqb_itime == 0) { *warntype = ISOFTWARN; - dquot->dq_dqb.dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; + dquot->dq_dqb.dqb_itime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_igrace; } return QUOTA_OK; @@ -914,7 +1004,7 @@ dquot->dq_dqb.dqb_btime == 0) { if (!prealloc) { *warntype = BSOFTWARN; - dquot->dq_dqb.dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; + dquot->dq_dqb.dqb_btime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_bgrace; } else /* @@ -940,7 +1030,7 @@ * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) return 0; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); /* Having dqptr_sem we know NOQUOTA flags can't be altered... */ if (IS_NOQUOTA(inode)) goto out_err; @@ -956,11 +1046,11 @@ id = inode->i_gid; break; } - inode->i_dquot[cnt] = dqget(inode->i_sb, id, cnt); + inode->i_dquot[cnt] = dqget(inode->i_dqh, id, cnt); } } out_err: - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return ret; } @@ -972,14 +1062,14 @@ { int cnt; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] != NODQUOT) { dqput(inode->i_dquot[cnt]); inode->i_dquot[cnt] = NODQUOT; } } - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return 0; } @@ -1010,9 +1100,9 @@ for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = NOWARN; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); if (IS_NOQUOTA(inode)) { /* Now we can do reliable test... */ - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); goto out_add; } spin_lock(&dq_data_lock); @@ -1037,7 +1127,7 @@ if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); flush_warnings(inode->i_dquot, warntype); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return ret; } @@ -1055,9 +1145,9 @@ return QUOTA_OK; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = NOWARN; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } spin_lock(&dq_data_lock); @@ -1082,7 +1172,7 @@ if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); flush_warnings((struct dquot **)inode->i_dquot, warntype); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return ret; } @@ -1100,10 +1190,10 @@ inode_sub_bytes(inode, number); return QUOTA_OK; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); goto out_sub; } spin_lock(&dq_data_lock); @@ -1118,7 +1208,7 @@ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } @@ -1133,10 +1223,10 @@ * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) return QUOTA_OK; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } spin_lock(&dq_data_lock); @@ -1150,7 +1240,7 @@ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } @@ -1165,6 +1255,7 @@ qsize_t space; struct dquot *transfer_from[MAXQUOTAS]; struct dquot *transfer_to[MAXQUOTAS]; + struct dqhash *dqh = inode->i_sb->s_dqh; int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; char warntype[MAXQUOTAS]; @@ -1178,10 +1269,10 @@ transfer_to[cnt] = transfer_from[cnt] = NODQUOT; warntype[cnt] = NOWARN; } - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } /* First build the transfer_to list - here we can block on @@ -1192,12 +1283,12 @@ case USRQUOTA: if (!chuid) continue; - transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_uid, cnt); + transfer_to[cnt] = dqget(dqh, iattr->ia_uid, cnt); break; case GRPQUOTA: if (!chgid) continue; - transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_gid, cnt); + transfer_to[cnt] = dqget(dqh, iattr->ia_gid, cnt); break; } } @@ -1252,20 +1343,20 @@ if (ret == NO_QUOTA && transfer_to[cnt] != NODQUOT) dqput(transfer_to[cnt]); } - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return ret; } /* * Write info of quota file to disk */ -int dquot_commit_info(struct super_block *sb, int type) +int dquot_commit_info(struct dqhash *hash, int type) { int ret; - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); down(&dqopt->dqio_sem); - ret = dqopt->ops[type]->write_file_info(sb, type); + ret = dqopt->ops[type]->write_file_info(hash, type); up(&dqopt->dqio_sem); return ret; } @@ -1315,10 +1406,10 @@ /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ -int vfs_quota_off(struct super_block *sb, int type) +int vfs_quota_off(struct dqhash *hash, int type) { int cnt; - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); struct inode *toputinode[MAXQUOTAS]; struct vfsmount *toputmnt[MAXQUOTAS]; @@ -1329,21 +1420,21 @@ toputmnt[cnt] = NULL; if (type != -1 && cnt != type) continue; - if (!sb_has_quota_enabled(sb, cnt)) + if (!dqh_has_quota_enabled(hash, cnt)) continue; reset_enable_flags(dqopt, cnt); /* Note: these are blocking operations */ - drop_dquot_ref(sb, cnt); - invalidate_dquots(sb, cnt); + drop_dquot_ref(hash, cnt); + invalidate_dquots(hash, cnt); /* * Now all dquots should be invalidated, all writes done so we should be only * users of the info. No locks needed. */ if (info_dirty(&dqopt->info[cnt])) - sb->dq_op->write_info(sb, cnt); + hash->dqh_qop->write_info(hash, cnt); if (dqopt->ops[cnt]->free_file_info) - dqopt->ops[cnt]->free_file_info(sb, cnt); + dqopt->ops[cnt]->free_file_info(hash, cnt); put_quota_format(dqopt->info[cnt].dqi_format); toputinode[cnt] = dqopt->files[cnt]; @@ -1361,9 +1452,9 @@ * The reference to vfsmnt we are still holding protects us from * umount (we don't have it only when quotas are turned on/off for * journal replay but in that case we are guarded by the fs anyway). */ - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + if (hash->dqh_sb->s_op->sync_fs) + hash->dqh_sb->s_op->sync_fs(hash->dqh_sb, 1); + sync_blockdev(hash->dqh_sb->s_bdev); /* Now the quota files are just ordinary files and we can set the * inode flags back. Moreover we discard the pagecache so that * userspace sees the writes we did bypassing the pagecache. We @@ -1374,7 +1465,7 @@ down(&dqopt->dqonoff_sem); /* If quota was reenabled in the meantime, we have * nothing to do */ - if (!sb_has_quota_enabled(sb, cnt)) { + if (!dqh_has_quota_enabled(hash, cnt)) { down(&toputinode[cnt]->i_sem); toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA); @@ -1389,8 +1480,8 @@ if (toputmnt[cnt]) mntput(toputmnt[cnt]); } - if (sb->s_bdev) - invalidate_bdev(sb->s_bdev, 0); + if (hash->dqh_sb->s_bdev) + invalidate_bdev(hash->dqh_sb->s_bdev, 0); return 0; } @@ -1403,7 +1494,8 @@ { struct quota_format_type *fmt = find_quota_format(format_id); struct super_block *sb = inode->i_sb; - struct quota_info *dqopt = sb_dqopt(sb); + struct dqhash *hash = inode->i_dqh; + struct quota_info *dqopt = dqh_dqopt(hash); int error; int oldflags = -1; @@ -1429,7 +1521,7 @@ invalidate_bdev(sb->s_bdev, 0); down(&inode->i_sem); down(&dqopt->dqonoff_sem); - if (sb_has_quota_enabled(sb, type)) { + if (dqh_has_quota_enabled(hash, type)) { error = -EBUSY; goto out_lock; } @@ -1440,21 +1532,21 @@ oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA); inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; up_write(&dqopt->dqptr_sem); - sb->dq_op->drop(inode); + hash->dqh_qop->drop(inode); error = -EIO; dqopt->files[type] = igrab(inode); if (!dqopt->files[type]) goto out_lock; error = -EINVAL; - if (!fmt->qf_ops->check_quota_file(sb, type)) + if (!fmt->qf_ops->check_quota_file(hash, type)) goto out_file_init; dqopt->ops[type] = fmt->qf_ops; dqopt->info[type].dqi_format = fmt; INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); down(&dqopt->dqio_sem); - if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) { + if ((error = dqopt->ops[type]->read_file_info(hash, type)) < 0) { up(&dqopt->dqio_sem); goto out_file_init; } @@ -1462,7 +1554,7 @@ up(&inode->i_sem); set_enable_flags(dqopt, type); - add_dquot_ref(sb, type); + add_dquot_ref(hash, type); up(&dqopt->dqonoff_sem); return 0; @@ -1488,7 +1580,7 @@ } /* Actual function called from quotactl() */ -int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) +int vfs_quota_on(struct dqhash *hash, int type, int format_id, char *path) { struct nameidata nd; int error; @@ -1500,12 +1592,12 @@ if (error) goto out_path; /* Quota file not on the same filesystem? */ - if (nd.mnt->mnt_sb != sb) + if (nd.mnt->mnt_sb != hash->dqh_sb) error = -EXDEV; else { error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id); if (!error) - sb_dqopt(sb)->mnt[type] = mntget(nd.mnt); + dqh_dqopt(hash)->mnt[type] = mntget(nd.mnt); } out_path: path_release(&nd); @@ -1516,13 +1608,13 @@ * This function is used when filesystem needs to initialize quotas * during mount time. */ -int vfs_quota_on_mount(struct super_block *sb, char *qf_name, +int vfs_quota_on_mount(struct dqhash *hash, char *qf_name, int format_id, int type) { struct dentry *dentry; int error; - dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name)); + dentry = lookup_one_len(qf_name, hash->dqh_sb->s_root, strlen(qf_name)); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -1552,18 +1644,18 @@ spin_unlock(&dq_data_lock); } -int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) +int vfs_get_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di) { struct dquot *dquot; - down(&sb_dqopt(sb)->dqonoff_sem); - if (!(dquot = dqget(sb, id, type))) { - up(&sb_dqopt(sb)->dqonoff_sem); + down(&dqh_dqopt(hash)->dqonoff_sem); + if (!(dquot = dqget(hash, id, type))) { + up(&dqh_dqopt(hash)->dqonoff_sem); return -ESRCH; } do_get_dqblk(dquot, di); dqput(dquot); - up(&sb_dqopt(sb)->dqonoff_sem); + up(&dqh_dqopt(hash)->dqonoff_sem); return 0; } @@ -1603,7 +1695,7 @@ clear_bit(DQ_BLKS_B, &dquot->dq_flags); } else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */ - dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; + dm->dqb_btime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_bgrace; } if (check_ilim) { if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) { @@ -1611,7 +1703,7 @@ clear_bit(DQ_INODES_B, &dquot->dq_flags); } else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */ - dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; + dm->dqb_itime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_igrace; } if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) clear_bit(DQ_FAKE_B, &dquot->dq_flags); @@ -1621,53 +1713,53 @@ mark_dquot_dirty(dquot); } -int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) +int vfs_set_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di) { struct dquot *dquot; - down(&sb_dqopt(sb)->dqonoff_sem); - if (!(dquot = dqget(sb, id, type))) { - up(&sb_dqopt(sb)->dqonoff_sem); + down(&dqh_dqopt(hash)->dqonoff_sem); + if (!(dquot = dqget(hash, id, type))) { + up(&dqh_dqopt(hash)->dqonoff_sem); return -ESRCH; } do_set_dqblk(dquot, di); dqput(dquot); - up(&sb_dqopt(sb)->dqonoff_sem); + up(&dqh_dqopt(hash)->dqonoff_sem); return 0; } /* Generic routine for getting common part of quota file information */ -int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) +int vfs_get_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii) { struct mem_dqinfo *mi; - down(&sb_dqopt(sb)->dqonoff_sem); - if (!sb_has_quota_enabled(sb, type)) { - up(&sb_dqopt(sb)->dqonoff_sem); + down(&dqh_dqopt(hash)->dqonoff_sem); + if (!dqh_has_quota_enabled(hash, type)) { + up(&dqh_dqopt(hash)->dqonoff_sem); return -ESRCH; } - mi = sb_dqopt(sb)->info + type; + mi = dqh_dqopt(hash)->info + type; spin_lock(&dq_data_lock); ii->dqi_bgrace = mi->dqi_bgrace; ii->dqi_igrace = mi->dqi_igrace; ii->dqi_flags = mi->dqi_flags & DQF_MASK; ii->dqi_valid = IIF_ALL; spin_unlock(&dq_data_lock); - up(&sb_dqopt(sb)->dqonoff_sem); + up(&dqh_dqopt(hash)->dqonoff_sem); return 0; } /* Generic routine for setting common part of quota file information */ -int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) +int vfs_set_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii) { struct mem_dqinfo *mi; - down(&sb_dqopt(sb)->dqonoff_sem); - if (!sb_has_quota_enabled(sb, type)) { - up(&sb_dqopt(sb)->dqonoff_sem); + down(&dqh_dqopt(hash)->dqonoff_sem); + if (!dqh_has_quota_enabled(hash, type)) { + up(&dqh_dqopt(hash)->dqonoff_sem); return -ESRCH; } - mi = sb_dqopt(sb)->info + type; + mi = dqh_dqopt(hash)->info + type; spin_lock(&dq_data_lock); if (ii->dqi_valid & IIF_BGRACE) mi->dqi_bgrace = ii->dqi_bgrace; @@ -1676,10 +1768,10 @@ if (ii->dqi_valid & IIF_FLAGS) mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK); spin_unlock(&dq_data_lock); - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); /* Force write to disk */ - sb->dq_op->write_info(sb, type); - up(&sb_dqopt(sb)->dqonoff_sem); + hash->dqh_qop->write_info(hash, type); + up(&dqh_dqopt(hash)->dqonoff_sem); return 0; } Index: linux-2.6.14/fs/exec.c =================================================================== --- linux-2.6.14.orig/fs/exec.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/exec.c 2005-10-31 11:05:45.000000000 -0600 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -449,7 +450,8 @@ kmem_cache_free(vm_area_cachep, mpnt); return ret; } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { Index: linux-2.6.14/fs/ext2/balloc.c =================================================================== --- linux-2.6.14.orig/fs/ext2/balloc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext2/balloc.c 2005-10-31 11:05:45.000000000 -0600 @@ -16,6 +16,7 @@ #include #include #include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -108,6 +109,8 @@ free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(es->s_r_blocks_count); + DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks); + if (free_blocks < count) count = free_blocks; @@ -258,6 +261,7 @@ } error_return: brelse(bitmap_bh); + DLIMIT_FREE_BLOCK(inode, freed); release_blocks(sb, freed); DQUOT_FREE_BLOCK(inode, freed); } @@ -361,6 +365,10 @@ *err = -ENOSPC; goto out_dquot; } + if (DLIMIT_ALLOC_BLOCK(inode, es_alloc)) { + *err = -ENOSPC; + goto out_dlimit; + } ext2_debug ("goal=%lu.\n", goal); @@ -508,6 +516,8 @@ *err = 0; out_release: group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + DLIMIT_FREE_BLOCK(inode, es_alloc); +out_dlimit: release_blocks(sb, es_alloc); out_dquot: DQUOT_FREE_BLOCK(inode, dq_alloc); Index: linux-2.6.14/fs/ext2/file.c =================================================================== --- linux-2.6.14.orig/fs/ext2/file.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext2/file.c 2005-10-31 11:05:45.000000000 -0600 @@ -53,6 +53,7 @@ .readv = generic_file_readv, .writev = generic_file_writev, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, }; #ifdef CONFIG_EXT2_FS_XIP Index: linux-2.6.14/fs/ext2/ialloc.c =================================================================== --- linux-2.6.14.orig/fs/ext2/ialloc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext2/ialloc.c 2005-10-31 11:05:45.000000000 -0600 @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -126,6 +128,7 @@ ext2_xattr_delete_inode(inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); } es = EXT2_SB(sb)->s_es; @@ -465,6 +468,11 @@ if (!inode) return ERR_PTR(-ENOMEM); + inode->i_xid = vx_current_fsxid(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto fail_dlim; + } ei = EXT2_I(inode); sbi = EXT2_SB(sb); es = sbi->s_es; @@ -579,7 +587,8 @@ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; + ei->i_flags = EXT2_I(dir)->i_flags & + ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); /* dirsync is only applied to directories */ @@ -605,7 +614,7 @@ insert_inode_hash(inode); if (DQUOT_ALLOC_INODE(inode)) { - err = -ENOSPC; + err = -EDQUOT; goto fail_drop; } @@ -626,6 +635,7 @@ DQUOT_FREE_INODE(inode); fail_drop: + DLIMIT_FREE_INODE(inode); DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; @@ -633,6 +643,8 @@ return ERR_PTR(err); fail: + DLIMIT_FREE_INODE(inode); +fail_dlim: make_bad_inode(inode); iput(inode); return ERR_PTR(err); Index: linux-2.6.14/fs/ext2/inode.c =================================================================== --- linux-2.6.14.orig/fs/ext2/inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext2/inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -31,6 +31,7 @@ #include #include #include +#include #include "ext2.h" #include "acl.h" #include "xip.h" @@ -66,6 +67,8 @@ ext2_discard_prealloc(inode); } +static void ext2_truncate_nocheck (struct inode * inode); + /* * Called at the last iput() if i_nlink is zero. */ @@ -81,7 +84,7 @@ inode->i_size = 0; if (inode->i_blocks) - ext2_truncate (inode); + ext2_truncate_nocheck(inode); ext2_free_inode (inode); return; @@ -902,7 +905,7 @@ ext2_free_data(inode, p, q); } -void ext2_truncate (struct inode * inode) +static void ext2_truncate_nocheck(struct inode * inode) { __le32 *i_data = EXT2_I(inode)->i_data; int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); @@ -919,8 +922,6 @@ return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; ext2_discard_prealloc(inode); @@ -1044,17 +1045,28 @@ return ERR_PTR(-EIO); } +void ext2_truncate (struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + ext2_truncate_nocheck(inode); +} + void ext2_set_inode_flags(struct inode *inode) { unsigned int flags = EXT2_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_IUNLINK|S_BARRIER|S_NOATIME|S_DIRSYNC); if (flags & EXT2_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT2_APPEND_FL) inode->i_flags |= S_APPEND; if (flags & EXT2_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & EXT2_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT2_BARRIER_FL) + inode->i_flags |= S_BARRIER; if (flags & EXT2_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT2_DIRSYNC_FL) @@ -1067,6 +1079,8 @@ ino_t ino = inode->i_ino; struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + uid_t uid; + gid_t gid; int n; #ifdef CONFIG_EXT2_FS_POSIX_ACL @@ -1077,12 +1091,17 @@ goto bad_inode; inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if (!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -1180,8 +1199,8 @@ struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - uid_t uid = inode->i_uid; - gid_t gid = inode->i_gid; + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); int n; @@ -1216,6 +1235,9 @@ raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_INOXID_INTERN + raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(inode->i_size); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -1293,6 +1315,26 @@ return sync_inode(inode, &wbc); } +static void ext2_setattr_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + + oldflags = EXT2_I(inode)->i_flags; + newflags = oldflags & + ~(EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL | EXT2_BARRIER_FL); + if (IS_IMMUTABLE(inode)) + newflags |= EXT2_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= EXT2_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= EXT2_BARRIER_FL; + + if (oldflags ^ newflags) { + EXT2_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + } +} + int ext2_setattr(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; @@ -1302,11 +1344,15 @@ if (error) return error; if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) || + (iattr->ia_valid & ATTR_XID && iattr->ia_xid != inode->i_xid)) { error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0; if (error) return error; } + + if (iattr->ia_valid & ATTR_ATTR_FLAG) + ext2_setattr_flags(inode); error = inode_setattr(inode, iattr); if (!error && (iattr->ia_valid & ATTR_MODE)) error = ext2_acl_chmod(inode); Index: linux-2.6.14/fs/ext2/ioctl.c =================================================================== --- linux-2.6.14.orig/fs/ext2/ioctl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext2/ioctl.c 2005-10-31 11:05:45.000000000 -0600 @@ -29,7 +29,8 @@ case EXT2_IOC_SETFLAGS: { unsigned int oldflags; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -49,7 +50,9 @@ * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { + if ((oldflags & EXT2_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT2_APPEND_FL | + EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } @@ -68,7 +71,8 @@ case EXT2_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(inode->i_generation, (int __user *) arg)) return -EFAULT; Index: linux-2.6.14/fs/ext2/namei.c =================================================================== --- linux-2.6.14.orig/fs/ext2/namei.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext2/namei.c 2005-10-31 11:05:45.000000000 -0600 @@ -31,6 +31,7 @@ */ #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -82,6 +83,7 @@ inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); + vx_propagate_xid(nd, inode); } if (inode) return d_splice_alias(inode, dentry); Index: linux-2.6.14/fs/ext2/super.c =================================================================== --- linux-2.6.14.orig/fs/ext2/super.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext2/super.c 2005-10-31 11:05:45.000000000 -0600 @@ -225,8 +225,8 @@ } #ifdef CONFIG_QUOTA -static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); -static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static ssize_t ext2_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off); +static ssize_t ext2_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off); #endif static struct super_operations ext2_sops = { @@ -284,7 +284,7 @@ Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota + Opt_usrquota, Opt_grpquota, Opt_tagxid }; static match_table_t tokens = { @@ -317,6 +317,7 @@ {Opt_ignore, "noquota"}, {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, + {Opt_tagxid, "tagxid"}, {Opt_err, NULL} }; @@ -376,6 +377,11 @@ case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + set_opt (sbi->s_mount_opt, TAGXID); + break; +#endif case Opt_check: #ifdef CONFIG_EXT2_CHECK set_opt (sbi->s_mount_opt, CHECK); @@ -690,6 +696,8 @@ if (!parse_options ((char *) data, sbi)) goto failed_mount; + if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGXID) + sb->s_flags |= MS_TAGXID; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -999,6 +1007,13 @@ goto restore_opts; } + if ((sbi->s_mount_opt & EXT2_MOUNT_TAGXID) && + !(sb->s_flags & MS_TAGXID)) { + printk("EXT2-fs: %s: tagxid not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -1110,10 +1125,11 @@ * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files) * we don't have to be afraid of races */ -static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, +static ssize_t ext2_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); @@ -1154,10 +1170,11 @@ } /* Write to quotafile */ -static ssize_t ext2_quota_write(struct super_block *sb, int type, +static ssize_t ext2_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); Index: linux-2.6.14/fs/ext2/xattr.c =================================================================== --- linux-2.6.14.orig/fs/ext2/xattr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext2/xattr.c 2005-10-31 11:05:45.000000000 -0600 @@ -60,6 +60,7 @@ #include #include #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -649,8 +650,12 @@ the inode. */ ea_bdebug(new_bh, "reusing block"); + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto cleanup; error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) { + DLIMIT_FREE_BLOCK(inode, 1); unlock_buffer(new_bh); goto cleanup; } @@ -744,6 +749,7 @@ le32_to_cpu(HDR(old_bh)->h_refcount) - 1); if (ce) mb_cache_entry_release(ce); + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); mark_buffer_dirty(old_bh); ea_bdebug(old_bh, "refcount now=%d", @@ -804,6 +810,7 @@ mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); } ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); Index: linux-2.6.14/fs/ext3/balloc.c =================================================================== --- linux-2.6.14.orig/fs/ext3/balloc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext3/balloc.c 2005-10-31 11:05:45.000000000 -0600 @@ -19,6 +19,7 @@ #include #include #include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -503,8 +504,10 @@ return; } ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); - if (dquot_freed_blocks) + if (dquot_freed_blocks) { + DLIMIT_FREE_BLOCK(inode, dquot_freed_blocks); DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + } return; } @@ -1115,18 +1118,32 @@ return ret; } -static int ext3_has_free_blocks(struct ext3_sb_info *sbi) +static int ext3_has_free_blocks(struct super_block *sb) { - int free_blocks, root_blocks; + struct ext3_sb_info *sbi = EXT3_SB(sb); + int free_blocks, root_blocks, cond; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): free=%u, root=%u", + sb, free_blocks, root_blocks); + + DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks); + + cond = (free_blocks < root_blocks + 1 && + !capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))); + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d", + sb, free_blocks, root_blocks, + !capable(CAP_SYS_RESOURCE)?'1':'0', + sbi->s_resuid, current->fsuid, cond?0:1); + + return (cond ? 0 : 1); } /* @@ -1137,7 +1154,7 @@ */ int ext3_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3) + if (!ext3_has_free_blocks(sb) || (*retries)++ > 3) return 0; jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); @@ -1192,6 +1209,8 @@ *errp = -EDQUOT; return 0; } + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto out_dlimit; sbi = EXT3_SB(sb); es = EXT3_SB(sb)->s_es; @@ -1208,7 +1227,7 @@ if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) my_rsv = &block_i->rsv_window_node; - if (!ext3_has_free_blocks(sbi)) { + if (!ext3_has_free_blocks(sb)) { *errp = -ENOSPC; goto out; } @@ -1392,6 +1411,9 @@ io_error: *errp = -EIO; out: + if (!performed_allocation) + DLIMIT_FREE_BLOCK(inode, 1); +out_dlimit: if (fatal) { *errp = fatal; ext3_std_error(sb, fatal); Index: linux-2.6.14/fs/ext3/file.c =================================================================== --- linux-2.6.14.orig/fs/ext3/file.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext3/file.c 2005-10-31 11:05:45.000000000 -0600 @@ -119,6 +119,7 @@ .release = ext3_release_file, .fsync = ext3_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, }; struct inode_operations ext3_file_inode_operations = { Index: linux-2.6.14/fs/ext3/ialloc.c =================================================================== --- linux-2.6.14.orig/fs/ext3/ialloc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext3/ialloc.c 2005-10-31 11:05:45.000000000 -0600 @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include @@ -127,6 +129,7 @@ ext3_xattr_delete_inode(handle, inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); is_directory = S_ISDIR(inode->i_mode); @@ -443,6 +446,12 @@ inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); + + inode->i_xid = vx_current_fsxid(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto out_dlimit; + } ei = EXT3_I(inode); sbi = EXT3_SB(sb); @@ -565,7 +574,8 @@ ei->i_dir_start_lookup = 0; ei->i_disksize = 0; - ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; + ei->i_flags = EXT3_I(dir)->i_flags & + ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); /* dirsync only applies to directories */ @@ -620,6 +630,8 @@ fail: ext3_std_error(sb, err); out: + DLIMIT_FREE_INODE(inode); +out_dlimit: iput(inode); ret = ERR_PTR(err); really_out: @@ -630,7 +642,12 @@ DQUOT_FREE_INODE(inode); fail_drop: + /* + * serge : not sure whether DQUOT_DROP belongs here or above... ext3 + * had it here + */ DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); Index: linux-2.6.14/fs/ext3/inode.c =================================================================== --- linux-2.6.14.orig/fs/ext3/inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext3/inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -36,6 +36,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -180,6 +181,8 @@ return ext3_journal_restart(handle, blocks_for_truncate(inode)); } +static void ext3_truncate_nocheck (struct inode *inode); + /* * Called at the last iput() if i_nlink is zero. */ @@ -205,7 +208,7 @@ handle->h_sync = 1; inode->i_size = 0; if (inode->i_blocks) - ext3_truncate(inode); + ext3_truncate_nocheck(inode); /* * Kill off the orphan record which ext3_truncate created. * AKPM: I think this can be inside the above `if'. @@ -2067,7 +2070,7 @@ * ext3_truncate() run will find them and release them. */ -void ext3_truncate(struct inode * inode) +void ext3_truncate_nocheck(struct inode * inode) { handle_t *handle; struct ext3_inode_info *ei = EXT3_I(inode); @@ -2088,8 +2091,6 @@ return; if (ext3_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; /* * We have to lock the EOF page here, because lock_page() nests @@ -2408,17 +2409,28 @@ !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR)); } +void ext3_truncate(struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + ext3_truncate_nocheck(inode); +} + void ext3_set_inode_flags(struct inode *inode) { unsigned int flags = EXT3_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_IUNLINK|S_BARRIER|S_NOATIME|S_DIRSYNC); if (flags & EXT3_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT3_APPEND_FL) inode->i_flags |= S_APPEND; if (flags & EXT3_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & EXT3_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT3_BARRIER_FL) + inode->i_flags |= S_BARRIER; if (flags & EXT3_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT3_DIRSYNC_FL) @@ -2432,6 +2444,8 @@ struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh; int block; + uid_t uid; + gid_t gid; #ifdef CONFIG_EXT3_FS_POSIX_ACL ei->i_acl = EXT3_ACL_NOT_CACHED; @@ -2444,12 +2458,17 @@ bh = iloc.bh; raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if(!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -2576,6 +2595,8 @@ struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); int err = 0, rc, block; /* For fields not not tracking in the in-memory inode, @@ -2585,29 +2606,32 @@ raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ if(!ei->i_dtime) { raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); + cpu_to_le16(high_16_bits(uid)); raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); + cpu_to_le16(high_16_bits(gid)); } else { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); + cpu_to_le16(fs_high2lowuid(uid)); raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); + cpu_to_le16(fs_high2lowgid(gid)); raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_INOXID_INTERN + raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -2732,6 +2756,44 @@ return ext3_force_commit(inode->i_sb); } +static int ext3_setattr_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + int err = 0; + + oldflags = EXT3_I(inode)->i_flags; + newflags = oldflags & + ~(EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL | EXT3_BARRIER_FL); + if (IS_IMMUTABLE(inode)) + newflags |= EXT3_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= EXT3_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= EXT3_BARRIER_FL; + + if (oldflags ^ newflags) { + handle_t *handle; + struct ext3_iloc iloc; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (IS_SYNC(inode)) + handle->h_sync = 1; + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + goto flags_err; + + EXT3_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); +flags_err: + ext3_journal_stop(handle); + } + return err; +} + /* * ext3_setattr() * @@ -2760,7 +2822,8 @@ return error; if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_XID && attr->ia_xid != inode->i_xid)) { handle_t *handle; /* (user+group)*(old+new) structure, inode write (sb, @@ -2782,6 +2845,8 @@ inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_XID) && IS_TAGXID(inode)) + inode->i_xid = attr->ia_xid; error = ext3_mark_inode_dirty(handle, inode); ext3_journal_stop(handle); } @@ -2804,6 +2869,12 @@ ext3_journal_stop(handle); } + if (ia_valid & ATTR_ATTR_FLAG) { + rc = ext3_setattr_flags(inode); + if (!error) + error = rc; + } + rc = inode_setattr(inode, attr); /* If inode_setattr's call to ext3_truncate failed to get a Index: linux-2.6.14/fs/ext3/ioctl.c =================================================================== --- linux-2.6.14.orig/fs/ext3/ioctl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext3/ioctl.c 2005-10-31 11:05:45.000000000 -0600 @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -35,7 +36,8 @@ unsigned int oldflags; unsigned int jflag; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -58,7 +60,9 @@ * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { + if ((oldflags & EXT3_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT3_APPEND_FL | + EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } @@ -111,7 +115,8 @@ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(generation, (int __user *) arg)) return -EFAULT; @@ -165,7 +170,8 @@ if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -200,7 +206,8 @@ if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(n_blocks_count, (__u32 __user *)arg)) @@ -221,7 +228,8 @@ if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, @@ -236,6 +244,38 @@ return err; } +#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE) + case EXT3_IOC_SETXID: { + handle_t *handle; + struct ext3_iloc iloc; + int xid; + int err; + + /* fixme: if stealth, return -ENOTTY */ + if (!capable(CAP_CONTEXT)) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + if (!(inode->i_sb->s_flags & MS_TAGXID)) + return -ENOSYS; + if (get_user(xid, (int *) arg)) + return -EFAULT; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; + + inode->i_xid = (xid & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + ext3_journal_stop(handle); + return err; + } +#endif default: return -ENOTTY; Index: linux-2.6.14/fs/ext3/namei.c =================================================================== --- linux-2.6.14.orig/fs/ext3/namei.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext3/namei.c 2005-10-31 11:05:45.000000000 -0600 @@ -36,6 +36,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -1002,6 +1003,7 @@ if (!inode) return ERR_PTR(-EACCES); + vx_propagate_xid(nd, inode); } if (inode) return d_splice_alias(inode, dentry); Index: linux-2.6.14/fs/ext3/super.c =================================================================== --- linux-2.6.14.orig/fs/ext3/super.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext3/super.c 2005-10-31 11:05:45.000000000 -0600 @@ -553,12 +553,12 @@ static int ext3_acquire_dquot(struct dquot *dquot); static int ext3_release_dquot(struct dquot *dquot); static int ext3_mark_dquot_dirty(struct dquot *dquot); -static int ext3_write_info(struct super_block *sb, int type); -static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path); -static int ext3_quota_on_mount(struct super_block *sb, int type); -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, +static int ext3_write_info(struct dqhash *hash, int type); +static int ext3_quota_on(struct dqhash *hash, int type, int format_id, char *path); +static int ext3_quota_on_mount(struct dqhash *hash, int type); +static ssize_t ext3_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off); -static ssize_t ext3_quota_write(struct super_block *sb, int type, +static ssize_t ext3_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off); static struct dquot_operations ext3_quota_operations = { @@ -625,7 +625,7 @@ Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota + Opt_grpquota, Opt_tagxid }; static match_table_t tokens = { @@ -674,6 +674,7 @@ {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, + {Opt_tagxid, "tagxid"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -765,6 +766,11 @@ case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + set_opt (sbi->s_mount_opt, TAGXID); + break; +#endif case Opt_check: #ifdef CONFIG_EXT3_CHECK set_opt (sbi->s_mount_opt, CHECK); @@ -881,7 +887,7 @@ case Opt_grpjquota: qtype = GRPQUOTA; set_qf_name: - if (sb_any_quota_enabled(sb)) { + if (dqh_any_quota_enabled(sb->s_dqh)) { printk(KERN_ERR "EXT3-fs: Cannot change journalled " "quota options when quota turned on.\n"); @@ -919,7 +925,7 @@ case Opt_offgrpjquota: qtype = GRPQUOTA; clear_qf_name: - if (sb_any_quota_enabled(sb)) { + if (dqh_any_quota_enabled(sb->s_dqh)) { printk(KERN_ERR "EXT3-fs: Cannot change " "journalled quota options when " "quota turned on.\n"); @@ -947,7 +953,7 @@ set_opt(sbi->s_mount_opt, GRPQUOTA); break; case Opt_noquota: - if (sb_any_quota_enabled(sb)) { + if (dqh_any_quota_enabled(sb->s_dqh)) { printk(KERN_ERR "EXT3-fs: Cannot change quota " "options when quota turned on.\n"); return 0; @@ -1225,7 +1231,7 @@ /* Turn on quotas so that they are updated correctly */ for (i = 0; i < MAXQUOTAS; i++) { if (EXT3_SB(sb)->s_qf_names[i]) { - int ret = ext3_quota_on_mount(sb, i); + int ret = ext3_quota_on_mount(sb->s_dqh, i); if (ret < 0) printk(KERN_ERR "EXT3-fs: Cannot turn on journalled " @@ -1275,8 +1281,8 @@ #ifdef CONFIG_QUOTA /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { - if (sb_dqopt(sb)->files[i]) - vfs_quota_off(sb, i); + if (dqh_dqopt(sb->s_dqh)->files[i]) + vfs_quota_off(sb->s_dqh, i); } #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -1421,6 +1427,9 @@ if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0)) goto failed_mount; + if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGXID) + sb->s_flags |= MS_TAGXID; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -1612,8 +1621,10 @@ sb->s_export_op = &ext3_export_ops; sb->s_xattr = ext3_xattr_handlers; #ifdef CONFIG_QUOTA - sb->s_qcop = &ext3_qctl_operations; - sb->dq_op = &ext3_quota_operations; +// sb->dq_op = &ext3_quota_operations; + sb->s_dqh->dqh_qop = &ext3_quota_operations; +// sb->s_qcop = &ext3_qctl_operations; + sb->s_dqh->dqh_qcop = &ext3_qctl_operations; #endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ @@ -2211,6 +2222,12 @@ if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) ext3_abort(sb, __FUNCTION__, "Abort forced by user"); + if ((sbi->s_mount_opt & EXT3_MOUNT_TAGXID) && + !(sb->s_flags & MS_TAGXID)) { + printk("EXT3-fs: %s: tagxid not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -2364,7 +2381,7 @@ static inline struct inode *dquot_to_inode(struct dquot *dquot) { - return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + return dqh_dqopt(dquot->dq_dqh)->files[dquot->dq_type]; } static int ext3_dquot_initialize(struct inode *inode, int type) @@ -2407,7 +2424,7 @@ inode = dquot_to_inode(dquot); handle = ext3_journal_start(inode, - EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); @@ -2423,7 +2440,7 @@ handle_t *handle; handle = ext3_journal_start(dquot_to_inode(dquot), - EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + EXT3_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); @@ -2439,7 +2456,7 @@ handle_t *handle; handle = ext3_journal_start(dquot_to_inode(dquot), - EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + EXT3_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_release(dquot); @@ -2452,8 +2469,8 @@ static int ext3_mark_dquot_dirty(struct dquot *dquot) { /* Are we journalling quotas? */ - if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { + if (EXT3_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[USRQUOTA] || + EXT3_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); return ext3_write_dquot(dquot); } else { @@ -2461,8 +2478,9 @@ } } -static int ext3_write_info(struct super_block *sb, int type) +static int ext3_write_info(struct dqhash *hash, int type) { + struct super_block *sb = hash->dqh_sb; int ret, err; handle_t *handle; @@ -2470,7 +2488,7 @@ handle = ext3_journal_start(sb->s_root->d_inode, 2); if (IS_ERR(handle)) return PTR_ERR(handle); - ret = dquot_commit_info(sb, type); + ret = dquot_commit_info(hash, type); err = ext3_journal_stop(handle); if (!ret) ret = err; @@ -2481,18 +2499,20 @@ * Turn on quotas during mount time - we need to find * the quota file and such... */ -static int ext3_quota_on_mount(struct super_block *sb, int type) +static int ext3_quota_on_mount(struct dqhash *hash, int type) { - return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], - EXT3_SB(sb)->s_jquota_fmt, type); + return vfs_quota_on_mount(hash, + EXT3_SB(hash->dqh_sb)->s_qf_names[type], + EXT3_SB(hash->dqh_sb)->s_jquota_fmt, type); } /* * Standard function to be called on quota_on */ -static int ext3_quota_on(struct super_block *sb, int type, int format_id, +static int ext3_quota_on(struct dqhash *hash, int type, int format_id, char *path) { + struct super_block *sb = hash->dqh_sb; int err; struct nameidata nd; @@ -2501,7 +2521,7 @@ /* Not journalling quota? */ if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] && !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) - return vfs_quota_on(sb, type, format_id, path); + return vfs_quota_on(hash, type, format_id, path); err = path_lookup(path, LOOKUP_FOLLOW, &nd); if (err) return err; @@ -2516,17 +2536,18 @@ "EXT3-fs: Quota file not on filesystem root. " "Journalled quota will not work.\n"); path_release(&nd); - return vfs_quota_on(sb, type, format_id, path); + return vfs_quota_on(hash, type, format_id, path); } /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files) * we don't have to be afraid of races */ -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, +static ssize_t ext3_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); @@ -2561,10 +2582,11 @@ /* Write to quotafile (we know the transaction is already started and has * enough credits) */ -static ssize_t ext3_quota_write(struct super_block *sb, int type, +static ssize_t ext3_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); Index: linux-2.6.14/fs/ext3/xattr.c =================================================================== --- linux-2.6.14.orig/fs/ext3/xattr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ext3/xattr.c 2005-10-31 11:05:45.000000000 -0600 @@ -58,6 +58,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -495,6 +496,7 @@ ext3_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); unlock_buffer(bh); ea_bdebug(bh, "refcount now=%d; releasing", @@ -763,11 +765,14 @@ if (new_bh == bs->bh) ea_bdebug(new_bh, "keeping"); else { + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto cleanup; /* The old block is released after updating the inode. */ error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) - goto cleanup; + goto cleanup_dlimit; error = ext3_journal_get_write_access(handle, new_bh); if (error) @@ -843,6 +848,8 @@ cleanup_dquot: DQUOT_FREE_BLOCK(inode, 1); +cleanup_dlimit: + DLIMIT_FREE_BLOCK(inode, 1); goto cleanup; bad_block: Index: linux-2.6.14/fs/fcntl.c =================================================================== --- linux-2.6.14.orig/fs/fcntl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/fcntl.c 2005-10-31 11:05:45.000000000 -0600 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -84,6 +85,8 @@ error = -EMFILE; if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; + if (!vx_files_avail(1)) + goto out; error = expand_files(files, newfd); if (error < 0) @@ -125,6 +128,7 @@ FD_SET(fd, fdt->open_fds); FD_CLR(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); + vx_openfd_inc(fd); fd_install(fd, file); } else { spin_unlock(&files->file_lock); @@ -177,6 +181,9 @@ if (tofree) filp_close(tofree, files); + else + vx_openfd_inc(newfd); /* fd was unused */ + err = newfd; out: return err; @@ -477,7 +484,7 @@ read_lock(&tasklist_lock); if (pid > 0) { - p = find_task_by_pid(pid); + p = find_task_by_real_pid(pid); if (p) { send_sigio_to_task(p, fown, fd, band); } @@ -512,7 +519,7 @@ read_lock(&tasklist_lock); if (pid > 0) { - p = find_task_by_pid(pid); + p = find_task_by_real_pid(pid); if (p) { send_sigurg_to_task(p, fown); } Index: linux-2.6.14/fs/file_table.c =================================================================== --- linux-2.6.14.orig/fs/file_table.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/file_table.c 2005-10-31 11:05:45.000000000 -0600 @@ -18,6 +18,8 @@ #include #include #include +#include +#include /* sysctl tunables... */ struct files_stat_struct files_stat = { @@ -96,6 +98,8 @@ rwlock_init(&f->f_owner.lock); /* f->f_version: 0 */ INIT_LIST_HEAD(&f->f_list); + f->f_xid = vx_current_xid(); + vx_files_inc(f); return f; over: @@ -150,6 +154,8 @@ fops_put(file->f_op); if (file->f_mode & FMODE_WRITE) put_write_access(inode); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file->f_dentry = NULL; file->f_vfsmnt = NULL; @@ -215,6 +221,8 @@ { if (rcuref_dec_and_test(&file->f_count)) { security_file_free(file); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file_free(file); } Index: linux-2.6.14/fs/hfsplus/ioctl.c =================================================================== --- linux-2.6.14.orig/fs/hfsplus/ioctl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/hfsplus/ioctl.c 2005-10-31 11:05:45.000000000 -0600 @@ -34,7 +34,8 @@ flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */ return put_user(flags, (int __user *)arg); case HFSPLUS_IOC_EXT2_SETFLAGS: { - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) Index: linux-2.6.14/fs/inode.c =================================================================== --- linux-2.6.14.orig/fs/inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -115,6 +115,9 @@ struct address_space * const mapping = &inode->i_data; inode->i_sb = sb; + + /* essential because of inode slab reuse */ + inode->i_xid = 0; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; atomic_set(&inode->i_count, 1); @@ -127,6 +130,7 @@ inode->i_bytes = 0; inode->i_generation = 0; #ifdef CONFIG_QUOTA + inode->i_dqh = dqhget(sb->s_dqh); memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); #endif inode->i_pipe = NULL; @@ -174,6 +178,8 @@ if (inode_has_buffers(inode)) BUG(); security_inode_free(inode); + if (dqhash_valid(inode->i_dqh)) + dqhput(inode->i_dqh); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else @@ -235,6 +241,8 @@ inodes_stat.nr_unused--; } +EXPORT_SYMBOL_GPL(__iget); + /** * clear_inode - clear an inode * @inode: inode to clear @@ -1256,12 +1264,13 @@ /* Function back in dquot.c */ int remove_inode_dquot_ref(struct inode *, int, struct list_head *); -void remove_dquot_ref(struct super_block *sb, int type, +void remove_dquot_ref(struct dqhash *hash, int type, struct list_head *tofree_head) { struct inode *inode; + struct super_block *sb = hash->dqh_sb; - if (!sb->dq_op) + if (!hash->dqh_qop) return; /* nothing to do */ spin_lock(&inode_lock); /* This lock is for inodes code */ Index: linux-2.6.14/fs/ioctl.c =================================================================== --- linux-2.6.14.orig/fs/ioctl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ioctl.c 2005-10-31 11:05:45.000000000 -0600 @@ -12,10 +12,19 @@ #include #include #include +#include +#include +#include #include #include + +#ifdef CONFIG_VSERVER_LEGACY +extern int vx_proc_ioctl(struct inode *, struct file *, + unsigned int, unsigned long); +#endif + static long do_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -146,6 +155,48 @@ else error = -ENOTTY; break; +#ifdef CONFIG_VSERVER_LEGACY +#ifndef CONFIG_INOXID_NONE + case FIOC_GETXID: { + struct inode *inode = filp->f_dentry->d_inode; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (capable(CAP_CONTEXT)) + error = put_user(inode->i_xid, (int *) arg); + break; + } + case FIOC_SETXID: { + struct inode *inode = filp->f_dentry->d_inode; + int xid; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -ENOSYS; + if (!(inode->i_sb->s_flags & MS_TAGXID)) + break; + error = -EFAULT; + if (get_user(xid, (int *) arg)) + break; + error = 0; + inode->i_xid = (xid & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + break; + } +#endif + case FIOC_GETXFLG: + case FIOC_SETXFLG: + error = -ENOTTY; + if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC) + error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); + break; +#endif default: if (S_ISREG(filp->f_dentry->d_inode->i_mode)) error = file_ioctl(filp, cmd, arg); Index: linux-2.6.14/fs/ioprio.c =================================================================== --- linux-2.6.14.orig/fs/ioprio.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ioprio.c 2005-10-31 11:05:45.000000000 -0600 @@ -22,6 +22,7 @@ #include #include #include +#include static int set_task_ioprio(struct task_struct *task, int ioprio) { @@ -93,7 +94,7 @@ if (!who) user = current->user; else - user = find_user(who); + user = find_user(who, vx_current_xid()); if (!user) break; @@ -147,7 +148,7 @@ if (!who) user = current->user; else - user = find_user(who); + user = find_user(who, vx_current_xid()); if (!user) break; Index: linux-2.6.14/fs/jfs/acl.c =================================================================== --- linux-2.6.14.orig/fs/jfs/acl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/acl.c 2005-10-31 11:05:45.000000000 -0600 @@ -229,7 +229,8 @@ return rc; if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) || + (iattr->ia_valid & ATTR_XID && iattr->ia_xid != inode->i_xid)) { if (DQUOT_TRANSFER(inode, iattr)) return -EDQUOT; } Index: linux-2.6.14/fs/jfs/file.c =================================================================== --- linux-2.6.14.orig/fs/jfs/file.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/file.c 2005-10-31 11:05:45.000000000 -0600 @@ -111,6 +111,7 @@ .readv = generic_file_readv, .writev = generic_file_writev, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .fsync = jfs_fsync, .release = jfs_release, }; Index: linux-2.6.14/fs/jfs/inode.c =================================================================== --- linux-2.6.14.orig/fs/jfs/inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -22,6 +22,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -143,6 +144,7 @@ DQUOT_INIT(inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); } clear_inode(inode); Index: linux-2.6.14/fs/jfs/jfs_dtree.c =================================================================== --- linux-2.6.14.orig/fs/jfs/jfs_dtree.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/jfs_dtree.c 2005-10-31 11:05:45.000000000 -0600 @@ -102,6 +102,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -383,10 +384,10 @@ */ if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage)) goto clean_up; - if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { - DQUOT_FREE_BLOCK(ip, sbi->nbperpage); - goto clean_up; - } + if (DLIMIT_ALLOC_BLOCK(ip, sbi->nbperpage)) + goto clean_up_dquot; + if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) + goto clean_up_dlimit; /* * Save the table, we're going to overwrite it with the @@ -479,6 +480,12 @@ return index; + clean_up_dlimit: + DLIMIT_FREE_BLOCK(ip, sbi->nbperpage); + + clean_up_dquot: + DQUOT_FREE_BLOCK(ip, sbi->nbperpage); + clean_up: jfs_ip->next_index--; @@ -930,7 +937,8 @@ static int dtSplitUp(tid_t tid, struct inode *ip, struct dtsplit * split, struct btstack * btstack) { - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + struct super_block *sb = ip->i_sb; + struct jfs_sb_info *sbi = JFS_SBI(sb); int rc = 0; struct metapage *smp; dtpage_t *sp; /* split page */ @@ -952,6 +960,7 @@ struct tlock *tlck; struct lv *lv; int quota_allocation = 0; + int dlimit_allocation = 0; /* get split page */ smp = split->mp; @@ -1033,6 +1042,12 @@ } quota_allocation += n; + if (DLIMIT_ALLOC_BLOCK(ip, n)) { + rc = -ENOSPC; + goto extendOut; + } + dlimit_allocation += n; + if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, (s64) n, &nxaddr))) goto extendOut; @@ -1301,6 +1316,9 @@ freeKeyName: kfree(key.name); + /* Rollback dlimit allocation */ + if (rc && dlimit_allocation) + DLIMIT_FREE_BLOCK(ip, dlimit_allocation); /* Rollback quota allocation */ if (rc && quota_allocation) DQUOT_FREE_BLOCK(ip, quota_allocation); @@ -1368,6 +1386,12 @@ release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); @@ -1918,6 +1942,12 @@ release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } BT_MARK_DIRTY(rmp, ip); /* @@ -2284,6 +2314,8 @@ xlen = lengthPXD(&fp->header.self); + /* Free dlimit allocation. */ + DLIMIT_FREE_BLOCK(ip, xlen); /* Free quota allocation. */ DQUOT_FREE_BLOCK(ip, xlen); @@ -2360,6 +2392,8 @@ xlen = lengthPXD(&p->header.self); + /* Free dlimit allocation */ + DLIMIT_FREE_BLOCK(ip, xlen); /* Free quota allocation */ DQUOT_FREE_BLOCK(ip, xlen); Index: linux-2.6.14/fs/jfs/jfs_extent.c =================================================================== --- linux-2.6.14.orig/fs/jfs/jfs_extent.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/jfs_extent.c 2005-10-31 11:05:45.000000000 -0600 @@ -18,6 +18,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_superblock.h" @@ -146,6 +147,13 @@ up(&JFS_IP(ip)->commit_sem); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) { + DQUOT_FREE_BLOCK(ip, nxlen); + dbFree(ip, nxaddr, (s64) nxlen); + up(&JFS_IP(ip)->commit_sem); + return -ENOSPC; + } /* determine the value of the extent flag */ xflag = (abnr == TRUE) ? XAD_NOTRECORDED : 0; @@ -164,6 +172,7 @@ */ if (rc) { dbFree(ip, nxaddr, nxlen); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); up(&JFS_IP(ip)->commit_sem); return (rc); @@ -261,6 +270,13 @@ up(&JFS_IP(ip)->commit_sem); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) { + DQUOT_FREE_BLOCK(ip, nxlen); + dbFree(ip, nxaddr, (s64) nxlen); + up(&JFS_IP(ip)->commit_sem); + return -ENOSPC; + } delta = nxlen - xlen; @@ -297,6 +313,7 @@ /* extend the extent */ if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { dbFree(ip, xaddr + xlen, delta); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); goto exit; } @@ -308,6 +325,7 @@ */ if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { dbFree(ip, nxaddr, nxlen); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); goto exit; } Index: linux-2.6.14/fs/jfs/jfs_filsys.h =================================================================== --- linux-2.6.14.orig/fs/jfs/jfs_filsys.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/jfs_filsys.h 2005-10-31 11:05:45.000000000 -0600 @@ -84,6 +84,7 @@ #define JFS_DIR_INDEX 0x00200000 /* Persistant index for */ /* directory entries */ +#define JFS_TAGXID 0x00800000 /* xid tagging */ /* * buffer cache configuration Index: linux-2.6.14/fs/jfs/jfs_imap.c =================================================================== --- linux-2.6.14.orig/fs/jfs/jfs_imap.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/jfs_imap.c 2005-10-31 11:05:45.000000000 -0600 @@ -45,6 +45,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" @@ -3072,14 +3073,21 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + uid_t uid; + gid_t gid; jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; ip->i_nlink = le32_to_cpu(dip->di_nlink); - ip->i_uid = le32_to_cpu(dip->di_uid); - ip->i_gid = le32_to_cpu(dip->di_gid); + + uid = le32_to_cpu(dip->di_uid); + gid = le32_to_cpu(dip->di_gid); + ip->i_uid = INOXID_UID(XID_TAG(ip), uid, gid); + ip->i_gid = INOXID_GID(XID_TAG(ip), uid, gid); + ip->i_xid = INOXID_XID(XID_TAG(ip), uid, gid, 0); + ip->i_size = le64_to_cpu(dip->di_size); ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); @@ -3130,6 +3138,8 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + uid_t uid; + gid_t gid; dip->di_fileset = cpu_to_le32(jfs_ip->fileset); dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp); @@ -3138,8 +3148,11 @@ dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - dip->di_uid = cpu_to_le32(ip->i_uid); - dip->di_gid = cpu_to_le32(ip->i_gid); + + uid = XIDINO_UID(XID_TAG(ip), ip->i_uid, ip->i_xid); + gid = XIDINO_GID(XID_TAG(ip), ip->i_gid, ip->i_xid); + dip->di_uid = cpu_to_le32(uid); + dip->di_gid = cpu_to_le32(gid); /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones Index: linux-2.6.14/fs/jfs/jfs_inode.c =================================================================== --- linux-2.6.14.orig/fs/jfs/jfs_inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/jfs_inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -18,6 +18,8 @@ #include #include +#include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -62,10 +64,17 @@ } else inode->i_gid = current->fsgid; + inode->i_xid = vx_current_fsxid(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + iput(inode); + return NULL; + } + /* * Allocate inode to quota. */ if (DQUOT_ALLOC_INODE(inode)) { + DLIMIT_FREE_INODE(inode); DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; Index: linux-2.6.14/fs/jfs/jfs_xtree.c =================================================================== --- linux-2.6.14.orig/fs/jfs/jfs_xtree.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/jfs_xtree.c 2005-10-31 11:05:45.000000000 -0600 @@ -21,6 +21,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" @@ -841,7 +842,12 @@ hint = 0; if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) goto out; + if ((rc = DLIMIT_ALLOC_BLOCK(ip, xlen))) { + DQUOT_FREE_BLOCK(ip, xlen); + goto out; + } if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { + DLIMIT_FREE_BLOCK(ip, xlen); DQUOT_FREE_BLOCK(ip, xlen); goto out; } @@ -871,6 +877,7 @@ /* undo data extent allocation */ if (*xaddrp == 0) { dbFree(ip, xaddr, (s64) xlen); + DLIMIT_FREE_BLOCK(ip, xlen); DQUOT_FREE_BLOCK(ip, xlen); } return rc; @@ -919,7 +926,6 @@ out: /* unpin the leaf page */ XT_PUTPAGE(mp); - return rc; } @@ -1231,6 +1237,7 @@ struct tlock *tlck; struct xtlock *sxtlck = NULL, *rxtlck = NULL; int quota_allocation = 0; + int dlimit_allocation = 0; smp = split->mp; sp = XT_PAGE(ip, smp); @@ -1243,13 +1250,19 @@ rbn = addressPXD(pxd); /* Allocate blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { rc = -EDQUOT; goto clean_up; } - quota_allocation += lengthPXD(pxd); + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + rc = -ENOSPC; + goto clean_up; + } + dlimit_allocation += lengthPXD(pxd); + /* * allocate the new right page for the split */ @@ -1451,6 +1464,9 @@ clean_up: + /* Rollback dlimit allocation. */ + if (dlimit_allocation) + DLIMIT_FREE_BLOCK(ip, dlimit_allocation); /* Rollback quota allocation. */ if (quota_allocation) DQUOT_FREE_BLOCK(ip, quota_allocation); @@ -1515,6 +1531,12 @@ release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); @@ -3941,6 +3963,8 @@ else ip->i_size = newsize; + /* update dlimit allocation to reflect freed blocks */ + DLIMIT_FREE_BLOCK(ip, nfreed); /* update quota allocation to reflect freed blocks */ DQUOT_FREE_BLOCK(ip, nfreed); Index: linux-2.6.14/fs/jfs/namei.c =================================================================== --- linux-2.6.14.orig/fs/jfs/namei.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/namei.c 2005-10-31 11:05:45.000000000 -0600 @@ -20,6 +20,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_inode.h" @@ -1386,6 +1387,7 @@ txAbort(tid, 0); goto out3; } + vx_propagate_xid(nd, ip); ip->i_op = &jfs_file_inode_operations; jfs_ip->dev = new_encode_dev(rdev); Index: linux-2.6.14/fs/jfs/super.c =================================================================== --- linux-2.6.14.orig/fs/jfs/super.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/super.c 2005-10-31 11:05:45.000000000 -0600 @@ -195,7 +195,7 @@ enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota + Opt_usrquota, Opt_grpquota, Opt_tagxid }; static match_table_t tokens = { @@ -205,6 +205,7 @@ {Opt_resize, "resize=%u"}, {Opt_resize_nosize, "resize"}, {Opt_errors, "errors=%s"}, + {Opt_tagxid, "tagxid"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, {Opt_usrquota, "usrquota"}, @@ -314,6 +315,11 @@ break; #endif +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + *flag |= JFS_TAGXID; + break; +#endif default: printk("jfs: Unrecognized mount option \"%s\" " " or missing value\n", p); @@ -344,6 +350,13 @@ if (!parse_options(data, sb, &newLVSize, &flag)) { return -EINVAL; } + + if ((flag & JFS_TAGXID) && !(sb->s_flags & MS_TAGXID)) { + printk(KERN_ERR "JFS: %s: tagxid not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } + if (newLVSize) { if (sb->s_flags & MS_RDONLY) { printk(KERN_ERR @@ -415,6 +428,9 @@ #ifdef CONFIG_JFS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif + /* map mount option tagxid */ + if (sbi->flag & JFS_TAGXID) + sb->s_flags |= MS_TAGXID; if (newLVSize) { printk(KERN_ERR "resize option for remount only\n"); Index: linux-2.6.14/fs/jfs/xattr.c =================================================================== --- linux-2.6.14.orig/fs/jfs/xattr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/jfs/xattr.c 2005-10-31 11:05:45.000000000 -0600 @@ -22,6 +22,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_dmap.h" @@ -277,9 +278,16 @@ if (DQUOT_ALLOC_BLOCK(ip, nblocks)) { return -EDQUOT; } + /* Allocate new blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nblocks)) { + DQUOT_FREE_BLOCK(ip, nblocks); + return -ENOSPC; + } rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno); if (rc) { + /*Rollback dlimit allocation. */ + DLIMIT_FREE_BLOCK(ip, nblocks); /*Rollback quota allocation. */ DQUOT_FREE_BLOCK(ip, nblocks); return rc; @@ -346,6 +354,8 @@ failed: /* Rollback quota allocation. */ + DLIMIT_FREE_BLOCK(ip, nblocks); + /* Rollback quota allocation. */ DQUOT_FREE_BLOCK(ip, nblocks); dbFree(ip, blkno, nblocks); @@ -482,6 +492,7 @@ s64 blkno; int rc; int quota_allocation = 0; + int dlimit_allocation = 0; /* When fsck.jfs clears a bad ea, it doesn't clear the size */ if (ji->ea.flag == 0) @@ -554,9 +565,14 @@ /* Allocate new blocks to quota. */ if (DQUOT_ALLOC_BLOCK(inode, blocks_needed)) return -EDQUOT; - quota_allocation = blocks_needed; + /* Allocate new blocks to dlimit. */ + rc = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, blocks_needed)) + goto clean_up; + dlimit_allocation = blocks_needed; + rc = dbAlloc(inode, INOHINT(inode), (s64) blocks_needed, &blkno); if (rc) @@ -613,6 +629,9 @@ return ea_size; clean_up: + /* Rollback dlimit allocation */ + if (dlimit_allocation) + DLIMIT_FREE_BLOCK(inode, dlimit_allocation); /* Rollback quota allocation */ if (quota_allocation) DQUOT_FREE_BLOCK(inode, quota_allocation); @@ -689,8 +708,10 @@ } /* If old blocks exist, they must be removed from quota allocation. */ - if (old_blocks) + if (old_blocks) { + DLIMIT_FREE_BLOCK(inode, old_blocks); DQUOT_FREE_BLOCK(inode, old_blocks); + } inode->i_ctime = CURRENT_TIME; Index: linux-2.6.14/fs/libfs.c =================================================================== --- linux-2.6.14.orig/fs/libfs.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/libfs.c 2005-10-31 11:05:45.000000000 -0600 @@ -122,7 +122,8 @@ * both impossible due to the lock on directory. */ -int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) +static inline int do_dcache_readdir_filter(struct file * filp, + void * dirent, filldir_t filldir, int (*filter)(struct dentry *dentry)) { struct dentry *dentry = filp->f_dentry; struct dentry *cursor = filp->private_data; @@ -156,6 +157,8 @@ next = list_entry(p, struct dentry, d_child); if (d_unhashed(next) || !next->d_inode) continue; + if (filter && !filter(next)) + continue; spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) @@ -172,6 +175,18 @@ return 0; } +int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + return do_dcache_readdir_filter(filp, dirent, filldir, NULL); +} + +int dcache_readdir_filter(struct file * filp, void * dirent, filldir_t filldir, + int (*filter)(struct dentry *)) +{ + return do_dcache_readdir_filter(filp, dirent, filldir, filter); +} + + ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) { return -EISDIR; @@ -620,6 +635,7 @@ EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); EXPORT_SYMBOL(dcache_readdir); +EXPORT_SYMBOL(dcache_readdir_filter); EXPORT_SYMBOL(generic_read_dir); EXPORT_SYMBOL(get_sb_pseudo); EXPORT_SYMBOL(simple_commit_write); Index: linux-2.6.14/fs/locks.c =================================================================== --- linux-2.6.14.orig/fs/locks.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/locks.c 2005-10-31 11:05:45.000000000 -0600 @@ -125,6 +125,7 @@ #include #include #include +#include #include #include @@ -150,12 +151,16 @@ /* Allocate an empty lock structure. */ static struct file_lock *locks_alloc_lock(void) { + if (!vx_locks_avail(1)) + return NULL; return kmem_cache_alloc(filelock_cache, SLAB_KERNEL); } /* Free a lock which is not in use. */ static inline void locks_free_lock(struct file_lock *fl) { + vx_locks_dec(fl); + if (fl == NULL) { BUG(); return; @@ -199,6 +204,7 @@ fl->fl_start = fl->fl_end = 0; fl->fl_ops = NULL; fl->fl_lmops = NULL; + fl->fl_xid = -1; } EXPORT_SYMBOL(locks_init_lock); @@ -236,6 +242,8 @@ fl->fl_ops->fl_copy_lock(new, fl); if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock) fl->fl_lmops->fl_copy_lock(new, fl); + + new->fl_xid = fl->fl_xid; } EXPORT_SYMBOL(locks_copy_lock); @@ -272,6 +280,11 @@ fl->fl_flags = FL_FLOCK; fl->fl_type = type; fl->fl_end = OFFSET_MAX; + + vxd_assert(filp->f_xid == vx_current_xid(), + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid()); + fl->fl_xid = filp->f_xid; + vx_locks_inc(fl); *lock = fl; return 0; @@ -449,6 +462,11 @@ if (fl == NULL) return -ENOMEM; + fl->fl_xid = vx_current_xid(); + if (filp) + vxd_assert(filp->f_xid == fl->fl_xid, + "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid); + vx_locks_inc(fl); error = lease_init(filp, type, fl); if (error) return error; @@ -764,7 +782,7 @@ EXPORT_SYMBOL(posix_lock_file); -static int __posix_lock_file(struct inode *inode, struct file_lock *request) +static int __posix_lock_file(struct inode *inode, struct file_lock *request, xid_t xid) { struct file_lock *fl; struct file_lock *new_fl, *new_fl2; @@ -773,12 +791,18 @@ struct file_lock **before; int error, added = 0; + vxd_assert(xid == vx_current_xid(), + "xid(%d) == current(%d)", xid, vx_current_xid()); /* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. */ new_fl = locks_alloc_lock(); + new_fl->fl_xid = xid; + vx_locks_inc(new_fl); new_fl2 = locks_alloc_lock(); + new_fl2->fl_xid = xid; + vx_locks_inc(new_fl2); lock_kernel(); if (request->fl_type != F_UNLCK) { @@ -952,7 +976,7 @@ */ int posix_lock_file(struct file *filp, struct file_lock *fl) { - return __posix_lock_file(filp->f_dentry->d_inode, fl); + return __posix_lock_file(filp->f_dentry->d_inode, fl, filp->f_xid); } /** @@ -969,7 +993,8 @@ int error; might_sleep (); for (;;) { - error = __posix_lock_file(filp->f_dentry->d_inode, fl); + error = __posix_lock_file(filp->f_dentry->d_inode, + fl, filp->f_xid); if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1041,7 +1066,7 @@ fl.fl_end = offset + count - 1; for (;;) { - error = __posix_lock_file(inode, &fl); + error = __posix_lock_file(inode, &fl, filp->f_xid); if (error != -EAGAIN) break; if (!(fl.fl_flags & FL_SLEEP)) @@ -1603,6 +1628,11 @@ if (file_lock == NULL) return -ENOLCK; + vxd_assert(filp->f_xid == vx_current_xid(), + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid()); + file_lock->fl_xid = filp->f_xid; + vx_locks_inc(file_lock); + /* * This might block, so we do it before checking the inode. */ @@ -1655,7 +1685,7 @@ error = filp->f_op->lock(filp, cmd, file_lock); else { for (;;) { - error = __posix_lock_file(inode, file_lock); + error = __posix_lock_file(inode, file_lock, filp->f_xid); if ((error != -EAGAIN) || (cmd == F_SETLK)) break; error = wait_event_interruptible(file_lock->fl_wait, @@ -1746,6 +1776,11 @@ if (file_lock == NULL) return -ENOLCK; + vxd_assert(filp->f_xid == vx_current_xid(), + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid()); + file_lock->fl_xid = filp->f_xid; + vx_locks_inc(file_lock); + /* * This might block, so we do it before checking the inode. */ @@ -1798,7 +1833,7 @@ error = filp->f_op->lock(filp, cmd, file_lock); else { for (;;) { - error = __posix_lock_file(inode, file_lock); + error = __posix_lock_file(inode, file_lock, filp->f_xid); if ((error != -EAGAIN) || (cmd == F_SETLK64)) break; error = wait_event_interruptible(file_lock->fl_wait, @@ -2075,6 +2110,10 @@ list_for_each(tmp, &file_lock_list) { struct list_head *btmp; struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + + if (!vx_check(fl->fl_xid, VX_IDENT|VX_WATCH)) + continue; + lock_get_status(q, fl, ++i, ""); move_lock_status(&q, &pos, offset); Index: linux-2.6.14/fs/namei.c =================================================================== --- linux-2.6.14.orig/fs/namei.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/namei.c 2005-10-31 11:05:45.000000000 -0600 @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include #include @@ -221,6 +224,24 @@ return -EACCES; } +static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) { + vxwprintk(1, "xid=%d did hit the barrier.", + vx_current_xid()); + return -EACCES; + } + if (inode->i_xid == 0) + return 0; + if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT)) + return 0; + + vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_xid, inode->i_ino, + vxd_path(nd->dentry, nd->mnt)); + return -EACCES; +} + int permission(struct inode *inode, int mask, struct nameidata *nd) { int retval, submask; @@ -231,7 +252,7 @@ /* * Nobody gets write access to a read-only fs. */ - if (IS_RDONLY(inode) && + if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt))) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; @@ -245,6 +266,8 @@ /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; + if ((retval = xid_permission(inode, mask, nd))) + return retval; if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, submask, nd); else @@ -654,7 +677,8 @@ if (nd->dentry == current->fs->root && nd->mnt == current->fs->rootmnt) { read_unlock(¤t->fs->lock); - break; + /* for sane '/' avoid follow_mount() */ + return; } read_unlock(¤t->fs->lock); spin_lock(&dcache_lock); @@ -691,16 +715,34 @@ { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); + struct inode *inode; if (!dentry) goto need_lookup; if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; + inode = dentry->d_inode; + if (!inode) + goto done; + if (!vx_check(inode->i_xid, VX_WATCH|VX_ADMIN|VX_HOSTID|VX_IDENT)) + goto hidden; + if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) { + struct proc_dir_entry *de = PDE(inode); + + if (de && !vx_hide_check(0, de->vx_flags)) + goto hidden; + } done: path->mnt = mnt; path->dentry = dentry; __follow_mount(path); return 0; +hidden: + vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_xid, inode->i_ino, + vxd_path(dentry, mnt)); + dput(dentry); + return -ENOENT; need_lookup: dentry = real_lookup(nd->dentry, name, nd); @@ -1184,7 +1226,8 @@ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) +static inline int may_delete(struct inode *dir, struct dentry *victim, + int isdir, struct nameidata *nd) { int error; @@ -1193,13 +1236,13 @@ BUG_ON(victim->d_parent->d_inode != dir); - error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); + error = permission(dir,MAY_WRITE | MAY_EXEC, nd); if (error) return error; if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IXORUNLINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -1321,6 +1364,8 @@ return error; } + + int may_open(struct nameidata *nd, int acc_mode, int flag) { struct dentry *dentry = nd->dentry; @@ -1336,6 +1381,11 @@ if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) return -EISDIR; +#ifdef CONFIG_VSERVER_COWBL + if (IS_COW_LINK(inode) && (flag & FMODE_WRITE)) + return -EMLINK; +#endif + error = permission(inode, acc_mode, nd); if (error) return error; @@ -1352,7 +1402,8 @@ return -EACCES; flag &= ~O_TRUNC; - } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) + } else if ((IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt)) + && (flag & FMODE_WRITE)) return -EROFS; /* * An append-only file must be opened in append mode for writing. @@ -1400,6 +1451,8 @@ return 0; } +int cow_break_link(struct dentry *dentry, const char *pathname); + /* * open_namei() * @@ -1421,6 +1474,11 @@ struct dentry *dir; int count = 0; +#ifdef CONFIG_VSERVER_COWBL + int rflag = flag; + int rmode = mode; +restart: +#endif acc_mode = ACC_MODE(flag); /* Allow the LSM permission hook to distinguish append @@ -1428,6 +1486,7 @@ if (flag & O_APPEND) acc_mode |= MAY_APPEND; + /* Fill in the open() intent data */ nd->intent.open.flags = flag; nd->intent.open.create_mode = mode; @@ -1513,6 +1572,18 @@ goto exit; ok: error = may_open(nd, acc_mode, flag); +#ifdef CONFIG_VSERVER_COWBL + if (error == -EMLINK) { + error = cow_break_link(path.dentry, pathname); + if (error) + goto exit; + path_release(nd); + vxdprintk(VXD_CBIT(misc, 2), "restarting open_namei() ..."); + flag = rflag; + mode = rmode; + goto restart; + } +#endif if (error) goto exit; return 0; @@ -1614,9 +1685,10 @@ } EXPORT_SYMBOL_GPL(lookup_create); -int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +int vfs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t dev, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1656,7 +1728,6 @@ goto out; dentry = lookup_create(&nd, 0); error = PTR_ERR(dentry); - if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; if (!IS_ERR(dentry)) { @@ -1665,11 +1736,12 @@ error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode, - new_decode_dev(dev)); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + new_decode_dev(dev), &nd); break; case S_IFIFO: case S_IFSOCK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + 0, &nd); break; case S_IFDIR: error = -EPERM; @@ -1687,9 +1759,10 @@ return error; } -int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +int vfs_mkdir(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1728,7 +1801,8 @@ if (!IS_ERR(dentry)) { if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; - error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); + error = vfs_mkdir(nd.dentry->d_inode, dentry, + mode, &nd); dput(dentry); } up(&nd.dentry->d_inode->i_sem); @@ -1768,9 +1842,10 @@ spin_unlock(&dcache_lock); } -int vfs_rmdir(struct inode *dir, struct dentry *dentry) +int vfs_rmdir(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 1); + int error = may_delete(dir, dentry, 1, nd); if (error) return error; @@ -1831,7 +1906,7 @@ dentry = lookup_hash(&nd.last, nd.dentry); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); + error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd); dput(dentry); } up(&nd.dentry->d_inode->i_sem); @@ -1842,9 +1917,10 @@ return error; } -int vfs_unlink(struct inode *dir, struct dentry *dentry) +int vfs_unlink(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 0); + int error = may_delete(dir, dentry, 0, nd); if (error) return error; @@ -1906,7 +1982,7 @@ inode = dentry->d_inode; if (inode) atomic_inc(&inode->i_count); - error = vfs_unlink(nd.dentry->d_inode, dentry); + error = vfs_unlink(nd.dentry->d_inode, dentry, &nd); exit2: dput(dentry); } @@ -1925,9 +2001,10 @@ goto exit2; } -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) +int vfs_symlink(struct inode *dir, struct dentry *dentry, + const char *oldname, int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1967,7 +2044,8 @@ dentry = lookup_create(&nd, 0); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); + error = vfs_symlink(nd.dentry->d_inode, dentry, + from, S_IALLUGO, &nd); dput(dentry); } up(&nd.dentry->d_inode->i_sem); @@ -1979,7 +2057,8 @@ return error; } -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) +int vfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry, struct nameidata *nd) { struct inode *inode = old_dentry->d_inode; int error; @@ -1987,7 +2066,7 @@ if (!inode) return -ENOENT; - error = may_create(dir, new_dentry, NULL); + error = may_create(dir, new_dentry, nd); if (error) return error; @@ -1997,7 +2076,7 @@ /* * A link to an append-only or immutable file cannot be created. */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return -EPERM; if (!dir->i_op || !dir->i_op->link) return -EPERM; @@ -2049,7 +2128,8 @@ new_dentry = lookup_create(&nd, 0); error = PTR_ERR(new_dentry); if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); + error = vfs_link(old_nd.dentry, nd.dentry->d_inode, + new_dentry, &nd); dput(new_dentry); } up(&nd.dentry->d_inode->i_sem); @@ -2176,14 +2256,14 @@ if (old_dentry->d_inode == new_dentry->d_inode) return 0; - error = may_delete(old_dir, old_dentry, is_dir); + error = may_delete(old_dir, old_dentry, is_dir, NULL); if (error) return error; if (!new_dentry->d_inode) error = may_create(new_dir, new_dentry, NULL); else - error = may_delete(new_dir, new_dentry, is_dir); + error = may_delete(new_dir, new_dentry, is_dir, NULL); if (error) return error; @@ -2260,6 +2340,9 @@ error = -EINVAL; if (old_dentry == trap) goto exit4; + error = -EROFS; + if (MNT_IS_RDONLY(newnd.mnt)) + goto exit4; new_dentry = lookup_hash(&newnd.last, new_dir); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) @@ -2347,6 +2430,128 @@ return __vfs_follow_link(nd, link); } + +#ifdef CONFIG_VSERVER_COWBL + +#include + +int cow_break_link(struct dentry *dentry, const char *pathname) +{ + int err = -EMLINK; + int ret, mode, pathlen; + struct nameidata old_nd, dir_nd; + struct dentry *old_dentry, *new_dentry; + struct vfsmount *old_mnt, *new_mnt; + struct file *old_file; + struct file *new_file; + char *to, *path, pad='\251'; + loff_t size; + + vxdprintk(VXD_CBIT(misc, 2), + "cow_break_link(%p,»%s«)", dentry, pathname); + path = kmalloc(PATH_MAX, GFP_KERNEL); + + ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd); + vxdprintk(VXD_CBIT(misc, 2), "path_lookup(old): %d", ret); + old_dentry = old_nd.dentry; + old_mnt = old_nd.mnt; + mode = old_dentry->d_inode->i_mode; + + to = d_path(old_dentry, old_mnt, path, PATH_MAX-2); + pathlen = strlen(to); + vxdprintk(VXD_CBIT(misc, 2), "old path »%s«", to); + + to[pathlen+1] = 0; +retry: + to[pathlen] = pad--; + if (pad <= '\240') + goto out_rel_old; + + vxdprintk(VXD_CBIT(misc, 2), "temp copy »%s«", to); + ret = path_lookup(to, + LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, &dir_nd); + + /* this puppy downs the inode sem */ + new_dentry = lookup_create(&dir_nd, 0); + vxdprintk(VXD_CBIT(misc, 2), + "lookup_create(new): %p", new_dentry); + if (!new_dentry) { + path_release(&dir_nd); + goto retry; + } + + ret = vfs_create(dir_nd.dentry->d_inode, new_dentry, mode, &dir_nd); + vxdprintk(VXD_CBIT(misc, 2), + "vfs_create(new): %d", ret); + if (ret == -EEXIST) { + up(&dir_nd.dentry->d_inode->i_sem); + dput(new_dentry); + path_release(&dir_nd); + goto retry; + } + + new_mnt = dir_nd.mnt; + + dget(old_dentry); + mntget(old_mnt); + /* this one cleans up the dentry in case of failure */ + old_file = dentry_open(old_dentry, old_mnt, O_RDONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(old): %p", old_file); + if (!old_file) + goto out_rel_both; + + dget(new_dentry); + mntget(new_mnt); + /* this one cleans up the dentry in case of failure */ + new_file = dentry_open(new_dentry, new_mnt, O_WRONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(new): %p", new_file); + if (!new_file) + goto out_fput_old; + + vxdprintk(VXD_CBIT(misc, 3), + "file@b %p,%p", new_file, new_file->f_list.next); + size = i_size_read(old_file->f_dentry->d_inode); + ret = vfs_sendfile(new_file, old_file, NULL, size, 0); + vxdprintk(VXD_CBIT(misc, 2), "vfs_sendfile: %d", ret); + vxdprintk(VXD_CBIT(misc, 3), + "file@a %p,%p", new_file, new_file->f_list.next); + + if (ret < 0) + goto out_fput_both; + + ret = vfs_rename(dir_nd.dentry->d_inode, new_dentry, + old_nd.dentry->d_parent->d_inode, old_dentry); + vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret); + if (!ret) + err = 0; + +out_fput_both: + vxdprintk(VXD_CBIT(misc, 3), + "fput(new_file=%p[#%d])", new_file, + atomic_read(&new_file->f_count)); + fput(new_file); + +out_fput_old: + vxdprintk(VXD_CBIT(misc, 3), + "fput(old_file=%p[#%d])", old_file, + atomic_read(&old_file->f_count)); + fput(old_file); + +out_rel_both: + up(&dir_nd.dentry->d_inode->i_sem); + dput(new_dentry); + + path_release(&dir_nd); +out_rel_old: + path_release(&old_nd); + kfree(path); + return err; +} + +#endif + /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) { Index: linux-2.6.14/fs/namespace.c =================================================================== --- linux-2.6.14.orig/fs/namespace.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/namespace.c 2005-10-31 11:05:45.000000000 -0600 @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -161,6 +163,7 @@ mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; mnt->mnt_namespace = current->namespace; + mnt->mnt_xid = old->mnt_xid; /* stick the duplicate mount on the same expiry list * as the original if that was on one */ @@ -215,43 +218,85 @@ seq_escape(m, s, " \t\n\\"); } +static int mnt_is_reachable(struct vfsmount *mnt) +{ + struct vfsmount *root_mnt; + struct dentry *root, *point; + int ret; + + if (mnt == mnt->mnt_namespace->root) + return 1; + + spin_lock(&dcache_lock); + root_mnt = current->fs->rootmnt; + root = current->fs->root; + point = root; + + while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) { + point = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + } + + ret = (mnt == root_mnt) && is_subdir(point, root); + + spin_unlock(&dcache_lock); + + return ret; +} + static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = v; int err = 0; static struct proc_fs_info { - int flag; - char *str; + int s_flag; + int mnt_flag; + char *set_str; + char *unset_str; } fs_info[] = { - { MS_SYNCHRONOUS, ",sync" }, - { MS_DIRSYNC, ",dirsync" }, - { MS_MANDLOCK, ",mand" }, - { MS_NOATIME, ",noatime" }, - { MS_NODIRATIME, ",nodiratime" }, - { 0, NULL } - }; - static struct proc_fs_info mnt_info[] = { - { MNT_NOSUID, ",nosuid" }, - { MNT_NODEV, ",nodev" }, - { MNT_NOEXEC, ",noexec" }, - { 0, NULL } + { MS_RDONLY, MNT_RDONLY, "ro", "rw" }, + { MS_SYNCHRONOUS, 0, ",sync", NULL }, + { MS_DIRSYNC, 0, ",dirsync", NULL }, + { MS_MANDLOCK, 0, ",mand", NULL }, + { MS_TAGXID, 0, ",tagxid", NULL }, + { MS_NOATIME, MNT_NOATIME, ",noatime", NULL }, + { MS_NODIRATIME, MNT_NODIRATIME, ",nodiratime", NULL }, + { 0, MNT_NOSUID, ",nosuid", NULL }, + { 0, MNT_NODEV, ",nodev", NULL }, + { 0, MNT_NOEXEC, ",noexec", NULL }, + { 0, 0, NULL, NULL } }; - struct proc_fs_info *fs_infop; + struct proc_fs_info *p; + unsigned long s_flags = mnt->mnt_sb->s_flags; + int mnt_flags = mnt->mnt_flags; - mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); - seq_putc(m, ' '); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); - seq_putc(m, ' '); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!mnt_is_reachable(mnt)) + return 0; + + if (!vx_check(0, VX_ADMIN|VX_WATCH) && + mnt == current->fs->rootmnt) { + seq_puts(m, "/dev/root / "); + } else { + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + seq_putc(m, ' '); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + } mangle(m, mnt->mnt_sb->s_type->name); - seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); - for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_sb->s_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); - } - for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); + seq_putc(m, ' '); + for (p = fs_info; (p->s_flag | p->mnt_flag) ; p++) { + if ((s_flags & p->s_flag) || (mnt_flags & p->mnt_flag)) { + if (p->set_str) + seq_puts(m, p->set_str); + } else { + if (p->unset_str) + seq_puts(m, p->unset_str); + } } + if (mnt->mnt_flags & MNT_XID) + seq_printf(m, ",xid=%d", mnt->mnt_xid); if (mnt->mnt_sb->s_op->show_options) err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); @@ -337,19 +382,12 @@ EXPORT_SYMBOL(may_umount); -static void umount_tree(struct vfsmount *mnt) +static inline void __umount_list(struct list_head *kill) { - struct vfsmount *p; - LIST_HEAD(kill); - - for (p = mnt; p; p = next_mnt(p, mnt)) { - list_del(&p->mnt_list); - list_add(&p->mnt_list, &kill); - p->mnt_namespace = NULL; - } + struct vfsmount *mnt; - while (!list_empty(&kill)) { - mnt = list_entry(kill.next, struct vfsmount, mnt_list); + while (!list_empty(kill)) { + mnt = list_entry(kill->next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); list_del_init(&mnt->mnt_expire); if (mnt->mnt_parent == mnt) { @@ -365,6 +403,34 @@ } } +void umount_tree(struct vfsmount *mnt) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + p->mnt_namespace = NULL; + } + __umount_list(&kill); +} + +void umount_unused(struct vfsmount *mnt, struct fs_struct *fs) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + if (p == fs->rootmnt || p == fs->pwdmnt) + continue; + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + p->mnt_namespace = NULL; + } + __umount_list(&kill); +} + static int do_umount(struct vfsmount *mnt, int flags) { struct super_block * sb = mnt->mnt_sb; @@ -424,7 +490,7 @@ down_write(&sb->s_umount); if (!(sb->s_flags & MS_RDONLY)) { lock_kernel(); - DQUOT_OFF(sb); + DQUOT_OFF(sb->s_dqh); retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); unlock_kernel(); } @@ -439,7 +505,7 @@ /* last instance - try to be smart */ spin_unlock(&vfsmount_lock); lock_kernel(); - DQUOT_OFF(sb); + DQUOT_OFF(sb->s_dqh); acct_auto_close(sb); unlock_kernel(); security_sb_umount_close(mnt); @@ -481,7 +547,7 @@ goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) goto dput_and_out; retval = do_umount(nd.mnt, flags); @@ -508,6 +574,8 @@ { if (capable(CAP_SYS_ADMIN)) return 0; + if (vx_ccaps(VXC_SECURE_MOUNT)) + return 0; return -EPERM; #ifdef notyet if (S_ISLNK(nd->dentry->d_inode->i_mode)) @@ -617,11 +685,13 @@ /* * do loopback mount. */ -static int do_loopback(struct nameidata *nd, char *old_name, int recurse) +static int do_loopback(struct nameidata *nd, char *old_name, xid_t xid, + unsigned long flags, int mnt_flags) { struct nameidata old_nd; struct vfsmount *mnt = NULL; int err = mount_is_safe(nd); + int recurse = flags & MS_REC; if (err) return err; if (!old_name || !*old_name) @@ -646,6 +716,10 @@ list_del_init(&mnt->mnt_expire); spin_unlock(&vfsmount_lock); + if (flags & MS_XID) { + mnt->mnt_xid = xid; + mnt->mnt_flags |= MNT_XID; + } err = graft_tree(mnt, nd); if (err) { spin_lock(&vfsmount_lock); @@ -653,6 +727,7 @@ spin_unlock(&vfsmount_lock); } else mntput(mnt); + mnt->mnt_flags = mnt_flags; } up_write(¤t->namespace->sem); @@ -667,12 +742,12 @@ */ static int do_remount(struct nameidata *nd, int flags, int mnt_flags, - void *data) + void *data, xid_t xid) { int err; struct super_block * sb = nd->mnt->mnt_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_REMOUNT)) return -EPERM; if (!check_mnt(nd->mnt)) @@ -681,10 +756,15 @@ if (nd->dentry != nd->mnt->mnt_root) return -EINVAL; + if (vx_ccaps(VXC_SECURE_REMOUNT)) + mnt_flags |= MNT_NODEV; down_write(&sb->s_umount); err = do_remount_sb(sb, flags, data, 0); - if (!err) + if (!err) { nd->mnt->mnt_flags=mnt_flags; + if (flags & MS_XID) + nd->mnt->mnt_xid = xid; + } up_write(&sb->s_umount); if (!err) security_sb_post_remount(nd->mnt, flags, data); @@ -696,7 +776,7 @@ struct nameidata old_nd, parent_nd; struct vfsmount *p; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -768,7 +848,7 @@ return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) return -EPERM; mnt = do_kern_mount(type, flags, name, data); @@ -857,7 +937,7 @@ if (atomic_read(&mnt->mnt_sb->s_active) == 1) { /* last instance - try to be smart */ lock_kernel(); - DQUOT_OFF(mnt->mnt_sb); + DQUOT_OFF(mnt->mnt_sb->s_dqh); acct_auto_close(mnt->mnt_sb); unlock_kernel(); } @@ -1017,6 +1097,7 @@ struct nameidata nd; int retval = 0; int mnt_flags = 0; + xid_t xid = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -1032,15 +1113,34 @@ if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; +#ifdef CONFIG_XID_PROPAGATE + retval = vx_parse_xid(data_page, &xid, 1); + if (retval) { + mnt_flags |= MNT_XID; + /* bind and re-mounts get xid flag */ + if (flags & (MS_BIND|MS_REMOUNT)) + flags |= MS_XID; + } +#endif + /* Separate the per-mountpoint flags */ + if (flags & MS_RDONLY) + mnt_flags |= MNT_RDONLY; if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; if (flags & MS_NODEV) mnt_flags |= MNT_NODEV; if (flags & MS_NOEXEC) mnt_flags |= MNT_NOEXEC; + if (flags & MS_NOATIME) + mnt_flags |= MNT_NOATIME; + if (flags & MS_NODIRATIME) + mnt_flags |= MNT_NODIRATIME; flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); + if (vx_ccaps(VXC_SECURE_MOUNT)) + mnt_flags |= MNT_NODEV; + /* ... and get the mountpoint */ retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); if (retval) @@ -1052,9 +1152,9 @@ if (flags & MS_REMOUNT) retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); + data_page, xid); else if (flags & MS_BIND) - retval = do_loopback(&nd, dev_name, flags & MS_REC); + retval = do_loopback(&nd, dev_name, xid, flags, mnt_flags); else if (flags & MS_MOVE) retval = do_move_mount(&nd, dev_name); else @@ -1081,7 +1181,7 @@ if (!(flags & CLONE_NEWNS)) return 0; - if (!capable(CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) { put_namespace(namespace); return -EPERM; } Index: linux-2.6.14/fs/nfs/dir.c =================================================================== --- linux-2.6.14.orig/fs/nfs/dir.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfs/dir.c 2005-10-31 11:05:45.000000000 -0600 @@ -31,6 +31,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -875,6 +876,7 @@ inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); if (!inode) goto out_unlock; + vx_propagate_xid(nd, inode); no_entry: res = d_add_unique(dentry, inode); if (res != NULL) @@ -908,7 +910,8 @@ if (nd->flags & LOOKUP_DIRECTORY) return 0; /* Are we trying to write to a read only partition? */ - if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + if ((IS_RDONLY(dir) || MNT_IS_RDONLY(nd->mnt)) && + (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) return 0; return 1; } Index: linux-2.6.14/fs/nfs/inode.c =================================================================== --- linux-2.6.14.orig/fs/nfs/inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfs/inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -340,12 +341,16 @@ } server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + if (server->flags & NFS_MOUNT_TAGXID) + sb->s_flags |= MS_TAGXID; + sb->s_maxbytes = fsinfo.maxfilesize; if (sb->s_maxbytes > MAX_LFS_FILESIZE) sb->s_maxbytes = MAX_LFS_FILESIZE; server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; + server->client->cl_tagxid = (server->flags & NFS_MOUNT_TAGXID) ? 1 : 0; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); @@ -398,6 +403,7 @@ clnt->cl_intr = 1; clnt->cl_softrtry = 1; + clnt->cl_tagxid = 1; clnt->cl_chatty = 1; return clnt; @@ -581,6 +587,7 @@ { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", ",lock" }, { NFS_MOUNT_NOACL, ",noacl", "" }, + { NFS_MOUNT_TAGXID, ",tagxid", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; @@ -761,8 +768,10 @@ nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); + /* maybe fattr->xid someday */ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* * report the blocks in 512byte units @@ -849,6 +858,8 @@ inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_XID) && IS_TAGXID(inode)) + inode->i_xid = attr->ia_xid; spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); @@ -1223,6 +1234,9 @@ struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; int data_unstable; + uid_t uid; + gid_t gid; + xid_t xid; spin_lock(&inode->i_lock); @@ -1275,10 +1289,15 @@ nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } + uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); + /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) - || inode->i_uid != fattr->uid - || inode->i_gid != fattr->gid) + || inode->i_uid != uid + || inode->i_gid != gid + || inode->i_xid != xid) nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ @@ -1311,6 +1330,9 @@ loff_t cur_isize, new_isize; unsigned int invalid = 0; int data_unstable; + uid_t uid; + gid_t gid; + xid_t xid; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __FUNCTION__, inode->i_sb->s_id, inode->i_ino, @@ -1393,15 +1415,21 @@ } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || - inode->i_uid != fattr->uid || - inode->i_gid != fattr->gid) + inode->i_uid != uid || + inode->i_gid != gid || + inode->i_xid != xid) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = uid; + inode->i_gid = gid; + inode->i_xid = xid; if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* Index: linux-2.6.14/fs/nfs/nfs3xdr.c =================================================================== --- linux-2.6.14.orig/fs/nfs/nfs3xdr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfs/nfs3xdr.c 2005-10-31 11:05:45.000000000 -0600 @@ -22,6 +22,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_XDR @@ -179,7 +180,7 @@ } static inline u32 * -xdr_encode_sattr(u32 *p, struct iattr *attr) +xdr_encode_sattr(u32 *p, struct iattr *attr, int tagxid) { if (attr->ia_valid & ATTR_MODE) { *p++ = xdr_one; @@ -187,15 +188,17 @@ } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_UID) { + if (attr->ia_valid & ATTR_UID || + (tagxid && (attr->ia_valid & ATTR_XID))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_uid); + *p++ = htonl(XIDINO_UID(tagxid, attr->ia_uid, attr->ia_xid)); } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_GID) { + if (attr->ia_valid & ATTR_GID || + (tagxid && (attr->ia_valid & ATTR_XID))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_gid); + *p++ = htonl(XIDINO_GID(tagxid, attr->ia_gid, attr->ia_xid)); } else { *p++ = xdr_zero; } @@ -280,7 +283,8 @@ nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); *p++ = htonl(args->guard); if (args->guard) p = xdr_encode_time3(p, &args->guardtime); @@ -371,7 +375,8 @@ *p++ = args->verifier[0]; *p++ = args->verifier[1]; } else - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -385,7 +390,8 @@ { p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -398,7 +404,8 @@ { p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); p = xdr_encode_array(p, args->topath, args->tolen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -413,7 +420,8 @@ p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); *p++ = htonl(args->type); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); if (args->type == NF3CHR || args->type == NF3BLK) { *p++ = htonl(MAJOR(args->rdev)); *p++ = htonl(MINOR(args->rdev)); Index: linux-2.6.14/fs/nfs/nfsroot.c =================================================================== --- linux-2.6.14.orig/fs/nfs/nfsroot.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfs/nfsroot.c 2005-10-31 11:05:45.000000000 -0600 @@ -87,6 +87,7 @@ #include #include #include +#include /* Define this to allow debugging output */ #undef NFSROOT_DEBUG @@ -124,7 +125,7 @@ Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, - Opt_acl, Opt_noacl, + Opt_acl, Opt_noacl, Opt_tagxid, /* Error token */ Opt_err }; @@ -161,6 +162,7 @@ {Opt_tcp, "tcp"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_tagxid, "tagxid"}, {Opt_err, NULL} }; @@ -275,6 +277,11 @@ case Opt_noacl: nfs_data.flags |= NFS_MOUNT_NOACL; break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + nfs_data.flags |= NFS_MOUNT_TAGXID; + break; +#endif default : return 0; } @@ -310,7 +317,7 @@ /* Override them by options set on kernel command-line */ root_nfs_parse(name, buf); - cp = system_utsname.nodename; + cp = vx_new_uts(nodename); if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); return -1; Index: linux-2.6.14/fs/nfsd/auth.c =================================================================== --- linux-2.6.14.orig/fs/nfsd/auth.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfsd/auth.c 2005-10-31 11:05:45.000000000 -0600 @@ -9,6 +9,7 @@ #include #include #include +#include #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) @@ -42,18 +43,20 @@ } if (cred->cr_uid != (uid_t) -1) - current->fsuid = cred->cr_uid; + current->fsuid = INOXID_UID(XID_TAG_NFSD, cred->cr_uid, cred->cr_gid); else current->fsuid = exp->ex_anon_uid; if (cred->cr_gid != (gid_t) -1) - current->fsgid = cred->cr_gid; + current->fsgid = INOXID_GID(XID_TAG_NFSD, cred->cr_uid, cred->cr_gid); else current->fsgid = exp->ex_anon_gid; + current->xid = INOXID_XID(XID_TAG_NFSD, cred->cr_uid, cred->cr_gid, 0); + if (!cred->cr_group_info) return -ENOMEM; ret = set_current_groups(cred->cr_group_info); - if ((cred->cr_uid)) { + if (INOXID_UID(XID_TAG_NFSD, cred->cr_uid, cred->cr_gid)) { cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; } else { cap_t(current->cap_effective) |= (CAP_NFSD_MASK & Index: linux-2.6.14/fs/nfsd/nfs3xdr.c =================================================================== --- linux-2.6.14.orig/fs/nfsd/nfs3xdr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfsd/nfs3xdr.c 2005-10-31 11:05:45.000000000 -0600 @@ -21,6 +21,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -111,6 +112,8 @@ decode_sattr3(u32 *p, struct iattr *iap) { u32 tmp; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -120,12 +123,15 @@ } if (*p++) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = ntohl(*p++); + uid = ntohl(*p++); } if (*p++) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = ntohl(*p++); + gid = ntohl(*p++); } + iap->ia_uid = INOXID_UID(XID_TAG_NFSD, uid, gid); + iap->ia_gid = INOXID_GID(XID_TAG_NFSD, uid, gid); + iap->ia_xid = INOXID_XID(XID_TAG_NFSD, uid, gid, 0); if (*p++) { u64 newsize; @@ -166,8 +172,10 @@ *p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]); *p++ = htonl((u32) stat.mode); *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + XIDINO_UID(XID_TAG(dentry->d_inode), stat.uid, stat.xid))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + XIDINO_GID(XID_TAG(dentry->d_inode), stat.gid, stat.xid))); if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { Index: linux-2.6.14/fs/nfsd/nfs4recover.c =================================================================== --- linux-2.6.14.orig/fs/nfsd/nfs4recover.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfsd/nfs4recover.c 2005-10-31 11:05:45.000000000 -0600 @@ -155,7 +155,8 @@ dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); goto out_put; } - status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); + status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, + S_IRWXU, &rec_dir); out_put: dput(dentry); out_unlock: @@ -260,7 +261,7 @@ return -EINVAL; } down(&dir->d_inode->i_sem); - status = vfs_unlink(dir->d_inode, dentry); + status = vfs_unlink(dir->d_inode, dentry, NULL); up(&dir->d_inode->i_sem); return status; } @@ -275,7 +276,7 @@ * a kernel from the future.... */ nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); down(&dir->d_inode->i_sem); - status = vfs_rmdir(dir->d_inode, dentry); + status = vfs_rmdir(dir->d_inode, dentry, NULL); up(&dir->d_inode->i_sem); return status; } Index: linux-2.6.14/fs/nfsd/nfs4xdr.c =================================================================== --- linux-2.6.14.orig/fs/nfsd/nfs4xdr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfsd/nfs4xdr.c 2005-10-31 11:05:45.000000000 -0600 @@ -57,6 +57,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -1562,14 +1563,18 @@ WRITE32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); + status = nfsd4_encode_user(rqstp, + XIDINO_UID(XID_TAG(dentry->d_inode), + stat.uid, stat.xid), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); + status = nfsd4_encode_group(rqstp, + XIDINO_GID(XID_TAG(dentry->d_inode), + stat.gid, stat.xid), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) Index: linux-2.6.14/fs/nfsd/nfsxdr.c =================================================================== --- linux-2.6.14.orig/fs/nfsd/nfsxdr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfsd/nfsxdr.c 2005-10-31 11:05:45.000000000 -0600 @@ -15,6 +15,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -102,6 +103,8 @@ decode_sattr(u32 *p, struct iattr *iap) { u32 tmp, tmp1; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -115,12 +118,15 @@ } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = tmp; + uid = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = tmp; + gid = tmp; } + iap->ia_uid = INOXID_UID(XID_TAG_NFSD, uid, gid); + iap->ia_gid = INOXID_GID(XID_TAG_NFSD, uid, gid); + iap->ia_xid = INOXID_XID(XID_TAG_NFSD, uid, gid, 0); if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; iap->ia_size = tmp; @@ -166,8 +172,10 @@ *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat.mode); *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + XIDINO_UID(XID_TAG(dentry->d_inode), stat.uid, stat.xid))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + XIDINO_GID(XID_TAG(dentry->d_inode), stat.gid, stat.xid))); if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); Index: linux-2.6.14/fs/nfsd/vfs.c =================================================================== --- linux-2.6.14.orig/fs/nfsd/vfs.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/nfsd/vfs.c 2005-10-31 11:05:45.000000000 -0600 @@ -1154,13 +1154,13 @@ err = vfs_create(dirp, dchild, iap->ia_mode, NULL); break; case S_IFDIR: - err = vfs_mkdir(dirp, dchild, iap->ia_mode); + err = vfs_mkdir(dirp, dchild, iap->ia_mode, NULL); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: - err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); + err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev, NULL); break; default: printk("nfsd: bad file type %o in nfsd_create\n", type); @@ -1436,11 +1436,13 @@ else { strncpy(path_alloced, path, plen); path_alloced[plen] = 0; - err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); + err = vfs_symlink(dentry->d_inode, dnew, + path_alloced, mode, NULL); kfree(path_alloced); } } else - err = vfs_symlink(dentry->d_inode, dnew, path, mode); + err = vfs_symlink(dentry->d_inode, dnew, + path, mode, NULL); if (!err) { if (EX_ISSYNC(fhp->fh_export)) @@ -1498,7 +1500,7 @@ dold = tfhp->fh_dentry; dest = dold->d_inode; - err = vfs_link(dold, dirp, dnew); + err = vfs_link(dold, dirp, dnew, NULL); if (!err) { if (EX_ISSYNC(ffhp->fh_export)) { nfsd_sync_dir(ddir); @@ -1659,9 +1661,9 @@ err = nfserr_perm; } else #endif - err = vfs_unlink(dirp, rdentry); + err = vfs_unlink(dirp, rdentry, NULL); } else { /* It's RMDIR */ - err = vfs_rmdir(dirp, rdentry); + err = vfs_rmdir(dirp, rdentry, NULL); } dput(rdentry); @@ -1773,7 +1775,8 @@ */ if (!(acc & MAY_LOCAL_ACCESS)) if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { - if (EX_RDONLY(exp) || IS_RDONLY(inode)) + if (EX_RDONLY(exp) || IS_RDONLY(inode) + || MNT_IS_RDONLY(exp->ex_mnt)) return nfserr_rofs; if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) return nfserr_perm; Index: linux-2.6.14/fs/open.c =================================================================== --- linux-2.6.14.orig/fs/open.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/open.c 2005-10-31 11:05:45.000000000 -0600 @@ -25,6 +25,9 @@ #include #include #include +#include +#include +#include #include @@ -43,6 +46,8 @@ if (retval == 0 && buf->f_frsize == 0) buf->f_frsize = buf->f_bsize; } + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + vx_vsi_statfs(sb, buf); } return retval; } @@ -241,7 +246,7 @@ goto dput_and_out; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; error = -EPERM; @@ -365,7 +370,7 @@ inode = nd.dentry->d_inode; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; /* Don't worry, the checks are done in inode_change_ok() */ @@ -422,7 +427,7 @@ inode = nd.dentry->d_inode; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; /* Don't worry, the checks are done in inode_change_ok() */ @@ -504,7 +509,8 @@ if (!res) { res = permission(nd.dentry->d_inode, mode, &nd); /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) + if(!res && (mode & S_IWOTH) + && (IS_RDONLY(nd.dentry->d_inode) || MNT_IS_RDONLY(nd.mnt)) && !special_file(nd.dentry->d_inode->i_mode)) res = -EROFS; path_release(&nd); @@ -610,7 +616,7 @@ inode = dentry->d_inode; err = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(file->f_vfsmnt)) goto out_putf; err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -642,7 +648,7 @@ inode = nd.dentry->d_inode; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; error = -EPERM; @@ -663,7 +669,8 @@ return error; } -static int chown_common(struct dentry * dentry, uid_t user, gid_t group) +static int chown_common(struct dentry *dentry, struct vfsmount *mnt, + uid_t user, gid_t group) { struct inode * inode; int error; @@ -675,7 +682,7 @@ goto out; } error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) goto out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -683,11 +690,11 @@ newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { newattrs.ia_valid |= ATTR_UID; - newattrs.ia_uid = user; + newattrs.ia_uid = vx_map_uid(user); } if (group != (gid_t) -1) { newattrs.ia_valid |= ATTR_GID; - newattrs.ia_gid = group; + newattrs.ia_gid = vx_map_gid(group); } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; @@ -705,7 +712,7 @@ error = user_path_walk(filename, &nd); if (!error) { - error = chown_common(nd.dentry, user, group); + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); } return error; @@ -718,7 +725,7 @@ error = user_path_walk_link(filename, &nd); if (!error) { - error = chown_common(nd.dentry, user, group); + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); } return error; @@ -732,7 +739,7 @@ file = fget(fd); if (file) { - error = chown_common(file->f_dentry, user, group); + error = chown_common(file->f_dentry, file->f_vfsmnt, user, group); fput(file); } return error; @@ -820,7 +827,7 @@ if ((namei_flags+1) & O_ACCMODE) namei_flags++; if (namei_flags & O_TRUNC) - namei_flags |= 2; + namei_flags |= FMODE_WRITE; error = -ENFILE; f = get_empty_filp(); @@ -892,6 +899,7 @@ FD_SET(fd, fdt->open_fds); FD_CLR(fd, fdt->close_on_exec); fdt->next_fd = fd + 1; + vx_openfd_inc(fd); #if 1 /* Sanity check */ if (fdt->fd[fd] != NULL) { @@ -914,6 +922,7 @@ __FD_CLR(fd, fdt->open_fds); if (fd < fdt->next_fd) fdt->next_fd = fd; + vx_openfd_dec(fd); } void fastcall put_unused_fd(unsigned int fd) Index: linux-2.6.14/fs/proc/array.c =================================================================== --- linux-2.6.14.orig/fs/proc/array.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/proc/array.c 2005-10-31 11:05:45.000000000 -0600 @@ -75,6 +75,9 @@ #include #include #include +#include +#include +#include #include #include @@ -135,7 +138,8 @@ "T (stopped)", /* 4 */ "T (tracing stop)", /* 8 */ "Z (zombie)", /* 16 */ - "X (dead)" /* 32 */ + "X (dead)", /* 32 */ + "H (on hold)" /* 64 */ }; static inline const char * get_task_state(struct task_struct *tsk) @@ -144,7 +148,8 @@ TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE | TASK_STOPPED | - TASK_TRACED)) | + TASK_TRACED | + TASK_ONHOLD)) | (tsk->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)); const char **p = &task_state_array[0]; @@ -161,8 +166,13 @@ struct group_info *group_info; int g; struct fdtable *fdt = NULL; + pid_t pid, ptgid, tppid, tgid; read_lock(&tasklist_lock); + tgid = vx_map_tgid(p->tgid); + pid = vx_map_pid(p->pid); + ptgid = vx_map_pid(p->group_leader->real_parent->tgid); + tppid = vx_map_pid(p->parent->pid); buffer += sprintf(buffer, "State:\t%s\n" "SleepAVG:\t%lu%%\n" @@ -174,9 +184,8 @@ "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), (p->sleep_avg/1024)*100/(1020000000/1024), - p->tgid, - p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, - pid_alive(p) && p->ptrace ? p->parent->pid : 0, + tgid, pid, (pid > 1) ? ptgid : 0, + pid_alive(p) && p->ptrace ? tppid : 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); read_unlock(&tasklist_lock); @@ -296,6 +305,12 @@ int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; +#ifdef CONFIG_VSERVER_LEGACY + struct vx_info *vxi; +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + struct nx_info *nxi; +#endif struct mm_struct *mm = get_task_mm(task); buffer = task_name(task, buffer); @@ -308,6 +323,46 @@ buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); buffer = cpuset_task_status_allowed(task, buffer); + + if (task_vx_flags(task, VXF_INFO_HIDE, 0)) + goto skip; +#ifdef CONFIG_VSERVER_LEGACY + buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task)); + vxi = task_get_vx_info(task); + if (vxi) { + buffer += sprintf (buffer,"ctxflags: %08llx\n" + ,(unsigned long long)vxi->vx_flags); + buffer += sprintf (buffer,"initpid: %d\n" + ,vxi->vx_initpid); + } else { + buffer += sprintf (buffer,"ctxflags: none\n"); + buffer += sprintf (buffer,"initpid: none\n"); + } + put_vx_info(vxi); +#else + buffer += sprintf (buffer,"VxID: %d\n", vx_task_xid(task)); +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + nxi = task_get_nx_info(task); + if (nxi) { + int i; + + buffer += sprintf (buffer,"ipv4root:"); + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer," %08x/%08x" + ,nxi->ipv4[i] + ,nxi->mask[i]); + } + *buffer++ = '\n'; + buffer += sprintf (buffer,"ipv4root_bcast: %08x\n" + ,nxi->v4_bcast); + } else { + buffer += sprintf (buffer,"ipv4root: 0\n"); + buffer += sprintf (buffer,"ipv4root_bcast: 0\n"); + } + put_nx_info(nxi); +#endif +skip: #if defined(CONFIG_ARCH_S390) buffer = task_show_regs(task, buffer); #endif @@ -322,7 +377,7 @@ sigset_t sigign, sigcatch; char state; int res; - pid_t ppid, pgid = -1, sid = -1; + pid_t pid, ppid, pgid = -1, sid = -1; int num_threads = 0; struct mm_struct *mm; unsigned long long start_time; @@ -388,7 +443,11 @@ } it_real_value = task->signal->it_real_value; } - ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; + pid = vx_info_map_pid(task->vx_info, pid_alive(task) ? task->pid : 0); + ppid = (!(pid > 1)) ? 0 : vx_info_map_tgid(task->vx_info, + task->group_leader->real_parent->tgid); + pgid = vx_info_map_pid(task->vx_info, pgid); + read_unlock(&tasklist_lock); if (!whole || num_threads<2) @@ -409,13 +468,25 @@ /* convert timespec -> nsec*/ start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC + task->start_time.tv_nsec; + /* convert nsec -> ticks */ start_time = nsec_to_clock_t(start_time); + /* fixup start time for virt uptime */ + if (vx_flags(VXF_VIRT_UPTIME, 0)) { + unsigned long long bias = + current->vx_info->cvirt.bias_clock; + + if (start_time > bias) + start_time -= bias; + else + start_time = 0; + } + res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", - task->pid, + pid, tcomm, state, ppid, Index: linux-2.6.14/fs/proc/base.c =================================================================== --- linux-2.6.14.orig/fs/proc/base.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/proc/base.c 2005-10-31 11:05:45.000000000 -0600 @@ -70,6 +70,7 @@ #include #include #include +#include #include "internal.h" /* @@ -119,6 +120,8 @@ PROC_TGID_ATTR_EXEC, PROC_TGID_ATTR_FSCREATE, #endif + PROC_TGID_VX_INFO, + PROC_TGID_IP_INFO, #ifdef CONFIG_AUDITSYSCALL PROC_TGID_LOGINUID, #endif @@ -159,6 +162,8 @@ PROC_TID_ATTR_EXEC, PROC_TID_ATTR_FSCREATE, #endif + PROC_TID_VX_INFO, + PROC_TID_IP_INFO, #ifdef CONFIG_AUDITSYSCALL PROC_TID_LOGINUID, #endif @@ -214,6 +219,8 @@ #ifdef CONFIG_CPUSETS E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), #endif + E(PROC_TGID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TGID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), #ifdef CONFIG_AUDITSYSCALL @@ -256,6 +263,8 @@ #ifdef CONFIG_CPUSETS E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), #endif + E(PROC_TID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), #ifdef CONFIG_AUDITSYSCALL @@ -1188,7 +1197,7 @@ struct inode *inode = dentry->d_inode; struct pid_entry *p; ino_t ino; - int ret; + int ret, hide; ret = -ENOENT; if (!pid_alive(proc_task(inode))) @@ -1219,11 +1228,20 @@ goto out; } p = ents + i; + hide = vx_flags(VXF_INFO_HIDE, 0); while (p->name) { + if (hide) { + switch (p->type) { + case PROC_TGID_VX_INFO: + case PROC_TGID_IP_INFO: + goto skip; + } + } if (filldir(dirent, p->name, p->len, filp->f_pos, fake_ino(pid, p->type), p->mode >> 12) < 0) goto out; filp->f_pos++; + skip: p++; } } @@ -1297,6 +1315,7 @@ inode->i_uid = task->euid; inode->i_gid = task->egid; } + inode->i_xid = vx_task_xid(task); security_task_to_inode(task, inode); out: @@ -1322,6 +1341,11 @@ { struct inode *inode = dentry->d_inode; struct task_struct *task = proc_task(inode); + + if (!vx_check(vx_task_xid(task), VX_IDENT)) + goto out_drop; + /* discard wrong fakeinit */ + if (pid_alive(task)) { if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { inode->i_uid = task->euid; @@ -1333,6 +1357,7 @@ security_task_to_inode(task, inode); return 1; } +out_drop: d_drop(dentry); return 0; } @@ -1567,6 +1592,9 @@ static struct inode_operations proc_tgid_attr_inode_operations; #endif +extern int proc_pid_vx_info(struct task_struct *, char *); +extern int proc_pid_nx_info(struct task_struct *, char *); + static int get_tid_list(int index, unsigned int *tids, struct inode *dir); /* SMP-safe */ @@ -1754,15 +1782,33 @@ inode->i_fop = &proc_loginuid_operations; break; #endif + case PROC_TID_VX_INFO: + case PROC_TGID_VX_INFO: + if (task_vx_flags(task, VXF_INFO_HIDE, 0)) + goto out_noent; + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_vx_info; + break; + case PROC_TID_IP_INFO: + case PROC_TGID_IP_INFO: + if (task_vx_flags(task, VXF_INFO_HIDE, 0)) + goto out_noent; + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_nx_info; + break; default: printk("procfs: impossible type (%d)",p->type); - iput(inode); - return ERR_PTR(-EINVAL); + error = -EINVAL; + goto out_put; } dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); return NULL; +out_noent: + error=-ENOENT; +out_put: + iput(inode); out: return ERR_PTR(error); } @@ -1846,14 +1892,14 @@ int buflen) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -1916,6 +1962,20 @@ } } +#define VXF_FAKE_INIT (VXF_INFO_INIT|VXF_STATE_INIT) + +static inline int proc_pid_visible(struct task_struct *task, int pid) +{ + if ((pid == 1) && + !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT)) + goto visible; + if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto visible; + return 0; +visible: + return 1; +} + /* SMP-safe */ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { @@ -1952,13 +2012,14 @@ if (!task) goto out; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + /* check for context visibility */ + if (!proc_pid_visible(task, tgid)) + goto out_drop_task; + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + if (!inode) + goto out_drop_task; - if (!inode) { - put_task_struct(task); - goto out; - } inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; @@ -1987,6 +2048,8 @@ goto out; } return NULL; +out_drop_task: + put_task_struct(task); out: return ERR_PTR(-ENOENT); } @@ -2002,6 +2065,8 @@ tid = name_to_int(dentry); if (tid == ~0U) goto out; + if (vx_current_initpid(tid)) + goto out; read_lock(&tasklist_lock); task = find_task_by_pid(tid); @@ -2013,11 +2078,14 @@ if (leader->tgid != task->tgid) goto out_drop_task; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); - + /* check for context visibility */ + if (!proc_pid_visible(task, tid)) + goto out_drop_task; + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); if (!inode) goto out_drop_task; + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; @@ -2057,7 +2125,7 @@ read_lock(&tasklist_lock); p = NULL; if (version) { - p = find_task_by_pid(version); + p = find_task_by_real_pid(version); if (p && !thread_group_leader(p)) p = NULL; } @@ -2069,11 +2137,15 @@ for ( ; p != &init_task; p = next_task(p)) { int tgid = p->pid; + if (!pid_alive(p)) continue; + /* check for context visibility */ + if (!proc_pid_visible(p, tgid)) + continue; if (--index >= 0) continue; - tgids[nr_tgids] = tgid; + tgids[nr_tgids] = vx_map_tgid(tgid); nr_tgids++; if (nr_tgids >= PROC_MAXPIDS) break; @@ -2103,10 +2175,13 @@ if (pid_alive(task)) do { int tid = task->pid; + /* check for context visibility */ + if (!proc_pid_visible(task, tid)) + continue; if (--index >= 0) continue; if (tids != NULL) - tids[nr_tids] = tid; + tids[nr_tids] = vx_map_pid(tid); nr_tids++; if (nr_tids >= PROC_MAXPIDS) break; @@ -2182,11 +2257,14 @@ unsigned int nr_tids, i; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; + struct task_struct *task = proc_task(inode); int retval = -ENOENT; ino_t ino; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ - if (!pid_alive(proc_task(inode))) + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto out; + if (!pid_alive(task)) goto out; retval = 0; Index: linux-2.6.14/fs/proc/generic.c =================================================================== --- linux-2.6.14.orig/fs/proc/generic.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/proc/generic.c 2005-10-31 11:05:45.000000000 -0600 @@ -19,6 +19,7 @@ #include #include #include +#include #include static ssize_t proc_file_read(struct file *file, char __user *buf, @@ -384,11 +385,15 @@ for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) continue; + if (!vx_hide_check(0, de->vx_flags)) + continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino = de->low_ino; error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); + /* generic proc entries belong to the host */ + inode->i_xid = 0; break; } } @@ -460,9 +465,12 @@ } do { + if (!vx_hide_check(0, de->vx_flags)) + goto skip; if (filldir(dirent, de->name, de->namelen, filp->f_pos, de->low_ino, de->mode >> 12) < 0) goto out; + skip: filp->f_pos++; de = de->next; } while (de); @@ -580,6 +588,7 @@ ent->namelen = len; ent->mode = mode; ent->nlink = nlink; + ent->vx_flags = IATTR_PROC_DEFAULT; out: return ent; } @@ -600,7 +609,8 @@ kfree(ent->data); kfree(ent); ent = NULL; - } + } else + ent->vx_flags = IATTR_PROC_SYMLINK; } else { kfree(ent); ent = NULL; Index: linux-2.6.14/fs/proc/inode.c =================================================================== --- linux-2.6.14.orig/fs/proc/inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/proc/inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -167,6 +167,8 @@ inode->i_uid = de->uid; inode->i_gid = de->gid; } + if (de->vx_flags) + PROC_I(inode)->vx_flags = de->vx_flags; if (de->size) inode->i_size = de->size; if (de->nlink) Index: linux-2.6.14/fs/proc/proc_misc.c =================================================================== --- linux-2.6.14.orig/fs/proc/proc_misc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/proc/proc_misc.c 2005-10-31 11:05:45.000000000 -0600 @@ -52,6 +52,8 @@ #include #include "internal.h" +#include + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) /* @@ -82,17 +84,32 @@ static int loadavg_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { + unsigned int running, threads; int a, b, c; int len; - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); - len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + if (vx_flags(VXF_VIRT_LOAD, 0)) { + struct vx_info *vxi = current->vx_info; + + a = vxi->cvirt.load[0] + (FIXED_1/200); + b = vxi->cvirt.load[1] + (FIXED_1/200); + c = vxi->cvirt.load[2] + (FIXED_1/200); + + running = atomic_read(&vxi->cvirt.nr_running); + threads = atomic_read(&vxi->cvirt.nr_threads); + } else { + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + + running = nr_running(); + threads = nr_threads; + } + len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n", LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, last_pid); + running, threads, last_pid); return proc_calc_metrics(page, start, off, count, eof, len); } @@ -106,6 +123,9 @@ do_posix_clock_monotonic_gettime(&uptime); cputime_to_timespec(idletime, &idle); + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&uptime, &idle); + len = sprintf(page,"%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, (uptime.tv_nsec / (NSEC_PER_SEC / 100)), @@ -238,8 +258,9 @@ { int len; - strcpy(page, linux_banner); - len = strlen(page); + len = sprintf(page, vx_linux_banner, + vx_new_uts(release), + vx_new_uts(version)); return proc_calc_metrics(page, start, off, count, eof, len); } Index: linux-2.6.14/fs/proc/root.c =================================================================== --- linux-2.6.14.orig/fs/proc/root.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/proc/root.c 2005-10-31 11:05:45.000000000 -0600 @@ -23,6 +23,9 @@ #ifdef CONFIG_SYSCTL struct proc_dir_entry *proc_sys_root; #endif +struct proc_dir_entry *proc_virtual; + +extern void proc_vx_init(void); static struct super_block *proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -77,6 +80,7 @@ proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", NULL); + proc_vx_init(); } static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) Index: linux-2.6.14/fs/quota.c =================================================================== --- linux-2.6.14.orig/fs/quota.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/quota.c 2005-10-31 11:05:45.000000000 -0600 @@ -15,47 +15,50 @@ #include #include #include +#include +#include +#include /* Check validity of generic quotactl commands */ -static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) +static int generic_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id) { if (type >= MAXQUOTAS) return -EINVAL; - if (!sb && cmd != Q_SYNC) + if (!hash && cmd != Q_SYNC) return -ENODEV; /* Is operation supported? */ - if (sb && !sb->s_qcop) + if (hash && !hash->dqh_qcop) return -ENOSYS; switch (cmd) { case Q_GETFMT: break; case Q_QUOTAON: - if (!sb->s_qcop->quota_on) + if (!hash->dqh_qcop->quota_on) return -ENOSYS; break; case Q_QUOTAOFF: - if (!sb->s_qcop->quota_off) + if (!hash->dqh_qcop->quota_off) return -ENOSYS; break; case Q_SETINFO: - if (!sb->s_qcop->set_info) + if (!hash->dqh_qcop->set_info) return -ENOSYS; break; case Q_GETINFO: - if (!sb->s_qcop->get_info) + if (!hash->dqh_qcop->get_info) return -ENOSYS; break; case Q_SETQUOTA: - if (!sb->s_qcop->set_dqblk) + if (!hash->dqh_qcop->set_dqblk) return -ENOSYS; break; case Q_GETQUOTA: - if (!sb->s_qcop->get_dqblk) + if (!hash->dqh_qcop->get_dqblk) return -ENOSYS; break; case Q_SYNC: - if (sb && !sb->s_qcop->quota_sync) + if (hash && !hash->dqh_qcop->quota_sync) return -ENOSYS; break; default: @@ -71,7 +74,7 @@ case Q_SETQUOTA: case Q_GETQUOTA: /* This is just informative test so we are satisfied without a lock */ - if (!sb_has_quota_enabled(sb, type)) + if (!dqh_has_quota_enabled(hash, type)) return -ESRCH; } @@ -79,43 +82,43 @@ if (cmd == Q_GETQUOTA) { if (((type == USRQUOTA && current->euid != id) || (type == GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_QUOTA_CTL)) return -EPERM; } else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO) - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_QUOTA_CTL)) return -EPERM; return 0; } /* Check validity of XFS Quota Manager commands */ -static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) +static int xqm_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id) { if (type >= XQM_MAXQUOTAS) return -EINVAL; - if (!sb) + if (!hash) return -ENODEV; - if (!sb->s_qcop) + if (!hash->dqh_qcop) return -ENOSYS; switch (cmd) { case Q_XQUOTAON: case Q_XQUOTAOFF: case Q_XQUOTARM: - if (!sb->s_qcop->set_xstate) + if (!hash->dqh_qcop->set_xstate) return -ENOSYS; break; case Q_XGETQSTAT: - if (!sb->s_qcop->get_xstate) + if (!hash->dqh_qcop->get_xstate) return -ENOSYS; break; case Q_XSETQLIM: - if (!sb->s_qcop->set_xquota) + if (!hash->dqh_qcop->set_xquota) return -ENOSYS; break; case Q_XGETQUOTA: - if (!sb->s_qcop->get_xquota) + if (!hash->dqh_qcop->get_xquota) return -ENOSYS; break; default: @@ -126,57 +129,68 @@ if (cmd == Q_XGETQUOTA) { if (((type == XQM_USRQUOTA && current->euid != id) || (type == XQM_GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_QUOTA_CTL)) return -EPERM; } else if (cmd != Q_XGETQSTAT) { - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_QUOTA_CTL)) return -EPERM; } return 0; } -static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) +static int check_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id) { int error; if (XQM_COMMAND(cmd)) - error = xqm_quotactl_valid(sb, type, cmd, id); + error = xqm_quotactl_valid(hash, type, cmd, id); else - error = generic_quotactl_valid(sb, type, cmd, id); + error = generic_quotactl_valid(hash, type, cmd, id); if (!error) - error = security_quotactl(cmd, type, id, sb); + error = security_quotactl(cmd, type, id, hash); return error; } -static void quota_sync_sb(struct super_block *sb, int type) +static void quota_sync_sb(struct super_block *sb) { - int cnt; - struct inode *discard[MAXQUOTAS]; - - sb->s_qcop->quota_sync(sb, type); /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); +} + +static void quota_sync_dqh(struct dqhash *hash, int type) +{ + int cnt; + struct inode *discard[MAXQUOTAS]; + + vxdprintk(VXD_CBIT(quota, 1), + "quota_sync_dqh(%p,%d)", hash, type); + hash->dqh_qcop->quota_sync(hash, type); + + quota_sync_sb(hash->dqh_sb); /* Now when everything is written we can discard the pagecache so * that userspace sees the changes. We need i_sem and so we could * not do it inside dqonoff_sem. Moreover we need to be carefull * about races with quotaoff() (that is the reason why we have own * reference to inode). */ - down(&sb_dqopt(sb)->dqonoff_sem); + down(&dqh_dqopt(hash)->dqonoff_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { discard[cnt] = NULL; if (type != -1 && cnt != type) continue; - if (!sb_has_quota_enabled(sb, cnt)) + if (!dqh_has_quota_enabled(hash, cnt)) continue; - discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]); + vxdprintk(VXD_CBIT(quota, 0), + "quota_sync_dqh(%p,%d) discard inode %p", + hash, type, dqh_dqopt(hash)->files[cnt]); + discard[cnt] = igrab(dqh_dqopt(hash)->files[cnt]); } - up(&sb_dqopt(sb)->dqonoff_sem); + up(&dqh_dqopt(hash)->dqonoff_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (discard[cnt]) { down(&discard[cnt]->i_sem); @@ -187,67 +201,59 @@ } } -void sync_dquots(struct super_block *sb, int type) +void sync_dquots_dqh(struct dqhash *hash, int type) { - int cnt, dirty; + vxdprintk(VXD_CBIT(quota, 1), + "sync_dquots_dqh(%p,%d)", hash, type); - if (sb) { - if (sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); - return; - } + if (hash->dqh_qcop->quota_sync) + quota_sync_dqh(hash, type); +} - spin_lock(&sb_lock); -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - /* This test just improves performance so it needn't be reliable... */ - for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) - if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) - && info_any_dirty(&sb_dqopt(sb)->info[cnt])) - dirty = 1; - if (!dirty) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root && sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); - up_read(&sb->s_umount); - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; +void sync_dquots(struct dqhash *hash, int type) + +{ + vxdprintk(VXD_CBIT(quota, 1), + "sync_dquots(%p,%d)", hash, type); + + if (hash) { + if (hash->dqh_qcop->quota_sync) + quota_sync_dqh(hash, type); + return; } - spin_unlock(&sb_lock); } /* Copy parameters and call proper function */ -static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __user *addr) +static int do_quotactl(struct dqhash *hash, int type, int cmd, qid_t id, void __user *addr) { int ret; + vxdprintk(VXD_CBIT(quota, 3), + "do_quotactl(%p,%d,cmd=%d,id=%d,%p)", hash, type, cmd, id, addr); + switch (cmd) { case Q_QUOTAON: { char *pathname; if (IS_ERR(pathname = getname(addr))) return PTR_ERR(pathname); - ret = sb->s_qcop->quota_on(sb, type, id, pathname); + ret = hash->dqh_qcop->quota_on(hash, type, id, pathname); putname(pathname); return ret; } case Q_QUOTAOFF: - return sb->s_qcop->quota_off(sb, type); + return hash->dqh_qcop->quota_off(hash, type); case Q_GETFMT: { __u32 fmt; - down_read(&sb_dqopt(sb)->dqptr_sem); - if (!sb_has_quota_enabled(sb, type)) { - up_read(&sb_dqopt(sb)->dqptr_sem); + down_read(&dqh_dqopt(hash)->dqptr_sem); + if (!dqh_has_quota_enabled(hash, type)) { + up_read(&dqh_dqopt(hash)->dqptr_sem); return -ESRCH; } - fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; - up_read(&sb_dqopt(sb)->dqptr_sem); + fmt = dqh_dqopt(hash)->info[type].dqi_format->qf_fmt_id; + up_read(&dqh_dqopt(hash)->dqptr_sem); if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; @@ -255,7 +261,7 @@ case Q_GETINFO: { struct if_dqinfo info; - if ((ret = sb->s_qcop->get_info(sb, type, &info))) + if ((ret = hash->dqh_qcop->get_info(hash, type, &info))) return ret; if (copy_to_user(addr, &info, sizeof(info))) return -EFAULT; @@ -266,12 +272,12 @@ if (copy_from_user(&info, addr, sizeof(info))) return -EFAULT; - return sb->s_qcop->set_info(sb, type, &info); + return hash->dqh_qcop->set_info(hash, type, &info); } case Q_GETQUOTA: { struct if_dqblk idq; - if ((ret = sb->s_qcop->get_dqblk(sb, type, id, &idq))) + if ((ret = hash->dqh_qcop->get_dqblk(hash, type, id, &idq))) return ret; if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; @@ -282,10 +288,10 @@ if (copy_from_user(&idq, addr, sizeof(idq))) return -EFAULT; - return sb->s_qcop->set_dqblk(sb, type, id, &idq); + return hash->dqh_qcop->set_dqblk(hash, type, id, &idq); } case Q_SYNC: - sync_dquots(sb, type); + sync_dquots_dqh(hash, type); return 0; case Q_XQUOTAON: @@ -295,12 +301,12 @@ if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; - return sb->s_qcop->set_xstate(sb, flags, cmd); + return hash->dqh_qcop->set_xstate(hash, flags, cmd); } case Q_XGETQSTAT: { struct fs_quota_stat fqs; - if ((ret = sb->s_qcop->get_xstate(sb, &fqs))) + if ((ret = hash->dqh_qcop->get_xstate(hash, &fqs))) return ret; if (copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; @@ -311,12 +317,12 @@ if (copy_from_user(&fdq, addr, sizeof(fdq))) return -EFAULT; - return sb->s_qcop->set_xquota(sb, type, id, &fdq); + return hash->dqh_qcop->set_xquota(hash, type, id, &fdq); } case Q_XGETQUOTA: { struct fs_disk_quota fdq; - if ((ret = sb->s_qcop->get_xquota(sb, type, id, &fdq))) + if ((ret = hash->dqh_qcop->get_xquota(hash, type, id, &fdq))) return ret; if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; @@ -329,6 +335,10 @@ return 0; } +#ifdef CONFIG_BLK_DEV_VROOT +extern struct block_device *vroot_get_real_bdev(struct block_device *); +#endif + /* * This is the system call interface. This communicates with * the user-level programs. Currently this only supports diskquota @@ -339,6 +349,7 @@ { uint cmds, type; struct super_block *sb = NULL; + struct dqhash *dqh = NULL; struct block_device *bdev; char *tmp; int ret; @@ -354,15 +365,28 @@ putname(tmp); if (IS_ERR(bdev)) return PTR_ERR(bdev); +#ifdef CONFIG_BLK_DEV_VROOT + if (bdev && bdev->bd_inode && + imajor(bdev->bd_inode) == VROOT_MAJOR) { + struct block_device *bdnew = + vroot_get_real_bdev(bdev); + + bdput(bdev); + if (IS_ERR(bdnew)) + return PTR_ERR(bdnew); + bdev = bdnew; + } +#endif sb = get_super(bdev); bdput(bdev); if (!sb) return -ENODEV; } - - ret = check_quotactl_valid(sb, type, cmds, id); + if (sb) + dqh = sb->s_dqh; + ret = check_quotactl_valid(dqh, type, cmds, id); if (ret >= 0) - ret = do_quotactl(sb, type, cmds, id, addr); + ret = do_quotactl(dqh, type, cmds, id, addr); if (sb) drop_super(sb); Index: linux-2.6.14/fs/quota_v1.c =================================================================== --- linux-2.6.14.orig/fs/quota_v1.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/quota_v1.c 2005-10-31 11:05:45.000000000 -0600 @@ -42,12 +42,13 @@ int type = dquot->dq_type; struct v1_disk_dqblk dqblk; - if (!sb_dqopt(dquot->dq_sb)->files[type]) + if (!dqh_dqopt(dquot->dq_dqh)->files[type]) return -EINVAL; /* Set structure to 0s in case read fails/is after end of file */ memset(&dqblk, 0, sizeof(struct v1_disk_dqblk)); - dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); + dquot->dq_dqh->dqh_sb->s_op->quota_read(dquot->dq_dqh, type, + (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk); if (dquot->dq_dqb.dqb_bhardlimit == 0 && dquot->dq_dqb.dqb_bsoftlimit == 0 && @@ -66,16 +67,16 @@ v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb); if (dquot->dq_id == 0) { - dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace; - dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace; + dqblk.dqb_btime = dqh_dqopt(dquot->dq_dqh)->info[type].dqi_bgrace; + dqblk.dqb_itime = dqh_dqopt(dquot->dq_dqh)->info[type].dqi_igrace; } ret = 0; - if (sb_dqopt(dquot->dq_sb)->files[type]) - ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, (char *)&dqblk, - sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); + if (dqh_dqopt(dquot->dq_dqh)->files[type]) + ret = dquot->dq_dqh->dqh_sb->s_op->quota_write(dquot->dq_dqh, type, + (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); if (ret != sizeof(struct v1_disk_dqblk)) { printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", - dquot->dq_sb->s_id); + dquot->dq_dqh->dqh_sb->s_id); if (ret >= 0) ret = -EIO; goto out; @@ -100,9 +101,9 @@ __le32 dqh_version; /* File version */ }; -static int v1_check_quota_file(struct super_block *sb, int type) +static int v1_check_quota_file(struct dqhash *hash, int type) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; ulong blocks; size_t off; struct v2_disk_dqheader dqhead; @@ -118,22 +119,26 @@ if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk)) return 0; /* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */ - size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); + size = hash->dqh_sb->s_op->quota_read(hash, type, + (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); if (size != sizeof(struct v2_disk_dqheader)) return 1; /* Probably not new format */ if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type]) return 1; /* Definitely not new format */ - printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file. It probably contains newer quota format.\n", sb->s_id); + printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file." + " It probably contains newer quota format.\n", hash->dqh_sb->s_id); return 0; /* Seems like a new format file -> refuse it */ } -static int v1_read_file_info(struct super_block *sb, int type) +static int v1_read_file_info(struct dqhash *hash, int type) { - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); struct v1_disk_dqblk dqblk; int ret; - if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) { + if ((ret = hash->dqh_sb->s_op->quota_read(hash, type, + (char *)&dqblk, sizeof(struct v1_disk_dqblk), + v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) { if (ret >= 0) ret = -EIO; goto out; @@ -145,14 +150,14 @@ return ret; } -static int v1_write_file_info(struct super_block *sb, int type) +static int v1_write_file_info(struct dqhash *hash, int type) { - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); struct v1_disk_dqblk dqblk; int ret; dqopt->info[type].dqi_flags &= ~DQF_INFO_DIRTY; - if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk, + if ((ret = hash->dqh_sb->s_op->quota_read(hash, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) { if (ret >= 0) ret = -EIO; @@ -160,7 +165,7 @@ } dqblk.dqb_itime = dqopt->info[type].dqi_igrace; dqblk.dqb_btime = dqopt->info[type].dqi_bgrace; - ret = sb->s_op->quota_write(sb, type, (char *)&dqblk, + ret = hash->dqh_sb->s_op->quota_write(hash, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0)); if (ret == sizeof(struct v1_disk_dqblk)) ret = 0; Index: linux-2.6.14/fs/quota_v2.c =================================================================== --- linux-2.6.14.orig/fs/quota_v2.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/quota_v2.c 2005-10-31 11:05:45.000000000 -0600 @@ -26,14 +26,15 @@ #define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader))) /* Check whether given file is really vfsv0 quotafile */ -static int v2_check_quota_file(struct super_block *sb, int type) +static int v2_check_quota_file(struct dqhash *hash, int type) { struct v2_disk_dqheader dqhead; ssize_t size; static const uint quota_magics[] = V2_INITQMAGICS; static const uint quota_versions[] = V2_INITQVERSIONS; - size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); + size = hash->dqh_sb->s_op->quota_read(hash, type, + (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); if (size != sizeof(struct v2_disk_dqheader)) { printk("failed read\n"); return 0; @@ -45,17 +46,17 @@ } /* Read information header from quota file */ -static int v2_read_file_info(struct super_block *sb, int type) +static int v2_read_file_info(struct dqhash *hash, int type) { struct v2_disk_dqinfo dinfo; - struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + struct mem_dqinfo *info = dqh_dqopt(hash)->info+type; ssize_t size; - size = sb->s_op->quota_read(sb, type, (char *)&dinfo, - sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); + size = hash->dqh_sb->s_op->quota_read(hash, type, + (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); if (size != sizeof(struct v2_disk_dqinfo)) { printk(KERN_WARNING "Can't read info structure on device %s.\n", - sb->s_id); + hash->dqh_sb->s_id); return -1; } info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); @@ -68,10 +69,10 @@ } /* Write information header to quota file */ -static int v2_write_file_info(struct super_block *sb, int type) +static int v2_write_file_info(struct dqhash *hash, int type) { struct v2_disk_dqinfo dinfo; - struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + struct mem_dqinfo *info = dqh_dqopt(hash)->info+type; ssize_t size; spin_lock(&dq_data_lock); @@ -83,11 +84,11 @@ dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks); dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk); dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry); - size = sb->s_op->quota_write(sb, type, (char *)&dinfo, + size = hash->dqh_sb->s_op->quota_write(hash, type, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); if (size != sizeof(struct v2_disk_dqinfo)) { printk(KERN_WARNING "Can't write info structure on device %s.\n", - sb->s_id); + hash->dqh_sb->s_id); return -1; } return 0; @@ -131,24 +132,24 @@ kfree(buf); } -static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf) +static inline ssize_t read_blk(struct dqhash *hash, int type, uint blk, dqbuf_t buf) { memset(buf, 0, V2_DQBLKSIZE); - return sb->s_op->quota_read(sb, type, (char *)buf, - V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); + return hash->dqh_sb->s_op->quota_read(hash, type, + (char *)buf, V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); } -static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf) +static inline ssize_t write_blk(struct dqhash *hash, int type, uint blk, dqbuf_t buf) { - return sb->s_op->quota_write(sb, type, (char *)buf, - V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); + return hash->dqh_sb->s_op->quota_write(hash, type, + (char *)buf, V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); } /* Remove empty block from list and return it */ -static int get_free_dqblk(struct super_block *sb, int type) +static int get_free_dqblk(struct dqhash *hash, int type) { dqbuf_t buf = getdqbuf(); - struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct mem_dqinfo *info = dqh_dqinfo(hash, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int ret, blk; @@ -156,18 +157,18 @@ return -ENOMEM; if (info->u.v2_i.dqi_free_blk) { blk = info->u.v2_i.dqi_free_blk; - if ((ret = read_blk(sb, type, blk, buf)) < 0) + if ((ret = read_blk(hash, type, blk, buf)) < 0) goto out_buf; info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); } else { memset(buf, 0, V2_DQBLKSIZE); /* Assure block allocation... */ - if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0) + if ((ret = write_blk(hash, type, info->u.v2_i.dqi_blocks, buf)) < 0) goto out_buf; blk = info->u.v2_i.dqi_blocks++; } - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); ret = blk; out_buf: freedqbuf(buf); @@ -175,9 +176,9 @@ } /* Insert empty block to the list */ -static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk) +static int put_free_dqblk(struct dqhash *hash, int type, dqbuf_t buf, uint blk) { - struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct mem_dqinfo *info = dqh_dqinfo(hash, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int err; @@ -185,18 +186,18 @@ dh->dqdh_prev_free = cpu_to_le32(0); dh->dqdh_entries = cpu_to_le16(0); info->u.v2_i.dqi_free_blk = blk; - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); /* Some strange block. We had better leave it... */ - if ((err = write_blk(sb, type, blk, buf)) < 0) + if ((err = write_blk(hash, type, blk, buf)) < 0) return err; return 0; } /* Remove given block from the list of blocks with free entries */ -static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk) +static int remove_free_dqentry(struct dqhash *hash, int type, dqbuf_t buf, uint blk) { dqbuf_t tmpbuf = getdqbuf(); - struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct mem_dqinfo *info = dqh_dqinfo(hash, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free); int err; @@ -204,27 +205,27 @@ if (!tmpbuf) return -ENOMEM; if (nextblk) { - if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0) + if ((err = read_blk(hash, type, nextblk, tmpbuf)) < 0) goto out_buf; ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free; - if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0) + if ((err = write_blk(hash, type, nextblk, tmpbuf)) < 0) goto out_buf; } if (prevblk) { - if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0) + if ((err = read_blk(hash, type, prevblk, tmpbuf)) < 0) goto out_buf; ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free; - if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0) + if ((err = write_blk(hash, type, prevblk, tmpbuf)) < 0) goto out_buf; } else { info->u.v2_i.dqi_free_entry = nextblk; - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); } freedqbuf(tmpbuf); dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); /* No matter whether write succeeds block is out of list */ - if (write_blk(sb, type, blk, buf) < 0) + if (write_blk(hash, type, blk, buf) < 0) printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk); return 0; out_buf: @@ -233,10 +234,10 @@ } /* Insert given block to the beginning of list with free entries */ -static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk) +static int insert_free_dqentry(struct dqhash *hash, int type, dqbuf_t buf, uint blk) { dqbuf_t tmpbuf = getdqbuf(); - struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct mem_dqinfo *info = dqh_dqinfo(hash, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int err; @@ -244,18 +245,18 @@ return -ENOMEM; dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry); dh->dqdh_prev_free = cpu_to_le32(0); - if ((err = write_blk(sb, type, blk, buf)) < 0) + if ((err = write_blk(hash, type, blk, buf)) < 0) goto out_buf; if (info->u.v2_i.dqi_free_entry) { - if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) + if ((err = read_blk(hash, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) goto out_buf; ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk); - if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) + if ((err = write_blk(hash, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) goto out_buf; } freedqbuf(tmpbuf); info->u.v2_i.dqi_free_entry = blk; - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); return 0; out_buf: freedqbuf(tmpbuf); @@ -265,8 +266,9 @@ /* Find space for dquot */ static uint find_free_dqentry(struct dquot *dquot, int *err) { - struct super_block *sb = dquot->dq_sb; - struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type; + // struct super_block *sb = dquot->dq_sb; + struct dqhash *dqh = dquot->dq_dqh; + struct mem_dqinfo *info = dqh_dqopt(dqh)->info+dquot->dq_type; uint blk, i; struct v2_disk_dqdbheader *dh; struct v2_disk_dqblk *ddquot; @@ -282,11 +284,11 @@ ddquot = GETENTRIES(buf); if (info->u.v2_i.dqi_free_entry) { blk = info->u.v2_i.dqi_free_entry; - if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0) + if ((*err = read_blk(dqh, dquot->dq_type, blk, buf)) < 0) goto out_buf; } else { - blk = get_free_dqblk(sb, dquot->dq_type); + blk = get_free_dqblk(dqh, dquot->dq_type); if ((int)blk < 0) { *err = blk; freedqbuf(buf); @@ -295,10 +297,10 @@ memset(buf, 0, V2_DQBLKSIZE); /* This is enough as block is already zeroed and entry list is empty... */ info->u.v2_i.dqi_free_entry = blk; - mark_info_dirty(sb, dquot->dq_type); + mark_info_dirty(dqh, dquot->dq_type); } if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */ - if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) { + if ((*err = remove_free_dqentry(dqh, dquot->dq_type, buf, blk)) < 0) { printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); goto out_buf; } @@ -313,7 +315,7 @@ goto out_buf; } #endif - if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) { + if ((*err = write_blk(dqh, dquot->dq_type, blk, buf)) < 0) { printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk); goto out_buf; } @@ -328,7 +330,8 @@ /* Insert reference to structure into the trie */ static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth) { - struct super_block *sb = dquot->dq_sb; + // struct super_block *sb = dquot->dq_sb; + struct dqhash *dqh = dquot->dq_dqh; dqbuf_t buf; int ret = 0, newson = 0, newact = 0; __le32 *ref; @@ -337,7 +340,7 @@ if (!(buf = getdqbuf())) return -ENOMEM; if (!*treeblk) { - ret = get_free_dqblk(sb, dquot->dq_type); + ret = get_free_dqblk(dqh, dquot->dq_type); if (ret < 0) goto out_buf; *treeblk = ret; @@ -345,7 +348,7 @@ newact = 1; } else { - if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) { + if ((ret = read_blk(dqh, dquot->dq_type, *treeblk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk); goto out_buf; } @@ -368,10 +371,10 @@ ret = do_insert_tree(dquot, &newblk, depth+1); if (newson && ret >= 0) { ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk); - ret = write_blk(sb, dquot->dq_type, *treeblk, buf); + ret = write_blk(dqh, dquot->dq_type, *treeblk, buf); } else if (newact && ret < 0) - put_free_dqblk(sb, dquot->dq_type, buf, *treeblk); + put_free_dqblk(dqh, dquot->dq_type, buf, *treeblk); out_buf: freedqbuf(buf); return ret; @@ -408,10 +411,11 @@ if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk))) ddquot.dqb_itime = cpu_to_le64(1); spin_unlock(&dq_data_lock); - ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, + ret = dquot->dq_dqh->dqh_sb->s_op->quota_write(dquot->dq_dqh, type, (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off); if (ret != sizeof(struct v2_disk_dqblk)) { - printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id); + printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", + dquot->dq_dqh->dqh_sb->s_id); if (ret >= 0) ret = -ENOSPC; } @@ -425,7 +429,8 @@ /* Free dquot entry in data block */ static int free_dqentry(struct dquot *dquot, uint blk) { - struct super_block *sb = dquot->dq_sb; + // struct super_block *sb = dquot->dq_sb; + struct dqhash *dqh = dquot->dq_dqh; int type = dquot->dq_type; struct v2_disk_dqdbheader *dh; dqbuf_t buf = getdqbuf(); @@ -439,15 +444,15 @@ (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS)); goto out_buf; } - if ((ret = read_blk(sb, type, blk, buf)) < 0) { + if ((ret = read_blk(dqh, type, blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk); goto out_buf; } dh = (struct v2_disk_dqdbheader *)buf; dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ - if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 || - (ret = put_free_dqblk(sb, type, buf, blk)) < 0) { + if ((ret = remove_free_dqentry(dqh, type, buf, blk)) < 0 || + (ret = put_free_dqblk(dqh, type, buf, blk)) < 0) { printk(KERN_ERR "VFS: Can't move quota data block (%u) " "to free list.\n", blk); goto out_buf; @@ -458,13 +463,13 @@ sizeof(struct v2_disk_dqblk)); if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) { /* Insert will write block itself */ - if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) { + if ((ret = insert_free_dqentry(dqh, type, buf, blk)) < 0) { printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk); goto out_buf; } } else - if ((ret = write_blk(sb, type, blk, buf)) < 0) { + if ((ret = write_blk(dqh, type, blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't write quota data " "block %u\n", blk); goto out_buf; @@ -479,7 +484,8 @@ /* Remove reference to dquot from tree */ static int remove_tree(struct dquot *dquot, uint *blk, int depth) { - struct super_block *sb = dquot->dq_sb; + // struct super_block *sb = dquot->dq_sb; + struct dqhash *dqh = dquot->dq_dqh; int type = dquot->dq_type; dqbuf_t buf = getdqbuf(); int ret = 0; @@ -488,7 +494,7 @@ if (!buf) return -ENOMEM; - if ((ret = read_blk(sb, type, *blk, buf)) < 0) { + if ((ret = read_blk(dqh, type, *blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); goto out_buf; } @@ -505,11 +511,11 @@ for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++); /* Block got empty? */ /* Don't put the root block into the free block list */ if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) { - put_free_dqblk(sb, type, buf, *blk); + put_free_dqblk(dqh, type, buf, *blk); *blk = 0; } else - if ((ret = write_blk(sb, type, *blk, buf)) < 0) + if ((ret = write_blk(dqh, type, *blk, buf)) < 0) printk(KERN_ERR "VFS: Can't write quota tree " "block %u.\n", *blk); } @@ -538,7 +544,7 @@ if (!buf) return -ENOMEM; - if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) { + if ((ret = read_blk(dquot->dq_dqh, dquot->dq_type, blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); goto out_buf; } @@ -577,7 +583,7 @@ if (!buf) return -ENOMEM; - if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) { + if ((ret = read_blk(dquot->dq_dqh, dquot->dq_type, blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); goto out_buf; } @@ -609,7 +615,7 @@ #ifdef __QUOTA_V2_PARANOIA /* Invalidated quota? */ - if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) { + if (!dquot->dq_dqh || !dqh_dqopt(dquot->dq_dqh)->files[type]) { printk(KERN_ERR "VFS: Quota invalidated while reading!\n"); return -EIO; } @@ -626,7 +632,7 @@ } else { dquot->dq_off = offset; - if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, + if ((ret = dquot->dq_dqh->dqh_sb->s_op->quota_read(dquot->dq_dqh, type, (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset)) != sizeof(struct v2_disk_dqblk)) { if (ret >= 0) Index: linux-2.6.14/fs/read_write.c =================================================================== --- linux-2.6.14.orig/fs/read_write.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/read_write.c 2005-10-31 11:05:45.000000000 -0600 @@ -622,12 +622,70 @@ return ret; } +ssize_t vfs_sendfile(struct file *out_file, struct file *in_file, loff_t *ppos, + size_t count, loff_t max) +{ + struct inode * in_inode, * out_inode; + loff_t pos; + ssize_t ret; + + /* verify in_file */ + in_inode = in_file->f_dentry->d_inode; + if (!in_inode) + return -EINVAL; + if (!in_file->f_op || !in_file->f_op->sendfile) + return -EINVAL; + + if (!ppos) + ppos = &in_file->f_pos; + else + if (!(in_file->f_mode & FMODE_PREAD)) + return -ESPIPE; + + ret = rw_verify_area(FLOCK_VERIFY_READ, in_file, ppos, count); + if (ret) + return ret; + + /* verify out_file */ + out_inode = out_file->f_dentry->d_inode; + if (!out_inode) + return -EINVAL; + if (!out_file->f_op || !out_file->f_op->sendpage) + return -EINVAL; + + ret = rw_verify_area(FLOCK_VERIFY_WRITE, out_file, &out_file->f_pos, count); + if (ret) + return ret; + + ret = security_file_permission (out_file, MAY_WRITE); + if (ret) + return ret; + + if (!max) + max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); + + pos = *ppos; + if (unlikely(pos < 0)) + return -EINVAL; + if (unlikely(pos + count > max)) { + if (pos >= max) + return -EOVERFLOW; + count = max - pos; + } + + ret = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + + if (*ppos > max) + return -EOVERFLOW; + return ret; +} + +EXPORT_SYMBOL(vfs_sendfile); + static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { struct file * in_file, * out_file; - struct inode * in_inode, * out_inode; - loff_t pos; ssize_t retval; int fput_needed_in, fput_needed_out; @@ -640,21 +698,6 @@ goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; - retval = -EINVAL; - in_inode = in_file->f_dentry->d_inode; - if (!in_inode) - goto fput_in; - if (!in_file->f_op || !in_file->f_op->sendfile) - goto fput_in; - retval = -ESPIPE; - if (!ppos) - ppos = &in_file->f_pos; - else - if (!(in_file->f_mode & FMODE_PREAD)) - goto fput_in; - retval = rw_verify_area(READ, in_file, ppos, count); - if (retval) - goto fput_in; retval = security_file_permission (in_file, MAY_READ); if (retval) @@ -669,33 +712,8 @@ goto fput_in; if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; - retval = -EINVAL; - if (!out_file->f_op || !out_file->f_op->sendpage) - goto fput_out; - out_inode = out_file->f_dentry->d_inode; - retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); - if (retval) - goto fput_out; - - retval = security_file_permission (out_file, MAY_WRITE); - if (retval) - goto fput_out; - - if (!max) - max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); - - pos = *ppos; - retval = -EINVAL; - if (unlikely(pos < 0)) - goto fput_out; - if (unlikely(pos + count > max)) { - retval = -EOVERFLOW; - if (pos >= max) - goto fput_out; - count = max - pos; - } - retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + retval = vfs_sendfile(out_file, in_file, ppos, count, max); if (retval > 0) { current->rchar += retval; Index: linux-2.6.14/fs/reiserfs/bitmap.c =================================================================== --- linux-2.6.14.orig/fs/reiserfs/bitmap.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/reiserfs/bitmap.c 2005-10-31 11:05:45.000000000 -0600 @@ -13,6 +13,7 @@ #include #include #include +#include #define PREALLOCATION_SIZE 9 @@ -1021,6 +1022,7 @@ int passno = 0; int nr_allocated = 0; int bigalloc = 0; + int blocks; determine_prealloc_size(hint); if (!hint->formatted_node) { @@ -1034,6 +1036,9 @@ DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed); if (quota_ret) /* Quota exceeded? */ return QUOTA_EXCEEDED; + if (DLIMIT_ALLOC_BLOCK(hint->inode, amount_needed)) + goto out_dlimit; + if (hint->preallocate && hint->prealloc_size) { #ifdef REISERQUOTA_DEBUG reiserfs_debug(s, REISERFS_DEBUG_CODE, @@ -1045,7 +1050,12 @@ hint->prealloc_size); if (quota_ret) hint->preallocate = hint->prealloc_size = 0; + if (DLIMIT_ALLOC_BLOCK(hint->inode, hint->prealloc_size)) { + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, hint->prealloc_size); + hint->preallocate=hint->prealloc_size=0; + } } + /* for unformatted nodes, force large allocations */ bigalloc = amount_needed; } @@ -1093,7 +1103,10 @@ nr_allocated, hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ + /* Free not allocated blocks */ + blocks = amount_needed + hint->prealloc_size - nr_allocated; + DLIMIT_FREE_BLOCK(hint->inode, blocks); + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks); } while (nr_allocated--) reiserfs_free_block(hint->th, hint->inode, @@ -1125,13 +1138,17 @@ REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + - hint->prealloc_size - nr_allocated - - REISERFS_I(hint->inode)-> - i_prealloc_count); + blocks = amount_needed + hint->prealloc_size - nr_allocated - + REISERFS_I(hint->inode)->i_prealloc_count; + DLIMIT_FREE_BLOCK(hint->inode, blocks); + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks); } return CARRY_ON; + +out_dlimit: + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed); + return NO_DISK_SPACE; } /* grab new blocknrs from preallocated list */ Index: linux-2.6.14/fs/reiserfs/file.c =================================================================== --- linux-2.6.14.orig/fs/reiserfs/file.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/reiserfs/file.c 2005-10-31 11:05:45.000000000 -0600 @@ -1555,6 +1555,7 @@ .release = reiserfs_file_release, .fsync = reiserfs_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .aio_read = generic_file_aio_read, .aio_write = reiserfs_aio_write, }; Index: linux-2.6.14/fs/reiserfs/inode.c =================================================================== --- linux-2.6.14.orig/fs/reiserfs/inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/reiserfs/inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -17,6 +17,8 @@ #include #include #include +#include +#include extern int reiserfs_default_io_size; /* default io size devuned in super.c */ @@ -58,6 +60,7 @@ * after delete_object so that quota updates go into the same transaction as * stat data deletion */ DQUOT_FREE_INODE(inode); + DLIMIT_FREE_INODE(inode); if (journal_end(&th, inode->i_sb, jbegin_count)) { up(&inode->i_sem); @@ -1125,6 +1128,8 @@ struct buffer_head *bh; struct item_head *ih; __u32 rdev; + uid_t uid; + gid_t gid; //int version = ITEM_VERSION_1; bh = PATH_PLAST_BUFFER(path); @@ -1148,12 +1153,13 @@ (struct stat_data_v1 *)B_I_PITEM(bh, ih); unsigned long blocks; + uid = sd_v1_uid(sd); + gid = sd_v1_gid(sd); + set_inode_item_key_version(inode, KEY_FORMAT_3_5); set_inode_sd_version(inode, STAT_DATA_V1); inode->i_mode = sd_v1_mode(sd); inode->i_nlink = sd_v1_nlink(sd); - inode->i_uid = sd_v1_uid(sd); - inode->i_gid = sd_v1_gid(sd); inode->i_size = sd_v1_size(sd); inode->i_atime.tv_sec = sd_v1_atime(sd); inode->i_mtime.tv_sec = sd_v1_mtime(sd); @@ -1195,11 +1201,12 @@ // (directories and symlinks) struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); + uid = sd_v2_uid(sd); + gid = sd_v2_gid(sd); + inode->i_mode = sd_v2_mode(sd); inode->i_nlink = sd_v2_nlink(sd); - inode->i_uid = sd_v2_uid(sd); inode->i_size = sd_v2_size(sd); - inode->i_gid = sd_v2_gid(sd); inode->i_mtime.tv_sec = sd_v2_mtime(sd); inode->i_atime.tv_sec = sd_v2_atime(sd); inode->i_ctime.tv_sec = sd_v2_ctime(sd); @@ -1229,6 +1236,10 @@ sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); } + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, 0); + pathrelse(path); if (S_ISREG(inode->i_mode)) { inode->i_op = &reiserfs_file_inode_operations; @@ -1251,13 +1262,15 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size) { struct stat_data *sd_v2 = (struct stat_data *)sd; + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); __u16 flags; + set_sd_v2_uid(sd_v2, uid); + set_sd_v2_gid(sd_v2, gid); set_sd_v2_mode(sd_v2, inode->i_mode); set_sd_v2_nlink(sd_v2, inode->i_nlink); - set_sd_v2_uid(sd_v2, inode->i_uid); set_sd_v2_size(sd_v2, size); - set_sd_v2_gid(sd_v2, inode->i_gid); set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); @@ -1788,6 +1801,10 @@ BUG_ON(!th->t_trans_id); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto out_bad_dlimit; + } if (DQUOT_ALLOC_INODE(inode)) { err = -EDQUOT; goto out_end_trans; @@ -1973,6 +1990,9 @@ DQUOT_FREE_INODE(inode); out_end_trans: + DLIMIT_FREE_INODE(inode); + + out_bad_dlimit: journal_end(th, th->t_super, th->t_blocks_allocated); /* Drop can be outside and it needs more credits so it's better to have it outside */ DQUOT_DROP(inode); @@ -2688,6 +2708,14 @@ inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (sd_attrs & REISERFS_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (sd_attrs & REISERFS_BARRIER_FL) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (sd_attrs & REISERFS_APPEND_FL) inode->i_flags |= S_APPEND; else @@ -2710,6 +2738,14 @@ *sd_attrs |= REISERFS_IMMUTABLE_FL; else *sd_attrs &= ~REISERFS_IMMUTABLE_FL; + if (inode->i_flags & S_IUNLINK) + *sd_attrs |= REISERFS_IUNLINK_FL; + else + *sd_attrs &= ~REISERFS_IUNLINK_FL; + if (inode->i_flags & S_BARRIER) + *sd_attrs |= REISERFS_BARRIER_FL; + else + *sd_attrs &= ~REISERFS_BARRIER_FL; if (inode->i_flags & S_SYNC) *sd_attrs |= REISERFS_SYNC_FL; else @@ -2885,6 +2921,26 @@ reiserfs_get_blocks_direct_io, NULL); } +static void reiserfs_setattr_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + + oldflags = REISERFS_I(inode)->i_flags; + newflags = oldflags & ~(REISERFS_IMMUTABLE_FL | + REISERFS_IUNLINK_FL | REISERFS_BARRIER_FL); + if (IS_IMMUTABLE(inode)) + newflags |= REISERFS_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= REISERFS_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= REISERFS_BARRIER_FL; + + if (oldflags ^ newflags) { + REISERFS_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + } +} + int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -2929,9 +2985,14 @@ } error = inode_change_ok(inode, attr); + + if (!error && attr->ia_valid & ATTR_ATTR_FLAG) + reiserfs_setattr_flags(inode); + if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_XID && attr->ia_xid != inode->i_xid)) { error = reiserfs_chown_xattrs(inode, attr); if (!error) { @@ -2961,6 +3022,8 @@ inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_XID) && IS_TAGXID(inode)) + inode->i_xid = attr->ia_xid; mark_inode_dirty(inode); error = journal_end(&th, inode->i_sb, jbegin_count); Index: linux-2.6.14/fs/reiserfs/ioctl.c =================================================================== --- linux-2.6.14.orig/fs/reiserfs/ioctl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/reiserfs/ioctl.c 2005-10-31 11:05:45.000000000 -0600 @@ -22,7 +22,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { - unsigned int flags; + unsigned int flags, oldflags; switch (cmd) { case REISERFS_IOC_UNPACK: @@ -41,13 +41,14 @@ flags = REISERFS_I(inode)->i_attrs; i_attrs_to_sd_attrs(inode, (__u16 *) & flags); + flags &= REISERFS_FL_USER_VISIBLE; return put_user(flags, (int __user *)arg); case REISERFS_IOC_SETFLAGS:{ if (!reiserfs_attrs(inode->i_sb)) return -ENOTTY; - if (IS_RDONLY(inode)) - return -EROFS; + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -56,10 +57,12 @@ if (get_user(flags, (int __user *)arg)) return -EFAULT; - if (((flags ^ REISERFS_I(inode)-> - i_attrs) & (REISERFS_IMMUTABLE_FL | - REISERFS_APPEND_FL)) - && !capable(CAP_LINUX_IMMUTABLE)) + oldflags = REISERFS_I(inode) -> i_attrs; + if (((oldflags & REISERFS_IMMUTABLE_FL) || + ((flags ^ oldflags) & + (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | + REISERFS_APPEND_FL))) && + !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; if ((flags & REISERFS_NOTAIL_FL) && @@ -70,6 +73,9 @@ if (result) return result; } + + flags = flags & REISERFS_FL_USER_MODIFIABLE; + flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE; sd_attrs_to_i_attrs(flags, inode); REISERFS_I(inode)->i_attrs = flags; inode->i_ctime = CURRENT_TIME_SEC; @@ -81,7 +87,8 @@ case REISERFS_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(inode->i_generation, (int __user *)arg)) return -EFAULT; Index: linux-2.6.14/fs/reiserfs/namei.c =================================================================== --- linux-2.6.14.orig/fs/reiserfs/namei.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/reiserfs/namei.c 2005-10-31 11:05:45.000000000 -0600 @@ -19,6 +19,7 @@ #include #include #include +#include #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--; @@ -365,6 +366,7 @@ reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); } + vx_propagate_xid(nd, inode); /* Propogate the priv_object flag so we know we're in the priv tree */ if (is_reiserfs_priv_object(dir)) @@ -604,6 +606,7 @@ } else { inode->i_gid = current->fsgid; } + inode->i_xid = vx_current_fsxid(inode->i_sb); DQUOT_INIT(inode); return 0; } Index: linux-2.6.14/fs/reiserfs/stree.c =================================================================== --- linux-2.6.14.orig/fs/reiserfs/stree.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/reiserfs/stree.c 2005-10-31 11:05:45.000000000 -0600 @@ -57,6 +57,7 @@ #include #include #include +#include /* Does the buffer contain a disk block which is in the tree. */ inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh) @@ -1365,6 +1366,7 @@ "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); #endif + DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes); DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); /* Return deleted body length */ @@ -1453,6 +1455,7 @@ #endif DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); + DLIMIT_FREE_SPACE(inode, quota_cut_bytes); } break; } @@ -1808,6 +1811,7 @@ "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?'); #endif + DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes); DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); return n_ret_value; } @@ -2048,6 +2052,11 @@ pathrelse(p_s_search_path); return -EDQUOT; } + if (DLIMIT_ALLOC_SPACE(inode, n_pasted_size)) { + DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); + pathrelse(p_s_search_path); + return -ENOSPC; + } init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); #ifdef DISPLACE_NEW_PACKING_LOCALITIES @@ -2100,6 +2109,7 @@ n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); #endif + DLIMIT_FREE_SPACE(inode, n_pasted_size); DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); return retval; } @@ -2137,6 +2147,11 @@ pathrelse(p_s_path); return -EDQUOT; } + if (DLIMIT_ALLOC_SPACE(inode, quota_bytes)) { + DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); + pathrelse(p_s_path); + return -ENOSPC; + } } init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); @@ -2184,7 +2199,9 @@ "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); #endif - if (inode) + if (inode) { + DLIMIT_FREE_SPACE(inode, quota_bytes); DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); + } return retval; } Index: linux-2.6.14/fs/reiserfs/super.c =================================================================== --- linux-2.6.14.orig/fs/reiserfs/super.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/reiserfs/super.c 2005-10-31 11:05:45.000000000 -0600 @@ -138,7 +138,7 @@ } #ifdef CONFIG_QUOTA -static int reiserfs_quota_on_mount(struct super_block *, int); +static int reiserfs_quota_on_mount(struct dqhash *, int); #endif /* look for uncompleted unlinks and truncates and complete them */ @@ -178,7 +178,7 @@ /* Turn on quotas so that they are updated correctly */ for (i = 0; i < MAXQUOTAS; i++) { if (REISERFS_SB(s)->s_qf_names[i]) { - int ret = reiserfs_quota_on_mount(s, i); + int ret = reiserfs_quota_on_mount(s->s_dqh, i); if (ret < 0) reiserfs_warning(s, "reiserfs: cannot turn on journalled quota: error %d", @@ -292,8 +292,8 @@ #ifdef CONFIG_QUOTA /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { - if (sb_dqopt(s)->files[i]) - vfs_quota_off_mount(s, i); + if (dqh_dqopt(s->s_dqh)->files[i]) + vfs_quota_off_mount(s->s_dqh, i); } if (ms_active_set) /* Restore the flag back */ @@ -584,9 +584,9 @@ } #ifdef CONFIG_QUOTA -static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, +static ssize_t reiserfs_quota_write(struct dqhash *, int, const char *, size_t, loff_t); -static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, +static ssize_t reiserfs_quota_read(struct dqhash *, int, char *, size_t, loff_t); #endif @@ -619,8 +619,8 @@ static int reiserfs_acquire_dquot(struct dquot *); static int reiserfs_release_dquot(struct dquot *); static int reiserfs_mark_dquot_dirty(struct dquot *); -static int reiserfs_write_info(struct super_block *, int); -static int reiserfs_quota_on(struct super_block *, int, int, char *); +static int reiserfs_write_info(struct dqhash *, int); +static int reiserfs_quota_on(struct dqhash *, int, int, char *); static struct dquot_operations reiserfs_quota_operations = { .initialize = reiserfs_dquot_initialize, @@ -888,6 +888,9 @@ {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, #endif +#ifndef CONFIG_INOXID_NONE + {"tagxid", .setmask = 1<s_dqh)) { reiserfs_warning(s, "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); return 0; @@ -1062,7 +1065,7 @@ } /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ if (!(*mount_options & (1 << REISERFS_QUOTA)) - && sb_any_quota_enabled(s)) { + && dqh_any_quota_enabled(s->s_dqh)) { reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on."); return 0; @@ -1167,6 +1170,11 @@ return -EINVAL; } + if ((mount_options & (1 << REISERFS_TAGXID)) && !(s->s_flags & MS_TAGXID)) { + reiserfs_warning(s, "reiserfs: tagxid not permitted on remount."); + return -EINVAL; + } + handle_attrs(s); /* Add options that are safe here */ @@ -1469,7 +1477,7 @@ s->s_export_op = &reiserfs_export_ops; #ifdef CONFIG_QUOTA s->s_qcop = &reiserfs_qctl_operations; - s->dq_op = &reiserfs_quota_operations; + s->s_qop = &reiserfs_quota_operations; #endif /* new format is limited by the 32 bit wide i_blocks field, want to @@ -1742,6 +1750,10 @@ goto error; } + /* map mount option tagxid */ + if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGXID)) + s->s_flags |= MS_TAGXID; + rs = SB_DISK_SUPER_BLOCK(s); /* Let's do basic sanity check to verify that underlying device is not smaller than the filesystem. If the check fails then abort and scream, @@ -2018,16 +2030,16 @@ struct reiserfs_transaction_handle th; int ret, err; - reiserfs_write_lock(dquot->dq_sb); + reiserfs_write_lock(dquot->dq_dqh->dqh_sb); ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + journal_begin(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb)); if (ret) goto out; ret = dquot_commit(dquot); err = - journal_end(&th, dquot->dq_sb, - REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + journal_end(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb)); if (!ret && err) ret = err; out: @@ -2040,20 +2052,20 @@ struct reiserfs_transaction_handle th; int ret, err; - reiserfs_write_lock(dquot->dq_sb); + reiserfs_write_lock(dquot->dq_dqh->dqh_sb); ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + journal_begin(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb)); if (ret) goto out; ret = dquot_acquire(dquot); err = - journal_end(&th, dquot->dq_sb, - REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + journal_end(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb)); if (!ret && err) ret = err; out: - reiserfs_write_unlock(dquot->dq_sb); + reiserfs_write_unlock(dquot->dq_dqh->dqh_sb); return ret; } @@ -2062,37 +2074,38 @@ struct reiserfs_transaction_handle th; int ret, err; - reiserfs_write_lock(dquot->dq_sb); + reiserfs_write_lock(dquot->dq_dqh->dqh_sb); ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + journal_begin(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb)); if (ret) goto out; ret = dquot_release(dquot); err = - journal_end(&th, dquot->dq_sb, - REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + journal_end(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb)); if (!ret && err) ret = err; out: - reiserfs_write_unlock(dquot->dq_sb); + reiserfs_write_unlock(dquot->dq_dqh->dqh_sb); return ret; } static int reiserfs_mark_dquot_dirty(struct dquot *dquot) { /* Are we journalling quotas? */ - if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { + if (REISERFS_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[USRQUOTA] || + REISERFS_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); return reiserfs_write_dquot(dquot); } else return dquot_mark_dquot_dirty(dquot); } -static int reiserfs_write_info(struct super_block *sb, int type) +static int reiserfs_write_info(struct dqhash *hash, int type) { struct reiserfs_transaction_handle th; + struct super_block *sb = hash->dqh_sb; int ret, err; /* Data block + inode block */ @@ -2100,7 +2113,7 @@ ret = journal_begin(&th, sb, 2); if (ret) goto out; - ret = dquot_commit_info(sb, type); + ret = dquot_commit_info(hash, type); err = journal_end(&th, sb, 2); if (!ret && err) ret = err; @@ -2112,18 +2125,21 @@ /* * Turn on quotas during mount time - we need to find the quota file and such... */ -static int reiserfs_quota_on_mount(struct super_block *sb, int type) +static int reiserfs_quota_on_mount(struct dqhash *hash, int type) { - return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], + struct super_block *sb = hash->dqh_sb; + + return vfs_quota_on_mount(hash, REISERFS_SB(sb)->s_qf_names[type], REISERFS_SB(sb)->s_jquota_fmt, type); } /* * Standard function to be called on quota_on */ -static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, +static int reiserfs_quota_on(struct dqhash *hash, int type, int format_id, char *path) { + struct super_block *sb = hash->dqh_sb; int err; struct nameidata nd; @@ -2148,7 +2164,7 @@ if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { path_release(&nd); - return vfs_quota_on(sb, type, format_id, path); + return vfs_quota_on(hash, type, format_id, path); } /* Quotafile not of fs root? */ if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) @@ -2156,17 +2172,18 @@ "reiserfs: Quota file not on filesystem root. " "Journalled quota will not work."); path_release(&nd); - return vfs_quota_on(sb, type, format_id, path); + return vfs_quota_on(hash, type, format_id, path); } /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files) * we don't have to be afraid of races */ -static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, +static ssize_t reiserfs_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; unsigned long blk = off >> sb->s_blocksize_bits; int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; size_t toread; @@ -2208,10 +2225,11 @@ /* Write to quotafile (we know the transaction is already started and has * enough credits) */ -static ssize_t reiserfs_quota_write(struct super_block *sb, int type, +static ssize_t reiserfs_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; unsigned long blk = off >> sb->s_blocksize_bits; int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL; Index: linux-2.6.14/fs/reiserfs/xattr.c =================================================================== --- linux-2.6.14.orig/fs/reiserfs/xattr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/reiserfs/xattr.c 2005-10-31 11:05:45.000000000 -0600 @@ -835,7 +835,7 @@ if (dir->d_inode->i_nlink <= 2) { root = get_xa_root(inode->i_sb); reiserfs_write_lock_xattrs(inode->i_sb); - err = vfs_rmdir(root->d_inode, dir); + err = vfs_rmdir(root->d_inode, dir, NULL); reiserfs_write_unlock_xattrs(inode->i_sb); dput(root); } else { @@ -1352,7 +1352,7 @@ /* * Nobody gets write access to a read-only fs. */ - if (IS_RDONLY(inode) && + if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt))) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; Index: linux-2.6.14/fs/stat.c =================================================================== --- linux-2.6.14.orig/fs/stat.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/stat.c 2005-10-31 11:05:45.000000000 -0600 @@ -27,6 +27,7 @@ stat->nlink = inode->i_nlink; stat->uid = inode->i_uid; stat->gid = inode->i_gid; + stat->xid = inode->i_xid; stat->rdev = inode->i_rdev; stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; Index: linux-2.6.14/fs/super.c =================================================================== --- linux-2.6.14.orig/fs/super.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/super.c 2005-10-31 11:05:45.000000000 -0600 @@ -37,6 +37,8 @@ #include /* for the emergency remount stuff */ #include #include +#include +#include #include @@ -77,15 +79,18 @@ s->s_count = S_BIAS; atomic_set(&s->s_active, 1); sema_init(&s->s_vfs_rename_sem,1); - sema_init(&s->s_dquot.dqio_sem, 1); - sema_init(&s->s_dquot.dqonoff_sem, 1); - init_rwsem(&s->s_dquot.dqptr_sem); + // sema_init(&s->s_dquot.dqio_sem, 1); + // sema_init(&s->s_dquot.dqonoff_sem, 1); + // init_rwsem(&s->s_dquot.dqptr_sem); init_waitqueue_head(&s->s_wait_unfrozen); s->s_maxbytes = MAX_NON_LFS; - s->dq_op = sb_dquot_ops; + // s->dq_op = sb_dquot_ops; + s->s_qop = sb_dquot_ops; s->s_qcop = sb_quotactl_ops; s->s_op = &default_op; s->s_time_gran = 1000000000; + /* quick hack to make dqhash id unique, sufficient for now */ + s->s_dqh = new_dqhash(s, (unsigned long)s); } out: return s; @@ -100,6 +105,7 @@ static inline void destroy_super(struct super_block *s) { security_sb_free(s); + dqhput(s->s_dqh); kfree(s); } @@ -804,7 +810,7 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) { struct file_system_type *type = get_fs_type(fstype); - struct super_block *sb = ERR_PTR(-ENOMEM); + struct super_block *sb; struct vfsmount *mnt; int error; char *secdata = NULL; @@ -812,6 +818,12 @@ if (!type) return ERR_PTR(-ENODEV); + sb = ERR_PTR(-EPERM); + if ((type->fs_flags & FS_BINARY_MOUNTDATA) && + !capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_BINARY_MOUNT)) + goto out; + + sb = ERR_PTR(-ENOMEM); mnt = alloc_vfsmnt(name); if (!mnt) goto out; @@ -833,6 +845,13 @@ sb = type->get_sb(type, flags, name, data); if (IS_ERR(sb)) goto out_free_secdata; + + error = -EPERM; + if (!capable(CAP_SYS_ADMIN) && !sb->s_bdev && + (sb->s_magic != PROC_SUPER_MAGIC) && + (sb->s_magic != DEVPTS_SUPER_MAGIC)) + goto out_sb; + error = security_sb_kern_mount(sb, secdata); if (error) goto out_sb; Index: linux-2.6.14/fs/sysfs/mount.c =================================================================== --- linux-2.6.14.orig/fs/sysfs/mount.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/sysfs/mount.c 2005-10-31 11:05:45.000000000 -0600 @@ -11,8 +11,6 @@ #include "sysfs.h" -/* Random magic number */ -#define SYSFS_MAGIC 0x62656572 struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; @@ -38,7 +36,7 @@ sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = SYSFS_MAGIC; + sb->s_magic = SYSFS_SUPER_MAGIC; sb->s_op = &sysfs_ops; sb->s_time_gran = 1; sysfs_sb = sb; Index: linux-2.6.14/fs/udf/super.c =================================================================== --- linux-2.6.14.orig/fs/udf/super.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/udf/super.c 2005-10-31 11:05:45.000000000 -0600 @@ -1564,7 +1564,7 @@ /* Fill in the rest of the superblock */ sb->s_op = &udf_sb_ops; - sb->dq_op = NULL; + sb->s_qop = NULL; sb->s_dirt = 0; sb->s_magic = UDF_SUPER_MAGIC; sb->s_time_gran = 1000; Index: linux-2.6.14/fs/ufs/super.c =================================================================== --- linux-2.6.14.orig/fs/ufs/super.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/ufs/super.c 2005-10-31 11:05:45.000000000 -0600 @@ -873,7 +873,7 @@ * Read ufs_super_block into internal data structures */ sb->s_op = &ufs_super_ops; - sb->dq_op = NULL; /***/ + sb->s_qop = NULL; /***/ sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic); uspi->s_sblkno = fs32_to_cpu(sb, usb1->fs_sblkno); @@ -1197,8 +1197,8 @@ } #ifdef CONFIG_QUOTA -static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t); -static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t); +static ssize_t ufs_quota_read(struct dqhash *, int, char *,size_t, loff_t); +static ssize_t ufs_quota_write(struct dqhash *, int, const char *, size_t, loff_t); #endif static struct super_operations ufs_super_ops = { @@ -1223,10 +1223,11 @@ * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files) * we don't have to be afraid of races */ -static ssize_t ufs_quota_read(struct super_block *sb, int type, char *data, +static ssize_t ufs_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> sb->s_blocksize_bits; int err = 0; int offset = off & (sb->s_blocksize - 1); @@ -1262,10 +1263,11 @@ } /* Write to quotafile */ -static ssize_t ufs_quota_write(struct super_block *sb, int type, +static ssize_t ufs_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> sb->s_blocksize_bits; int err = 0; int offset = off & (sb->s_blocksize - 1); Index: linux-2.6.14/fs/xattr.c =================================================================== --- linux-2.6.14.orig/fs/xattr.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xattr.c 2005-10-31 11:05:45.000000000 -0600 @@ -24,7 +24,7 @@ */ static long setxattr(struct dentry *d, char __user *name, void __user *value, - size_t size, int flags) + size_t size, int flags, struct vfsmount *mnt) { int error; void *kvalue = NULL; @@ -55,6 +55,9 @@ error = security_inode_setxattr(d, kname, kvalue, size, flags); if (error) goto out; + error = -EROFS; + if (MNT_IS_RDONLY(mnt)) + goto out; error = -EOPNOTSUPP; if (d->d_inode->i_op && d->d_inode->i_op->setxattr) { error = d->d_inode->i_op->setxattr(d, kname, kvalue, @@ -89,7 +92,7 @@ error = user_path_walk(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); + error = setxattr(nd.dentry, name, value, size, flags, nd.mnt); path_release(&nd); return error; } @@ -104,7 +107,7 @@ error = user_path_walk_link(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); + error = setxattr(nd.dentry, name, value, size, flags, nd.mnt); path_release(&nd); return error; } @@ -119,7 +122,7 @@ f = fget(fd); if (!f) return error; - error = setxattr(f->f_dentry, name, value, size, flags); + error = setxattr(f->f_dentry, name, value, size, flags, f->f_vfsmnt); fput(f); return error; } @@ -306,7 +309,7 @@ * Extended attribute REMOVE operations */ static long -removexattr(struct dentry *d, char __user *name) +removexattr(struct dentry *d, char __user *name, struct vfsmount *mnt) { int error; char kname[XATTR_NAME_MAX + 1]; @@ -322,6 +325,9 @@ error = security_inode_removexattr(d, kname); if (error) goto out; + error = -EROFS; + if (MNT_IS_RDONLY(mnt)) + goto out; down(&d->d_inode->i_sem); error = d->d_inode->i_op->removexattr(d, kname); up(&d->d_inode->i_sem); @@ -341,7 +347,7 @@ error = user_path_walk(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); + error = removexattr(nd.dentry, name, nd.mnt); path_release(&nd); return error; } @@ -355,7 +361,7 @@ error = user_path_walk_link(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); + error = removexattr(nd.dentry, name, nd.mnt); path_release(&nd); return error; } @@ -369,7 +375,7 @@ f = fget(fd); if (!f) return error; - error = removexattr(f->f_dentry, name); + error = removexattr(f->f_dentry, name, f->f_vfsmnt); fput(f); return error; } Index: linux-2.6.14/fs/xfs/linux-2.6/xfs_file.c =================================================================== --- linux-2.6.14.orig/fs/xfs/linux-2.6/xfs_file.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/linux-2.6/xfs_file.c 2005-10-31 11:05:45.000000000 -0600 @@ -551,6 +551,7 @@ .aio_read = linvfs_aio_read, .aio_write = linvfs_aio_write, .sendfile = linvfs_sendfile, + .sendpage = generic_file_sendpage, .unlocked_ioctl = linvfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = linvfs_compat_ioctl, Index: linux-2.6.14/fs/xfs/linux-2.6/xfs_ioctl.c =================================================================== --- linux-2.6.14.orig/fs/xfs/linux-2.6/xfs_ioctl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/linux-2.6/xfs_ioctl.c 2005-10-31 11:05:45.000000000 -0600 @@ -1113,6 +1113,8 @@ #define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */ #define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */ #define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */ +#define LINUX_XFLAG_BARRIER 0x04000000 /* chroot() barrier */ +#define LINUX_XFLAG_IUNLINK 0x08000000 /* immutable unlink */ STATIC unsigned int xfs_merge_ioc_xflags( @@ -1153,6 +1155,10 @@ if (di_flags & XFS_DIFLAG_IMMUTABLE) flags |= LINUX_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_IUNLINK) + flags |= LINUX_XFLAG_IUNLINK; + if (di_flags & XFS_DIFLAG_BARRIER) + flags |= LINUX_XFLAG_BARRIER; if (di_flags & XFS_DIFLAG_APPEND) flags |= LINUX_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) Index: linux-2.6.14/fs/xfs/linux-2.6/xfs_iops.c =================================================================== --- linux-2.6.14.orig/fs/xfs/linux-2.6/xfs_iops.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/linux-2.6/xfs_iops.c 2005-10-31 11:09:53.000000000 -0600 @@ -68,6 +68,7 @@ #include #include +#include /* @@ -238,6 +239,7 @@ d_add(dentry, NULL); return NULL; } + vx_propagate_xid(nd, LINVFS_GET_IP(cvp)); return d_splice_alias(LINVFS_GET_IP(cvp), dentry); } @@ -470,6 +472,27 @@ return 0; } +STATIC void +linvfs_setattr_flags( + struct inode *inode, + vattr_t *vap) +{ + unsigned int oldflags, newflags; + + oldflags = vap->va_xflags; + newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE | + XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER); + if (IS_IMMUTABLE(inode)) + newflags |= XFS_XFLAG_IMMUTABLE; + if (IS_IUNLINK(inode)) + newflags |= XFS_XFLAG_IUNLINK; + if (IS_BARRIER(inode)) + newflags |= XFS_XFLAG_BARRIER; + + if (oldflags ^ newflags) + vap->va_xflags = newflags; +} + STATIC int linvfs_setattr( struct dentry *dentry, @@ -482,6 +505,10 @@ int flags = 0; int error; + error = inode_change_ok(inode, attr); + if (error) + return error; + memset(&vattr, 0, sizeof(vattr_t)); if (ia_valid & ATTR_UID) { vattr.va_mask |= XFS_AT_UID; @@ -491,6 +518,10 @@ vattr.va_mask |= XFS_AT_GID; vattr.va_gid = attr->ia_gid; } + if ((ia_valid & ATTR_XID) && IS_TAGXID(inode)) { + vattr.va_mask |= XFS_AT_XID; + vattr.va_xid = attr->ia_xid; + } if (ia_valid & ATTR_SIZE) { vattr.va_mask |= XFS_AT_SIZE; vattr.va_size = attr->ia_size; @@ -521,6 +552,11 @@ flags |= ATTR_NONBLOCK; #endif + if (ia_valid & ATTR_ATTR_FLAG) { + vattr.va_mask |= XFS_AT_XFLAGS; + linvfs_setattr_flags(inode, &vattr); + } + VOP_SETATTR(vp, &vattr, flags, NULL, error); if (error) return -error; Index: linux-2.6.14/fs/xfs/linux-2.6/xfs_linux.h =================================================================== --- linux-2.6.14.orig/fs/xfs/linux-2.6/xfs_linux.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/linux-2.6/xfs_linux.h 2005-10-31 11:05:45.000000000 -0600 @@ -152,6 +152,7 @@ #define current_pid() (current->pid) #define current_fsuid(cred) (current->fsuid) #define current_fsgid(cred) (current->fsgid) +#define current_fsxid(cred,vp) (vx_current_fsxid(LINVFS_GET_IP(vp)->i_sb)) #define NBPP PAGE_SIZE #define DPPSHFT (PAGE_SHIFT - 9) Index: linux-2.6.14/fs/xfs/linux-2.6/xfs_super.c =================================================================== --- linux-2.6.14.orig/fs/xfs/linux-2.6/xfs_super.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/linux-2.6/xfs_super.c 2005-10-31 11:05:45.000000000 -0600 @@ -176,6 +176,7 @@ inode->i_nlink = ip->i_d.di_nlink; inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; + inode->i_xid = ip->i_d.di_xid; switch (inode->i_mode & S_IFMT) { case S_IFBLK: @@ -204,6 +205,14 @@ inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) inode->i_flags |= S_APPEND; else @@ -675,6 +684,12 @@ int error; VFS_PARSEARGS(vfsp, options, args, 1, error); + if ((args->flags & XFSMNT_TAGXID) && + !(sb->s_flags & MS_TAGXID)) { + printk("XFS: %s: tagxid not permitted on remount.\n", + sb->s_id); + error = EINVAL; + } if (!error) VFS_MNTUPDATE(vfsp, flags, args, error); kmem_free(args, sizeof(*args)); @@ -702,10 +717,10 @@ STATIC int linvfs_getxstate( - struct super_block *sb, + struct dqhash *hash, struct fs_quota_stat *fqs) { - struct vfs *vfsp = LINVFS_GET_VFS(sb); + struct vfs *vfsp = LINVFS_GET_VFS(hash->dqh_sb); int error; VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error); @@ -714,11 +729,11 @@ STATIC int linvfs_setxstate( - struct super_block *sb, + struct dqhash *hash, unsigned int flags, int op) { - struct vfs *vfsp = LINVFS_GET_VFS(sb); + struct vfs *vfsp = LINVFS_GET_VFS(hash->dqh_sb); int error; VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error); @@ -727,12 +742,12 @@ STATIC int linvfs_getxquota( - struct super_block *sb, + struct dqhash *hash, int type, qid_t id, struct fs_disk_quota *fdq) { - struct vfs *vfsp = LINVFS_GET_VFS(sb); + struct vfs *vfsp = LINVFS_GET_VFS(hash->dqh_sb); int error, getmode; getmode = (type == USRQUOTA) ? Q_XGETQUOTA : @@ -743,12 +758,12 @@ STATIC int linvfs_setxquota( - struct super_block *sb, + struct dqhash *hash, int type, qid_t id, struct fs_disk_quota *fdq) { - struct vfs *vfsp = LINVFS_GET_VFS(sb); + struct vfs *vfsp = LINVFS_GET_VFS(hash->dqh_sb); int error, setmode; setmode = (type == USRQUOTA) ? Q_XSETQLIM : @@ -786,6 +801,9 @@ sb->s_export_op = &linvfs_export_ops; #endif sb->s_qcop = &linvfs_qops; +#ifdef CONFIG_QUOTA + sb->s_dqh->dqh_qcop = &linvfs_qops; +#endif sb->s_op = &linvfs_sops; VFS_MOUNT(vfsp, args, NULL, error); Index: linux-2.6.14/fs/xfs/linux-2.6/xfs_sysctl.c =================================================================== --- linux-2.6.14.orig/fs/xfs/linux-2.6/xfs_sysctl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/linux-2.6/xfs_sysctl.c 2005-10-31 11:05:45.000000000 -0600 @@ -76,74 +76,74 @@ STATIC ctl_table xfs_table[] = { {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max}, {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max}, {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max}, {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.panic_mask.min, &xfs_params.panic_mask.max}, {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.error_level.min, &xfs_params.error_level.max}, {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max}, {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max}, {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max}, {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max}, {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max}, {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max}, {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max}, {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.rotorstep.min, &xfs_params.rotorstep.max}, /* please keep this the last entry */ #ifdef CONFIG_PROC_FS {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.stats_clear.min, &xfs_params.stats_clear.max}, #endif /* CONFIG_PROC_FS */ Index: linux-2.6.14/fs/xfs/linux-2.6/xfs_vnode.c =================================================================== --- linux-2.6.14.orig/fs/xfs/linux-2.6/xfs_vnode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/linux-2.6/xfs_vnode.c 2005-10-31 11:05:45.000000000 -0600 @@ -120,6 +120,7 @@ inode->i_nlink = vap->va_nlink; inode->i_uid = vap->va_uid; inode->i_gid = vap->va_gid; + inode->i_xid = vap->va_xid; inode->i_blocks = vap->va_nblocks; inode->i_mtime = vap->va_mtime; inode->i_ctime = vap->va_ctime; @@ -128,6 +129,14 @@ inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else Index: linux-2.6.14/fs/xfs/linux-2.6/xfs_vnode.h =================================================================== --- linux-2.6.14.orig/fs/xfs/linux-2.6/xfs_vnode.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/linux-2.6/xfs_vnode.h 2005-10-31 11:05:45.000000000 -0600 @@ -399,6 +399,7 @@ xfs_nlink_t va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ gid_t va_gid; /* owner group id */ + xid_t va_xid; /* owner group id */ xfs_ino_t va_nodeid; /* file id */ xfs_off_t va_size; /* file size in bytes */ u_long va_blocksize; /* blocksize preferred for i/o */ @@ -447,13 +448,15 @@ #define XFS_AT_PROJID 0x04000000 #define XFS_AT_SIZE_NOPERM 0x08000000 #define XFS_AT_GENCOUNT 0x10000000 +#define XFS_AT_XID 0x20000000 #define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\ XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\ - XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT) + XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT\ + XFS_AT_XID) #define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ Index: linux-2.6.14/fs/xfs/quota/xfs_qm_syscalls.c =================================================================== --- linux-2.6.14.orig/fs/xfs/quota/xfs_qm_syscalls.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/quota/xfs_qm_syscalls.c 2005-10-31 11:05:45.000000000 -0600 @@ -229,7 +229,7 @@ xfs_qoff_logitem_t *qoffstart; int nculprits; - if (!force && !capable(CAP_SYS_ADMIN)) + if (!force && !capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); /* * No file system can have quotas enabled on disk but not in core. @@ -398,7 +398,7 @@ int error; xfs_inode_t *qip; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); error = 0; if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) { @@ -443,7 +443,7 @@ uint accflags; __int64_t sbflags; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); @@ -614,7 +614,7 @@ int error; xfs_qcnt_t hard, soft; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); if ((newlim->d_fieldmask & Index: linux-2.6.14/fs/xfs/xfs_clnt.h =================================================================== --- linux-2.6.14.orig/fs/xfs/xfs_clnt.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/xfs_clnt.h 2005-10-31 11:05:45.000000000 -0600 @@ -106,5 +106,6 @@ #define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */ #define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename * symlink,mkdir,rmdir,mknod */ +#define XFSMNT_TAGXID 0x80000000 /* context xid tagging */ #endif /* __XFS_CLNT_H__ */ Index: linux-2.6.14/fs/xfs/xfs_dinode.h =================================================================== --- linux-2.6.14.orig/fs/xfs/xfs_dinode.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/xfs_dinode.h 2005-10-31 11:05:45.000000000 -0600 @@ -72,7 +72,8 @@ __uint32_t di_gid; /* owner's group id */ __uint32_t di_nlink; /* number of links to file */ __uint16_t di_projid; /* owner's project id */ - __uint8_t di_pad[8]; /* unused, zeroed space */ + __uint16_t di_xid; /* vserver context id */ + __uint8_t di_pad[6]; /* unused, zeroed space */ __uint16_t di_flushiter; /* incremented on flush */ xfs_timestamp_t di_atime; /* time last accessed */ xfs_timestamp_t di_mtime; /* time last modified */ @@ -397,6 +398,9 @@ #define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */ #define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */ #define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ +#define XFS_DIFLAG_BARRIER_BIT 12 /* chroot() barrier */ +#define XFS_DIFLAG_IUNLINK_BIT 13 /* immutable unlink */ + #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) @@ -408,11 +412,15 @@ #define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT) #define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT) #define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) +#define XFS_DIFLAG_BARRIER (1 << XFS_DIFLAG_BARRIER_BIT) +#define XFS_DIFLAG_IUNLINK (1 << XFS_DIFLAG_IUNLINK_BIT) + #define XFS_DIFLAG_ANY \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ - XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS) + XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | \ + XFS_DIFLAG_BARRIER | XFS_DIFLAG_IUNLINK) #endif /* __XFS_DINODE_H__ */ Index: linux-2.6.14/fs/xfs/xfs_fs.h =================================================================== --- linux-2.6.14.orig/fs/xfs/xfs_fs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/xfs_fs.h 2005-10-31 11:05:45.000000000 -0600 @@ -80,6 +80,8 @@ #define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ #define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ #define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ +#define XFS_XFLAG_BARRIER 0x00004000 /* chroot() barrier */ +#define XFS_XFLAG_IUNLINK 0x00008000 /* immutable unlink */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* @@ -307,7 +309,8 @@ __s32 bs_extents; /* number of extents */ __u32 bs_gen; /* generation count */ __u16 bs_projid; /* project id */ - unsigned char bs_pad[14]; /* pad space, unused */ + __u16 bs_xid; /* context id */ + unsigned char bs_pad[12]; /* pad space, unused */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ Index: linux-2.6.14/fs/xfs/xfs_inode.c =================================================================== --- linux-2.6.14.orig/fs/xfs/xfs_inode.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/xfs_inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -67,6 +67,7 @@ #include "xfs_mac.h" #include "xfs_acl.h" +#include kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; @@ -747,20 +748,35 @@ xfs_dinode_core_t *buf_core = (xfs_dinode_core_t *)buf; xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; xfs_arch_t arch = ARCH_CONVERT; + uint32_t uid = 0, gid = 0; + uint16_t xid = 0; ASSERT(dir); + if (dir < 0) { + xid = mem_core->di_xid; + /* FIXME supposed to use superblock flag */ + uid = XIDINO_UID(1, mem_core->di_uid, xid); + gid = XIDINO_GID(1, mem_core->di_gid, xid); + xid = XIDINO_XID(1, xid); + } + INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch); INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch); INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch); INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); - INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch); - INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch); + INT_XLATE(buf_core->di_uid, uid, dir, arch); + INT_XLATE(buf_core->di_gid, gid, dir, arch); + INT_XLATE(buf_core->di_xid, xid, dir, arch); INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); if (dir > 0) { + /* FIXME supposed to use superblock flag */ + mem_core->di_uid = INOXID_UID(1, uid, gid); + mem_core->di_gid = INOXID_GID(1, uid, gid); + mem_core->di_xid = INOXID_XID(1, uid, gid, xid); memcpy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad)); } else { @@ -809,6 +825,10 @@ flags |= XFS_XFLAG_PREALLOC; if (di_flags & XFS_DIFLAG_IMMUTABLE) flags |= XFS_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_IUNLINK) + flags |= XFS_XFLAG_IUNLINK; + if (di_flags & XFS_DIFLAG_BARRIER) + flags |= XFS_XFLAG_BARRIER; if (di_flags & XFS_DIFLAG_APPEND) flags |= XFS_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) @@ -1134,6 +1154,7 @@ ASSERT(ip->i_d.di_nlink == nlink); ip->i_d.di_uid = current_fsuid(cr); ip->i_d.di_gid = current_fsgid(cr); + ip->i_d.di_xid = current_fsxid(cr, vp); ip->i_d.di_projid = prid; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); Index: linux-2.6.14/fs/xfs/xfs_itable.c =================================================================== --- linux-2.6.14.orig/fs/xfs/xfs_itable.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/xfs_itable.c 2005-10-31 11:05:45.000000000 -0600 @@ -97,6 +97,7 @@ buf->bs_mode = dic->di_mode; buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; + buf->bs_xid = dic->di_xid; buf->bs_size = dic->di_size; buf->bs_atime.tv_sec = dic->di_atime.t_sec; buf->bs_atime.tv_nsec = dic->di_atime.t_nsec; @@ -172,6 +173,7 @@ buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT); buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT); buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT); + buf->bs_xid = INT_GET(dic->di_xid, ARCH_CONVERT); buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT); buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT); buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT); Index: linux-2.6.14/fs/xfs/xfs_mount.h =================================================================== --- linux-2.6.14.orig/fs/xfs/xfs_mount.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/xfs_mount.h 2005-10-31 11:05:45.000000000 -0600 @@ -421,6 +421,7 @@ * allocation */ #define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */ #define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */ +#define XFS_MOUNT_TAGXID 0x80000000 /* context xid tagging */ /* * Default minimum read and write sizes. Index: linux-2.6.14/fs/xfs/xfs_vfsops.c =================================================================== --- linux-2.6.14.orig/fs/xfs/xfs_vfsops.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/xfs_vfsops.c 2005-10-31 11:05:45.000000000 -0600 @@ -321,6 +321,8 @@ if (ap->flags & XFSMNT_NOUUID) mp->m_flags |= XFS_MOUNT_NOUUID; + if (ap->flags & XFSMNT_TAGXID) + mp->m_flags |= XFS_MOUNT_TAGXID; if (ap->flags & XFSMNT_NOLOGFLUSH) mp->m_flags |= XFS_MOUNT_NOLOGFLUSH; @@ -393,6 +395,8 @@ return XFS_ERROR(EINVAL); } + if (ap->flags & XFSMNT_TAGXID) + vfs->vfs_super->s_flags |= MS_TAGXID; return 0; } @@ -1633,6 +1637,7 @@ #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ #define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ +#define MNTOPT_TAGXID "tagxid" /* context xid tagging for inodes */ STATIC unsigned long suffix_strtoul(const char *cp, char **endp, unsigned int base) @@ -1797,6 +1802,10 @@ args->flags &= ~XFSMNT_IDELETE; } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { args->flags |= XFSMNT_IDELETE; +#ifndef CONFIG_INOXID_NONE + } else if (!strcmp(this_char, MNTOPT_TAGXID)) { + args->flags |= XFSMNT_TAGXID; +#endif } else if (!strcmp(this_char, "osyncisdsync")) { /* no-op, this is now the default */ printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); Index: linux-2.6.14/fs/xfs/xfs_vnodeops.c =================================================================== --- linux-2.6.14.orig/fs/xfs/xfs_vnodeops.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/fs/xfs/xfs_vnodeops.c 2005-10-31 11:05:45.000000000 -0600 @@ -166,6 +166,7 @@ vap->va_mode = ip->i_d.di_mode; vap->va_uid = ip->i_d.di_uid; vap->va_gid = ip->i_d.di_gid; + vap->va_xid = ip->i_d.di_xid; vap->va_projid = ip->i_d.di_projid; /* @@ -300,6 +301,7 @@ uint commit_flags=0; uid_t uid=0, iuid=0; gid_t gid=0, igid=0; + xid_t xid=0, ixid=0; int timeflags = 0; vnode_t *vp; xfs_prid_t projid=0, iprojid=0; @@ -356,6 +358,7 @@ (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { uint qflags = 0; + /* FIXME: handle xid? */ if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = vap->va_uid; qflags |= XFS_QMOPT_UQUOTA; @@ -436,6 +439,8 @@ if (mask & (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| XFS_AT_GID|XFS_AT_PROJID)) { + /* FIXME: handle xid? */ + /* * CAP_FOWNER overrides the following restrictions: * @@ -484,7 +489,7 @@ * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_XID|XFS_AT_PROJID)) { /* * These IDs could have changed since we last looked at them. * But, we're assured that if the ownership did change @@ -492,10 +497,12 @@ * would have changed also. */ iuid = ip->i_d.di_uid; - iprojid = ip->i_d.di_projid; igid = ip->i_d.di_gid; - gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + ixid = ip->i_d.di_xid; + iprojid = ip->i_d.di_projid; uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; + gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + xid = (mask & XFS_AT_XID) ? vap->va_xid : ixid; projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : iprojid; @@ -523,6 +530,7 @@ if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { + /* FIXME: handle xid? */ ASSERT(tp); code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? @@ -755,7 +763,7 @@ * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_XID|XFS_AT_PROJID)) { /* * CAP_FSETID overrides the following restrictions: * @@ -771,6 +779,12 @@ * Change the ownerships and register quota modifications * in the transaction. */ + if (ixid != xid) { + if (XFS_IS_GQUOTA_ON(mp)) { + /* FIXME: handle xid quota? */ + } + ip->i_d.di_xid = xid; + } if (iuid != uid) { if (XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & XFS_AT_UID); @@ -851,6 +865,10 @@ di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) di_flags |= XFS_DIFLAG_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + di_flags |= XFS_DIFLAG_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + di_flags |= XFS_DIFLAG_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) di_flags |= XFS_DIFLAG_APPEND; if (vap->va_xflags & XFS_XFLAG_SYNC) Index: linux-2.6.14/include/asm-arm/tlb.h =================================================================== --- linux-2.6.14.orig/include/asm-arm/tlb.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-arm/tlb.h 2005-10-31 11:05:45.000000000 -0600 @@ -20,6 +20,7 @@ #include #include #include +#include /* * TLB handling. This allows us to remove pages from the page Index: linux-2.6.14/include/asm-arm26/tlb.h =================================================================== --- linux-2.6.14.orig/include/asm-arm26/tlb.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-arm26/tlb.h 2005-10-31 11:05:45.000000000 -0600 @@ -3,6 +3,7 @@ #include #include +#include /* * TLB handling. This allows us to remove pages from the page Index: linux-2.6.14/include/asm-arm26/unistd.h =================================================================== --- linux-2.6.14.orig/include/asm-arm26/unistd.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-arm26/unistd.h 2005-10-31 11:05:45.000000000 -0600 @@ -304,6 +304,8 @@ #define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) #define __NR_waitid (__NR_SYSCALL_BASE+280) +#define __NR_vserver (__NR_SYSCALL_BASE+313) + /* * The following SWIs are ARM private. FIXME - make appropriate for arm26 */ Index: linux-2.6.14/include/asm-generic/tlb.h =================================================================== --- linux-2.6.14.orig/include/asm-generic/tlb.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-generic/tlb.h 2005-10-31 11:05:45.000000000 -0600 @@ -15,6 +15,7 @@ #include #include +#include #include #include Index: linux-2.6.14/include/asm-i386/elf.h =================================================================== --- linux-2.6.14.orig/include/asm-i386/elf.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-i386/elf.h 2005-10-31 11:05:45.000000000 -0600 @@ -71,7 +71,7 @@ the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +#define ELF_ET_DYN_BASE ((TASK_UNMAPPED_BASE) * 2) /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is now struct_user_regs, they are different) */ Index: linux-2.6.14/include/asm-i386/page.h =================================================================== --- linux-2.6.14.orig/include/asm-i386/page.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-i386/page.h 2005-10-31 11:05:45.000000000 -0600 @@ -109,19 +109,27 @@ #endif /* __ASSEMBLY__ */ -#ifdef __ASSEMBLY__ +#if defined(CONFIG_SPLIT_3GB) #define __PAGE_OFFSET (0xC0000000) -#define __PHYSICAL_START CONFIG_PHYSICAL_START -#else -#define __PAGE_OFFSET (0xC0000000UL) -#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START) +#elif defined(CONFIG_SPLIT_25GB) +#define __PAGE_OFFSET (0xA0000000) +#elif defined(CONFIG_SPLIT_2GB) +#define __PAGE_OFFSET (0x80000000) +#elif defined(CONFIG_SPLIT_15GB) +#define __PAGE_OFFSET (0x60000000) +#elif defined(CONFIG_SPLIT_1GB) +#define __PAGE_OFFSET (0x40000000) #endif + +#define __PHYSICAL_START CONFIG_PHYSICAL_START #define __KERNEL_START (__PAGE_OFFSET + __PHYSICAL_START) +#define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) +#define PHYSICAL_START ((unsigned long)__PHYSICAL_START) #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) -#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) +#define MAXMEM ((unsigned long)__MAXMEM) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) Index: linux-2.6.14/include/asm-i386/processor.h =================================================================== --- linux-2.6.14.orig/include/asm-i386/processor.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-i386/processor.h 2005-10-31 11:05:45.000000000 -0600 @@ -310,9 +310,10 @@ extern int bootloader_type; /* - * User space process size: 3GB (default). + * User space process size: (3GB default). */ -#define TASK_SIZE (PAGE_OFFSET) +#define __TASK_SIZE (__PAGE_OFFSET) +#define TASK_SIZE ((unsigned long)__TASK_SIZE) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. Index: linux-2.6.14/include/asm-ia64/tlb.h =================================================================== --- linux-2.6.14.orig/include/asm-ia64/tlb.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-ia64/tlb.h 2005-10-31 11:05:45.000000000 -0600 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include Index: linux-2.6.14/include/asm-parisc/unistd.h =================================================================== --- linux-2.6.14.orig/include/asm-parisc/unistd.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-parisc/unistd.h 2005-10-31 11:05:45.000000000 -0600 @@ -755,8 +755,9 @@ #define __NR_mbind (__NR_Linux + 260) #define __NR_get_mempolicy (__NR_Linux + 261) #define __NR_set_mempolicy (__NR_Linux + 262) +#define __NR_vserver (__NR_Linux + 263) -#define __NR_Linux_syscalls 263 +#define __NR_Linux_syscalls 264 #define HPUX_GATEWAY_ADDR 0xC0000004 #define LINUX_GATEWAY_ADDR 0x100 Index: linux-2.6.14/include/asm-ppc/unistd.h =================================================================== --- linux-2.6.14.orig/include/asm-ppc/unistd.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-ppc/unistd.h 2005-10-31 11:05:45.000000000 -0600 @@ -261,7 +261,7 @@ #define __NR_fadvise64_64 254 #define __NR_rtas 255 #define __NR_sys_debug_setcontext 256 -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 /* 258 currently unused */ /* Number 259 is reserved for new sys_mbind */ /* Number 260 is reserved for new sys_get_mempolicy */ Index: linux-2.6.14/include/asm-ppc64/unistd.h =================================================================== --- linux-2.6.14.orig/include/asm-ppc64/unistd.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-ppc64/unistd.h 2005-10-31 11:05:45.000000000 -0600 @@ -267,7 +267,7 @@ /* #define __NR_fadvise64_64 254 32bit only */ #define __NR_rtas 255 /* Number 256 is reserved for sys_debug_setcontext */ -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 /* 258 currently unused */ #define __NR_mbind 259 #define __NR_get_mempolicy 260 Index: linux-2.6.14/include/asm-s390/unistd.h =================================================================== --- linux-2.6.14.orig/include/asm-s390/unistd.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-s390/unistd.h 2005-10-31 11:05:45.000000000 -0600 @@ -255,7 +255,7 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) -/* Number 263 is reserved for vserver */ +#define __NR_vserver 263 #define __NR_fadvise64_64 264 #define __NR_statfs64 265 #define __NR_fstatfs64 266 Index: linux-2.6.14/include/asm-sparc/unistd.h =================================================================== --- linux-2.6.14.orig/include/asm-sparc/unistd.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-sparc/unistd.h 2005-10-31 11:05:45.000000000 -0600 @@ -283,7 +283,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 Index: linux-2.6.14/include/asm-sparc64/tlb.h =================================================================== --- linux-2.6.14.orig/include/asm-sparc64/tlb.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-sparc64/tlb.h 2005-10-31 11:05:45.000000000 -0600 @@ -3,6 +3,7 @@ #include #include +#include #include #include #include Index: linux-2.6.14/include/asm-sparc64/unistd.h =================================================================== --- linux-2.6.14.orig/include/asm-sparc64/unistd.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-sparc64/unistd.h 2005-10-31 11:05:45.000000000 -0600 @@ -285,7 +285,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 Index: linux-2.6.14/include/asm-x86_64/unistd.h =================================================================== --- linux-2.6.14.orig/include/asm-x86_64/unistd.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/asm-x86_64/unistd.h 2005-10-31 11:05:45.000000000 -0600 @@ -532,7 +532,7 @@ #define __NR_utimes 235 __SYSCALL(__NR_utimes, sys_utimes) #define __NR_vserver 236 -__SYSCALL(__NR_vserver, sys_ni_syscall) +__SYSCALL(__NR_vserver, sys_vserver) #define __NR_mbind 237 __SYSCALL(__NR_mbind, sys_mbind) #define __NR_set_mempolicy 238 Index: linux-2.6.14/include/linux/capability.h =================================================================== --- linux-2.6.14.orig/include/linux/capability.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/capability.h 2005-10-31 11:05:45.000000000 -0600 @@ -234,6 +234,7 @@ arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ /* Allow setting zone reclaim policy */ +/* Allow the selection of a security context */ #define CAP_SYS_ADMIN 21 @@ -287,6 +288,11 @@ #define CAP_AUDIT_CONTROL 30 +/* Allow context manipulations */ +/* Allow changing context info on files */ + +#define CAP_CONTEXT 31 + #ifdef __KERNEL__ /* * Bounding set Index: linux-2.6.14/include/linux/cyclades.h =================================================================== --- linux-2.6.14.orig/include/linux/cyclades.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/cyclades.h 2005-10-31 11:05:45.000000000 -0600 @@ -585,7 +585,7 @@ int custom_divisor; int x_char; /* to be pushed out ASAP */ int close_delay; - unsigned short closing_wait; + unsigned int closing_wait; unsigned long event; unsigned long last_active; int count; /* # of fd on device */ Index: linux-2.6.14/include/linux/devpts_fs.h =================================================================== --- linux-2.6.14.orig/include/linux/devpts_fs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/devpts_fs.h 2005-10-31 11:05:45.000000000 -0600 @@ -30,5 +30,7 @@ #endif +#define DEVPTS_SUPER_MAGIC 0x00001cd1 + #endif /* _LINUX_DEVPTS_FS_H */ Index: linux-2.6.14/include/linux/dtlk.h =================================================================== --- linux-2.6.14.orig/include/linux/dtlk.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/dtlk.h 2005-10-31 11:05:45.000000000 -0600 @@ -27,7 +27,7 @@ #define DTLK_CLEAR 0x18 /* stops speech */ -#define DTLK_MAX_RETRIES (loops_per_jiffy/(10000/HZ)) +#define DTLK_MAX_RETRIES (HZ*(loops_per_jiffy >> 3)/1250) /* TTS Port Status Flags */ #define TTS_READABLE 0x80 /* mask for bit which is nonzero if a Index: linux-2.6.14/include/linux/ext2_fs.h =================================================================== --- linux-2.6.14.orig/include/linux/ext2_fs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/ext2_fs.h 2005-10-31 11:05:45.000000000 -0600 @@ -192,10 +192,17 @@ #define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT2_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT2_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define EXT2_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#endif /* * ioctl commands @@ -240,7 +247,7 @@ struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_xid; /* LRU Context */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -272,6 +279,7 @@ #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_xid osd2.linux2.l_i_xid #define i_reserved2 osd2.linux2.l_i_reserved2 #endif @@ -315,6 +323,7 @@ #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ #define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ #define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ +#define EXT2_MOUNT_TAGXID (1<<24) /* Enable Context Tags */ #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt Index: linux-2.6.14/include/linux/ext3_fs.h =================================================================== --- linux-2.6.14.orig/include/linux/ext3_fs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/ext3_fs.h 2005-10-31 11:05:45.000000000 -0600 @@ -185,10 +185,20 @@ #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT3_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT3_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT3_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#endif +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_IOC_SETXID FIOC_SETXIDJ +#endif /* * Inode dynamic state flags @@ -287,7 +297,7 @@ struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_xid; /* LRU Context */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -321,6 +331,7 @@ #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_xid osd2.linux2.l_i_xid #define i_reserved2 osd2.linux2.l_i_reserved2 #elif defined(__GNU__) @@ -375,6 +386,7 @@ #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT3_MOUNT_TAGXID (1<<24) /* Enable Context Tags */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H Index: linux-2.6.14/include/linux/ext3_jbd.h =================================================================== --- linux-2.6.14.orig/include/linux/ext3_jbd.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/ext3_jbd.h 2005-10-31 11:05:45.000000000 -0600 @@ -77,10 +77,10 @@ #define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ -#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ - (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0) -#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ - (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0) +#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? \ + (DQUOT_INIT_ALLOC*(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0) +#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? \ + (DQUOT_DEL_ALLOC*(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0) #else #define EXT3_QUOTA_TRANS_BLOCKS(sb) 0 #define EXT3_QUOTA_INIT_BLOCKS(sb) 0 Index: linux-2.6.14/include/linux/fs.h =================================================================== --- linux-2.6.14.orig/include/linux/fs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/fs.h 2005-10-31 11:05:45.000000000 -0600 @@ -10,6 +10,7 @@ #include #include #include +#include /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change @@ -105,6 +106,8 @@ #define MS_REC 16384 #define MS_VERBOSE 32768 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_TAGXID (1<<24) /* tag inodes with context information */ +#define MS_XID (1<<25) /* use specific xid for this mount */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -132,6 +135,8 @@ #define S_NOCMTIME 128 /* Do not update file c/mtime */ #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE 512 /* Inode is fs-internal */ +#define S_BARRIER 1024 /* Barrier for chroot() */ +#define S_IUNLINK 2048 /* Immutable unlink */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -148,25 +153,31 @@ */ #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) -#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) +#define IS_RDONLY(inode) __IS_FLG(inode, MS_RDONLY) #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) +#define IS_TAGXID(inode) __IS_FLG(inode, MS_TAGXID) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) +#define IS_IUNLINK(inode) ((inode)->i_flags & S_IUNLINK) +#define IS_IXORUNLINK(inode) ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode)) #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) +#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER)) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) +#define IS_COW_LINK(inode) (((inode)->i_nlink > 1) && IS_IUNLINK(inode)) + /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -264,6 +275,7 @@ #define ATTR_ATTR_FLAG 1024 #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 +#define ATTR_XID 8192 /* * This is the Inode Attributes structure, used for notify_change(). It @@ -279,6 +291,7 @@ umode_t ia_mode; uid_t ia_uid; gid_t ia_gid; + xid_t ia_xid; loff_t ia_size; struct timespec ia_atime; struct timespec ia_mtime; @@ -290,6 +303,9 @@ */ #include +#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */ +#define ATTR_FLAG_IUNLINK 1024 /* Immutable unlink */ + /* * oh the beauties of C type declarations. */ @@ -430,6 +446,7 @@ unsigned int i_nlink; uid_t i_uid; gid_t i_gid; + xid_t i_xid; dev_t i_rdev; loff_t i_size; struct timespec i_atime; @@ -450,6 +467,7 @@ struct address_space *i_mapping; struct address_space i_data; #ifdef CONFIG_QUOTA + struct dqhash *i_dqh; struct dquot *i_dquot[MAXQUOTAS]; #endif /* These three should probably be a union */ @@ -585,6 +603,7 @@ struct fown_struct f_owner; unsigned int f_uid, f_gid; struct file_ra_state f_ra; + xid_t f_xid; unsigned long f_version; void *f_security; @@ -665,6 +684,7 @@ unsigned char fl_type; loff_t fl_start; loff_t fl_end; + xid_t fl_xid; struct fasync_struct * fl_fasync; /* for lease break notifications */ unsigned long fl_break_time; /* for nonblocking lease breaks */ @@ -765,7 +785,7 @@ unsigned long long s_maxbytes; /* Max file size */ struct file_system_type *s_type; struct super_operations *s_op; - struct dquot_operations *dq_op; + struct dquot_operations *s_qop; struct quotactl_ops *s_qcop; struct export_operations *s_export_op; unsigned long s_flags; @@ -788,7 +808,7 @@ struct block_device *s_bdev; struct list_head s_instances; - struct quota_info s_dquot; /* Diskquota specific options */ + struct dqhash *s_dqh; /* Diskquota hash */ int s_frozen; wait_queue_head_t s_wait_unfrozen; @@ -857,12 +877,12 @@ * VFS helper functions.. */ extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); -extern int vfs_mkdir(struct inode *, struct dentry *, int); -extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); -extern int vfs_symlink(struct inode *, struct dentry *, const char *, int); -extern int vfs_link(struct dentry *, struct inode *, struct dentry *); -extern int vfs_rmdir(struct inode *, struct dentry *); -extern int vfs_unlink(struct inode *, struct dentry *); +extern int vfs_mkdir(struct inode *, struct dentry *, int, struct nameidata *); +extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t, struct nameidata *); +extern int vfs_symlink(struct inode *, struct dentry *, const char *, int, struct nameidata *); +extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct nameidata *); +extern int vfs_rmdir(struct inode *, struct dentry *, struct nameidata *); +extern int vfs_unlink(struct inode *, struct dentry *, struct nameidata *); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); /* @@ -1005,6 +1025,7 @@ unsigned long, loff_t *); extern ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long, loff_t *); +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t); /* * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called @@ -1033,8 +1054,8 @@ int (*show_options)(struct seq_file *, struct vfsmount *); - ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); - ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); + ssize_t (*quota_read)(struct dqhash *, int, char *, size_t, loff_t); + ssize_t (*quota_write)(struct dqhash *, int, const char *, size_t, loff_t); }; /* Inode state bits. Protected by inode_lock. */ @@ -1063,8 +1084,16 @@ static inline void touch_atime(struct vfsmount *mnt, struct dentry *dentry) { - /* per-mountpoint checks will go here */ - update_atime(dentry->d_inode); + struct inode *inode = dentry->d_inode; + + if (MNT_IS_NOATIME(mnt)) + return; + if (S_ISDIR(inode->i_mode) && MNT_IS_NODIRATIME(mnt)) + return; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) + return; + + update_atime(inode); } static inline void file_accessed(struct file *file) @@ -1464,7 +1493,7 @@ extern void destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); extern int remove_suid(struct dentry *); -extern void remove_dquot_ref(struct super_block *, int, struct list_head *); +extern void remove_dquot_ref(struct dqhash *, int, struct list_head *); extern struct semaphore iprune_sem; extern void __insert_inode_hash(struct inode *, unsigned long hashval); @@ -1504,6 +1533,7 @@ ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos); extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); +extern ssize_t generic_file_sendpage(struct file *, struct page *, int, size_t, loff_t *, int); extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, loff_t *, read_descriptor_t *, read_actor_t); @@ -1622,6 +1652,7 @@ extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, void *, filldir_t); +extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *)); extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int simple_statfs(struct super_block *, struct kstatfs *); extern int simple_link(struct dentry *, struct inode *, struct dentry *); Index: linux-2.6.14/include/linux/generic_serial.h =================================================================== --- linux-2.6.14.orig/include/linux/generic_serial.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/generic_serial.h 2005-10-31 11:05:45.000000000 -0600 @@ -42,7 +42,7 @@ int blocked_open; struct tty_struct *tty; unsigned long event; - unsigned short closing_wait; + unsigned int closing_wait; int close_delay; struct real_driver *rd; int wakeup_chars; Index: linux-2.6.14/include/linux/hayesesp.h =================================================================== --- linux-2.6.14.orig/include/linux/hayesesp.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/hayesesp.h 2005-10-31 11:05:45.000000000 -0600 @@ -88,8 +88,8 @@ int stat_flags; int custom_divisor; int close_delay; - unsigned short closing_wait; - unsigned short closing_wait2; + unsigned int closing_wait; + unsigned int closing_wait2; int IER; /* Interrupt Enable Register */ int MCR; /* Modem control register */ unsigned long event; Index: linux-2.6.14/include/linux/init_task.h =================================================================== --- linux-2.6.14.orig/include/linux/init_task.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/init_task.h 2005-10-31 11:05:45.000000000 -0600 @@ -122,6 +122,10 @@ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ + .xid = 0, \ + .vx_info = NULL, \ + .nid = 0, \ + .nx_info = NULL, \ } Index: linux-2.6.14/include/linux/ip.h =================================================================== --- linux-2.6.14.orig/include/linux/ip.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/ip.h 2005-10-31 11:05:45.000000000 -0600 @@ -139,6 +139,7 @@ /* Socket demultiplex comparisons on incoming packets. */ __u32 daddr; /* Foreign IPv4 addr */ __u32 rcv_saddr; /* Bound local IPv4 addr */ + __u32 rcv_saddr2; /* Second bound ipv4 addr, for ipv4root */ __u16 dport; /* Destination port */ __u16 num; /* Local port */ __u32 saddr; /* Sending source */ Index: linux-2.6.14/include/linux/ipc.h =================================================================== --- linux-2.6.14.orig/include/linux/ipc.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/ipc.h 2005-10-31 11:05:45.000000000 -0600 @@ -66,6 +66,7 @@ mode_t mode; unsigned long seq; void *security; + xid_t xid; }; #endif /* __KERNEL__ */ Index: linux-2.6.14/include/linux/jiffies.h =================================================================== --- linux-2.6.14.orig/include/linux/jiffies.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/jiffies.h 2005-10-31 11:05:45.000000000 -0600 @@ -38,6 +38,14 @@ # define SHIFT_HZ 9 #elif HZ >= 768 && HZ < 1536 # define SHIFT_HZ 10 +#elif HZ >= 1536 && HZ < 3072 +# define SHIFT_HZ 11 +#elif HZ >= 3072 && HZ < 6144 +# define SHIFT_HZ 12 +#elif HZ >= 6144 && HZ < 12288 +# define SHIFT_HZ 13 +#elif HZ >= 12288 && HZ < 24576 +# define SHIFT_HZ 14 #else # error You lose. #endif Index: linux-2.6.14/include/linux/kernel.h =================================================================== --- linux-2.6.14.orig/include/linux/kernel.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/kernel.h 2005-10-31 11:05:45.000000000 -0600 @@ -17,6 +17,7 @@ #include extern const char linux_banner[]; +extern const char vx_linux_banner[]; #define INT_MAX ((int)(~0U>>1)) #define INT_MIN (-INT_MAX - 1) Index: linux-2.6.14/include/linux/major.h =================================================================== --- linux-2.6.14.orig/include/linux/major.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/major.h 2005-10-31 11:05:45.000000000 -0600 @@ -15,6 +15,7 @@ #define HD_MAJOR IDE0_MAJOR #define PTY_SLAVE_MAJOR 3 #define TTY_MAJOR 4 +#define VROOT_MAJOR 4 #define TTYAUX_MAJOR 5 #define LP_MAJOR 6 #define VCS_MAJOR 7 Index: linux-2.6.14/include/linux/mount.h =================================================================== --- linux-2.6.14.orig/include/linux/mount.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/mount.h 2005-10-31 11:05:45.000000000 -0600 @@ -20,6 +20,10 @@ #define MNT_NOSUID 1 #define MNT_NODEV 2 #define MNT_NOEXEC 4 +#define MNT_RDONLY 8 +#define MNT_NOATIME 16 +#define MNT_NODIRATIME 32 +#define MNT_XID 256 struct vfsmount { @@ -37,8 +41,13 @@ struct list_head mnt_list; struct list_head mnt_expire; /* link in fs-specific expiry list */ struct namespace *mnt_namespace; /* containing namespace */ + xid_t mnt_xid; /* xid tagging used for vfsmount */ }; +#define MNT_IS_RDONLY(m) ((m) && ((m)->mnt_flags & MNT_RDONLY)) +#define MNT_IS_NOATIME(m) ((m) && ((m)->mnt_flags & MNT_NOATIME)) +#define MNT_IS_NODIRATIME(m) ((m) && ((m)->mnt_flags & MNT_NODIRATIME)) + static inline struct vfsmount *mntget(struct vfsmount *mnt) { if (mnt) Index: linux-2.6.14/include/linux/namespace.h =================================================================== --- linux-2.6.14.orig/include/linux/namespace.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/namespace.h 2005-10-31 11:05:45.000000000 -0600 @@ -14,6 +14,7 @@ extern int copy_namespace(int, struct task_struct *); extern void __put_namespace(struct namespace *namespace); +extern void umount_unused(struct vfsmount *, struct fs_struct *); static inline void put_namespace(struct namespace *namespace) { Index: linux-2.6.14/include/linux/net.h =================================================================== --- linux-2.6.14.orig/include/linux/net.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/net.h 2005-10-31 11:05:45.000000000 -0600 @@ -62,6 +62,7 @@ #define SOCK_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 +#define SOCK_USER_SOCKET 4 #ifndef ARCH_HAS_SOCKET_TYPES /** Index: linux-2.6.14/include/linux/nfs_mount.h =================================================================== --- linux-2.6.14.orig/include/linux/nfs_mount.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/nfs_mount.h 2005-10-31 11:05:45.000000000 -0600 @@ -61,6 +61,7 @@ #define NFS_MOUNT_NOACL 0x0800 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ +#define NFS_MOUNT_TAGXID 0x8000 /* tagxid */ #define NFS_MOUNT_FLAGMASK 0xFFFF #endif Index: linux-2.6.14/include/linux/percpu.h =================================================================== --- linux-2.6.14.orig/include/linux/percpu.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/percpu.h 2005-10-31 11:05:45.000000000 -0600 @@ -8,7 +8,7 @@ /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM -#define PERCPU_ENOUGH_ROOM 32768 +#define PERCPU_ENOUGH_ROOM 65536 #endif /* Must be an lvalue. */ Index: linux-2.6.14/include/linux/proc_fs.h =================================================================== --- linux-2.6.14.orig/include/linux/proc_fs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/proc_fs.h 2005-10-31 11:05:45.000000000 -0600 @@ -55,6 +55,7 @@ nlink_t nlink; uid_t uid; gid_t gid; + int vx_flags; unsigned long size; struct inode_operations * proc_iops; struct file_operations * proc_fops; @@ -245,9 +246,11 @@ struct proc_inode { struct task_struct *task; int type; + int vx_flags; union { int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); int (*proc_read)(struct task_struct *task, char *page); + int (*proc_vid_read)(int vid, char *page); } op; struct proc_dir_entry *pde; struct inode vfs_inode; Index: linux-2.6.14/include/linux/ptrace.h =================================================================== --- linux-2.6.14.orig/include/linux/ptrace.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/ptrace.h 2005-10-31 11:05:45.000000000 -0600 @@ -77,6 +77,8 @@ #include /* For unlikely. */ #include /* For struct task_struct. */ +#include +#include extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); Index: linux-2.6.14/include/linux/quota.h =================================================================== --- linux-2.6.14.orig/include/linux/quota.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/quota.h 2005-10-31 11:05:45.000000000 -0600 @@ -56,6 +56,13 @@ #define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10)) #define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS) +/* are NULL dqhash ptrs valid? */ +#ifdef HANDLE_DQHASH_NULL +#define dqhash_valid(hash) ((hash) != NULL) +#else +#define dqhash_valid(hash) (0 == 0) +#endif + #define MAXQUOTAS 2 #define USRQUOTA 0 /* element used for user quotas */ #define GRPQUOTA 1 /* element used for group quotas */ @@ -175,19 +182,24 @@ } u; }; -struct super_block; +// struct super_block; +struct dqhash; #define DQF_MASK 0xffff /* Mask for format specific flags */ #define DQF_INFO_DIRTY_B 16 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */ -extern void mark_info_dirty(struct super_block *sb, int type); +extern void mark_info_dirty(struct dqhash *hash, int type); + #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags) #define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list)) #define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info)) -#define sb_dqopt(sb) (&(sb)->s_dquot) -#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type)) +// #define sb_dqopt(sb) (&(sb)->s_dquot) +// #define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type)) + +#define dqh_dqopt(hash) (&(hash)->dqh_dqopt) +#define dqh_dqinfo(hash, type) (dqh_dqopt(hash)->info+(type)) struct dqstats { int lookups; @@ -218,7 +230,7 @@ struct semaphore dq_lock; /* dquot IO lock */ atomic_t dq_count; /* Use count */ wait_queue_head_t dq_wait_unused; /* Wait queue for dquot to become unused */ - struct super_block *dq_sb; /* superblock this applies to */ + struct dqhash *dq_dqh; /* quota hash backpointer */ unsigned int dq_id; /* ID this applies to (uid, gid) */ loff_t dq_off; /* Offset of dquot on disk */ unsigned long dq_flags; /* See DQ_* */ @@ -233,13 +245,14 @@ /* Operations which must be implemented by each quota format */ struct quota_format_ops { - int (*check_quota_file)(struct super_block *sb, int type); /* Detect whether file is in our format */ - int (*read_file_info)(struct super_block *sb, int type); /* Read main info about file - called on quotaon() */ - int (*write_file_info)(struct super_block *sb, int type); /* Write main info about file */ - int (*free_file_info)(struct super_block *sb, int type); /* Called on quotaoff() */ - int (*read_dqblk)(struct dquot *dquot); /* Read structure for one user */ - int (*commit_dqblk)(struct dquot *dquot); /* Write structure for one user */ - int (*release_dqblk)(struct dquot *dquot); /* Called when last reference to dquot is being dropped */ + int (*check_quota_file)(struct dqhash *, int); /* Detect whether file is in our format */ + int (*read_file_info)(struct dqhash *, int); /* Read main info about file - called on quotaon() */ + int (*write_file_info)(struct dqhash *, int); /* Write main info about file */ + int (*free_file_info)(struct dqhash *, int); /* Called on quotaoff() */ + + int (*read_dqblk)(struct dquot *); /* Read structure for one user */ + int (*commit_dqblk)(struct dquot *); /* Write structure for one user */ + int (*release_dqblk)(struct dquot *); /* Called when last reference to dquot is being dropped */ }; /* Operations working with dquots */ @@ -255,22 +268,22 @@ int (*acquire_dquot) (struct dquot *); /* Quota is going to be created on disk */ int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */ int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */ - int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */ + int (*write_info) (struct dqhash *, int); /* Write of quota "superblock" */ }; /* Operations handling requests from userspace */ struct quotactl_ops { - int (*quota_on)(struct super_block *, int, int, char *); - int (*quota_off)(struct super_block *, int); - int (*quota_sync)(struct super_block *, int); - int (*get_info)(struct super_block *, int, struct if_dqinfo *); - int (*set_info)(struct super_block *, int, struct if_dqinfo *); - int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); - int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); - int (*get_xstate)(struct super_block *, struct fs_quota_stat *); - int (*set_xstate)(struct super_block *, unsigned int, int); - int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); - int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); + int (*quota_on)(struct dqhash *, int, int, char *); + int (*quota_off)(struct dqhash *, int); + int (*quota_sync)(struct dqhash *, int); + int (*get_info)(struct dqhash *, int, struct if_dqinfo *); + int (*set_info)(struct dqhash *, int, struct if_dqinfo *); + int (*get_dqblk)(struct dqhash *, int, qid_t, struct if_dqblk *); + int (*set_dqblk)(struct dqhash *, int, qid_t, struct if_dqblk *); + int (*get_xstate)(struct dqhash *, struct fs_quota_stat *); + int (*set_xstate)(struct dqhash *, unsigned int, int); + int (*get_xquota)(struct dqhash *, int, qid_t, struct fs_disk_quota *); + int (*set_xquota)(struct dqhash *, int, qid_t, struct fs_disk_quota *); }; struct quota_format_type { @@ -294,16 +307,15 @@ struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ }; -/* Inline would be better but we need to dereference super_block which is not defined yet */ -int mark_dquot_dirty(struct dquot *dquot); #define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags) -#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \ - (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED)) +#define dqh_has_quota_enabled(hash, type) (dqhash_valid(hash) && ((type)==USRQUOTA ? \ + (dqh_dqopt(hash)->flags & DQUOT_USR_ENABLED) : (dqh_dqopt(hash)->flags & DQUOT_GRP_ENABLED))) + +#define dqh_any_quota_enabled(hash) (dqhash_valid(hash) && \ + (dqh_has_quota_enabled(hash, USRQUOTA) || dqh_has_quota_enabled(hash, GRPQUOTA))) -#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \ - sb_has_quota_enabled(sb, GRPQUOTA)) int register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); @@ -318,6 +330,52 @@ {QFMT_VFS_V0, "quota_v2"},\ {0, NULL}} +struct dqhash { + struct list_head dqh_list; /* List of all quota hashes */ + unsigned int dqh_id; /* ID for hash */ + atomic_t dqh_count; /* Use count */ + struct quota_info dqh_dqopt; /* Diskquota specific options */ + struct dquot_operations *dqh_qop; + struct quotactl_ops *dqh_qcop; + struct super_block *dqh_sb; /* super block */ +// struct list_head dqh_hash[NR_DQHASH]; + unsigned int dqh_hash_bits; + unsigned int dqh_hash_mask; + struct hlist_head *dqh_hash; +}; + +#if defined(CONFIG_QUOTA) + + +struct dqhash *new_dqhash(struct super_block *, unsigned int); +void destroy_dqhash(struct dqhash *); +struct dqhash *find_dqhash(unsigned int); + +static inline void dqhput(struct dqhash *hash) +{ + if (dqhash_valid(hash)) + if (atomic_dec_and_test(&hash->dqh_count)) + destroy_dqhash(hash); +} + +static inline struct dqhash *dqhget(struct dqhash *hash) +{ + if (dqhash_valid(hash)) + atomic_inc(&hash->dqh_count); + return hash; +} + +#else /* CONFIG_QUOTA */ + +#define new_dqhash(sb, dqdom) (0) +#define find_dqhash(dqdom) (0) +#define destroy_dqhash(hash) do { } while(0) + +#define dqhput(hash) do { } while(0) +#define dqhget(hash) (hash) + +#endif /* CONFIG_QUOTA */ + #else # /* nodep */ include Index: linux-2.6.14/include/linux/quotaops.h =================================================================== --- linux-2.6.14.orig/include/linux/quotaops.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/quotaops.h 2005-10-31 11:05:45.000000000 -0600 @@ -20,7 +20,7 @@ /* * declaration of quota_function calls in kernel. */ -extern void sync_dquots(struct super_block *sb, int type); +extern void sync_dquots(struct dqhash *hash, int type); extern int dquot_initialize(struct inode *inode, int type); extern int dquot_drop(struct inode *inode); @@ -35,19 +35,19 @@ extern int dquot_commit(struct dquot *dquot); extern int dquot_acquire(struct dquot *dquot); extern int dquot_release(struct dquot *dquot); -extern int dquot_commit_info(struct super_block *sb, int type); +extern int dquot_commit_info(struct dqhash *hash, int type); extern int dquot_mark_dquot_dirty(struct dquot *dquot); -extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path); -extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name, +extern int vfs_quota_on(struct dqhash *hash, int type, int format_id, char *path); +extern int vfs_quota_on_mount(struct dqhash *hash, char *qf_name, int format_id, int type); -extern int vfs_quota_off(struct super_block *sb, int type); -#define vfs_quota_off_mount(sb, type) vfs_quota_off(sb, type) -extern int vfs_quota_sync(struct super_block *sb, int type); -extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); -extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); -extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); -extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +extern int vfs_quota_off(struct dqhash *hash, int type); +#define vfs_quota_off_mount(dqh, type) vfs_quota_off(dqh, type) +extern int vfs_quota_sync(struct dqhash *hash, int type); +extern int vfs_get_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii); +extern int vfs_set_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii); +extern int vfs_get_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di); +extern int vfs_set_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di); /* * Operations supported for diskquotas. @@ -62,9 +62,12 @@ * need a lot of space in journal for dquot structure allocation. */ static __inline__ void DQUOT_INIT(struct inode *inode) { - BUG_ON(!inode->i_sb); - if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) - inode->i_sb->dq_op->initialize(inode, -1); + if (!dqhash_valid(inode->i_dqh)) + return; + BUG_ON(!inode->i_dqh); + // printk("DQUOT_INIT(%p,%p,%d)\n", inode, inode->i_dqh, dqh_any_quota_enabled(inode->i_dqh)); + if (dqh_any_quota_enabled(inode->i_dqh) && !IS_NOQUOTA(inode)) + inode->i_dqh->dqh_qop->initialize(inode, -1); } /* The same as with DQUOT_INIT */ @@ -73,8 +76,8 @@ /* Here we can get arbitrary inode from clear_inode() so we have * to be careful. OTOH we don't need locking as quota operations * are allowed to change only at mount time */ - if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op - && inode->i_sb->dq_op->drop) { + if (!IS_NOQUOTA(inode) && inode->i_dqh && inode->i_dqh->dqh_qop + && inode->i_dqh->dqh_qop->drop) { int cnt; /* Test before calling to rule out calls from proc and such * where we are not allowed to block. Note that this is @@ -85,7 +88,7 @@ if (inode->i_dquot[cnt] != NODQUOT) break; if (cnt < MAXQUOTAS) - inode->i_sb->dq_op->drop(inode); + inode->i_dqh->dqh_qop->drop(inode); } } @@ -93,9 +96,9 @@ * a transaction (deadlocks possible otherwise) */ static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (dqh_any_quota_enabled(inode->i_dqh)) { /* Used space is updated in alloc_space() */ - if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) + if (inode->i_dqh->dqh_qop->alloc_space(inode, nr, 1) == NO_QUOTA) return 1; } else @@ -113,9 +116,9 @@ static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (dqh_any_quota_enabled(inode->i_dqh)) { /* Used space is updated in alloc_space() */ - if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) + if (inode->i_dqh->dqh_qop->alloc_space(inode, nr, 0) == NO_QUOTA) return 1; } else @@ -133,9 +136,9 @@ static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (dqh_any_quota_enabled(inode->i_dqh)) { DQUOT_INIT(inode); - if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) + if (inode->i_dqh->dqh_qop->alloc_inode(inode, 1) == NO_QUOTA) return 1; } return 0; @@ -143,8 +146,8 @@ static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) - inode->i_sb->dq_op->free_space(inode, nr); + if (dqh_any_quota_enabled(inode->i_dqh)) + inode->i_dqh->dqh_qop->free_space(inode, nr); else inode_sub_bytes(inode, nr); } @@ -157,29 +160,30 @@ static __inline__ void DQUOT_FREE_INODE(struct inode *inode) { - if (sb_any_quota_enabled(inode->i_sb)) - inode->i_sb->dq_op->free_inode(inode, 1); + if (dqh_any_quota_enabled(inode->i_dqh)) + inode->i_dqh->dqh_qop->free_inode(inode, 1); } static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) { - if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) { + if (dqh_any_quota_enabled(inode->i_dqh) && !IS_NOQUOTA(inode)) { DQUOT_INIT(inode); - if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) + if (inode->i_dqh->dqh_qop->transfer(inode, iattr) == NO_QUOTA) return 1; } return 0; } /* The following two functions cannot be called inside a transaction */ -#define DQUOT_SYNC(sb) sync_dquots(sb, -1) +#define DQUOT_SYNC(hash) sync_dquots(hash, -1) -static __inline__ int DQUOT_OFF(struct super_block *sb) +static __inline__ int DQUOT_OFF(struct dqhash *hash) { int ret = -ENOSYS; - if (sb_any_quota_enabled(sb) && sb->s_qcop && sb->s_qcop->quota_off) - ret = sb->s_qcop->quota_off(sb, -1); + if (dqh_any_quota_enabled(hash) && hash->dqh_qcop && + hash->dqh_qcop->quota_off) + ret = hash->dqh_qcop->quota_off(hash, -1); return ret; } @@ -195,8 +199,8 @@ #define DQUOT_DROP(inode) do { } while(0) #define DQUOT_ALLOC_INODE(inode) (0) #define DQUOT_FREE_INODE(inode) do { } while(0) -#define DQUOT_SYNC(sb) do { } while(0) -#define DQUOT_OFF(sb) do { } while(0) +#define DQUOT_SYNC(hash) do { } while(0) +#define DQUOT_OFF(hash) do { } while(0) #define DQUOT_TRANSFER(inode, iattr) (0) extern __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { Index: linux-2.6.14/include/linux/reiserfs_fs.h =================================================================== --- linux-2.6.14.orig/include/linux/reiserfs_fs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/reiserfs_fs.h 2005-10-31 11:05:45.000000000 -0600 @@ -829,6 +829,18 @@ #define REISERFS_COMPR_FL EXT2_COMPR_FL #define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL +/* unfortunately reiserfs sdattr is only 16 bit */ +#define REISERFS_BARRIER_FL (EXT2_BARRIER_FL >> 16) +#define REISERFS_IUNLINK_FL (EXT2_IUNLINK_FL >> 16) + +#ifdef CONFIG_VSERVER_LEGACY +#define REISERFS_FL_USER_VISIBLE (REISERFS_IUNLINK_FL|0x80FF) +#define REISERFS_FL_USER_MODIFIABLE (REISERFS_IUNLINK_FL|0x80FF) +#else +#define REISERFS_FL_USER_VISIBLE 0x80FF +#define REISERFS_FL_USER_MODIFIABLE 0x80FF +#endif + /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ REISERFS_SYNC_FL | \ Index: linux-2.6.14/include/linux/reiserfs_fs_sb.h =================================================================== --- linux-2.6.14.orig/include/linux/reiserfs_fs_sb.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/reiserfs_fs_sb.h 2005-10-31 11:05:45.000000000 -0600 @@ -457,6 +457,7 @@ REISERFS_POSIXACL, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, + REISERFS_TAGXID, /* Actions on error */ REISERFS_ERROR_PANIC, Index: linux-2.6.14/include/linux/sched.h =================================================================== --- linux-2.6.14.orig/include/linux/sched.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/sched.h 2005-10-31 11:05:45.000000000 -0600 @@ -34,6 +34,7 @@ #include #include #include +#include #include /* For AT_VECTOR_SIZE */ @@ -60,6 +61,7 @@ #define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */ #define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ #define CLONE_STOPPED 0x02000000 /* Start in stopped state */ +#define CLONE_KTHREAD 0x10000000 /* clone a kernel thread */ /* * List of flags we want to share for kernel threads, @@ -127,6 +129,7 @@ #define EXIT_DEAD 32 /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 +#define TASK_ONHOLD 128 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -249,11 +252,12 @@ extern void arch_unmap_area(struct mm_struct *, unsigned long); extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); -#define set_mm_counter(mm, member, value) (mm)->_##member = (value) +#define __set_mm_counter(mm, member, value) (mm)->_##member = (value) +#define set_mm_counter(mm, member, value) vx_ ## member ## pages_sub((mm), ((mm)->_##member - value)) #define get_mm_counter(mm, member) ((mm)->_##member) -#define add_mm_counter(mm, member, value) (mm)->_##member += (value) -#define inc_mm_counter(mm, member) (mm)->_##member++ -#define dec_mm_counter(mm, member) (mm)->_##member-- +#define add_mm_counter(mm, member, value) vx_ ## member ## pages_add((mm), (value)) +#define inc_mm_counter(mm, member) vx_ ## member ## pages_inc((mm)) +#define dec_mm_counter(mm, member) vx_ ## member ## pages_dec((mm)) typedef unsigned long mm_counter_t; struct mm_struct { @@ -296,6 +300,7 @@ /* Architecture-specific MM context */ mm_context_t context; + struct vx_info *mm_vx_info; /* Token based thrashing protection. */ unsigned long swap_token_time; @@ -471,9 +476,10 @@ /* Hash table maintenance information */ struct list_head uidhash_list; uid_t uid; + xid_t xid; }; -extern struct user_struct *find_user(uid_t); +extern struct user_struct *find_user(xid_t, uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -768,6 +774,14 @@ struct audit_context *audit_context; seccomp_t seccomp; +/* vserver context data */ + xid_t xid; + struct vx_info *vx_info; + +/* vserver network data */ + nid_t nid; + struct nx_info *nx_info; + /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; @@ -956,13 +970,19 @@ extern struct mm_struct init_mm; -#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) + +#define find_task_by_real_pid(nr) \ + find_task_by_pid_type(PIDTYPE_PID, nr) +#define find_task_by_pid(nr) \ + find_task_by_pid_type(PIDTYPE_PID, \ + vx_rmap_pid(nr)) + extern struct task_struct *find_task_by_pid_type(int type, int pid); extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(xid_t, uid_t); static inline struct user_struct *get_uid(struct user_struct *u) { atomic_inc(&u->__count); @@ -1056,15 +1076,28 @@ #ifdef CONFIG_SECURITY /* code is in security.c */ extern int capable(int cap); +extern int vx_capable(int cap, int ccap); #else static inline int capable(int cap) { + if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap)) + return 0; if (cap_raised(current->cap_effective, cap)) { current->flags |= PF_SUPERPRIV; return 1; } return 0; } + +static inline int vx_capable(int cap, int ccap) +{ + if (cap_raised(current->cap_effective, cap) && + vx_ccaps(ccap)) { + current->flags |= PF_SUPERPRIV; + return 1; + } + return 0; +} #endif /* Index: linux-2.6.14/include/linux/security.h =================================================================== --- linux-2.6.14.orig/include/linux/security.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/security.h 2005-10-31 11:05:45.000000000 -0600 @@ -81,6 +81,7 @@ struct nfsctl_arg; struct sched_param; struct swap_info_struct; +// struct dqhash; /* bprm_apply_creds unsafe reasons */ #define LSM_UNSAFE_SHARE 1 @@ -1025,7 +1026,7 @@ int (*acct) (struct file * file); int (*sysctl) (struct ctl_table * table, int op); int (*capable) (struct task_struct * tsk, int cap); - int (*quotactl) (int cmds, int type, int id, struct super_block * sb); + int (*quotactl) (int cmds, int type, int id, struct dqhash *); int (*quota_on) (struct dentry * dentry); int (*syslog) (int type); int (*settime) (struct timespec *ts, struct timezone *tz); @@ -1259,9 +1260,9 @@ } static inline int security_quotactl (int cmds, int type, int id, - struct super_block *sb) + struct dqhash *hash) { - return security_ops->quotactl (cmds, type, id, sb); + return security_ops->quotactl (cmds, type, id, hash); } static inline int security_quota_on (struct dentry * dentry) @@ -1966,7 +1967,7 @@ } static inline int security_quotactl (int cmds, int type, int id, - struct super_block * sb) + struct dqhash * hash) { return 0; } Index: linux-2.6.14/include/linux/serial.h =================================================================== --- linux-2.6.14.orig/include/linux/serial.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/serial.h 2005-10-31 11:05:45.000000000 -0600 @@ -39,12 +39,12 @@ int xmit_fifo_size; int custom_divisor; int baud_base; - unsigned short close_delay; + unsigned int close_delay; char io_type; char reserved_char[1]; int hub6; - unsigned short closing_wait; /* time to wait before closing */ - unsigned short closing_wait2; /* no longer used... */ + unsigned int closing_wait; /* time to wait before closing */ + unsigned int closing_wait2; /* no longer used... */ unsigned char *iomem_base; unsigned short iomem_reg_shift; unsigned int port_high; Index: linux-2.6.14/include/linux/serialP.h =================================================================== --- linux-2.6.14.orig/include/linux/serialP.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/serialP.h 2005-10-31 11:05:45.000000000 -0600 @@ -41,8 +41,8 @@ int count; u8 *iomem_base; u16 iomem_reg_shift; - unsigned short close_delay; - unsigned short closing_wait; /* time to wait before closing */ + unsigned int close_delay; + unsigned int closing_wait; /* time to wait before closing */ struct async_icount icount; int io_type; struct async_struct *info; @@ -63,8 +63,8 @@ int quot; int x_char; /* xon/xoff character */ int close_delay; - unsigned short closing_wait; - unsigned short closing_wait2; /* obsolete */ + unsigned int closing_wait; + unsigned int closing_wait2; /* obsolete */ int IER; /* Interrupt Enable Register */ int MCR; /* Modem control register */ int LCR; /* Line control register */ Index: linux-2.6.14/include/linux/shmem_fs.h =================================================================== --- linux-2.6.14.orig/include/linux/shmem_fs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/shmem_fs.h 2005-10-31 11:05:45.000000000 -0600 @@ -8,6 +8,9 @@ #define SHMEM_NR_DIRECT 16 +#define TMPFS_SUPER_MAGIC 0x01021994 + + struct shmem_inode_info { spinlock_t lock; unsigned long flags; Index: linux-2.6.14/include/linux/stat.h =================================================================== --- linux-2.6.14.orig/include/linux/stat.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/stat.h 2005-10-31 11:05:45.000000000 -0600 @@ -63,6 +63,7 @@ unsigned int nlink; uid_t uid; gid_t gid; + xid_t xid; dev_t rdev; loff_t size; struct timespec atime; Index: linux-2.6.14/include/linux/sunrpc/auth.h =================================================================== --- linux-2.6.14.orig/include/linux/sunrpc/auth.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/sunrpc/auth.h 2005-10-31 11:05:45.000000000 -0600 @@ -28,6 +28,7 @@ struct auth_cred { uid_t uid; gid_t gid; + xid_t xid; struct group_info *group_info; }; Index: linux-2.6.14/include/linux/sunrpc/clnt.h =================================================================== --- linux-2.6.14.orig/include/linux/sunrpc/clnt.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/sunrpc/clnt.h 2005-10-31 11:05:45.000000000 -0600 @@ -52,7 +52,8 @@ cl_chatty : 1,/* be verbose */ cl_autobind : 1,/* use getport() */ cl_oneshot : 1,/* dispose after use */ - cl_dead : 1;/* abandoned */ + cl_dead : 1,/* abandoned */ + cl_tagxid : 1;/* do xid tagging */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ struct rpc_portmap * cl_pmap; /* port mapping */ Index: linux-2.6.14/include/linux/syscalls.h =================================================================== --- linux-2.6.14.orig/include/linux/syscalls.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/syscalls.h 2005-10-31 11:05:45.000000000 -0600 @@ -291,6 +291,8 @@ asmlinkage long sys_unlink(const char __user *pathname); asmlinkage long sys_rename(const char __user *oldname, const char __user *newname); +asmlinkage long sys_copyfile(const char __user *from, const char __user *to, + umode_t mode); asmlinkage long sys_chmod(const char __user *filename, mode_t mode); asmlinkage long sys_fchmod(unsigned int fd, mode_t mode); Index: linux-2.6.14/include/linux/sysctl.h =================================================================== --- linux-2.6.14.orig/include/linux/sysctl.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/sysctl.h 2005-10-31 11:05:45.000000000 -0600 @@ -146,6 +146,7 @@ KERN_RANDOMIZE=68, /* int: randomize virtual address space */ KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ + KERN_VSHELPER=71, /* string: path to vshelper policy agent */ }; @@ -832,6 +833,9 @@ typedef int proc_handler (ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos); +typedef int virt_handler (struct ctl_table *ctl, int write, xid_t xid, + void **datap, size_t *lenp); + extern int proc_dostring(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int proc_dointvec(ctl_table *, int, struct file *, @@ -913,6 +917,7 @@ mode_t mode; ctl_table *child; proc_handler *proc_handler; /* Callback for text formatting */ + virt_handler *virt_handler; /* Context virtualization */ ctl_handler *strategy; /* Callback function for all r/w */ struct proc_dir_entry *de; /* /proc control block */ void *extra1; Index: linux-2.6.14/include/linux/sysfs.h =================================================================== --- linux-2.6.14.orig/include/linux/sysfs.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/sysfs.h 2005-10-31 11:05:45.000000000 -0600 @@ -12,6 +12,8 @@ #include +#define SYSFS_SUPER_MAGIC 0x62656572 + struct kobject; struct module; Index: linux-2.6.14/include/linux/types.h =================================================================== --- linux-2.6.14.orig/include/linux/types.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/linux/types.h 2005-10-31 11:05:45.000000000 -0600 @@ -36,6 +36,8 @@ typedef __kernel_gid32_t gid_t; typedef __kernel_uid16_t uid16_t; typedef __kernel_gid16_t gid16_t; +typedef unsigned int xid_t; +typedef unsigned int nid_t; #ifdef CONFIG_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ Index: linux-2.6.14/include/linux/vroot.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vroot.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,45 @@ + +/* + * include/linux/vroot.h + * + * written by Herbert Pötzl, 9/11/2002 + * ported to 2.6 by Herbert Pötzl, 30/12/2004 + * + * Copyright (C) 2002-2005 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + */ + +#ifndef _LINUX_VROOT_H +#define _LINUX_VROOT_H + + +#ifdef __KERNEL__ + +/* Possible states of device */ +enum { + Vr_unbound, + Vr_bound, +}; + +struct vroot_device { + int vr_number; + int vr_refcnt; + + struct semaphore vr_ctl_mutex; + struct block_device *vr_device; + int vr_state; +}; + +#endif /* __KERNEL__ */ + +#define MAX_VROOT_DEFAULT 8 + +/* + * IOCTL commands --- we will commandeer 0x56 ('V') + */ + +#define VROOT_SET_DEV 0x5600 +#define VROOT_CLR_DEV 0x5601 + +#endif /* _LINUX_VROOT_H */ Index: linux-2.6.14/include/linux/vs_base.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vs_base.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,104 @@ +#ifndef _VX_VS_BASE_H +#define _VX_VS_BASE_H + + +#include "vserver/context.h" + + +#define vx_task_xid(t) ((t)->xid) + +#define vx_current_xid() vx_task_xid(current) + +#define vx_check(c,m) __vx_check(vx_current_xid(),c,m) + +#define vx_weak_check(c,m) ((m) ? vx_check(c,m) : 1) + + +/* + * check current context for ADMIN/WATCH and + * optionally agains supplied argument + */ +static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode) +{ + if (mode & VX_ARG_MASK) { + if ((mode & VX_IDENT) && + (id == cid)) + return 1; + } + if (mode & VX_ATR_MASK) { + if ((mode & VX_DYNAMIC) && + (id >= MIN_D_CONTEXT) && + (id <= MAX_S_CONTEXT)) + return 1; + if ((mode & VX_STATIC) && + (id > 1) && (id < MIN_D_CONTEXT)) + return 1; + } + return (((mode & VX_ADMIN) && (cid == 0)) || + ((mode & VX_WATCH) && (cid == 1)) || + ((mode & VX_HOSTID) && (id == 0))); +} + + +#define __vx_state(v) ((v) ? ((v)->vx_state) : 0) + +#define vx_info_state(v,m) (__vx_state(v) & (m)) + + +/* generic flag merging */ + +#define vx_check_flags(v,m,f) (((v) & (m)) ^ (f)) + +#define vx_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m))) + +#define vx_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m))) + +#define vx_check_bit(v,n) ((v) & (1LL << (n))) + + +/* context flags */ + +#define __vx_flags(v) ((v) ? (v)->vx_flags : 0) + +#define vx_current_flags() __vx_flags(current->vx_info) + +#define vx_info_flags(v,m,f) \ + vx_check_flags(__vx_flags(v),(m),(f)) + +#define task_vx_flags(t,m,f) \ + ((t) && vx_info_flags((t)->vx_info, (m), (f))) + +#define vx_flags(m,f) vx_info_flags(current->vx_info,(m),(f)) + + +/* context caps */ + +#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0) + +#define vx_current_ccaps() __vx_ccaps(current->vx_info) + +#define vx_info_ccaps(v,c) (__vx_ccaps(v) & (c)) + +#define vx_ccaps(c) vx_info_ccaps(current->vx_info,(c)) + + +#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 ) + +#define vx_info_mcaps(v,c) (__vx_mcaps(v) & (c)) + +#define vx_mcaps(c) vx_info_mcaps(current->vx_info,(c)) + + +#define vx_current_bcaps() \ + (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \ + current->vx_info->vx_bcaps : cap_bset) + + +#define vx_current_initpid(n) \ + (current->vx_info && \ + (current->vx_info->vx_initpid == (n))) + + +#else +#warning duplicate inclusion +#endif Index: linux-2.6.14/include/linux/vs_context.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vs_context.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,219 @@ +#ifndef _VX_VS_CONTEXT_H +#define _VX_VS_CONTEXT_H + + +#include +#include "vserver/debug.h" + + +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__,__HERE__) + +static inline struct vx_info *__get_vx_info(struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (!vxi) + return NULL; + + vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_get_vx_info(vxi, _here); + + atomic_inc(&vxi->vx_usecnt); + return vxi; +} + + +extern void free_vx_info(struct vx_info *); + +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__,__HERE__) + +static inline void __put_vx_info(struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (!vxi) + return; + + vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_put_vx_info(vxi, _here); + + if (atomic_dec_and_test(&vxi->vx_usecnt)) + free_vx_info(vxi); +} + + +#define init_vx_info(p,i) __init_vx_info(p,i,__FILE__,__LINE__,__HERE__) + +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (vxi) { + vxlprintk(VXD_CBIT(xid, 3), + "init_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_init_vx_info(vxi, vxp, _here); + + atomic_inc(&vxi->vx_usecnt); + } + *vxp = vxi; +} + + +#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__,__HERE__) + +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxo; + + if (!vxi) + return; + + vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_set_vx_info(vxi, vxp, _here); + + atomic_inc(&vxi->vx_usecnt); + vxo = xchg(vxp, vxi); + BUG_ON(vxo); +} + + +#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__,__HERE__) + +static inline void __clr_vx_info(struct vx_info **vxp, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxo; + + vxo = xchg(vxp, NULL); + if (!vxo) + return; + + vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])", + vxo, vxo?vxo->vx_id:0, + vxo?atomic_read(&vxo->vx_usecnt):0, + _file, _line); + __vxh_clr_vx_info(vxo, vxp, _here); + + if (atomic_dec_and_test(&vxo->vx_usecnt)) + free_vx_info(vxo); +} + + +#define claim_vx_info(v,p) \ + __claim_vx_info(v,p,__FILE__,__LINE__,__HERE__) + +static inline void __claim_vx_info(struct vx_info *vxi, + struct task_struct *task, + const char *_file, int _line, void *_here) +{ + vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_tasks):0, + task, _file, _line); + __vxh_claim_vx_info(vxi, task, _here); + + atomic_inc(&vxi->vx_tasks); +} + + +extern void unhash_vx_info(struct vx_info *); + +#define release_vx_info(v,p) \ + __release_vx_info(v,p,__FILE__,__LINE__,__HERE__) + +static inline void __release_vx_info(struct vx_info *vxi, + struct task_struct *task, + const char *_file, int _line, void *_here) +{ + vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_tasks):0, + task, _file, _line); + __vxh_release_vx_info(vxi, task, _here); + + might_sleep(); + + if (atomic_dec_and_test(&vxi->vx_tasks)) + unhash_vx_info(vxi); +} + + +#define task_get_vx_info(p) \ + __task_get_vx_info(p,__FILE__,__LINE__,__HERE__) + +static inline struct vx_info *__task_get_vx_info(struct task_struct *p, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxi; + + task_lock(p); + vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)", + p, _file, _line); + vxi = __get_vx_info(p->vx_info, _file, _line, _here); + task_unlock(p); + return vxi; +} + + +static inline void __wakeup_vx_info(struct vx_info *vxi) +{ + if (waitqueue_active(&vxi->vx_wait)) + wake_up_interruptible(&vxi->vx_wait); +} + + +#define enter_vx_info(v,s) __enter_vx_info(v,s,__FILE__,__LINE__) + +static inline void __enter_vx_info(struct vx_info *vxi, + struct vx_info_save *vxis, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]", + vxi, vxi ? vxi->vx_id : 0, vxis, current, + current->xid, current->vx_info, _file, _line); + vxis->vxi = xchg(¤t->vx_info, vxi); + vxis->xid = current->xid; + current->xid = vxi ? vxi->vx_id : 0; +} + +#define leave_vx_info(s) __leave_vx_info(s,__FILE__,__LINE__) + +static inline void __leave_vx_info(struct vx_info_save *vxis, + const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]", + vxis, vxis->xid, vxis->vxi, current, + current->xid, current->vx_info, _file, _line); + xchg(¤t->vx_info, vxis->vxi); + current->xid = vxis->xid; +} + + +static inline void __enter_vx_admin(struct vx_info_save *vxis) +{ + vxis->vxi = xchg(¤t->vx_info, NULL); + vxis->xid = current->xid; + current->xid = 0; +} + +static inline void __leave_vx_admin(struct vx_info_save *vxis) +{ + if (vxis->vxi) + xchg(¤t->vx_info, vxis->vxi); + current->xid = vxis->xid; +} + +extern void exit_vx_info(struct task_struct *); + +#else +#warning duplicate inclusion +#endif Index: linux-2.6.14/include/linux/vs_cvirt.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vs_cvirt.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,109 @@ +#ifndef _VX_VS_CVIRT_H +#define _VX_VS_CVIRT_H + + +#include "vserver/cvirt.h" +#include "vserver/debug.h" + + +/* utsname virtualization */ + +static inline struct new_utsname *vx_new_utsname(void) +{ + if (current->vx_info) + return ¤t->vx_info->cvirt.utsname; + return &system_utsname; +} + +#define vx_new_uts(x) ((vx_new_utsname())->x) + + +/* pid faking stuff */ + + +#define vx_info_map_pid(v,p) \ + __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p) +#define vx_map_pid(p) vx_info_map_pid(current->vx_info, p) +#define vx_map_tgid(p) vx_map_pid(p) + +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_map_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid && pid == vxi->vx_initpid)?1:pid, + func, file, line); + if (pid == 0) + return 0; + if (pid == vxi->vx_initpid) + return 1; + } + return pid; +} + +#define vx_info_rmap_pid(v,p) \ + __vx_info_rmap_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p) +#define vx_rmap_tgid(p) vx_rmap_pid(p) + +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_rmap_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid == 1)?vxi->vx_initpid:pid, + func, file, line); + if ((pid == 1) && vxi->vx_initpid) + return vxi->vx_initpid; + if (pid == vxi->vx_initpid) + return ~0U; + } + return pid; +} + + +static inline void vx_activate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_inc(&vxi->cvirt.nr_running); + } +} + +static inline void vx_deactivate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_dec(&vxi->cvirt.nr_running); + } +} + +static inline void vx_uninterruptible_inc(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) + atomic_inc(&vxi->cvirt.nr_uninterruptible); +} + +static inline void vx_uninterruptible_dec(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) + atomic_dec(&vxi->cvirt.nr_uninterruptible); +} + + +#else +#warning duplicate inclusion +#endif Index: linux-2.6.14/include/linux/vs_dlimit.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vs_dlimit.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,214 @@ +#ifndef _VX_VS_DLIMIT_H +#define _VX_VS_DLIMIT_H + + +#include "vserver/dlimit.h" +#include "vserver/debug.h" + + +#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__) + +static inline struct dl_info *__get_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return NULL; + vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + atomic_inc(&dli->dl_usecnt); + return dli; +} + + +#define free_dl_info(i) \ + call_rcu(&i->dl_rcu, rcu_free_dl_info); + +#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__) + +static inline void __put_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return; + vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&dli->dl_usecnt)) + free_dl_info(dli); +} + + +#define __dlimit_char(d) ((d)?'*':' ') + +static inline int __dl_alloc_space(struct super_block *sb, + xid_t xid, dlsize_t nr, const char *file, int line) +{ + struct dl_info *dli = NULL; + int ret = 0; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_space_used + nr > dli->dl_space_total); + if (!ret) + dli->dl_space_used += nr; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "ALLOC (%p,#%d)%c %lld bytes (%d)", + sb, xid, __dlimit_char(dli), (long long)nr, + ret, file, line); + return ret; +} + +static inline void __dl_free_space(struct super_block *sb, + xid_t xid, dlsize_t nr, const char *_file, int _line) +{ + struct dl_info *dli = NULL; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_space_used > nr) + dli->dl_space_used -= nr; + else + dli->dl_space_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "FREE (%p,#%d)%c %lld bytes", + sb, xid, __dlimit_char(dli), (long long)nr, + _file, _line); +} + +static inline int __dl_alloc_inode(struct super_block *sb, + xid_t xid, const char *_file, int _line) +{ + struct dl_info *dli; + int ret = 0; + + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_inodes_used >= dli->dl_inodes_total); + if (!ret) + dli->dl_inodes_used++; +#if 0 + else + vxwprintk("DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d", + sb, xid, + dli->dl_inodes_used, dli->dl_inodes_total, + file, line); +#endif + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "ALLOC (%p,#%d)%c inode (%d)", + sb, xid, __dlimit_char(dli), ret, _file, _line); + return ret; +} + +static inline void __dl_free_inode(struct super_block *sb, + xid_t xid, const char *_file, int _line) +{ + struct dl_info *dli; + + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_used > 1) + dli->dl_inodes_used--; + else + dli->dl_inodes_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "FREE (%p,#%d)%c inode", + sb, xid, __dlimit_char(dli), _file, _line); +} + +static inline void __dl_adjust_block(struct super_block *sb, xid_t xid, + unsigned int *free_blocks, unsigned int *root_blocks, + const char *_file, int _line) +{ + struct dl_info *dli; + uint64_t broot, bfree; + + dli = locate_dl_info(sb, xid); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + broot = (dli->dl_space_total - + (dli->dl_space_total >> 10) * dli->dl_nrlmult) + >> sb->s_blocksize_bits; + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + spin_unlock(&dli->dl_lock); + + vxlprintk(VXD_CBIT(dlim, 2), + "ADJUST: %lld,%lld on %d,%d [mult=%d]", + (long long)bfree, (long long)broot, + *free_blocks, *root_blocks, dli->dl_nrlmult, + _file, _line); + if (free_blocks) { + if (*free_blocks > bfree) + *free_blocks = bfree; + } + if (root_blocks) { + if (*root_blocks > broot) + *root_blocks = broot; + } + put_dl_info(dli); +} + +#define DLIMIT_ALLOC_SPACE(in, bytes) \ + __dl_alloc_space((in)->i_sb, (in)->i_xid, (dlsize_t)(bytes), \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_SPACE(in, bytes) \ + __dl_free_space((in)->i_sb, (in)->i_xid, (dlsize_t)(bytes), \ + __FILE__, __LINE__ ) + +#define DLIMIT_ALLOC_BLOCK(in, nr) \ + __dl_alloc_space((in)->i_sb, (in)->i_xid, \ + ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_BLOCK(in, nr) \ + __dl_free_space((in)->i_sb, (in)->i_xid, \ + ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \ + __FILE__, __LINE__ ) + + +#define DLIMIT_ALLOC_INODE(in) \ + __dl_alloc_inode((in)->i_sb, (in)->i_xid, __FILE__, __LINE__ ) + +#define DLIMIT_FREE_INODE(in) \ + __dl_free_inode((in)->i_sb, (in)->i_xid, __FILE__, __LINE__ ) + + +#define DLIMIT_ADJUST_BLOCK(sb, xid, fb, rb) \ + __dl_adjust_block(sb, xid, fb, rb, __FILE__, __LINE__ ) + + +#else +#warning duplicate inclusion +#endif Index: linux-2.6.14/include/linux/vs_limit.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vs_limit.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,161 @@ +#ifndef _VX_VS_LIMIT_H +#define _VX_VS_LIMIT_H + + +#include "vserver/limit.h" +#include "vserver/debug.h" + + +#define vx_acc_cres(v,d,p,r) \ + __vx_acc_cres(v, r, d, p, __FILE__, __LINE__) + +#define vx_acc_cres_cond(x,d,p,r) \ + __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \ + r, d, p, __FILE__, __LINE__) + + +static inline void __vx_acc_cres(struct vx_info *vxi, + int res, int dir, void *_data, char *_file, int _line) +{ + vxlprintk(VXD_RLIMIT_COND(res), + "vx_acc_cres[%5d,%s,%2d]: %5d%s (%p)", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + (dir>0)?"++":"--", _data, _file, _line); + if (vxi) { + if (dir > 0) + atomic_inc(&vxi->limit.rcur[res]); + else + atomic_dec(&vxi->limit.rcur[res]); + } +} + +#define vx_add_cres(v,a,p,r) \ + __vx_add_cres(v, r, a, p, __FILE__, __LINE__) +#define vx_sub_cres(v,a,p,r) vx_add_cres(v,-(a),p,r) + +#define vx_add_cres_cond(x,a,p,r) \ + __vx_add_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \ + r, a, p, __FILE__, __LINE__) +#define vx_sub_cres_cond(x,a,p,r) vx_add_cres_cond(x,-(a),p,r) + + +static inline void __vx_add_cres(struct vx_info *vxi, + int res, int amount, void *_data, char *_file, int _line) +{ + vxlprintk(VXD_RLIMIT_COND(res), + "vx_add_cres[%5d,%s,%2d]: %5d += %5d (%p)", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + amount, _data, _file, _line); + if (amount == 0) + return; + if (vxi) + atomic_add(amount, &vxi->limit.rcur[res]); +} + + +/* process and file limits */ + +#define vx_nproc_inc(p) \ + vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC) + +#define vx_nproc_dec(p) \ + vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC) + +#define vx_files_inc(f) \ + vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE) + +#define vx_files_dec(f) \ + vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE) + +#define vx_locks_inc(l) \ + vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS) + +#define vx_locks_dec(l) \ + vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS) + +#define vx_openfd_inc(f) \ + vx_acc_cres(current->vx_info, 1, (void *)(long)(f), VLIMIT_OPENFD) + +#define vx_openfd_dec(f) \ + vx_acc_cres(current->vx_info,-1, (void *)(long)(f), VLIMIT_OPENFD) + + +#define vx_cres_avail(v,n,r) \ + __vx_cres_avail(v, r, n, __FILE__, __LINE__) + +static inline int __vx_cres_avail(struct vx_info *vxi, + int res, int num, char *_file, int _line) +{ + unsigned long value; + + vxlprintk(VXD_RLIMIT_COND(res), + "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?vxi->limit.rlim[res]:1), + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + num, _file, _line); + if (!vxi) + return 1; + value = atomic_read(&vxi->limit.rcur[res]); + if (value > vxi->limit.rmax[res]) + vxi->limit.rmax[res] = value; + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + if (value + num <= vxi->limit.rlim[res]) + return 1; + atomic_inc(&vxi->limit.lhit[res]); + return 0; +} + +#define vx_nproc_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_NPROC) + +#define vx_files_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_NOFILE) + +#define vx_locks_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_LOCKS) + +#define vx_openfd_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_OPENFD) + + +/* socket limits */ + +#define vx_sock_inc(s) \ + vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK) + +#define vx_sock_dec(s) \ + vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK) + +#define vx_sock_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_NSOCK) + + +/* ipc resource limits */ + +#define vx_ipcmsg_add(v,u,a) \ + vx_add_cres(v, a, u, RLIMIT_MSGQUEUE) + +#define vx_ipcmsg_sub(v,u,a) \ + vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE) + +#define vx_ipcmsg_avail(v,a) \ + vx_cres_avail(v, a, RLIMIT_MSGQUEUE) + + +#define vx_ipcshm_add(v,k,a) \ + vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM) + +#define vx_ipcshm_sub(v,k,a) \ + vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM) + +#define vx_ipcshm_avail(v,a) \ + vx_cres_avail(v, a, VLIMIT_SHMEM) + + +#else +#warning duplicate inclusion +#endif Index: linux-2.6.14/include/linux/vs_memory.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vs_memory.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,140 @@ +#ifndef _VX_VS_MEMORY_H +#define _VX_VS_MEMORY_H + + +#include "vserver/limit.h" +#include "vserver/debug.h" + + +#define vx_acc_page(m,d,v,r) \ + __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__) + +static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi, + int res, int dir, char *file, int line) +{ + if (VXD_RLIMIT(res, RLIMIT_RSS) || + VXD_RLIMIT(res, VLIMIT_ANON) || + VXD_RLIMIT(res, RLIMIT_AS) || + VXD_RLIMIT(res, RLIMIT_MEMLOCK)) + vxlprintk(1, "vx_acc_page[%5d,%s,%2d]: %5d%s", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + (dir?"++":"--"), file, line); + if (v) { + if (dir > 0) + ++(*v); + else + --(*v); + } + if (vxi) { + if (dir > 0) + atomic_inc(&vxi->limit.rcur[res]); + else + atomic_dec(&vxi->limit.rcur[res]); + } +} + + +#define vx_acc_pages(m,p,v,r) \ + __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__) + +static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi, + int res, int pages, char *_file, int _line) +{ + if (VXD_RLIMIT(res, RLIMIT_RSS) || + VXD_RLIMIT(res, VLIMIT_ANON) || + VXD_RLIMIT(res, RLIMIT_AS) || + VXD_RLIMIT(res, RLIMIT_MEMLOCK)) + vxlprintk(1, "vx_acc_pages[%5d,%s,%2d]: %5d += %5d", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + pages, _file, _line); + if (pages == 0) + return; + if (v) + *v += pages; + if (vxi) + atomic_add(pages, &vxi->limit.rcur[res]); +} + + + +#define vx_acc_vmpage(m,d) \ + vx_acc_page(m, d, total_vm, RLIMIT_AS) +#define vx_acc_vmlpage(m,d) \ + vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspage(m,d) \ + vx_acc_page(m, d, _rss, RLIMIT_RSS) +#define vx_acc_anon_rsspage(m,d) \ + vx_acc_page(m, d, _anon_rss, VLIMIT_ANON) + +#define vx_acc_vmpages(m,p) \ + vx_acc_pages(m, p, total_vm, RLIMIT_AS) +#define vx_acc_vmlpages(m,p) \ + vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspages(m,p) \ + vx_acc_pages(m, p, _rss, RLIMIT_RSS) +#define vx_acc_anon_rsspages(m,p) \ + vx_acc_pages(m, p, _anon_rss, VLIMIT_ANON) + +#define vx_pages_add(s,r,p) __vx_acc_pages(0, s, r, p, __FILE__, __LINE__) +#define vx_pages_sub(s,r,p) vx_pages_add(s, r, -(p)) + +#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1) +#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1) +#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p) +#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p)) + +#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1) +#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1) +#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p) +#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p)) + +#define vx_rsspages_inc(m) vx_acc_rsspage(m, 1) +#define vx_rsspages_dec(m) vx_acc_rsspage(m,-1) +#define vx_rsspages_add(m,p) vx_acc_rsspages(m, p) +#define vx_rsspages_sub(m,p) vx_acc_rsspages(m,-(p)) + +#define vx_anon_rsspages_inc(m) vx_acc_anon_rsspage(m, 1) +#define vx_anon_rsspages_dec(m) vx_acc_anon_rsspage(m,-1) +#define vx_anon_rsspages_add(m,p) vx_acc_anon_rsspages(m, p) +#define vx_anon_rsspages_sub(m,p) vx_acc_anon_rsspages(m,-(p)) + + +#define vx_pages_avail(m,p,r) \ + __vx_pages_avail((m)->mm_vx_info, r, p, __FILE__, __LINE__) + +static inline int __vx_pages_avail(struct vx_info *vxi, + int res, int pages, char *_file, int _line) +{ + unsigned long value; + + if (VXD_RLIMIT(res, RLIMIT_RSS) || + VXD_RLIMIT(res, RLIMIT_AS) || + VXD_RLIMIT(res, RLIMIT_MEMLOCK)) + vxlprintk(1, "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?vxi->limit.rlim[res]:1), + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + pages, _file, _line); + if (!vxi) + return 1; + value = atomic_read(&vxi->limit.rcur[res]); + if (value > vxi->limit.rmax[res]) + vxi->limit.rmax[res] = value; + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + if (value + pages <= vxi->limit.rlim[res]) + return 1; + atomic_inc(&vxi->limit.lhit[res]); + return 0; +} + +#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS) +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK) +#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS) +#define vx_anonpages_avail(m,p) vx_pages_avail(m, p, VLIMIT_ANON) + +#else +#warning duplicate inclusion +#endif Index: linux-2.6.14/include/linux/vs_network.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vs_network.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,216 @@ +#ifndef _NX_VS_NETWORK_H +#define _NX_VS_NETWORK_H + + +#include "vserver/network.h" +#include "vserver/debug.h" + + +#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__) + +static inline struct nx_info *__get_nx_info(struct nx_info *nxi, + const char *_file, int _line) +{ + if (!nxi) + return NULL; + + vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + return nxi; +} + + +extern void free_nx_info(struct nx_info *); + +#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__) + +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line) +{ + if (!nxi) + return; + + vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + if (atomic_dec_and_test(&nxi->nx_usecnt)) + free_nx_info(nxi); +} + + +#define init_nx_info(p,i) __init_nx_info(p,i,__FILE__,__LINE__) + +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + if (nxi) { + vxlprintk(VXD_CBIT(nid, 3), + "init_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + } + *nxp = nxi; +} + + +#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__) + +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + struct nx_info *nxo; + + if (!nxi) + return; + + vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + nxo = xchg(nxp, nxi); + BUG_ON(nxo); +} + +#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__) + +static inline void __clr_nx_info(struct nx_info **nxp, + const char *_file, int _line) +{ + struct nx_info *nxo; + + nxo = xchg(nxp, NULL); + if (!nxo) + return; + + vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])", + nxo, nxo?nxo->nx_id:0, + nxo?atomic_read(&nxo->nx_usecnt):0, + _file, _line); + + if (atomic_dec_and_test(&nxo->nx_usecnt)) + free_nx_info(nxo); +} + + +#define claim_nx_info(v,p) __claim_nx_info(v,p,__FILE__,__LINE__) + +static inline void __claim_nx_info(struct nx_info *nxi, + struct task_struct *task, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_tasks):0, + task, _file, _line); + + atomic_inc(&nxi->nx_tasks); +} + + +extern void unhash_nx_info(struct nx_info *); + +#define release_nx_info(v,p) __release_nx_info(v,p,__FILE__,__LINE__) + +static inline void __release_nx_info(struct nx_info *nxi, + struct task_struct *task, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_tasks):0, + task, _file, _line); + + might_sleep(); + + if (atomic_dec_and_test(&nxi->nx_tasks)) + unhash_nx_info(nxi); +} + + +#define task_get_nx_info(i) __task_get_nx_info(i,__FILE__,__LINE__) + +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct nx_info *nxi; + + task_lock(p); + vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)", + p, _file, _line); + nxi = __get_nx_info(p->nx_info, _file, _line); + task_unlock(p); + return nxi; +} + + +#define nx_task_nid(t) ((t)->nid) + +#define nx_current_nid() nx_task_nid(current) + +#define nx_check(c,m) __nx_check(nx_current_nid(),c,m) + +#define nx_weak_check(c,m) ((m) ? nx_check(c,m) : 1) + + +#define __nx_state(v) ((v) ? ((v)->nx_state) : 0) + +#define nx_info_state(v,m) (__nx_state(v) & (m)) + + +#define __nx_flags(v) ((v) ? (v)->nx_flags : 0) + +#define nx_current_flags() __nx_flags(current->nx_info) + +#define nx_info_flags(v,m,f) \ + vx_check_flags(__nx_flags(v),(m),(f)) + +#define task_nx_flags(t,m,f) \ + ((t) && nx_info_flags((t)->nx_info, (m), (f))) + +#define nx_flags(m,f) nx_info_flags(current->nx_info,(m),(f)) + + +/* context caps */ + +#define __nx_ncaps(v) ((v) ? (v)->nx_ncaps : 0) + +#define nx_current_ncaps() __nx_ncaps(current->nx_info) + +#define nx_info_ncaps(v,c) (__nx_ncaps(v) & (c)) + +#define nx_ncaps(c) nx_info_ncaps(current->nx_info,(c)) + + +static inline int addr_in_nx_info(struct nx_info *nxi, uint32_t addr) +{ + int n,i; + + if (!nxi) + return 1; + + n = nxi->nbipv4; + for (i=0; iipv4[i] == addr) + return 1; + } + return 0; +} + +static inline void exit_nx_info(struct task_struct *p) +{ + if (p->nx_info) + release_nx_info(p->nx_info, p); +} + + +#else +#warning duplicate inclusion +#endif Index: linux-2.6.14/include/linux/vs_sched.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vs_sched.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,96 @@ +#ifndef _VX_VS_SCHED_H +#define _VX_VS_SCHED_H + +#ifndef CONFIG_VSERVER +#warning config options missing +#endif + +#include "vserver/sched.h" + + +#define VAVAVOOM_RATIO 50 + +#define MAX_PRIO_BIAS 20 +#define MIN_PRIO_BIAS -20 + + +static inline int vx_tokens_avail(struct vx_info *vxi) +{ + return atomic_read(&vxi->sched.tokens); +} + +static inline void vx_consume_token(struct vx_info *vxi) +{ + atomic_dec(&vxi->sched.tokens); +} + +static inline int vx_need_resched(struct task_struct *p) +{ +#ifdef CONFIG_VSERVER_HARDCPU + struct vx_info *vxi = p->vx_info; +#endif + int slice = --p->time_slice; + +#ifdef CONFIG_VSERVER_HARDCPU + if (vxi) { + int tokens; + + if ((tokens = vx_tokens_avail(vxi)) > 0) + vx_consume_token(vxi); + /* for tokens > 0, one token was consumed */ + if (tokens < 2) + return 1; + } +#endif + return (slice == 0); +} + + +static inline void vx_onhold_inc(struct vx_info *vxi) +{ + int onhold = atomic_read(&vxi->cvirt.nr_onhold); + + atomic_inc(&vxi->cvirt.nr_onhold); + if (!onhold) + vxi->cvirt.onhold_last = jiffies; +} + +static inline void __vx_onhold_update(struct vx_info *vxi) +{ + int cpu = smp_processor_id(); + uint32_t now = jiffies; + uint32_t delta = now - vxi->cvirt.onhold_last; + + vxi->cvirt.onhold_last = now; + vxi->sched.cpu[cpu].hold_ticks += delta; +} + +static inline void vx_onhold_dec(struct vx_info *vxi) +{ + if (atomic_dec_and_test(&vxi->cvirt.nr_onhold)) + __vx_onhold_update(vxi); +} + +static inline void vx_account_user(struct vx_info *vxi, + cputime_t cputime, int nice) +{ + int cpu = smp_processor_id(); + + if (!vxi) + return; + vxi->sched.cpu[cpu].user_ticks += cputime; +} + +static inline void vx_account_system(struct vx_info *vxi, + cputime_t cputime, int idle) +{ + int cpu = smp_processor_id(); + + if (!vxi) + return; + vxi->sched.cpu[cpu].sys_ticks += cputime; +} + +#else +#warning duplicate inclusion +#endif Index: linux-2.6.14/include/linux/vs_socket.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vs_socket.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,58 @@ +#ifndef _VX_VS_SOCKET_H +#define _VX_VS_SOCKET_H + + +#include "vserver/debug.h" + + +/* socket accounting */ + +#include + +static inline int vx_sock_type(int family) +{ + int type = 4; + + if (family > 0 && family < 3) + type = family; + else if (family == PF_INET6) + type = 3; + return type; +} + +#define vx_acc_sock(v,f,p,s) \ + __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__) + +static inline void __vx_acc_sock(struct vx_info *vxi, + int family, int pos, int size, char *file, int line) +{ + if (vxi) { + int type = vx_sock_type(family); + + atomic_inc(&vxi->cacct.sock[type][pos].count); + atomic_add(size, &vxi->cacct.sock[type][pos].total); + } +} + +#define vx_sock_recv(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s)) +#define vx_sock_send(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s)) +#define vx_sock_fail(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s)) + + +#define sock_vx_init(s) do { \ + (s)->sk_xid = 0; \ + (s)->sk_vx_info = NULL; \ + } while (0) + +#define sock_nx_init(s) do { \ + (s)->sk_nid = 0; \ + (s)->sk_nx_info = NULL; \ + } while (0) + + +#else +#warning duplicate inclusion +#endif Index: linux-2.6.14/include/linux/vserver/context.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/context.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,167 @@ +#ifndef _VX_CONTEXT_H +#define _VX_CONTEXT_H + +#include + + +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */ +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */ + +#define VX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +/* context flags */ + +#define VXF_INFO_LOCK 0x00000001 +#define VXF_INFO_SCHED 0x00000002 +#define VXF_INFO_NPROC 0x00000004 +#define VXF_INFO_PRIVATE 0x00000008 + +#define VXF_INFO_INIT 0x00000010 +#define VXF_INFO_HIDE 0x00000020 +#define VXF_INFO_ULIMIT 0x00000040 +#define VXF_INFO_NSPACE 0x00000080 + +#define VXF_SCHED_HARD 0x00000100 +#define VXF_SCHED_PRIO 0x00000200 +#define VXF_SCHED_PAUSE 0x00000400 + +#define VXF_VIRT_MEM 0x00010000 +#define VXF_VIRT_UPTIME 0x00020000 +#define VXF_VIRT_CPU 0x00040000 +#define VXF_VIRT_LOAD 0x00080000 + +#define VXF_HIDE_MOUNT 0x01000000 +#define VXF_HIDE_NETIF 0x02000000 + +#define VXF_STATE_SETUP (1ULL<<32) +#define VXF_STATE_INIT (1ULL<<33) + +#define VXF_SC_HELPER (1ULL<<36) +#define VXF_REBOOT_KILL (1ULL<<37) + +#define VXF_FORK_RSS (1ULL<<48) +#define VXF_PROLIFIC (1ULL<<49) + +#define VXF_IGNEG_NICE (1ULL<<52) + +#define VXF_ONE_TIME (0x0003ULL<<32) + +#define VXF_INIT_SET (VXF_STATE_SETUP|VXF_STATE_INIT) + + +/* context caps */ + +#define VXC_CAP_MASK 0x00000000 + +#define VXC_SET_UTSNAME 0x00000001 +#define VXC_SET_RLIMIT 0x00000002 + +#define VXC_RAW_ICMP 0x00000100 +#define VXC_SYSLOG 0x00001000 + +#define VXC_SECURE_MOUNT 0x00010000 +#define VXC_SECURE_REMOUNT 0x00020000 +#define VXC_BINARY_MOUNT 0x00040000 + +#define VXC_QUOTA_CTL 0x00100000 + + +/* context state changes */ + +enum { + VSC_STARTUP = 1, + VSC_SHUTDOWN, + + VSC_NETUP, + VSC_NETDOWN, +}; + + +#ifdef __KERNEL__ + +#include +#include +#include + +#include "limit_def.h" +#include "sched_def.h" +#include "cvirt_def.h" + +struct vx_info { + struct hlist_node vx_hlist; /* linked list of contexts */ + xid_t vx_id; /* context id */ + atomic_t vx_usecnt; /* usage count */ + atomic_t vx_tasks; /* tasks count */ + struct vx_info *vx_parent; /* parent context */ + int vx_state; /* context state */ + + struct namespace *vx_namespace; /* private namespace */ + struct fs_struct *vx_fs; /* private namespace fs */ + uint64_t vx_flags; /* context flags */ + uint64_t vx_bcaps; /* bounding caps (system) */ + uint64_t vx_ccaps; /* context caps (vserver) */ + + pid_t vx_initpid; /* PID of fake init process */ + + wait_queue_head_t vx_wait; /* context exit waitqueue */ + + struct _vx_limit limit; /* vserver limits */ + struct _vx_sched sched; /* vserver scheduler */ + struct _vx_cvirt cvirt; /* virtual/bias stuff */ + struct _vx_cacct cacct; /* context accounting */ + + char vx_name[65]; /* vserver name */ +}; + +struct vx_info_save { + struct vx_info *vxi; + xid_t xid; +}; + + +/* status flags */ + +#define VXS_HASHED 0x0001 +#define VXS_PAUSED 0x0010 +#define VXS_ONHOLD 0x0020 +#define VXS_SHUTDOWN 0x0100 +#define VXS_RELEASED 0x8000 + +/* check conditions */ + +#define VX_ADMIN 0x0001 +#define VX_WATCH 0x0002 +#define VX_HIDE 0x0004 +#define VX_HOSTID 0x0008 + +#define VX_IDENT 0x0010 +#define VX_EQUIV 0x0020 +#define VX_PARENT 0x0040 +#define VX_CHILD 0x0080 + +#define VX_ARG_MASK 0x00F0 + +#define VX_DYNAMIC 0x0100 +#define VX_STATIC 0x0200 + +#define VX_ATR_MASK 0x0F00 + + +extern void claim_vx_info(struct vx_info *, struct task_struct *); +extern void release_vx_info(struct vx_info *, struct task_struct *); + +extern struct vx_info *lookup_vx_info(int); +extern struct vx_info *lookup_or_create_vx_info(int); + +extern int get_xid_list(int, unsigned int *, int); +extern int xid_is_hashed(xid_t); + +extern int vx_migrate_task(struct task_struct *, struct vx_info *); + +extern long vs_state_change(struct vx_info *, unsigned int); + + +#endif /* __KERNEL__ */ +#else /* _VX_CONTEXT_H */ +#warning duplicate inclusion +#endif /* _VX_CONTEXT_H */ Index: linux-2.6.14/include/linux/vserver/context_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/context_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,79 @@ +#ifndef _VX_CONTEXT_CMD_H +#define _VX_CONTEXT_CMD_H + + +/* vinfo commands */ + +#define VCMD_task_xid VC_CMD(VINFO, 1, 0) + +#ifdef __KERNEL__ +extern int vc_task_xid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_vx_info VC_CMD(VINFO, 5, 0) + +struct vcmd_vx_info_v0 { + uint32_t xid; + uint32_t initpid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_vx_info(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* context commands */ + +#define VCMD_ctx_create_v0 VC_CMD(VPROC, 1, 0) +#define VCMD_ctx_create VC_CMD(VPROC, 1, 1) + +struct vcmd_ctx_create { + uint64_t flagword; +}; + +#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 0) + +#ifdef __KERNEL__ +extern int vc_ctx_create(uint32_t, void __user *); +extern int vc_ctx_migrate(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* flag commands */ + +#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0) +#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0) + +struct vcmd_ctx_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_cflags(uint32_t, void __user *); +extern int vc_set_cflags(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* context caps commands */ + +#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 0) +#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 0) + +struct vcmd_ctx_caps_v0 { + uint64_t bcaps; + uint64_t ccaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ccaps(uint32_t, void __user *); +extern int vc_set_ccaps(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CONTEXT_CMD_H */ Index: linux-2.6.14/include/linux/vserver/cvirt.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/cvirt.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,25 @@ +#ifndef _VX_CVIRT_H +#define _VX_CVIRT_H + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *, struct timespec *); + + +struct vx_info; + +void vx_update_load(struct vx_info *); + + +int vx_uts_virt_handler(struct ctl_table *ctl, int write, xid_t xid, + void **datap, size_t *lenp); + + +int vx_do_syslog(int, char __user *, int); + +#endif /* __KERNEL__ */ +#else /* _VX_CVIRT_H */ +#warning duplicate inclusion +#endif /* _VX_CVIRT_H */ Index: linux-2.6.14/include/linux/vserver/cvirt_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/cvirt_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,34 @@ +#ifndef _VX_CVIRT_CMD_H +#define _VX_CVIRT_CMD_H + +/* virtual host info name commands */ + +#define VCMD_set_vhi_name VC_CMD(VHOST, 1, 0) +#define VCMD_get_vhi_name VC_CMD(VHOST, 2, 0) + +struct vcmd_vhi_name_v0 { + uint32_t field; + char name[65]; +}; + + +enum vhi_name_field { + VHIN_CONTEXT=0, + VHIN_SYSNAME, + VHIN_NODENAME, + VHIN_RELEASE, + VHIN_VERSION, + VHIN_MACHINE, + VHIN_DOMAINNAME, +}; + + +#ifdef __KERNEL__ + +#include + +extern int vc_set_vhi_name(uint32_t, void __user *); +extern int vc_get_vhi_name(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CVIRT_CMD_H */ Index: linux-2.6.14/include/linux/vserver/cvirt_def.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/cvirt_def.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,115 @@ +#ifndef _VX_CVIRT_DEF_H +#define _VX_CVIRT_DEF_H + +#include +#include +#include +#include +#include +#include + + +struct _vx_usage_stat { + uint64_t user; + uint64_t nice; + uint64_t system; + uint64_t softirq; + uint64_t irq; + uint64_t idle; + uint64_t iowait; +}; + +struct _vx_syslog { + wait_queue_head_t log_wait; + spinlock_t logbuf_lock; /* lock for the log buffer */ + + unsigned long log_start; /* next char to be read by syslog() */ + unsigned long con_start; /* next char to be sent to consoles */ + unsigned long log_end; /* most-recently-written-char + 1 */ + unsigned long logged_chars; /* #chars since last read+clear operation */ + + char log_buf[1024]; +}; + + +/* context sub struct */ + +struct _vx_cvirt { +// int max_threads; /* maximum allowed threads */ + atomic_t nr_threads; /* number of current threads */ + atomic_t nr_running; /* number of running threads */ + atomic_t nr_uninterruptible; /* number of uninterruptible threads */ + + atomic_t nr_onhold; /* processes on hold */ + uint32_t onhold_last; /* jiffies when put on hold */ + + struct timespec bias_idle; + struct timespec bias_uptime; /* context creation point */ + uint64_t bias_clock; /* offset in clock_t */ + + struct new_utsname utsname; + + spinlock_t load_lock; /* lock for the load averages */ + atomic_t load_updates; /* nr of load updates done so far */ + uint32_t load_last; /* last time load was cacled */ + uint32_t load[3]; /* load averages 1,5,15 */ + + atomic_t total_forks; /* number of forks so far */ + + struct _vx_usage_stat cpustat[NR_CPUS]; + + struct _vx_syslog syslog; +}; + + +#ifdef CONFIG_VSERVER_DEBUG + +static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt) +{ + printk("\t_vx_cvirt:\n"); + printk("\t threads: %4d, %4d, %4d, %4d\n", + atomic_read(&cvirt->nr_threads), + atomic_read(&cvirt->nr_running), + atomic_read(&cvirt->nr_uninterruptible), + atomic_read(&cvirt->nr_onhold)); + /* add rest here */ + printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks)); +} + +#endif + + +struct _vx_sock_acc { + atomic_t count; + atomic_t total; +}; + +/* context sub struct */ + +struct _vx_cacct { + struct _vx_sock_acc sock[5][3]; +}; + +#ifdef CONFIG_VSERVER_DEBUG + +static inline void __dump_vx_cacct(struct _vx_cacct *cacct) +{ + int i,j; + + printk("\t_vx_cacct:"); + for (i=0; i<5; i++) { + struct _vx_sock_acc *ptr = cacct->sock[i]; + + printk("\t [%d] =", i); + for (j=0; j<3; j++) { + printk(" [%d] = %8d, %8d", j, + atomic_read(&ptr[j].count), + atomic_read(&ptr[j].total)); + } + printk("\n"); + } +} + +#endif + +#endif /* _VX_CVIRT_DEF_H */ Index: linux-2.6.14/include/linux/vserver/debug.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/debug.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,297 @@ +#ifndef _VX_DEBUG_H +#define _VX_DEBUG_H + +#ifndef CONFIG_VSERVER +#warning config options missing +#endif + +#define VXD_CBIT(n,m) (vx_debug_ ## n & (1 << (m))) +#define VXD_CMIN(n,m) (vx_debug_ ## n > (m)) +#define VXD_MASK(n,m) (vx_debug_ ## n & (m)) + +#define VXD_QPOS(v,p) (((uint32_t)(v) >> ((p)*8)) & 0xFF) +#define VXD_QUAD(v) VXD_QPOS(v,0), VXD_QPOS(v,1), \ + VXD_QPOS(v,2), VXD_QPOS(v,3) + +#define __FUNC__ __func__ + + +#ifdef CONFIG_VSERVER_DEBUG + +extern unsigned int vx_debug_switch; +extern unsigned int vx_debug_xid; +extern unsigned int vx_debug_nid; +extern unsigned int vx_debug_net; +extern unsigned int vx_debug_limit; +extern unsigned int vx_debug_dlim; +extern unsigned int vx_debug_quota; +extern unsigned int vx_debug_cvirt; +extern unsigned int vx_debug_misc; + + +#define VX_LOGLEVEL "vxD: " +#define VX_WARNLEVEL KERN_WARNING "vxW: " + +#define vxdprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f "\n" , ##x); \ + } while (0) + +#define vxlprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f " @%s:%d\n", x); \ + } while (0) + +#define vxfprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \ + } while (0) + + +#define vxwprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_WARNLEVEL f "\n" , ##x); \ + } while (0) + + +#define vxd_path(d,m) \ + ({ static char _buffer[PATH_MAX]; \ + d_path((d), (m), _buffer, sizeof(_buffer)); }) + + +void dump_vx_info(struct vx_info *, int); +void dump_vx_info_inactive(int); + +#else /* CONFIG_VSERVER_DEBUG */ + +#define vx_debug_switch 0 +#define vx_debug_xid 0 +#define vx_debug_nid 0 +#define vx_debug_net 0 +#define vx_debug_limit 0 +#define vx_debug_dlim 0 +#define vx_debug_cvirt 0 + +#define vxdprintk(x...) do { } while (0) +#define vxlprintk(x...) do { } while (0) +#define vxfprintk(x...) do { } while (0) +#define vxwprintk(x...) do { } while (0) + +#define vxd_path "" + + +#endif /* CONFIG_VSERVER_DEBUG */ + + +/* history stuff */ + +#ifdef CONFIG_VSERVER_HISTORY + + +extern unsigned volatile int vxh_active; + +struct _vxhe_vxi { + struct vx_info *ptr; + unsigned xid; + unsigned usecnt; + unsigned tasks; +}; + +struct _vxhe_set_clr { + void *data; +}; + +struct _vxhe_loc_lookup { + unsigned arg; +}; + +enum { + VXH_UNUSED=0, + VXH_THROW_OOPS=1, + + VXH_GET_VX_INFO, + VXH_PUT_VX_INFO, + VXH_INIT_VX_INFO, + VXH_SET_VX_INFO, + VXH_CLR_VX_INFO, + VXH_CLAIM_VX_INFO, + VXH_RELEASE_VX_INFO, + VXH_ALLOC_VX_INFO, + VXH_DEALLOC_VX_INFO, + VXH_HASH_VX_INFO, + VXH_UNHASH_VX_INFO, + VXH_LOC_VX_INFO, + VXH_LOOKUP_VX_INFO, + VXH_CREATE_VX_INFO, +}; + +struct _vx_hist_entry { + void *loc; + unsigned short seq; + unsigned short type; + struct _vxhe_vxi vxi; + union { + struct _vxhe_set_clr sc; + struct _vxhe_loc_lookup ll; + }; +}; + +struct _vx_hist_entry *vxh_advance(void *loc); + + +static inline +void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi) +{ + entry->vxi.ptr = vxi; + if (vxi) { + entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt); + entry->vxi.tasks = atomic_read(&vxi->vx_tasks); + entry->vxi.xid = vxi->vx_id; + } +} + + +#define __HERE__ \ + ({ __label__ __vxh_label; __vxh_label:; &&__vxh_label; }) + +#define __VXH_BODY(__type, __data, __here) \ + struct _vx_hist_entry *entry; \ + \ + preempt_disable(); \ + entry = vxh_advance(__here); \ + __data; \ + entry->type = __type; \ + preempt_enable(); + + + /* pass vxi only */ + +#define __VXH_SMPL \ + __vxh_copy_vxi(entry, vxi) + +static inline +void __vxh_smpl(struct vx_info *vxi, int __type, void *__here) +{ + __VXH_BODY(__type, __VXH_SMPL, __here) +} + + /* pass vxi and data (void *) */ + +#define __VXH_DATA \ + __vxh_copy_vxi(entry, vxi); \ + entry->sc.data = data + +static inline +void __vxh_data(struct vx_info *vxi, void *data, + int __type, void *__here) +{ + __VXH_BODY(__type, __VXH_DATA, __here) +} + + /* pass vxi and arg (long) */ + +#define __VXH_LONG \ + __vxh_copy_vxi(entry, vxi); \ + entry->ll.arg = arg + +static inline +void __vxh_long(struct vx_info *vxi, long arg, + int __type, void *__here) +{ + __VXH_BODY(__type, __VXH_LONG, __here) +} + + +static inline +void __vxh_throw_oops(void *__here) +{ + __VXH_BODY(VXH_THROW_OOPS, {}, __here); + /* prevent further acquisition */ + vxh_active = 0; +} + + +#define vxh_throw_oops() __vxh_throw_oops(__HERE__); + +#define __vxh_get_vx_info(v,h) __vxh_smpl(v, VXH_GET_VX_INFO, h); +#define __vxh_put_vx_info(v,h) __vxh_smpl(v, VXH_PUT_VX_INFO, h); + +#define __vxh_init_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_INIT_VX_INFO, h); +#define __vxh_set_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_SET_VX_INFO, h); +#define __vxh_clr_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_CLR_VX_INFO, h); + +#define __vxh_claim_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_CLAIM_VX_INFO, h); +#define __vxh_release_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_RELEASE_VX_INFO, h); + +#define vxh_alloc_vx_info(v) \ + __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__); +#define vxh_dealloc_vx_info(v) \ + __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__); + +#define vxh_hash_vx_info(v) \ + __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__); +#define vxh_unhash_vx_info(v) \ + __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__); + +#define vxh_loc_vx_info(v,l) \ + __vxh_long(v,l, VXH_LOC_VX_INFO, __HERE__); +#define vxh_lookup_vx_info(v,l) \ + __vxh_long(v,l, VXH_LOOKUP_VX_INFO, __HERE__); +#define vxh_create_vx_info(v,l) \ + __vxh_long(v,l, VXH_CREATE_VX_INFO, __HERE__); + +extern void vxh_dump_history(void); + + +#else /* CONFIG_VSERVER_HISTORY */ + +#define __HERE__ 0 + +#define vxh_throw_oops() do { } while (0) + +#define __vxh_get_vx_info(v,h) do { } while (0) +#define __vxh_put_vx_info(v,h) do { } while (0) + +#define __vxh_init_vx_info(v,d,h) do { } while (0) +#define __vxh_set_vx_info(v,d,h) do { } while (0) +#define __vxh_clr_vx_info(v,d,h) do { } while (0) + +#define __vxh_claim_vx_info(v,d,h) do { } while (0) +#define __vxh_release_vx_info(v,d,h) do { } while (0) + +#define vxh_alloc_vx_info(v) do { } while (0) +#define vxh_dealloc_vx_info(v) do { } while (0) + +#define vxh_hash_vx_info(v) do { } while (0) +#define vxh_unhash_vx_info(v) do { } while (0) + +#define vxh_loc_vx_info(a,v) do { } while (0) +#define vxh_lookup_vx_info(a,v) do { } while (0) +#define vxh_create_vx_info(a,v) do { } while (0) + +#define vxh_dump_history() do { } while (0) + + +#endif /* CONFIG_VSERVER_HISTORY */ + + +#ifdef CONFIG_VSERVER_DEBUG +#define vxd_assert_lock(l) assert_spin_locked(l) +#define vxd_assert(c,f,x...) vxlprintk(!(c), \ + "assertion [" f "] failed.", ##x, __FILE__, __LINE__) +#else +#define vxd_assert_lock(l) do { } while (0) +#define vxd_assert(c,f,x...) do { } while (0) +#endif + + +#endif /* _VX_DEBUG_H */ Index: linux-2.6.14/include/linux/vserver/debug_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/debug_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,14 @@ +#ifndef _VX_DEBUG_CMD_H +#define _VX_DEBUG_CMD_H + + +/* debug commands */ + +#define VCMD_dump_history VC_CMD(DEBUG, 1, 0) + +#ifdef __KERNEL__ + +extern int vc_dump_history(uint32_t); + +#endif /* __KERNEL__ */ +#endif /* _VX_DEBUG_CMD_H */ Index: linux-2.6.14/include/linux/vserver/dlimit.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/dlimit.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,53 @@ +#ifndef _VX_DLIMIT_H +#define _VX_DLIMIT_H + +#include "switch.h" + +#define CDLIM_UNSET (0ULL) +#define CDLIM_INFINITY (~0ULL) +#define CDLIM_KEEP (~1ULL) + + +#ifdef __KERNEL__ + +#include + +struct super_block; + +struct dl_info { + struct hlist_node dl_hlist; /* linked list of contexts */ + struct rcu_head dl_rcu; /* the rcu head */ + xid_t dl_xid; /* context id */ + atomic_t dl_usecnt; /* usage count */ + atomic_t dl_refcnt; /* reference count */ + + struct super_block *dl_sb; /* associated superblock */ + + spinlock_t dl_lock; /* protect the values */ + + uint64_t dl_space_used; /* used space in bytes */ + uint64_t dl_space_total; /* maximum space in bytes */ + uint32_t dl_inodes_used; /* used inodes */ + uint32_t dl_inodes_total; /* maximum inodes */ + + unsigned int dl_nrlmult; /* non root limit mult */ +}; + +struct rcu_head; + +extern void rcu_free_dl_info(struct rcu_head *); +extern void unhash_dl_info(struct dl_info *); + +extern struct dl_info *locate_dl_info(struct super_block *, xid_t); + + +struct kstatfs; + +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *); + +typedef uint64_t dlsize_t; + +#endif /* __KERNEL__ */ +#else /* _VX_DLIMIT_H */ +#warning duplicate inclusion +#endif /* _VX_DLIMIT_H */ Index: linux-2.6.14/include/linux/vserver/dlimit_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/dlimit_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,68 @@ +#ifndef _VX_DLIMIT_CMD_H +#define _VX_DLIMIT_CMD_H + +/* dlimit vserver commands */ + +#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0) +#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0) + +#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0) +#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0) + +struct vcmd_ctx_dlimit_base_v0 { + const char __user *name; + uint32_t flags; +}; + +struct vcmd_ctx_dlimit_v0 { + const char __user *name; + uint32_t space_used; /* used space in kbytes */ + uint32_t space_total; /* maximum space in kbytes */ + uint32_t inodes_used; /* used inodes */ + uint32_t inodes_total; /* maximum inodes */ + uint32_t reserved; /* reserved for root in % */ + uint32_t flags; +}; + + +#ifdef __KERNEL__ + +#ifdef CONFIG_COMPAT + +struct vcmd_ctx_dlimit_base_v0_x32 { + compat_uptr_t name_ptr; + uint32_t flags; +}; + +struct vcmd_ctx_dlimit_v0_x32 { + compat_uptr_t name_ptr; + uint32_t space_used; /* used space in kbytes */ + uint32_t space_total; /* maximum space in kbytes */ + uint32_t inodes_used; /* used inodes */ + uint32_t inodes_total; /* maximum inodes */ + uint32_t reserved; /* reserved for root in % */ + uint32_t flags; +}; + +#endif /* CONFIG_COMPAT */ + +#include + +extern int vc_add_dlimit(uint32_t, void __user *); +extern int vc_rem_dlimit(uint32_t, void __user *); + +extern int vc_set_dlimit(uint32_t, void __user *); +extern int vc_get_dlimit(uint32_t, void __user *); + +#ifdef CONFIG_COMPAT + +extern int vc_add_dlimit_x32(uint32_t, void __user *); +extern int vc_rem_dlimit_x32(uint32_t, void __user *); + +extern int vc_set_dlimit_x32(uint32_t, void __user *); +extern int vc_get_dlimit_x32(uint32_t, void __user *); + +#endif /* CONFIG_COMPAT */ + +#endif /* __KERNEL__ */ +#endif /* _VX_DLIMIT_CMD_H */ Index: linux-2.6.14/include/linux/vserver/global.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/global.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,12 @@ +#ifndef _VX_GLOBAL_H +#define _VX_GLOBAL_H + +#ifndef CONFIG_VSERVER +#warning config options missing +#endif + + +extern atomic_t vx_global_ctotal; +extern atomic_t vx_global_cactive; + +#endif /* _VX_GLOBAL_H */ Index: linux-2.6.14/include/linux/vserver/inode.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/inode.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,41 @@ +#ifndef _VX_INODE_H +#define _VX_INODE_H + + +#define IATTR_XID 0x01000000 + +#define IATTR_ADMIN 0x00000001 +#define IATTR_WATCH 0x00000002 +#define IATTR_HIDE 0x00000004 +#define IATTR_FLAGS 0x00000007 + +#define IATTR_BARRIER 0x00010000 +#define IATTR_IUNLINK 0x00020000 +#define IATTR_IMMUTABLE 0x00040000 + +#ifdef __KERNEL__ + +#ifndef CONFIG_VSERVER +#warning config options missing +#endif + +#ifdef CONFIG_VSERVER_PROC_SECURE +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#else +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#endif + +#define vx_hide_check(c,m) (((m) & IATTR_HIDE) ? vx_check(c,m) : 1) + +#endif /* __KERNEL__ */ + +/* inode ioctls */ + +#define FIOC_GETXFLG _IOR('x', 5, long) +#define FIOC_SETXFLG _IOW('x', 6, long) + +#else /* _VX_INODE_H */ +#warning duplicate inclusion +#endif /* _VX_INODE_H */ Index: linux-2.6.14/include/linux/vserver/inode_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/inode_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,57 @@ +#ifndef _VX_INODE_CMD_H +#define _VX_INODE_CMD_H + +/* inode vserver commands */ + +#define VCMD_get_iattr_v0 VC_CMD(INODE, 1, 0) +#define VCMD_set_iattr_v0 VC_CMD(INODE, 2, 0) + +#define VCMD_get_iattr VC_CMD(INODE, 1, 1) +#define VCMD_set_iattr VC_CMD(INODE, 2, 1) + +struct vcmd_ctx_iattr_v0 { + /* device handle in id */ + uint64_t ino; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + +struct vcmd_ctx_iattr_v1 { + const char __user *name; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + + +#ifdef __KERNEL__ + +#ifdef CONFIG_COMPAT + +struct vcmd_ctx_iattr_v1_x32 { + compat_uptr_t name_ptr; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + +#endif /* CONFIG_COMPAT */ + +#include + +extern int vc_get_iattr_v0(uint32_t, void __user *); +extern int vc_set_iattr_v0(uint32_t, void __user *); + +extern int vc_get_iattr(uint32_t, void __user *); +extern int vc_set_iattr(uint32_t, void __user *); + +#ifdef CONFIG_COMPAT + +extern int vc_get_iattr_x32(uint32_t, void __user *); +extern int vc_set_iattr_x32(uint32_t, void __user *); + +#endif /* CONFIG_COMPAT */ + +#endif /* __KERNEL__ */ +#endif /* _VX_INODE_CMD_H */ Index: linux-2.6.14/include/linux/vserver/legacy.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/legacy.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,48 @@ +#ifndef _VX_LEGACY_H +#define _VX_LEGACY_H + +#include "switch.h" + +/* compatibiliy vserver commands */ + +#define VCMD_new_s_context VC_CMD(COMPAT, 1, 1) +#define VCMD_set_ipv4root VC_CMD(COMPAT, 2, 3) + +#define VCMD_create_context VC_CMD(VSETUP, 1, 0) + +/* compatibiliy vserver arguments */ + +struct vcmd_new_s_context_v1 { + uint32_t remove_cap; + uint32_t flags; +}; + +struct vcmd_set_ipv4root_v3 { + /* number of pairs in id */ + uint32_t broadcast; + struct { + uint32_t ip; + uint32_t mask; + } nx_mask_pair[NB_IPV4ROOT]; +}; + + +#define VX_INFO_LOCK 1 /* Can't request a new vx_id */ +#define VX_INFO_NPROC 4 /* Limit number of processes in a context */ +#define VX_INFO_PRIVATE 8 /* Noone can join this security context */ +#define VX_INFO_INIT 16 /* This process wants to become the */ + /* logical process 1 of the security */ + /* context */ +#define VX_INFO_HIDEINFO 32 /* Hide some information in /proc */ +#define VX_INFO_ULIMIT 64 /* Use ulimit of the current process */ + /* to become the global limits */ + /* of the context */ +#define VX_INFO_NAMESPACE 128 /* save private namespace */ + + +#ifdef __KERNEL__ +extern int vc_new_s_context(uint32_t, void __user *); +extern int vc_set_ipv4root(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_LEGACY_H */ Index: linux-2.6.14/include/linux/vserver/limit.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/limit.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,26 @@ +#ifndef _VX_LIMIT_H +#define _VX_LIMIT_H + + +#define VLIMIT_NSOCK 16 +#define VLIMIT_OPENFD 17 +#define VLIMIT_ANON 18 +#define VLIMIT_SHMEM 19 + +#ifdef __KERNEL__ + +struct sysinfo; + +void vx_vsi_meminfo(struct sysinfo *); +void vx_vsi_swapinfo(struct sysinfo *); + +#define VXD_RLIMIT(r,l) (VXD_CBIT(limit, (l)) && ((r) == (l))) + +#define VXD_RLIMIT_COND(r) (VXD_CBIT(limit, (r))) + +#define NUM_LIMITS 24 + +extern const char *vlimit_name[NUM_LIMITS]; + +#endif /* __KERNEL__ */ +#endif /* _VX_LIMIT_H */ Index: linux-2.6.14/include/linux/vserver/limit_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/limit_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,36 @@ +#ifndef _VX_LIMIT_CMD_H +#define _VX_LIMIT_CMD_H + +/* rlimit vserver commands */ + +#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0) +#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0) +#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0) + +struct vcmd_ctx_rlimit_v0 { + uint32_t id; + uint64_t minimum; + uint64_t softlimit; + uint64_t maximum; +}; + +struct vcmd_ctx_rlimit_mask_v0 { + uint32_t minimum; + uint32_t softlimit; + uint32_t maximum; +}; + +#define CRLIM_UNSET (0ULL) +#define CRLIM_INFINITY (~0ULL) +#define CRLIM_KEEP (~1ULL) + +#ifdef __KERNEL__ + +#include + +extern int vc_get_rlimit(uint32_t, void __user *); +extern int vc_set_rlimit(uint32_t, void __user *); +extern int vc_get_rlimit_mask(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_LIMIT_CMD_H */ Index: linux-2.6.14/include/linux/vserver/limit_def.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/limit_def.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,37 @@ +#ifndef _VX_LIMIT_DEF_H +#define _VX_LIMIT_DEF_H + +#include +#include + +#include "limit.h" + +/* context sub struct */ + +struct _vx_limit { +// atomic_t ticks; + + unsigned long rlim[NUM_LIMITS]; /* Context limit */ + unsigned long rmax[NUM_LIMITS]; /* Context maximum */ + atomic_t rcur[NUM_LIMITS]; /* Current value */ + atomic_t lhit[NUM_LIMITS]; /* Limit hits */ +}; + +#ifdef CONFIG_VSERVER_DEBUG + +static inline void __dump_vx_limit(struct _vx_limit *limit) +{ + int i; + + printk("\t_vx_limit:"); + for (i=0; irlim[i], limit->rmax[i], + atomic_read(&limit->rcur[i]), + atomic_read(&limit->lhit[i])); + } +} + +#endif + +#endif /* _VX_LIMIT_DEF_H */ Index: linux-2.6.14/include/linux/vserver/namespace.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/namespace.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,14 @@ +#ifndef _VX_NAMESPACE_H +#define _VX_NAMESPACE_H + +#include + +struct vx_info; +struct namespace; +struct fs_struct; + +extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *); + +#else /* _VX_NAMESPACE_H */ +#warning duplicate inclusion +#endif /* _VX_NAMESPACE_H */ Index: linux-2.6.14/include/linux/vserver/namespace_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/namespace_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,16 @@ +#ifndef _VX_NAMESPACE_CMD_H +#define _VX_NAMESPACE_CMD_H + +#define VCMD_enter_namespace VC_CMD(PROCALT, 1, 0) +#define VCMD_cleanup_namespace VC_CMD(PROCALT, 2, 0) +#define VCMD_set_namespace VC_CMD(PROCALT, 3, 0) + + +#ifdef __KERNEL__ + +extern int vc_enter_namespace(uint32_t, void __user *); +extern int vc_cleanup_namespace(uint32_t, void __user *); +extern int vc_set_namespace(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_NAMESPACE_CMD_H */ Index: linux-2.6.14/include/linux/vserver/network.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/network.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,95 @@ +#ifndef _VX_NETWORK_H +#define _VX_NETWORK_H + +#include + + +#define MAX_N_CONTEXT 65535 /* Arbitrary limit */ + +#define NX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +#define NB_IPV4ROOT 16 + + +/* network flags */ + +#define NXF_STATE_SETUP (1ULL<<32) + +#define NXF_SC_HELPER (1ULL<<36) + +#define NXF_ONE_TIME (0x0001ULL<<32) + +#define NXF_INIT_SET (0) + + +/* address types */ + +#define NXA_TYPE_IPV4 1 +#define NXA_TYPE_IPV6 2 + +#define NXA_MOD_BCAST (1<<8) + +#define NXA_TYPE_ANY (~0) + + +#ifdef __KERNEL__ + +#include +#include +#include +#include + + +struct nx_info { + struct hlist_node nx_hlist; /* linked list of nxinfos */ + nid_t nx_id; /* vnet id */ + atomic_t nx_usecnt; /* usage count */ + atomic_t nx_tasks; /* tasks count */ + int nx_state; /* context state */ + + uint64_t nx_flags; /* network flag word */ + uint64_t nx_ncaps; /* network capabilities */ + + int nbipv4; + __u32 ipv4[NB_IPV4ROOT]; /* Process can only bind to these IPs */ + /* The first one is used to connect */ + /* and for bind any service */ + /* The other must be used explicity */ + __u32 mask[NB_IPV4ROOT]; /* Netmask for each ipv4 */ + /* Used to select the proper source */ + /* address for sockets */ + __u32 v4_bcast; /* Broadcast address to receive UDP */ + + char nx_name[65]; /* network context name */ +}; + + +/* status flags */ + +#define NXS_HASHED 0x0001 +#define NXS_SHUTDOWN 0x0100 +#define NXS_RELEASED 0x8000 + +extern struct nx_info *lookup_nx_info(int); + +extern int get_nid_list(int, unsigned int *, int); +extern int nid_is_hashed(nid_t); + +extern int nx_migrate_task(struct task_struct *, struct nx_info *); + +extern long vs_net_change(struct nx_info *, unsigned int); + +struct in_ifaddr; +struct net_device; + +int ifa_in_nx_info(struct in_ifaddr *, struct nx_info *); +int dev_in_nx_info(struct net_device *, struct nx_info *); + +struct sock; + +int nx_addr_conflict(struct nx_info *, uint32_t, struct sock *); + +#endif /* __KERNEL__ */ +#else /* _VX_NETWORK_H */ +#warning duplicate inclusion +#endif /* _VX_NETWORK_H */ Index: linux-2.6.14/include/linux/vserver/network_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/network_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,89 @@ +#ifndef _VX_NETWORK_CMD_H +#define _VX_NETWORK_CMD_H + + +/* vinfo commands */ + +#define VCMD_task_nid VC_CMD(VINFO, 2, 0) + +#ifdef __KERNEL__ +extern int vc_task_nid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_nx_info VC_CMD(VINFO, 6, 0) + +struct vcmd_nx_info_v0 { + uint32_t nid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_nx_info(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_net_create_v0 VC_CMD(VNET, 1, 0) +#define VCMD_net_create VC_CMD(VNET, 1, 1) + +struct vcmd_net_create { + uint64_t flagword; +}; + +#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0) + +#define VCMD_net_add VC_CMD(NETALT, 1, 0) +#define VCMD_net_remove VC_CMD(NETALT, 2, 0) + +struct vcmd_net_addr_v0 { + uint16_t type; + uint16_t count; + uint32_t ip[4]; + uint32_t mask[4]; + /* more to come */ +}; + + +#ifdef __KERNEL__ +extern int vc_net_create(uint32_t, void __user *); +extern int vc_net_migrate(uint32_t, void __user *); + +extern int vc_net_add(uint32_t, void __user *); +extern int vc_net_remove(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* flag commands */ + +#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0) +#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0) + +struct vcmd_net_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_nflags(uint32_t, void __user *); +extern int vc_set_nflags(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* network caps commands */ + +#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0) +#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0) + +struct vcmd_net_caps_v0 { + uint64_t ncaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ncaps(uint32_t, void __user *); +extern int vc_set_ncaps(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CONTEXT_CMD_H */ Index: linux-2.6.14/include/linux/vserver/sched.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/sched.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,25 @@ +#ifndef _VX_SCHED_H +#define _VX_SCHED_H + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *, struct timespec *); + + +struct vx_info; + +void vx_update_load(struct vx_info *); + + +struct task_struct; + +int vx_effective_vavavoom(struct vx_info *, int); + +int vx_tokens_recalc(struct vx_info *); + +#endif /* __KERNEL__ */ +#else /* _VX_SCHED_H */ +#warning duplicate inclusion +#endif /* _VX_SCHED_H */ Index: linux-2.6.14/include/linux/vserver/sched_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/sched_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,47 @@ +#ifndef _VX_SCHED_CMD_H +#define _VX_SCHED_CMD_H + +/* sched vserver commands */ + +#define VCMD_set_sched_v2 VC_CMD(SCHED, 1, 2) +#define VCMD_set_sched VC_CMD(SCHED, 1, 3) + +struct vcmd_set_sched_v2 { + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + uint64_t cpu_mask; +}; + +struct vcmd_set_sched_v3 { + uint32_t set_mask; + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + int32_t priority_bias; +}; + + +#define VXSM_FILL_RATE 0x0001 +#define VXSM_INTERVAL 0x0002 +#define VXSM_TOKENS 0x0010 +#define VXSM_TOKENS_MIN 0x0020 +#define VXSM_TOKENS_MAX 0x0040 +#define VXSM_PRIO_BIAS 0x0100 + +#define SCHED_KEEP (-2) + +#ifdef __KERNEL__ + +#include + +extern int vc_set_sched_v1(uint32_t, void __user *); +extern int vc_set_sched_v2(uint32_t, void __user *); +extern int vc_set_sched(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_SCHED_CMD_H */ Index: linux-2.6.14/include/linux/vserver/sched_def.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/sched_def.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,54 @@ +#ifndef _VX_SCHED_DEF_H +#define _VX_SCHED_DEF_H + +#include +#include +#include +#include +#include + + +struct _vx_ticks { + uint64_t user_ticks; /* token tick events */ + uint64_t sys_ticks; /* token tick events */ + uint64_t hold_ticks; /* token ticks paused */ + uint64_t unused[5]; /* cacheline ? */ +}; + +/* context sub struct */ + +struct _vx_sched { + atomic_t tokens; /* number of CPU tokens */ + spinlock_t tokens_lock; /* lock for token bucket */ + + int fill_rate; /* Fill rate: add X tokens... */ + int interval; /* Divisor: per Y jiffies */ + int tokens_min; /* Limit: minimum for unhold */ + int tokens_max; /* Limit: no more than N tokens */ + uint32_t jiffies; /* last time accounted */ + + int priority_bias; /* bias offset for priority */ + int vavavoom; /* last calculated vavavoom */ + + cpumask_t cpus_allowed; /* cpu mask for context */ + + struct _vx_ticks cpu[NR_CPUS]; +}; + + +#ifdef CONFIG_VSERVER_DEBUG + +static inline void __dump_vx_sched(struct _vx_sched *sched) +{ + printk("\t_vx_sched:\n"); + printk("\t tokens: %4d, %4d, %4d, %4d, %4d\n", + atomic_read(&sched->tokens), + sched->fill_rate, sched->interval, + sched->tokens_min, sched->tokens_max); + printk("\t priority = %4d, %4d\n", + sched->priority_bias, sched->vavavoom); +} + +#endif + +#endif /* _VX_SCHED_DEF_H */ Index: linux-2.6.14/include/linux/vserver/signal.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/signal.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,14 @@ +#ifndef _VX_SIGNAL_H +#define _VX_SIGNAL_H + +#ifdef __KERNEL__ + + +struct vx_info; + +int vx_info_kill(struct vx_info *, int, int); + +#endif /* __KERNEL__ */ +#else /* _VX_SIGNAL_H */ +#warning duplicate inclusion +#endif /* _VX_SIGNAL_H */ Index: linux-2.6.14/include/linux/vserver/signal_cmd.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/signal_cmd.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,25 @@ +#ifndef _VX_SIGNAL_CMD_H +#define _VX_SIGNAL_CMD_H + +/* signalling vserver commands */ + +#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0) +#define VCMD_wait_exit VC_CMD(EVENT, 99, 0) + +struct vcmd_ctx_kill_v0 { + int32_t pid; + int32_t sig; +}; + +struct vcmd_wait_exit_v0 { + int32_t a; + int32_t b; +}; + +#ifdef __KERNEL__ + +extern int vc_ctx_kill(uint32_t, void __user *); +extern int vc_wait_exit(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_SIGNAL_CMD_H */ Index: linux-2.6.14/include/linux/vserver/switch.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/switch.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,97 @@ +#ifndef _VX_SWITCH_H +#define _VX_SWITCH_H + +#include + +#define VC_CATEGORY(c) (((c) >> 24) & 0x3F) +#define VC_COMMAND(c) (((c) >> 16) & 0xFF) +#define VC_VERSION(c) ((c) & 0xFFF) + +#define VC_CMD(c,i,v) ((((VC_CAT_ ## c) & 0x3F) << 24) \ + | (((i) & 0xFF) << 16) | ((v) & 0xFFF)) + +/* + + Syscall Matrix V2.8 + + |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL| + |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | | + |INFO |SETUP | |MOVE | | | | | | + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICES| | + HOST | 00| 01| 02| 03| 04| 05| | 06| 07| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | | + PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + MEMORY | | | | | | | |SWAP | | + | 16| 17| 18| 19| 20| 21| | 22| 23| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | | + | 24| 25| 26| 27| 28| 29| | 30| 31| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + DISK | | | | |DLIMIT | | |INODE | | + VFS | 32| 33| 34| 35| 36| 37| | 38| 39| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + OTHER | | | | | | | |VINFO | | + | 40| 41| 42| 43| 44| 45| | 46| 47| + =======+=======+=======+=======+=======+=======+=======+ +=======+=======+ + SPECIAL|EVENT | | | |FLAGS | | | | | + | 48| 49| 50| 51| 52| 53| | 54| 55| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT | + | 56| 57| 58| 59| 60|TEST 61| | 62| 63| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + +*/ + +#define VC_CAT_VERSION 0 + +#define VC_CAT_VSETUP 1 +#define VC_CAT_VHOST 2 + +#define VC_CAT_VPROC 9 +#define VC_CAT_PROCALT 10 +#define VC_CAT_PROCMIG 11 +#define VC_CAT_PROCTRL 12 + +#define VC_CAT_SCHED 14 + +#define VC_CAT_VNET 25 +#define VC_CAT_NETALT 26 +#define VC_CAT_NETMIG 27 +#define VC_CAT_NETCTRL 28 + +#define VC_CAT_DLIMIT 36 +#define VC_CAT_INODE 38 + +#define VC_CAT_VINFO 46 +#define VC_CAT_EVENT 48 + +#define VC_CAT_FLAGS 52 +#define VC_CAT_DEBUG 56 +#define VC_CAT_RLIMIT 60 + +#define VC_CAT_SYSTEST 61 +#define VC_CAT_COMPAT 63 + +/* interface version */ + +#define VCI_VERSION 0x00020001 +#define VCI_LEGACY_VERSION 0x000100FF + +/* query version */ + +#define VCMD_get_version VC_CMD(VERSION, 0, 0) + + +#ifdef __KERNEL__ + +#include + + +#else /* __KERNEL__ */ +#define __user +#endif /* __KERNEL__ */ + +#endif /* _VX_SWITCH_H */ Index: linux-2.6.14/include/linux/vserver/xid.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/include/linux/vserver/xid.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,156 @@ +#ifndef _VX_XID_H +#define _VX_XID_H + +#ifndef CONFIG_VSERVER +#warning config options missing +#endif + +#define XID_TAG(in) (IS_TAGXID(in)) + + +#ifdef CONFIG_XID_TAG_NFSD +#define XID_TAG_NFSD 1 +#else +#define XID_TAG_NFSD 0 +#endif + + +#ifdef CONFIG_INOXID_NONE + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) (0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_GID16 + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0x0000FFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? (((gid) >> 16) & 0xFFFF) : 0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) \ + ((tag) ? (((gid) & 0xFFFF) | ((xid) << 16)) : (gid)) + +#endif + + +#ifdef CONFIG_INOXID_UGID24 + +#define MAX_UID 0x00FFFFFF +#define MAX_GID 0x00FFFFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0) + +#define XIDINO_UID(tag, uid, xid) \ + ((tag) ? (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16)) : (uid)) +#define XIDINO_GID(tag, gid, xid) \ + ((tag) ? (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24)) : (gid)) + +#endif + + +#ifdef CONFIG_INOXID_UID16 + +#define MAX_UID 0x0000FFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? (((uid) >> 16) & 0xFFFF) : 0) + +#define XIDINO_UID(tag, uid, xid) \ + ((tag) ? (((uid) & 0xFFFF) | ((xid) << 16)) : (uid)) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_INTERN + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? (xid) : 0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_RUNTIME + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) (0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifndef CONFIG_INOXID_NONE +#define vx_current_fsxid(sb) \ + ((sb)->s_flags & MS_TAGXID ? current->xid : 0) +#else +#define vx_current_fsxid(sb) (0) +#endif + +#ifndef CONFIG_INOXID_INTERN +#define XIDINO_XID(tag, xid) (0) +#else +#define XIDINO_XID(tag, xid) ((tag) ? (xid) : 0) +#endif + +#define INOXID_UID(tag, uid, gid) \ + ((tag) ? ((uid) & MAX_UID) : (uid)) +#define INOXID_GID(tag, uid, gid) \ + ((tag) ? ((gid) & MAX_GID) : (gid)) + + +static inline uid_t vx_map_uid(uid_t uid) +{ + if ((uid > MAX_UID) && (uid != -1)) + uid = -2; + return (uid & MAX_UID); +} + +static inline gid_t vx_map_gid(gid_t gid) +{ + if ((gid > MAX_GID) && (gid != -1)) + gid = -2; + return (gid & MAX_GID); +} + + +#ifdef CONFIG_VSERVER_LEGACY +#define FIOC_GETXID _IOR('x', 1, long) +#define FIOC_SETXID _IOW('x', 2, long) +#define FIOC_SETXIDJ _IOW('x', 3, long) +#endif + +#ifdef CONFIG_XID_PROPAGATE + +int vx_parse_xid(char *string, xid_t *xid, int remove); + +void __vx_propagate_xid(struct nameidata *nd, struct inode *inode); + +#define vx_propagate_xid(n,i) __vx_propagate_xid(n,i) + +#else +#define vx_propagate_xid(n,i) do { } while (0) +#endif + +#endif /* _VX_XID_H */ Index: linux-2.6.14/include/net/af_unix.h =================================================================== --- linux-2.6.14.orig/include/net/af_unix.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/net/af_unix.h 2005-10-31 11:05:45.000000000 -0600 @@ -17,9 +17,9 @@ extern atomic_t unix_tot_inflight; -static inline struct sock *first_unix_socket(int *i) +static inline struct sock *next_unix_socket_table(int *i) { - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { + for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { if (!hlist_empty(&unix_socket_table[*i])) return __sk_head(&unix_socket_table[*i]); } @@ -28,16 +28,19 @@ static inline struct sock *next_unix_socket(int *i, struct sock *s) { - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; + do { + if (s) + s = sk_next(s); + if (!s) + s = next_unix_socket_table(i); + } while (s && !vx_check(s->sk_xid, VX_IDENT|VX_WATCH)); + return s; +} + +static inline struct sock *first_unix_socket(int *i) +{ + *i = 0; + return next_unix_socket(i, NULL); } #define forall_unix_sockets(i, s) \ Index: linux-2.6.14/include/net/irda/ircomm_tty.h =================================================================== --- linux-2.6.14.orig/include/net/irda/ircomm_tty.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/net/irda/ircomm_tty.h 2005-10-31 11:05:45.000000000 -0600 @@ -102,8 +102,8 @@ struct timer_list watchdog_timer; struct work_struct tqueue; - unsigned short close_delay; - unsigned short closing_wait; /* time to wait before closing */ + unsigned int close_delay; + unsigned int closing_wait; /* time to wait before closing */ int open_count; int blocked_open; /* # of blocked opens */ Index: linux-2.6.14/include/net/route.h =================================================================== --- linux-2.6.14.orig/include/net/route.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/net/route.h 2005-10-31 11:05:45.000000000 -0600 @@ -33,6 +33,7 @@ #include #include #include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -141,6 +142,59 @@ return ip_tos2prio[IPTOS_TOS(tos)>>1]; } +#define IPI_LOOPBACK htonl(INADDR_LOOPBACK) + +static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl) +{ + int err; + int i, n = nxi->nbipv4; + u32 ipv4root = nxi->ipv4[0]; + + if (ipv4root == 0) + return 0; + + if (fl->fl4_src == 0) { + if (n > 1) { + u32 foundsrc; + + err = __ip_route_output_key(rp, fl); + if (err) { + fl->fl4_src = ipv4root; + err = __ip_route_output_key(rp, fl); + } + if (err) + return err; + + foundsrc = (*rp)->rt_src; + ip_rt_put(*rp); + + for (i=0; imask[i]; + u32 ipv4 = nxi->ipv4[i]; + u32 net4 = ipv4 & mask; + + if (foundsrc == ipv4) { + fl->fl4_src = ipv4; + break; + } + if (!fl->fl4_src && (foundsrc & mask) == net4) + fl->fl4_src = ipv4; + } + } + if (fl->fl4_src == 0) + fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK) + ? IPI_LOOPBACK : ipv4root; + } else { + for (i=0; iipv4[i] == fl->fl4_src) + break; + } + if (i == n) + return -EPERM; + } + return 0; +} + static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif, u8 protocol, u16 sport, u16 dport, struct sock *sk) @@ -155,7 +209,23 @@ .dport = dport } } }; int err; - if (!dst || !src) { + struct nx_info *nx_info = current->nx_info; + + if (sk) + nx_info = sk->sk_nx_info; + vxdprintk(VXD_CBIT(net, 4), + "ip_route_connect(%p) %p,%p;%lx", + sk, nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (nx_info) { + err = ip_find_src(nx_info, rp, &fl); + if (err) + return err; + if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + fl.fl4_dst = nx_info->ipv4[0]; + } + if (!fl.fl4_dst || !fl.fl4_src) { err = __ip_route_output_key(rp, &fl); if (err) return err; Index: linux-2.6.14/include/net/sock.h =================================================================== --- linux-2.6.14.orig/include/net/sock.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/net/sock.h 2005-10-31 11:05:45.000000000 -0600 @@ -115,6 +115,10 @@ atomic_t skc_refcnt; unsigned int skc_hash; struct proto *skc_prot; + xid_t skc_xid; + struct vx_info *skc_vx_info; + nid_t skc_nid; + struct nx_info *skc_nx_info; }; /** @@ -189,6 +193,10 @@ #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot +#define sk_xid __sk_common.skc_xid +#define sk_vx_info __sk_common.skc_vx_info +#define sk_nid __sk_common.skc_nid +#define sk_nx_info __sk_common.skc_nx_info unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; Index: linux-2.6.14/init/Kconfig =================================================================== --- linux-2.6.14.orig/init/Kconfig 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/init/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -1,5 +1,9 @@ menu "Code maturity level options" +config DEVFS_FS + bool + default y + config EXPERIMENTAL bool "Prompt for development and/or incomplete code/drivers" ---help--- Index: linux-2.6.14/init/calibrate.c =================================================================== --- linux-2.6.14.orig/init/calibrate.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/init/calibrate.c 2005-10-31 11:05:45.000000000 -0600 @@ -165,8 +165,8 @@ /* Round the value and print it */ printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100, + HZ*(loops_per_jiffy >> 3)/62500, + (HZ*(loops_per_jiffy >> 3)/625) % 100, loops_per_jiffy); } Index: linux-2.6.14/init/version.c =================================================================== --- linux-2.6.14.orig/init/version.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/init/version.c 2005-10-31 11:05:45.000000000 -0600 @@ -31,3 +31,8 @@ const char linux_banner[] = "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; + +const char vx_linux_banner[] = + "Linux version %s (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") %s\n"; + Index: linux-2.6.14/ipc/mqueue.c =================================================================== --- linux-2.6.14.orig/ipc/mqueue.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/ipc/mqueue.c 2005-10-31 11:05:45.000000000 -0600 @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include "util.h" @@ -147,17 +149,20 @@ spin_lock(&mq_lock); if (u->mq_bytes + mq_bytes < u->mq_bytes || u->mq_bytes + mq_bytes > - p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) { + p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur || + !vx_ipcmsg_avail(p->vx_info, mq_bytes)) { spin_unlock(&mq_lock); goto out_inode; } u->mq_bytes += mq_bytes; + vx_ipcmsg_add(p->vx_info, u, mq_bytes); spin_unlock(&mq_lock); info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); if (!info->messages) { spin_lock(&mq_lock); u->mq_bytes -= mq_bytes; + vx_ipcmsg_sub(p->vx_info, u, mq_bytes); spin_unlock(&mq_lock); goto out_inode; } @@ -255,10 +260,14 @@ (info->attr.mq_maxmsg * info->attr.mq_msgsize)); user = info->user; if (user) { + struct vx_info *vxi = lookup_vx_info(user->xid); + spin_lock(&mq_lock); user->mq_bytes -= mq_bytes; + vx_ipcmsg_sub(vxi, user, mq_bytes); queues_count--; spin_unlock(&mq_lock); + put_vx_info(vxi); free_uid(user); } } @@ -730,7 +739,7 @@ if (inode) atomic_inc(&inode->i_count); - err = vfs_unlink(dentry->d_parent->d_inode, dentry); + err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL); out_err: dput(dentry); Index: linux-2.6.14/ipc/msg.c =================================================================== --- linux-2.6.14.orig/ipc/msg.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/ipc/msg.c 2005-10-31 11:05:45.000000000 -0600 @@ -99,6 +99,7 @@ msq->q_perm.mode = (msgflg & S_IRWXUGO); msq->q_perm.key = key; + msq->q_perm.xid = vx_current_xid(); msq->q_perm.security = NULL; retval = security_msg_queue_alloc(msq); @@ -814,6 +815,9 @@ { struct msg_queue *msq = it; + if (!vx_check(msq->q_perm.xid, VX_IDENT)) + return 0; + return seq_printf(s, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", msq->q_perm.key, Index: linux-2.6.14/ipc/sem.c =================================================================== --- linux-2.6.14.orig/ipc/sem.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/ipc/sem.c 2005-10-31 11:05:45.000000000 -0600 @@ -178,6 +178,7 @@ sma->sem_perm.mode = (semflg & S_IRWXUGO); sma->sem_perm.key = key; + sma->sem_perm.xid = vx_current_xid(); sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); @@ -1334,6 +1335,9 @@ { struct sem_array *sma = it; + if (!vx_check(sma->sem_perm.xid, VX_IDENT)) + return 0; + return seq_printf(s, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", sma->sem_perm.key, Index: linux-2.6.14/ipc/shm.c =================================================================== --- linux-2.6.14.orig/ipc/shm.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/ipc/shm.c 2005-10-31 11:05:45.000000000 -0600 @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -115,7 +116,12 @@ */ static void shm_destroy (struct shmid_kernel *shp) { - shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid); + int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + + vx_ipcshm_sub(vxi, shp, numpages); + shm_tot -= numpages; + shm_rmid (shp->id); shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) @@ -125,6 +131,7 @@ shp->mlock_user); fput (shp->shm_file); security_shm_free(shp); + put_vx_info(vxi); ipc_rcu_putref(shp); } @@ -191,12 +198,15 @@ if (shm_tot + numpages >= shm_ctlall) return -ENOSPC; + if (!vx_ipcshm_avail(current->vx_info, numpages)) + return -ENOSPC; shp = ipc_rcu_alloc(sizeof(*shp)); if (!shp) return -ENOMEM; shp->shm_perm.key = key; + shp->shm_perm.xid = vx_current_xid(); shp->shm_flags = (shmflg & S_IRWXUGO); shp->mlock_user = NULL; @@ -238,6 +248,7 @@ else file->f_op = &shm_file_operations; shm_tot += numpages; + vx_ipcshm_add(current->vx_info, key, numpages); shm_unlock(shp); return shp->id; @@ -878,6 +889,9 @@ #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" + if (!vx_check(shp->shm_perm.xid, VX_IDENT)) + return 0; + if (sizeof(size_t) <= sizeof(int)) format = SMALL_STRING; else Index: linux-2.6.14/ipc/util.c =================================================================== --- linux-2.6.14.orig/ipc/util.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/ipc/util.c 2005-10-31 11:05:45.000000000 -0600 @@ -153,7 +153,9 @@ */ for (id = 0; id <= max_id; id++) { p = ids->entries->p[id]; - if(p==NULL) + if (p==NULL) + continue; + if (!vx_check(p->xid, VX_IDENT)) continue; if (key == p->key) return id; @@ -466,6 +468,8 @@ { /* flag will most probably be 0 or S_...UGO from */ int requested_mode, granted_mode; + if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */ + return -1; requested_mode = (flag >> 6) | (flag >> 3) | flag; granted_mode = ipcp->mode; if (current->euid == ipcp->cuid || current->euid == ipcp->uid) Index: linux-2.6.14/kernel/Makefile =================================================================== --- linux-2.6.14.orig/kernel/Makefile 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/Makefile 2005-10-31 11:05:45.000000000 -0600 @@ -9,6 +9,9 @@ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o +subdir-y += vserver +obj-y += vserver/vserver.o + obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o spinlock.o Index: linux-2.6.14/kernel/capability.c =================================================================== --- linux-2.6.14.orig/kernel/capability.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/capability.c 2005-10-31 11:05:45.000000000 -0600 @@ -11,6 +11,7 @@ #include #include #include +#include #include unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ Index: linux-2.6.14/kernel/cpuset.c =================================================================== --- linux-2.6.14.orig/kernel/cpuset.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/cpuset.c 2005-10-31 11:05:45.000000000 -0600 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include Index: linux-2.6.14/kernel/exit.c =================================================================== --- linux-2.6.14.orig/kernel/exit.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/exit.c 2005-10-31 11:05:45.000000000 -0600 @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include @@ -235,6 +237,7 @@ ptrace_unlink(current); /* Reparent to init */ REMOVE_LINKS(current); + /* FIXME handle vchild_reaper/initpid */ current->parent = child_reaper; current->real_parent = child_reaper; SET_LINKS(current); @@ -389,6 +392,7 @@ struct file * file = xchg(&fdt->fd[i], NULL); if (file) filp_close(file, files); + vx_openfd_dec(i); } i++; set >>= 1; @@ -609,6 +613,7 @@ struct task_struct *p, *reaper = father; struct list_head *_p, *_n; + /* FIXME handle vchild_reaper/initpid */ do { reaper = next_thread(reaper); if (reaper == father) { @@ -852,6 +857,8 @@ __exit_files(tsk); __exit_fs(tsk); exit_namespace(tsk); + exit_vx_info(tsk); + exit_nx_info(tsk); exit_thread(); cpuset_exit(tsk); exit_keys(tsk); Index: linux-2.6.14/kernel/fork.c =================================================================== --- linux-2.6.14.orig/kernel/fork.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/fork.c 2005-10-31 11:05:45.000000000 -0600 @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #include #include @@ -102,6 +105,8 @@ void free_task(struct task_struct *tsk) { free_thread_info(tsk->thread_info); + clr_vx_info(&tsk->vx_info); + clr_nx_info(&tsk->nx_info); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -198,8 +203,8 @@ mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; - set_mm_counter(mm, rss, 0); - set_mm_counter(mm, anon_rss, 0); + __set_mm_counter(mm, rss, 0); + __set_mm_counter(mm, anon_rss, 0); cpus_clear(mm->cpu_vm_mask); mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; @@ -332,6 +337,7 @@ if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + set_vx_info(&mm->mm_vx_info, current->vx_info); return mm; } free_mm(mm); @@ -363,6 +369,7 @@ BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); + clr_vx_info(&mm->mm_vx_info); free_mm(mm); } @@ -489,6 +496,7 @@ /* Copy the current MM stuff.. */ memcpy(mm, oldmm, sizeof(*mm)); + mm->mm_vx_info = NULL; if (!mm_init(mm)) goto fail_nomem; @@ -517,6 +525,7 @@ * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() */ + clr_vx_info(&mm->mm_vx_info); mm_free_pgd(mm); free_mm(mm); return retval; @@ -685,6 +694,8 @@ struct file *f = *old_fds++; if (f) { get_file(f); + /* FIXME sum it first for check and performance */ + vx_openfd_inc(open_files - i); } else { /* * The fd may be claimed in the fd bitmap but not yet @@ -880,6 +891,8 @@ { int retval; struct task_struct *p = NULL; + struct vx_info *vxi; + struct nx_info *nxi; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -908,12 +921,30 @@ if (!p) goto fork_out; + init_vx_info(&p->vx_info, current->vx_info); + init_nx_info(&p->nx_info, current->nx_info); + + /* check vserver memory */ + if (p->mm && !(clone_flags & CLONE_VM)) { + if (vx_vmpages_avail(p->mm, p->mm->total_vm)) + vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm); + else + goto bad_fork_free; + } + if (p->mm && vx_flags(VXF_FORK_RSS, 0)) { + if (!vx_rsspages_avail(p->mm, get_mm_counter(p->mm, rss))) + goto bad_fork_cleanup_vm; + } + retval = -EAGAIN; + if (!vx_nproc_avail(1)) + goto bad_fork_cleanup_vm; + if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) - goto bad_fork_free; + goto bad_fork_cleanup_vm; } atomic_inc(&p->user->__count); @@ -1151,6 +1182,18 @@ nr_threads++; total_forks++; + + /* p is copy of current */ + vxi = p->vx_info; + if (vxi) { + claim_vx_info(vxi, p); + atomic_inc(&vxi->cvirt.nr_threads); + atomic_inc(&vxi->cvirt.total_forks); + vx_nproc_inc(p); + } + nxi = p->nx_info; + if (nxi) + claim_nx_info(nxi, p); write_unlock_irq(&tasklist_lock); retval = 0; @@ -1193,6 +1236,9 @@ put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); +bad_fork_cleanup_vm: + if (p->mm && !(clone_flags & CLONE_VM)) + vx_pages_sub(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm); bad_fork_free: free_task(p); goto fork_out; @@ -1252,6 +1298,14 @@ if (pid < 0) return -EAGAIN; + + /* kernel threads are host only */ + if ((clone_flags & CLONE_KTHREAD) && !vx_check(0, VX_ADMIN)) { + vxwprintk(1, "xid=%d tried to spawn a kernel thread.", + vx_current_xid()); + return -EPERM; + } + if (unlikely(current->ptrace)) { trace = fork_traceflag (clone_flags); if (trace) Index: linux-2.6.14/kernel/kthread.c =================================================================== --- linux-2.6.14.orig/kernel/kthread.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/kthread.c 2005-10-31 11:05:45.000000000 -0600 @@ -114,7 +114,7 @@ create->result = ERR_PTR(pid); } else { wait_for_completion(&create->started); - create->result = find_task_by_pid(pid); + create->result = find_task_by_real_pid(pid); } complete(&create->done); } Index: linux-2.6.14/kernel/posix-cpu-timers.c =================================================================== --- linux-2.6.14.orig/kernel/posix-cpu-timers.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/posix-cpu-timers.c 2005-10-31 11:05:45.000000000 -0600 @@ -6,6 +6,7 @@ #include #include #include +#include static int check_clock(clockid_t which_clock) { Index: linux-2.6.14/kernel/posix-timers.c =================================================================== --- linux-2.6.14.orig/kernel/posix-timers.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/posix-timers.c 2005-10-31 11:05:45.000000000 -0600 @@ -47,6 +47,8 @@ #include #include #include +#include +#include #ifndef div_long_long_rem #include @@ -411,6 +413,10 @@ int posix_timer_event(struct k_itimer *timr,int si_private) { + struct vx_info_save vxis; + int ret; + + enter_vx_info(task_get_vx_info(timr->it_process), &vxis); memset(&timr->sigq->info, 0, sizeof(siginfo_t)); timr->sigq->info.si_sys_private = si_private; /* @@ -430,11 +436,11 @@ if (timr->it_sigev_notify & SIGEV_THREAD_ID) { struct task_struct *leader; - int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, + ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, timr->it_process); if (likely(ret >= 0)) - return ret; + goto out; timr->it_sigev_notify = SIGEV_SIGNAL; leader = timr->it_process->group_leader; @@ -442,8 +448,13 @@ timr->it_process = leader; } - return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, + ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq, timr->it_process); + +out: + leave_vx_info(&vxis); + put_vx_info(vxis.vxi); + return ret; } EXPORT_SYMBOL_GPL(posix_timer_event); @@ -518,7 +529,7 @@ struct task_struct *rtn = current->group_leader; if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) || + (!(rtn = find_task_by_real_pid(event->sigev_notify_thread_id)) || rtn->tgid != current->tgid || (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) return NULL; Index: linux-2.6.14/kernel/printk.c =================================================================== --- linux-2.6.14.orig/kernel/printk.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/printk.c 2005-10-31 11:05:45.000000000 -0600 @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include @@ -222,18 +224,13 @@ unsigned long i, j, limit, count; int do_clear = 0; char c; - int error = 0; + int error; error = security_syslog(type); if (error) return error; - switch (type) { - case 0: /* Close log */ - break; - case 1: /* Open log */ - break; - case 2: /* Read from log */ + if ((type >= 2) && (type <= 4)) { error = -EINVAL; if (!buf || len < 0) goto out; @@ -244,6 +241,16 @@ error = -EFAULT; goto out; } + } + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return vx_do_syslog(type, buf, len); + + switch (type) { + case 0: /* Close log */ + break; + case 1: /* Open log */ + break; + case 2: /* Read from log */ error = wait_event_interruptible(log_wait, (log_start - log_end)); if (error) goto out; @@ -267,16 +274,6 @@ do_clear = 1; /* FALL THRU */ case 3: /* Read last kernel messages */ - error = -EINVAL; - if (!buf || len < 0) - goto out; - error = 0; - if (!len) - goto out; - if (!access_ok(VERIFY_WRITE, buf, len)) { - error = -EFAULT; - goto out; - } count = len; if (count > log_buf_len) count = log_buf_len; @@ -509,11 +506,14 @@ asmlinkage int printk(const char *fmt, ...) { + struct vx_info_save vxis; va_list args; int r; va_start(args, fmt); + __enter_vx_admin(&vxis); r = vprintk(fmt, args); + __leave_vx_admin(&vxis); va_end(args); return r; Index: linux-2.6.14/kernel/sched.c =================================================================== --- linux-2.6.14.orig/kernel/sched.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/sched.c 2005-10-31 11:05:45.000000000 -0600 @@ -50,6 +50,9 @@ #include #include +#include +#include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -236,6 +239,10 @@ task_t *migration_thread; struct list_head migration_queue; #endif +#ifdef CONFIG_VSERVER_HARDCPU + struct list_head hold_queue; + int idle_tokens; +#endif #ifdef CONFIG_SCHEDSTATS /* latency stats */ @@ -596,6 +603,7 @@ */ static void dequeue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); array->nr_active--; list_del(&p->run_list); if (list_empty(array->queue + p->prio)) @@ -604,6 +612,7 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); sched_info_queued(p); list_add_tail(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); @@ -617,11 +626,13 @@ */ static void requeue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); list_move_tail(&p->run_list, array->queue + p->prio); } static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); list_add(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); array->nr_active++; @@ -645,6 +656,7 @@ static int effective_prio(task_t *p) { int bonus, prio; + struct vx_info *vxi; if (rt_task(p)) return p->prio; @@ -652,6 +664,11 @@ bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; prio = p->static_prio - bonus; + + if ((vxi = p->vx_info) && + vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) + prio += vx_effective_vavavoom(vxi, MAX_USER_PRIO); + if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) @@ -785,19 +802,77 @@ } p->timestamp = now; + vx_activate_task(p); __activate_task(p, rq); } /* * deactivate_task - remove a task from the runqueue. */ -static void deactivate_task(struct task_struct *p, runqueue_t *rq) +static void __deactivate_task(struct task_struct *p, runqueue_t *rq) { rq->nr_running--; dequeue_task(p, p->array); p->array = NULL; } +static inline +void deactivate_task(struct task_struct *p, runqueue_t *rq) +{ + vx_deactivate_task(p); + __deactivate_task(p, rq); +} + + +#ifdef CONFIG_VSERVER_HARDCPU +/* + * vx_hold_task - put a task on the hold queue + */ +static inline +void vx_hold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + __deactivate_task(p, rq); + p->state |= TASK_ONHOLD; + /* a new one on hold */ + vx_onhold_inc(vxi); + list_add_tail(&p->run_list, &rq->hold_queue); +} + +/* + * vx_unhold_task - put a task back to the runqueue + */ +static inline +void vx_unhold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + list_del(&p->run_list); + /* one less waiting */ + vx_onhold_dec(vxi); + p->state &= ~TASK_ONHOLD; + enqueue_task(p, rq->expired); + rq->nr_running++; + + if (p->static_prio < rq->best_expired_prio) + rq->best_expired_prio = p->static_prio; +} +#else +static inline +void vx_hold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + return; +} + +static inline +void vx_unhold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + return; +} +#endif /* CONFIG_VSERVER_HARDCPU */ + + /* * resched_task - mark a task 'to be rescheduled now'. * @@ -1154,6 +1229,12 @@ rq = task_rq_lock(p, &flags); old_state = p->state; + + /* we need to unhold suspended tasks */ + if (old_state & TASK_ONHOLD) { + vx_unhold_task(p->vx_info, p, rq); + old_state = p->state; + } if (!(old_state & state)) goto out; @@ -1282,6 +1363,9 @@ * the waker guarantees that the freshly woken up task is going * to be considered on this CPU.) */ + /* this is to get the accounting behind the load update */ + if (old_state == TASK_UNINTERRUPTIBLE) + vx_uninterruptible_dec(p); if (!sync || cpu != this_cpu) { if (TASK_PREEMPTS_CURR(p, rq)) resched_task(rq->curr); @@ -1397,6 +1481,7 @@ p->prio = effective_prio(p); + vx_activate_task(p); if (likely(cpu == this_cpu)) { if (!(clone_flags & CLONE_VM)) { /* @@ -1408,6 +1493,7 @@ __activate_task(p, rq); else { p->prio = current->prio; + BUG_ON(p->state & TASK_ONHOLD); list_add_tail(&p->run_list, ¤t->run_list); p->array = current->array; p->array->nr_active++; @@ -2470,13 +2556,16 @@ void account_user_time(struct task_struct *p, cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + struct vx_info *vxi = p->vx_info; /* p is _always_ current */ cputime64_t tmp; + int nice = (TASK_NICE(p) > 0); p->utime = cputime_add(p->utime, cputime); + vx_account_user(vxi, cputime, nice); /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (nice) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); @@ -2492,10 +2581,12 @@ cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + struct vx_info *vxi = p->vx_info; /* p is _always_ current */ runqueue_t *rq = this_rq(); cputime64_t tmp; p->stime = cputime_add(p->stime, cputime); + vx_account_system(vxi, cputime, (p == rq->idle)); /* Add system time to cpustat. */ tmp = cputime_to_cputime64(cputime); @@ -2557,6 +2648,10 @@ if (p == rq->idle) { if (wake_priority_sleeper(rq)) goto out; +#ifdef CONFIG_VSERVER_HARDCPU_IDLE + if (!--rq->idle_tokens && !list_empty(&rq->hold_queue)) + set_need_resched(); +#endif rebalance_tick(cpu, rq, SCHED_IDLE); return; } @@ -2589,7 +2684,7 @@ } goto out_unlock; } - if (!--p->time_slice) { + if (vx_need_resched(p)) { dequeue_task(p, rq->active); set_tsk_need_resched(p); p->prio = effective_prio(p); @@ -2854,6 +2949,10 @@ unsigned long long now; unsigned long run_time; int cpu, idx, new_prio; + struct vx_info *vxi; +#ifdef CONFIG_VSERVER_HARDCPU + int maxidle = -HZ; +#endif /* * Test if we are atomic. Since do_exit() needs to call into @@ -2913,12 +3012,41 @@ unlikely(signal_pending(prev)))) prev->state = TASK_RUNNING; else { - if (prev->state == TASK_UNINTERRUPTIBLE) + if (prev->state == TASK_UNINTERRUPTIBLE) { rq->nr_uninterruptible++; + vx_uninterruptible_inc(prev); + } deactivate_task(prev, rq); } } +#ifdef CONFIG_VSERVER_HARDCPU + if (!list_empty(&rq->hold_queue)) { + struct list_head *l, *n; + int ret; + + vxi = NULL; + list_for_each_safe(l, n, &rq->hold_queue) { + next = list_entry(l, task_t, run_list); + if (vxi == next->vx_info) + continue; + + vxi = next->vx_info; + ret = vx_tokens_recalc(vxi); + + if (ret > 0) { + vx_unhold_task(vxi, next, rq); + break; + } + if ((ret < 0) && (maxidle < ret)) + maxidle = ret; + } + } + rq->idle_tokens = -maxidle; + +pick_next: +#endif + cpu = smp_processor_id(); if (unlikely(!rq->nr_running)) { go_idle: @@ -2966,6 +3094,22 @@ queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); + vxi = next->vx_info; +#ifdef CONFIG_VSERVER_HARDCPU + if (vx_info_flags(vxi, VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) { + int ret = vx_tokens_recalc(vxi); + + if (unlikely(ret <= 0)) { + if (ret && (rq->idle_tokens > -ret)) + rq->idle_tokens = -ret; + vx_hold_task(vxi, next, rq); + goto pick_next; + } + } else /* well, looks ugly but not as ugly as the ifdef-ed version */ +#endif + if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) + vx_tokens_recalc(vxi); + if (!rt_task(next) && next->activated > 0) { unsigned long long delta = now - next->timestamp; if (unlikely((long long)(now - next->timestamp) < 0)) @@ -3521,7 +3665,7 @@ nice = 19; if (increment < 0 && !can_nice(current, nice)) - return -EPERM; + return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM; retval = security_task_setnice(current, nice); if (retval) @@ -3671,6 +3815,7 @@ oldprio = p->prio; __setscheduler(p, policy, param->sched_priority); if (array) { + vx_activate_task(p); __activate_task(p, rq); /* * Reschedule if we are currently running on this runqueue and @@ -5524,6 +5669,9 @@ INIT_LIST_HEAD(&rq->migration_queue); #endif atomic_set(&rq->nr_iowait, 0); +#ifdef CONFIG_VSERVER_HARDCPU + INIT_LIST_HEAD(&rq->hold_queue); +#endif for (j = 0; j < 2; j++) { array = rq->arrays + j; @@ -5593,6 +5741,7 @@ deactivate_task(p, task_rq(p)); __setscheduler(p, SCHED_NORMAL, 0); if (array) { + vx_activate_task(p); __activate_task(p, task_rq(p)); resched_task(rq->curr); } Index: linux-2.6.14/kernel/signal.c =================================================================== --- linux-2.6.14.orig/kernel/signal.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/signal.c 2005-10-31 11:05:45.000000000 -0600 @@ -646,19 +646,27 @@ static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { + int user; int error = -EINVAL; + if (!valid_signal(sig)) return error; + + user = (!info || ((unsigned long)info != 1 && + (unsigned long)info != 2 && SI_FROMUSER(info))); + error = -EPERM; - if ((!info || ((unsigned long)info != 1 && - (unsigned long)info != 2 && SI_FROMUSER(info))) - && ((sig != SIGCONT) || + if (user && ((sig != SIGCONT) || (current->signal->session != t->signal->session)) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) && !capable(CAP_KILL)) return error; + error = -ESRCH; + if (user && !vx_check(vx_task_xid(t), VX_ADMIN|VX_IDENT)) + return error; + error = security_task_kill(t, info, sig); if (!error) audit_signal_info(sig, t); /* Let audit system see the signal */ @@ -1928,6 +1936,11 @@ if (current->pid == 1) continue; + /* virtual init is protected against user signals */ + if ((info->si_code == SI_USER) && + vx_current_initpid(current->pid)) + continue; + if (sig_kernel_stop(signr)) { /* * The default action is to stop all threads in Index: linux-2.6.14/kernel/softirq.c =================================================================== --- linux-2.6.14.orig/kernel/softirq.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/softirq.c 2005-10-31 11:05:45.000000000 -0600 @@ -16,6 +16,7 @@ #include #include #include +#include #include /* @@ -73,6 +74,7 @@ asmlinkage void __do_softirq(void) { + struct vx_info_save vxis; struct softirq_action *h; __u32 pending; int max_restart = MAX_SOFTIRQ_RESTART; @@ -81,6 +83,7 @@ pending = local_softirq_pending(); local_bh_disable(); + __enter_vx_admin(&vxis); cpu = smp_processor_id(); restart: /* Reset the pending bitmask before enabling irqs */ @@ -108,6 +111,7 @@ if (pending) wakeup_softirqd(); + __leave_vx_admin(&vxis); __local_bh_enable(); } Index: linux-2.6.14/kernel/sys.c =================================================================== --- linux-2.6.14.orig/kernel/sys.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/sys.c 2005-10-31 11:05:45.000000000 -0600 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include +#include #include #include @@ -231,7 +233,10 @@ goto out; } if (niceval < task_nice(p) && !can_nice(p, niceval)) { - error = -EACCES; + if (vx_flags(VXF_IGNEG_NICE, 0)) + error = 0; + else + error = -EACCES; goto out; } no_nice = security_task_setnice(p, niceval); @@ -283,7 +288,8 @@ if (!who) who = current->uid; else - if ((who != current->uid) && !(user = find_user(who))) + if ((who != current->uid) && + !(user = find_user(vx_current_xid(), who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) @@ -341,7 +347,8 @@ if (!who) who = current->uid; else - if ((who != current->uid) && !(user = find_user(who))) + if ((who != current->uid) && + !(user = find_user(vx_current_xid(), who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) @@ -460,6 +467,8 @@ } EXPORT_SYMBOL_GPL(kernel_power_off); +long vs_reboot(unsigned int, void *); + /* * Reboot system call: for obvious reasons only root may call it, * and even root needs to set up some magic numbers in the registers @@ -484,6 +493,9 @@ magic2 != LINUX_REBOOT_MAGIC2C)) return -EINVAL; + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return vs_reboot(cmd, arg); + lock_kernel(); switch (cmd) { case LINUX_REBOOT_CMD_RESTART: @@ -562,7 +574,6 @@ kill_proc(cad_pid, SIGINT, 1); } - /* * Unprivileged users may change the real gid to the effective gid * or vice versa. (BSD-style) @@ -669,7 +680,7 @@ { struct user_struct *new_user; - new_user = alloc_uid(new_ruid); + new_user = alloc_uid(vx_current_xid(), new_ruid); if (!new_user) return -EAGAIN; @@ -1072,14 +1083,17 @@ { struct task_struct *p; int err = -EINVAL; + pid_t rpgid; if (!pid) - pid = current->pid; + pid = vx_map_pid(current->pid); if (!pgid) pgid = pid; if (pgid < 0) return -EINVAL; + rpgid = vx_rmap_pid(pgid); + /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ @@ -1114,22 +1128,22 @@ if (pgid != pid) { struct task_struct *p; - do_each_task_pid(pgid, PIDTYPE_PGID, p) { + do_each_task_pid(rpgid, PIDTYPE_PGID, p) { if (p->signal->session == current->signal->session) goto ok_pgid; - } while_each_task_pid(pgid, PIDTYPE_PGID, p); + } while_each_task_pid(rpgid, PIDTYPE_PGID, p); goto out; } ok_pgid: - err = security_task_setpgid(p, pgid); + err = security_task_setpgid(p, rpgid); if (err) goto out; - if (process_group(p) != pgid) { + if (process_group(p) != rpgid) { detach_pid(p, PIDTYPE_PGID); - p->signal->pgrp = pgid; - attach_pid(p, PIDTYPE_PGID, pgid); + p->signal->pgrp = rpgid; + attach_pid(p, PIDTYPE_PGID, rpgid); } err = 0; @@ -1142,7 +1156,7 @@ asmlinkage long sys_getpgid(pid_t pid) { if (!pid) { - return process_group(current); + return vx_rmap_pid(process_group(current)); } else { int retval; struct task_struct *p; @@ -1154,7 +1168,7 @@ if (p) { retval = security_task_getpgid(p); if (!retval) - retval = process_group(p); + retval = vx_rmap_pid(process_group(p)); } read_unlock(&tasklist_lock); return retval; @@ -1492,7 +1506,7 @@ int errno = 0; down_read(&uts_sem); - if (copy_to_user(name,&system_utsname,sizeof *name)) + if (copy_to_user(name, vx_new_utsname(), sizeof *name)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1503,15 +1517,17 @@ int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.nodename, tmp, len); - system_utsname.nodename[len] = 0; + char *ptr = vx_new_uts(nodename); + + memcpy(ptr, tmp, len); + ptr[len] = 0; errno = 0; } up_write(&uts_sem); @@ -1523,15 +1539,17 @@ asmlinkage long sys_gethostname(char __user *name, int len) { int i, errno; + char *ptr; if (len < 0) return -EINVAL; down_read(&uts_sem); - i = 1 + strlen(system_utsname.nodename); + ptr = vx_new_uts(nodename); + i = 1 + strlen(ptr); if (i > len) i = len; errno = 0; - if (copy_to_user(name, system_utsname.nodename, i)) + if (copy_to_user(name, ptr, i)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1548,7 +1566,7 @@ int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -1556,8 +1574,10 @@ down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.domainname, tmp, len); - system_utsname.domainname[len] = 0; + char *ptr = vx_new_uts(domainname); + + memcpy(ptr, tmp, len); + ptr[len] = 0; errno = 0; } up_write(&uts_sem); @@ -1614,7 +1634,7 @@ return -EINVAL; old_rlim = current->signal->rlim + resource; if ((new_rlim.rlim_max > old_rlim->rlim_max) && - !capable(CAP_SYS_RESOURCE)) + !capable(CAP_SYS_RESOURCE) && !vx_ccaps(VXC_SET_RLIMIT)) return -EPERM; if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) return -EPERM; Index: linux-2.6.14/kernel/sysctl.c =================================================================== --- linux-2.6.14.orig/kernel/sysctl.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/sysctl.c 2005-10-31 11:05:45.000000000 -0600 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,7 @@ #ifdef CONFIG_HOTPLUG extern char hotplug_path[]; #endif +extern char vshelper_path[]; #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif @@ -232,6 +234,7 @@ .maxlen = sizeof(system_utsname.sysname), .mode = 0444, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -241,6 +244,7 @@ .maxlen = sizeof(system_utsname.release), .mode = 0444, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -250,6 +254,7 @@ .maxlen = sizeof(system_utsname.version), .mode = 0444, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -259,6 +264,7 @@ .maxlen = sizeof(system_utsname.nodename), .mode = 0644, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -268,6 +274,7 @@ .maxlen = sizeof(system_utsname.domainname), .mode = 0644, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -404,6 +411,15 @@ .strategy = &sysctl_string, }, #endif + { + .ctl_name = KERN_VSHELPER, + .procname = "vshelper", + .data = &vshelper_path, + .maxlen = 256, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, #ifdef CONFIG_CHR_DEV_SG { .ctl_name = KERN_SG_BIG_BUFF, @@ -1419,16 +1435,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - size_t len; + size_t len, maxlen; char __user *p; char c; + void *data; + + data = table->data; + maxlen = table->maxlen; + + if (!data || !maxlen || !*lenp || (*ppos && !write)) + return (*lenp = 0); - if (!table->data || !table->maxlen || !*lenp || - (*ppos && !write)) { - *lenp = 0; - return 0; - } - + if (table->virt_handler) + table->virt_handler(table, write, filp->f_xid, &data, &maxlen); + if (write) { len = 0; p = buffer; @@ -1439,20 +1459,20 @@ break; len++; } - if (len >= table->maxlen) - len = table->maxlen-1; - if(copy_from_user(table->data, buffer, len)) + if (len >= maxlen) + len = maxlen-1; + if(copy_from_user(data, buffer, len)) return -EFAULT; - ((char *) table->data)[len] = 0; + ((char *) data)[len] = 0; *ppos += *lenp; } else { - len = strlen(table->data); - if (len > table->maxlen) - len = table->maxlen; + len = strlen(data); + if (len > maxlen) + len = maxlen; if (len > *lenp) len = *lenp; if (len) - if(copy_to_user(buffer, table->data, len)) + if(copy_to_user(buffer, data, len)) return -EFAULT; if (len < *lenp) { if(put_user('\n', ((char __user *) buffer) + len)) Index: linux-2.6.14/kernel/timer.c =================================================================== --- linux-2.6.14.orig/kernel/timer.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/timer.c 2005-10-31 11:05:45.000000000 -0600 @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -710,7 +712,11 @@ if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; time_offset += ltemp; + #if SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE > 0 time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + #else + time_adj = -ltemp >> (SHIFT_HZ + SHIFT_UPDATE - SHIFT_SCALE); + #endif } else { ltemp = time_offset; if (!(time_status & STA_FLL)) @@ -718,7 +724,11 @@ if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; time_offset -= ltemp; + #if SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE > 0 time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + #else + time_adj = ltemp >> (SHIFT_HZ + SHIFT_UPDATE - SHIFT_SCALE); + #endif } /* @@ -978,12 +988,6 @@ #endif -#ifndef __alpha__ - -/* - * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this - * should be moved into arch/i386 instead? - */ /** * sys_getpid - return the thread group id of the current process @@ -996,7 +1000,7 @@ */ asmlinkage long sys_getpid(void) { - return current->tgid; + return vx_map_tgid(current->tgid); } /* @@ -1040,9 +1044,23 @@ #endif break; } - return pid; + return vx_map_pid(pid); } +#ifdef __alpha__ + +/* + * The Alpha uses getxpid, getxuid, and getxgid instead. + */ + +asmlinkage long do_getxpid(long *ppid) +{ + *ppid = sys_getppid(); + return sys_getpid(); +} + +#else /* _alpha_ */ + asmlinkage long sys_getuid(void) { /* Only we change this so SMP safe */ @@ -1263,6 +1281,8 @@ tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; tp.tv_sec++; } + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&tp, NULL); val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); Index: linux-2.6.14/kernel/user.c =================================================================== --- linux-2.6.14.orig/kernel/user.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/kernel/user.c 2005-10-31 11:05:45.000000000 -0600 @@ -22,8 +22,8 @@ #define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) #define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) -#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) -#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) +#define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK) +#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid))) static kmem_cache_t *uid_cachep; static struct list_head uidhash_table[UIDHASH_SZ]; @@ -55,7 +55,7 @@ list_del(&up->uidhash_list); } -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) +static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent) { struct list_head *up; @@ -64,7 +64,7 @@ user = list_entry(up, struct user_struct, uidhash_list); - if(user->uid == uid) { + if(user->uid == uid && user->xid == xid) { atomic_inc(&user->__count); return user; } @@ -79,12 +79,12 @@ * * If the user_struct could not be found, return NULL. */ -struct user_struct *find_user(uid_t uid) +struct user_struct *find_user(xid_t xid, uid_t uid) { struct user_struct *ret; spin_lock(&uidhash_lock); - ret = uid_hash_find(uid, uidhashentry(uid)); + ret = uid_hash_find(xid, uid, uidhashentry(xid, uid)); spin_unlock(&uidhash_lock); return ret; } @@ -100,13 +100,13 @@ } } -struct user_struct * alloc_uid(uid_t uid) +struct user_struct * alloc_uid(xid_t xid, uid_t uid) { - struct list_head *hashent = uidhashentry(uid); + struct list_head *hashent = uidhashentry(xid, uid); struct user_struct *up; spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); spin_unlock(&uidhash_lock); if (!up) { @@ -116,6 +116,7 @@ if (!new) return NULL; new->uid = uid; + new->xid = xid; atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); atomic_set(&new->files, 0); @@ -138,7 +139,7 @@ * on adding the same user already.. */ spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); if (up) { key_put(new->uid_keyring); key_put(new->session_keyring); @@ -184,7 +185,7 @@ /* Insert the root user immediately (init already runs as root) */ spin_lock(&uidhash_lock); - uid_hash_insert(&root_user, uidhashentry(0)); + uid_hash_insert(&root_user, uidhashentry(0,0)); spin_unlock(&uidhash_lock); return 0; Index: linux-2.6.14/kernel/vserver/Kconfig =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/Kconfig 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,198 @@ +# +# Linux VServer configuration +# + +config VSERVER + bool + default y + +config VSERVER_SECURITY + bool + depends on SECURITY + default y + select SECURITY_CAPABILITIES + +config VSERVER_LEGACYNET + bool + depends on !VSERVER_NGNET + default y + +menu "Linux VServer" + +config VSERVER_LEGACY + bool "Enable Legacy Kernel API" + default y + help + This enables the legacy API used in vs1.xx, maintaining + compatibility with older vserver tools, and guest images + that are configured using the legacy method. This is + probably a good idea for now, for migration purposes. + + Note that some tools have not yet been altered to use + this API, so disabling this option may reduce some + functionality. + +config VSERVER_LEGACY_VERSION + bool "Show a Legacy Version ID" + depends on VSERVER_LEGACY + default n + help + This shows a special legacy version to very old tools + which do not handle the current version correctly. + + This will probably disable some features of newer tools + so better avoid it, unless you really, really need it + for backwards compatibility. + +config VSERVER_NGNET + bool "Disable Legacy Networking Kernel API" + depends on EXPERIMENTAL + default n + help + This disables the legacy networking API which is required + by the chbind tool. Do not disable it unless you exactly + know what you are doing. + +config VSERVER_COWBL + bool "Enable COW Immutable Link Breaking" + depends on EXPERIMENTAL + default y + help + This enables the COW link break code which will allow to + treat unified files like normal files in regard of writing + to them (which will implicitely break the link and create + a copy of the unified file) + +config VSERVER_PROC_SECURE + bool "Enable Proc Security" + depends on PROC_FS + default y + help + This configures ProcFS security to initially hide + non-process entries for all contexts except the main and + spectator context (i.e. for all guests), which is a secure + default. + + (note: on 1.2x the entries were visible by default) + +config VSERVER_HARDCPU + bool "Enable Hard CPU Limits" + depends on EXPERIMENTAL + default n + help + Activate the Hard CPU Limits + + This will compile in code that allows the Token Bucket + Scheduler to put processes on hold when a context's + tokens are depleted (provided that its per-context + sched_hard flag is set). + + Processes belonging to that context will not be able + to consume CPU resources again until a per-context + configured minimum of tokens has been reached. + +config VSERVER_HARDCPU_IDLE + bool "Limit the IDLE task" + depends on VSERVER_HARDCPU + default n + help + Limit the idle slices, so the the next context + will be scheduled as soon as possible. + + This might improve interactivity and latency, but + will also marginally increase scheduling overhead. + +choice + prompt "Persistent Inode Context Tagging" + default INOXID_UGID24 + help + This adds persistent context information to filesystems + mounted with the tagxid option. Tagging is a requirement + for per-context disk limits and per-context quota. + + +config INOXID_NONE + bool "Disabled" + help + do not store per-context information in inodes. + +config INOXID_UID16 + bool "UID16/GID32" + help + reduces UID to 16 bit, but leaves GID at 32 bit. + +config INOXID_GID16 + bool "UID32/GID16" + help + reduces GID to 16 bit, but leaves UID at 32 bit. + +config INOXID_UGID24 + bool "UID24/GID24" + help + uses the upper 8bit from UID and GID for XID tagging + which leaves 24bit for UID/GID each, which should be + more than sufficient for normal use. + +config INOXID_INTERN + bool "UID32/GID32" + help + this uses otherwise reserved inode fields in the on + disk representation, which limits the use to a few + filesystems (currently ext2 and ext3) + +config INOXID_RUNTIME + bool "Runtime" + depends on EXPERIMENTAL + help + inodes are tagged when first accessed, this doesn't + require any persistant information, but might give + funny results for mixed access. + +endchoice + +config XID_TAG_NFSD + bool "Tag NFSD User Auth and Files" + default n + help + Enable this if you do want the in-kernel NFS + Server to use the xid tagging specified above. + (will require patched clients too) + +config XID_PROPAGATE + bool "Enable XID Propagation" + default n + depends on EXPERIMENTAL + help + This allows for the xid= mount option to specify + an xid which is to be used for the entire mount + tree. + +config VSERVER_DEBUG + bool "VServer Debugging Code" + default n + help + Set this to yes if you want to be able to activate + debugging output at runtime. It adds a probably small + overhead to all vserver related functions and + increases the kernel size by about 20k. + +config VSERVER_HISTORY + bool "VServer History Tracing" + depends on VSERVER_DEBUG + default n + help + Set this to yes if you want to record the history of + linux-vserver activities, so they can be replayed in + the event of a kernel panic or oops. + +config VSERVER_HISTORY_SIZE + int "Per-CPU History Size (32-65536)" + depends on VSERVER_HISTORY + range 32 65536 + default 64 + help + This allows you to specify the number of entries in + the per-CPU history buffer. + +endmenu + Index: linux-2.6.14/kernel/vserver/Makefile =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/Makefile 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,16 @@ +# +# Makefile for the Linux vserver routines. +# + + +obj-y += vserver.o + +vserver-y := switch.o context.o namespace.o sched.o network.o inode.o \ + limit.o cvirt.o signal.o helper.o init.o dlimit.o + +vserver-$(CONFIG_PROC_FS) += proc.o +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o +vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o +vserver-$(CONFIG_VSERVER_LEGACYNET) += legacynet.o +vserver-$(CONFIG_VSERVER_HISTORY) += history.o + Index: linux-2.6.14/kernel/vserver/context.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/context.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,861 @@ +/* + * linux/kernel/vserver/context.c + * + * Virtual Server: Context Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 context helper + * V0.02 vx_ctx_kill syscall command + * V0.03 replaced context_info calls + * V0.04 redesign of struct (de)alloc + * V0.05 rlimit basic implementation + * V0.06 task_xid and info commands + * V0.07 context flags and caps + * V0.08 switch to RCU based hash + * V0.09 revert to non RCU for now + * V0.10 and back to working RCU hash + * V0.11 and back to locking again + * V0.12 referenced context store + * + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "cvirt_init.h" +#include "limit_init.h" +#include "sched_init.h" + + +atomic_t vx_global_ctotal = ATOMIC_INIT(0); +atomic_t vx_global_cactive = ATOMIC_INIT(0); + + +/* now inactive context structures */ + +static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT; + +static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED; + + +/* __alloc_vx_info() + + * allocate an initialized vx_info struct + * doesn't make it visible (hash) */ + +static struct vx_info *__alloc_vx_info(xid_t xid) +{ + struct vx_info *new = NULL; + + vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct vx_info)); + new->vx_id = xid; + INIT_HLIST_NODE(&new->vx_hlist); + atomic_set(&new->vx_usecnt, 0); + atomic_set(&new->vx_tasks, 0); + new->vx_parent = NULL; + new->vx_state = 0; + init_waitqueue_head(&new->vx_wait); + + /* rest of init goes here */ + vx_info_init_limit(&new->limit); + vx_info_init_sched(&new->sched); + vx_info_init_cvirt(&new->cvirt); + vx_info_init_cacct(&new->cacct); + + new->vx_flags = VXF_INIT_SET; + new->vx_bcaps = CAP_INIT_EFF_SET; + new->vx_ccaps = 0; + + vxdprintk(VXD_CBIT(xid, 0), + "alloc_vx_info(%d) = %p", xid, new); + vxh_alloc_vx_info(new); + atomic_inc(&vx_global_ctotal); + return new; +} + +/* __dealloc_vx_info() + + * final disposal of vx_info */ + +static void __dealloc_vx_info(struct vx_info *vxi) +{ + vxdprintk(VXD_CBIT(xid, 0), + "dealloc_vx_info(%p)", vxi); + vxh_dealloc_vx_info(vxi); + + vxi->vx_id = -1; + + vx_info_exit_limit(&vxi->limit); + vx_info_exit_sched(&vxi->sched); + vx_info_exit_cvirt(&vxi->cvirt); + vx_info_exit_cacct(&vxi->cacct); + + vxi->vx_state |= VXS_RELEASED; + kfree(vxi); + atomic_dec(&vx_global_ctotal); +} + +static void __shutdown_vx_info(struct vx_info *vxi) +{ + struct namespace *namespace; + struct fs_struct *fs; + + might_sleep(); + + vxi->vx_state |= VXS_SHUTDOWN; + vs_state_change(vxi, VSC_SHUTDOWN); + + namespace = xchg(&vxi->vx_namespace, NULL); + if (namespace) + put_namespace(namespace); + + fs = xchg(&vxi->vx_fs, NULL); + if (fs) + put_fs_struct(fs); +} + +/* exported stuff */ + +void free_vx_info(struct vx_info *vxi) +{ + /* context shutdown is mandatory */ + BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN)); + + BUG_ON(atomic_read(&vxi->vx_usecnt)); + BUG_ON(atomic_read(&vxi->vx_tasks)); + + BUG_ON(vx_info_state(vxi, VXS_HASHED)); + + BUG_ON(vxi->vx_namespace); + BUG_ON(vxi->vx_fs); + + spin_lock(&vx_info_inactive_lock); + hlist_del(&vxi->vx_hlist); + spin_unlock(&vx_info_inactive_lock); + + __dealloc_vx_info(vxi); +} + + +/* hash table for vx_info hash */ + +#define VX_HASH_SIZE 13 + +static struct hlist_head vx_info_hash[VX_HASH_SIZE] = + { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT }; + +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(xid_t xid) +{ + return (xid % VX_HASH_SIZE); +} + + + +/* __hash_vx_info() + + * add the vxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_vx_info(struct vx_info *vxi) +{ + struct hlist_head *head; + + vxd_assert_lock(&vx_info_hash_lock); + vxdprintk(VXD_CBIT(xid, 4), + "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id); + vxh_hash_vx_info(vxi); + + /* context must not be hashed */ + BUG_ON(vx_info_state(vxi, VXS_HASHED)); + + vxi->vx_state |= VXS_HASHED; + head = &vx_info_hash[__hashval(vxi->vx_id)]; + hlist_add_head(&vxi->vx_hlist, head); + atomic_inc(&vx_global_cactive); +} + +/* __unhash_vx_info() + + * remove the vxi from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_vx_info(struct vx_info *vxi) +{ + vxd_assert_lock(&vx_info_hash_lock); + vxdprintk(VXD_CBIT(xid, 4), + "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id); + vxh_unhash_vx_info(vxi); + + /* context must be hashed */ + BUG_ON(!vx_info_state(vxi, VXS_HASHED)); + + vxi->vx_state &= ~VXS_HASHED; + hlist_del_init(&vxi->vx_hlist); + spin_lock(&vx_info_inactive_lock); + hlist_add_head(&vxi->vx_hlist, &vx_info_inactive); + spin_unlock(&vx_info_inactive_lock); + atomic_dec(&vx_global_cactive); +} + + +/* __lookup_vx_info() + + * requires the hash_lock to be held + * doesn't increment the vx_refcnt */ + +static inline struct vx_info *__lookup_vx_info(xid_t xid) +{ + struct hlist_head *head = &vx_info_hash[__hashval(xid)]; + struct hlist_node *pos; + struct vx_info *vxi; + + vxd_assert_lock(&vx_info_hash_lock); + hlist_for_each(pos, head) { + vxi = hlist_entry(pos, struct vx_info, vx_hlist); + + if (vxi->vx_id == xid) + goto found; + } + vxi = NULL; +found: + vxdprintk(VXD_CBIT(xid, 0), + "__lookup_vx_info(#%u): %p[#%u]", + xid, vxi, vxi?vxi->vx_id:0); + vxh_lookup_vx_info(vxi, xid); + return vxi; +} + + +/* __vx_dynamic_id() + + * find unused dynamic xid + * requires the hash_lock to be held */ + +static inline xid_t __vx_dynamic_id(void) +{ + static xid_t seq = MAX_S_CONTEXT; + xid_t barrier = seq; + + vxd_assert_lock(&vx_info_hash_lock); + do { + if (++seq > MAX_S_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__lookup_vx_info(seq)) { + vxdprintk(VXD_CBIT(xid, 4), + "__vx_dynamic_id: [#%d]", seq); + return seq; + } + } while (barrier != seq); + return 0; +} + +#ifdef CONFIG_VSERVER_LEGACY + +/* __loc_vx_info() + + * locate or create the requested context + * get() it and if new hash it */ + +static struct vx_info * __loc_vx_info(int id, int *err) +{ + struct vx_info *new, *vxi = NULL; + + vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id); + + if (!(new = __alloc_vx_info(id))) { + *err = -ENOMEM; + return NULL; + } + + /* required to make dynamic xids unique */ + spin_lock(&vx_info_hash_lock); + + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + goto out_unlock; + } + new->vx_id = id; + } + /* existing context requested */ + else if ((vxi = __lookup_vx_info(id))) { + /* context in setup is not available */ + if (vxi->vx_flags & VXF_STATE_SETUP) { + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (not available)", id, vxi); + vxi = NULL; + *err = -EBUSY; + } else { + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (found)", id, vxi); + get_vx_info(vxi); + *err = 0; + } + goto out_unlock; + } + + /* new context requested */ + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (new)", id, new); + __hash_vx_info(get_vx_info(new)); + vxi = new, new = NULL; + *err = 1; + +out_unlock: + spin_unlock(&vx_info_hash_lock); + vxh_loc_vx_info(vxi, id); + if (new) + __dealloc_vx_info(new); + return vxi; +} + +#endif + +/* __create_vx_info() + + * create the requested context + * get() and hash it */ + +static struct vx_info * __create_vx_info(int id) +{ + struct vx_info *new, *vxi = NULL; + + vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id); + + if (!(new = __alloc_vx_info(id))) + return ERR_PTR(-ENOMEM); + + /* required to make dynamic xids unique */ + spin_lock(&vx_info_hash_lock); + + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + vxi = ERR_PTR(-EAGAIN); + goto out_unlock; + } + new->vx_id = id; + } + /* static context requested */ + else if ((vxi = __lookup_vx_info(id))) { + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) = %p (already there)", id, vxi); + if (vx_info_flags(vxi, VXF_STATE_SETUP, 0)) + vxi = ERR_PTR(-EBUSY); + else + vxi = ERR_PTR(-EEXIST); + goto out_unlock; + } + /* dynamic xid creation blocker */ + else if (id >= MIN_D_CONTEXT) { + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) (dynamic rejected)", id); + vxi = ERR_PTR(-EINVAL); + goto out_unlock; + } + + /* new context */ + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) = %p (new)", id, new); + __hash_vx_info(get_vx_info(new)); + vxi = new, new = NULL; + +out_unlock: + spin_unlock(&vx_info_hash_lock); + vxh_create_vx_info(IS_ERR(vxi)?NULL:vxi, id); + if (new) + __dealloc_vx_info(new); + return vxi; +} + + +/* exported stuff */ + + +void unhash_vx_info(struct vx_info *vxi) +{ + __shutdown_vx_info(vxi); + spin_lock(&vx_info_hash_lock); + __unhash_vx_info(vxi); + spin_unlock(&vx_info_hash_lock); + __wakeup_vx_info(vxi); +} + + +/* lookup_vx_info() + + * search for a vx_info and get() it + * negative id means current */ + +struct vx_info *lookup_vx_info(int id) +{ + struct vx_info *vxi = NULL; + + if (id < 0) { + vxi = get_vx_info(current->vx_info); + } else if (id > 1) { + spin_lock(&vx_info_hash_lock); + vxi = get_vx_info(__lookup_vx_info(id)); + spin_unlock(&vx_info_hash_lock); + } + return vxi; +} + +/* xid_is_hashed() + + * verify that xid is still hashed */ + +int xid_is_hashed(xid_t xid) +{ + int hashed; + + spin_lock(&vx_info_hash_lock); + hashed = (__lookup_vx_info(xid) != NULL); + spin_unlock(&vx_info_hash_lock); + return hashed; +} + +#ifdef CONFIG_VSERVER_LEGACY + +struct vx_info *lookup_or_create_vx_info(int id) +{ + int err; + + return __loc_vx_info(id, &err); +} + +#endif + +#ifdef CONFIG_PROC_FS + +int get_xid_list(int index, unsigned int *xids, int size) +{ + int hindex, nr_xids = 0; + + for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) { + struct hlist_head *head = &vx_info_hash[hindex]; + struct hlist_node *pos; + + spin_lock(&vx_info_hash_lock); + hlist_for_each(pos, head) { + struct vx_info *vxi; + + if (--index > 0) + continue; + + vxi = hlist_entry(pos, struct vx_info, vx_hlist); + xids[nr_xids] = vxi->vx_id; + if (++nr_xids >= size) { + spin_unlock(&vx_info_hash_lock); + goto out; + } + } + /* keep the lock time short */ + spin_unlock(&vx_info_hash_lock); + } +out: + return nr_xids; +} +#endif + + +/* task must me current or locked */ + +void exit_vx_info(struct task_struct *p) +{ + struct vx_info *vxi = p->vx_info; + + if (vxi) { + atomic_dec(&vxi->cvirt.nr_threads); + vx_nproc_dec(p); + release_vx_info(vxi, p); + } +} + + +#ifdef CONFIG_VSERVER_DEBUG + +void dump_vx_info_inactive(int level) +{ + struct hlist_node *entry, *next; + + hlist_for_each_safe(entry, next, &vx_info_inactive) { + struct vx_info *vxi = + list_entry(entry, struct vx_info, vx_hlist); + + dump_vx_info(vxi, level); + } +} + +#endif + +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) +{ + struct user_struct *new_user, *old_user; + + if (!p || !vxi) + BUG(); + new_user = alloc_uid(vxi->vx_id, p->uid); + if (!new_user) + return -ENOMEM; + + old_user = p->user; + if (new_user != old_user) { + atomic_inc(&new_user->processes); + atomic_dec(&old_user->processes); + p->user = new_user; + } + free_uid(old_user); + return 0; +} + +void vx_mask_bcaps(struct task_struct *p) +{ + struct vx_info *vxi = p->vx_info; + + p->cap_effective &= vxi->vx_bcaps; + p->cap_inheritable &= vxi->vx_bcaps; + p->cap_permitted &= vxi->vx_bcaps; +} + + +#include + +static int vx_openfd_task(struct task_struct *tsk) +{ + struct files_struct *files = tsk->files; + const unsigned long *bptr; + int count, total; + struct fdtable *fdt; + + spin_lock(&files->file_lock); + rcu_read_lock(); + fdt = files_fdtable(files); + bptr = fdt->open_fds->fds_bits; + count = fdt->max_fds / (sizeof(unsigned long) * 8); + for (total = 0; count > 0; count--) { + if (*bptr) + total += hweight_long(*bptr); + bptr++; + } + rcu_read_unlock(); + spin_unlock(&files->file_lock); + return total; +} + +/* + * migrate task to new context + * gets vxi, puts old_vxi on change + */ + +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) +{ + struct vx_info *old_vxi; + int ret = 0; + + if (!p || !vxi) + BUG(); + + old_vxi = task_get_vx_info(p); + if (old_vxi == vxi) + goto out; + + vxdprintk(VXD_CBIT(xid, 5), + "vx_migrate_task(%p,%p[#%d.%d])", p, vxi, + vxi->vx_id, atomic_read(&vxi->vx_usecnt)); + + if (!(ret = vx_migrate_user(p, vxi))) { + int openfd; + + task_lock(p); + openfd = vx_openfd_task(p); + + if (old_vxi) { + atomic_dec(&old_vxi->cvirt.nr_threads); + atomic_dec(&old_vxi->cvirt.nr_running); + atomic_dec(&old_vxi->limit.rcur[RLIMIT_NPROC]); + /* FIXME: what about the struct files here? */ + atomic_sub(openfd, &old_vxi->limit.rcur[VLIMIT_OPENFD]); + } + atomic_inc(&vxi->cvirt.nr_threads); + atomic_inc(&vxi->cvirt.nr_running); + atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]); + /* FIXME: what about the struct files here? */ + atomic_add(openfd, &vxi->limit.rcur[VLIMIT_OPENFD]); + + if (old_vxi) { + release_vx_info(old_vxi, p); + clr_vx_info(&p->vx_info); + } + claim_vx_info(vxi, p); + set_vx_info(&p->vx_info, vxi); + p->xid = vxi->vx_id; + + vxdprintk(VXD_CBIT(xid, 5), + "moved task %p into vxi:%p[#%d]", + p, vxi, vxi->vx_id); + + vx_mask_bcaps(p); + task_unlock(p); + } +out: + put_vx_info(old_vxi); + return ret; +} + +int vx_set_init(struct vx_info *vxi, struct task_struct *p) +{ + if (!vxi) + return -EINVAL; + if (vxi->vx_initpid) + return -EPERM; + + vxdprintk(VXD_CBIT(xid, 6), + "vx_set_init(%p[#%d],%p[#%d,%d,%d])", + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid); + + vxi->vx_initpid = p->tgid; + return 0; +} + + +/* vserver syscall commands below here */ + +/* taks xid and vx_info functions */ + +#include + + +int vc_task_xid(uint32_t id, void __user *data) +{ + xid_t xid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + xid = (tsk) ? tsk->xid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + xid = vx_current_xid(); + return xid; +} + + +int vc_vx_info(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vx_info_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.xid = vxi->vx_id; + vc_data.initpid = vxi->vx_initpid; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* context functions */ + +int vc_ctx_create(uint32_t xid, void __user *data) +{ + struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET }; + struct vx_info *new_vxi; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if ((xid > MAX_S_CONTEXT) && (xid != VX_DYNAMIC_ID)) + return -EINVAL; + if (xid < 2) + return -EINVAL; + + new_vxi = __create_vx_info(xid); + if (IS_ERR(new_vxi)) + return PTR_ERR(new_vxi); + + /* initial flags */ + new_vxi->vx_flags = vc_data.flagword; + + vs_state_change(new_vxi, VSC_STARTUP); + ret = new_vxi->vx_id; + vx_migrate_task(current, new_vxi); + /* if this fails, we might end up with a hashed vx_info */ + put_vx_info(new_vxi); + return ret; +} + + +int vc_ctx_migrate(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* dirty hack until Spectator becomes a cap */ + if (id == 1) { + current->xid = 1; + return 0; + } + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + vx_migrate_task(current, vxi); + put_vx_info(vxi); + return 0; +} + + +int vc_get_cflags(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_flags_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.flagword = vxi->vx_flags; + + /* special STATE flag handling */ + vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME); + + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_cflags(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_flags_v0 vc_data; + uint64_t mask, trigger; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + /* special STATE flag handling */ + mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME); + trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword); + + if (trigger & VXF_STATE_SETUP) + vx_mask_bcaps(current); + if (trigger & VXF_STATE_INIT) + if (vxi == current->vx_info) + vx_set_init(vxi, current); + + vxi->vx_flags = vx_mask_flags(vxi->vx_flags, + vc_data.flagword, mask); + put_vx_info(vxi); + return 0; +} + +int vc_get_ccaps(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.bcaps = vxi->vx_bcaps; + vc_data.ccaps = vxi->vx_ccaps; + vc_data.cmask = ~0UL; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_ccaps(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + vxi->vx_bcaps &= vc_data.bcaps; + vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps, + vc_data.ccaps, vc_data.cmask); + put_vx_info(vxi); + return 0; +} + +#include + +EXPORT_SYMBOL_GPL(free_vx_info); + Index: linux-2.6.14/kernel/vserver/cvirt.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/cvirt.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,260 @@ +/* + * linux/kernel/vserver/cvirt.c + * + * Virtual Server: Context Virtualization + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 broken out from limit.c + * V0.02 added utsname stuff + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle) +{ + struct vx_info *vxi = current->vx_info; + + set_normalized_timespec(uptime, + uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec, + uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec); + if (!idle) + return; + set_normalized_timespec(idle, + idle->tv_sec - vxi->cvirt.bias_idle.tv_sec, + idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec); + return; +} + +uint64_t vx_idle_jiffies(void) +{ + return init_task.utime + init_task.stime; +} + + + +static inline uint32_t __update_loadavg(uint32_t load, + int wsize, int delta, int n) +{ + unsigned long long calc, prev; + + /* just set it to n */ + if (unlikely(delta >= wsize)) + return (n << FSHIFT); + + calc = delta * n; + calc <<= FSHIFT; + prev = (wsize - delta); + prev *= load; + calc += prev; + do_div(calc, wsize); + return calc; +} + + +void vx_update_load(struct vx_info *vxi) +{ + uint32_t now, last, delta; + unsigned int nr_running, nr_uninterruptible; + unsigned int total; + + spin_lock(&vxi->cvirt.load_lock); + + now = jiffies; + last = vxi->cvirt.load_last; + delta = now - last; + + if (delta < 5*HZ) + goto out; + + nr_running = atomic_read(&vxi->cvirt.nr_running); + nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible); + total = nr_running + nr_uninterruptible; + + vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0], + 60*HZ, delta, total); + vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1], + 5*60*HZ, delta, total); + vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2], + 15*60*HZ, delta, total); + + vxi->cvirt.load_last = now; +out: + atomic_inc(&vxi->cvirt.load_updates); + spin_unlock(&vxi->cvirt.load_lock); +} + + +int vx_uts_virt_handler(struct ctl_table *ctl, int write, xid_t xid, + void **datap, size_t *lenp) +{ + switch (ctl->ctl_name) { + case KERN_OSTYPE: + *datap = vx_new_uts(sysname); + break; + case KERN_OSRELEASE: + *datap = vx_new_uts(release); + break; + case KERN_VERSION: + *datap = vx_new_uts(version); + break; + case KERN_NODENAME: + *datap = vx_new_uts(nodename); + break; + case KERN_DOMAINNAME: + *datap = vx_new_uts(domainname); + break; + } + + return 0; +} + + + +/* + * Commands to do_syslog: + * + * 0 -- Close the log. Currently a NOP. + * 1 -- Open the log. Currently a NOP. + * 2 -- Read from the log. + * 3 -- Read all messages remaining in the ring buffer. + * 4 -- Read and clear all messages remaining in the ring buffer + * 5 -- Clear ring buffer. + * 6 -- Disable printk's to console + * 7 -- Enable printk's to console + * 8 -- Set level of messages printed to console + * 9 -- Return number of unread characters in the log buffer + * 10 -- Return size of the log buffer + */ +int vx_do_syslog(int type, char __user *buf, int len) +{ + int error = 0; + int do_clear = 0; + struct vx_info *vxi = current->vx_info; + struct _vx_syslog *log; + + if (!vxi) + return -EINVAL; + log = &vxi->cvirt.syslog; + + switch (type) { + case 0: /* Close log */ + case 1: /* Open log */ + break; + case 2: /* Read from log */ + error = wait_event_interruptible(log->log_wait, + (log->log_start - log->log_end)); + if (error) + break; + spin_lock_irq(&log->logbuf_lock); + spin_unlock_irq(&log->logbuf_lock); + break; + case 4: /* Read/clear last kernel messages */ + do_clear = 1; + /* fall through */ + case 3: /* Read last kernel messages */ + return 0; + + case 5: /* Clear ring buffer */ + return 0; + + case 6: /* Disable logging to console */ + case 7: /* Enable logging to console */ + case 8: /* Set level of messages printed to console */ + break; + + case 9: /* Number of chars in the log buffer */ + return 0; + case 10: /* Size of the log buffer */ + return 0; + default: + error = -EINVAL; + break; + } + return error; +} + + +/* virtual host info names */ + +static char * vx_vhi_name(struct vx_info *vxi, int id) +{ + switch (id) { + case VHIN_CONTEXT: + return vxi->vx_name; + case VHIN_SYSNAME: + return vxi->cvirt.utsname.sysname; + case VHIN_NODENAME: + return vxi->cvirt.utsname.nodename; + case VHIN_RELEASE: + return vxi->cvirt.utsname.release; + case VHIN_VERSION: + return vxi->cvirt.utsname.version; + case VHIN_MACHINE: + return vxi->cvirt.utsname.machine; + case VHIN_DOMAINNAME: + return vxi->cvirt.utsname.domainname; + default: + return NULL; + } + return NULL; +} + +int vc_set_vhi_name(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vhi_name_v0 vc_data; + char *name; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (name) + memcpy(name, vc_data.name, 65); + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} + +int vc_get_vhi_name(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vhi_name_v0 vc_data; + char *name; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (!name) + goto out_put; + + memcpy(vc_data.name, name, 65); + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; +out_put: + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} Index: linux-2.6.14/kernel/vserver/cvirt_init.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/cvirt_init.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,78 @@ + +extern uint64_t vx_idle_jiffies(void); + +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt) +{ + uint64_t idle_jiffies = vx_idle_jiffies(); + uint64_t nsuptime; + + do_posix_clock_monotonic_gettime(&cvirt->bias_uptime); + nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec + * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec; + cvirt->bias_clock = nsec_to_clock_t(nsuptime); + + jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle); + atomic_set(&cvirt->nr_threads, 0); + atomic_set(&cvirt->nr_running, 0); + atomic_set(&cvirt->nr_uninterruptible, 0); + atomic_set(&cvirt->nr_onhold, 0); + + down_read(&uts_sem); + cvirt->utsname = system_utsname; + up_read(&uts_sem); + + spin_lock_init(&cvirt->load_lock); + cvirt->load_last = jiffies; + atomic_set(&cvirt->load_updates, 0); + cvirt->load[0] = 0; + cvirt->load[1] = 0; + cvirt->load[2] = 0; + atomic_set(&cvirt->total_forks, 0); + + spin_lock_init(&cvirt->syslog.logbuf_lock); + init_waitqueue_head(&cvirt->syslog.log_wait); + cvirt->syslog.log_start = 0; + cvirt->syslog.log_end = 0; + cvirt->syslog.con_start = 0; + cvirt->syslog.logged_chars = 0; +} + +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt) +{ +#ifdef CONFIG_VSERVER_DEBUG + int value; + + vxwprintk((value = atomic_read(&cvirt->nr_threads)), + "!!! cvirt: %p[nr_threads] = %d on exit.", + cvirt, value); + vxwprintk((value = atomic_read(&cvirt->nr_running)), + "!!! cvirt: %p[nr_running] = %d on exit.", + cvirt, value); + vxwprintk((value = atomic_read(&cvirt->nr_uninterruptible)), + "!!! cvirt: %p[nr_uninterruptible] = %d on exit.", + cvirt, value); + vxwprintk((value = atomic_read(&cvirt->nr_onhold)), + "!!! cvirt: %p[nr_onhold] = %d on exit.", + cvirt, value); +#endif + return; +} + +static inline void vx_info_init_cacct(struct _vx_cacct *cacct) +{ + int i,j; + + + for (i=0; i<5; i++) { + for (j=0; j<3; j++) { + atomic_set(&cacct->sock[i][j].count, 0); + atomic_set(&cacct->sock[i][j].total, 0); + } + } +} + +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct) +{ + return; +} + Index: linux-2.6.14/kernel/vserver/cvirt_proc.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/cvirt_proc.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,93 @@ +#ifndef _VX_CVIRT_PROC_H +#define _VX_CVIRT_PROC_H + +#include + + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer) +{ + int length = 0; + int a, b, c; + + length += sprintf(buffer + length, + "BiasUptime:\t%lu.%02lu\n", + (unsigned long)cvirt->bias_uptime.tv_sec, + (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100))); + length += sprintf(buffer + length, + "SysName:\t%.*s\n" + "NodeName:\t%.*s\n" + "Release:\t%.*s\n" + "Version:\t%.*s\n" + "Machine:\t%.*s\n" + "DomainName:\t%.*s\n" + ,__NEW_UTS_LEN, cvirt->utsname.sysname + ,__NEW_UTS_LEN, cvirt->utsname.nodename + ,__NEW_UTS_LEN, cvirt->utsname.release + ,__NEW_UTS_LEN, cvirt->utsname.version + ,__NEW_UTS_LEN, cvirt->utsname.machine + ,__NEW_UTS_LEN, cvirt->utsname.domainname + ); + + a = cvirt->load[0] + (FIXED_1/200); + b = cvirt->load[1] + (FIXED_1/200); + c = cvirt->load[2] + (FIXED_1/200); + length += sprintf(buffer + length, + "nr_threads:\t%d\n" + "nr_running:\t%d\n" + "nr_unintr:\t%d\n" + "nr_onhold:\t%d\n" + "load_updates:\t%d\n" + "loadavg:\t%d.%02d %d.%02d %d.%02d\n" + "total_forks:\t%d\n" + ,atomic_read(&cvirt->nr_threads) + ,atomic_read(&cvirt->nr_running) + ,atomic_read(&cvirt->nr_uninterruptible) + ,atomic_read(&cvirt->nr_onhold) + ,atomic_read(&cvirt->load_updates) + ,LOAD_INT(a), LOAD_FRAC(a) + ,LOAD_INT(b), LOAD_FRAC(b) + ,LOAD_INT(c), LOAD_FRAC(c) + ,atomic_read(&cvirt->total_forks) + ); + return length; +} + + +static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].count); +} + + +static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].total); +} + +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer) +{ + int i,j, length = 0; + static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" }; + + for (i=0; i<5; i++) { + length += sprintf(buffer + length, + "%s:", type[i]); + for (j=0; j<3; j++) { + length += sprintf(buffer + length, + "\t%12lu/%-12lu" + ,vx_sock_count(cacct, i, j) + ,vx_sock_total(cacct, i, j) + ); + } + buffer[length++] = '\n'; + } +/* length += sprintf(buffer + length, + "forks:\t%lu\n", cacct->total_forks); +*/ + return length; +} + +#endif /* _VX_CVIRT_PROC_H */ Index: linux-2.6.14/kernel/vserver/debug.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/debug.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,37 @@ +/* + * kernel/vserver/debug.c + * + * Copyright (C) 2005 Herbert Pötzl + * + * V0.01 vx_info dump support + * + */ + +#include +#include +#include +#include + +#include +#include +#include + + +void dump_vx_info(struct vx_info *vxi, int level) +{ + printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id, + atomic_read(&vxi->vx_usecnt), + atomic_read(&vxi->vx_tasks), + vxi->vx_state); + if (level > 0) { + __dump_vx_limit(&vxi->limit); + __dump_vx_sched(&vxi->sched); + __dump_vx_cvirt(&vxi->cvirt); + __dump_vx_cacct(&vxi->cacct); + } + printk("---\n"); +} + + +EXPORT_SYMBOL_GPL(dump_vx_info); + Index: linux-2.6.14/kernel/vserver/dlimit.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/dlimit.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,544 @@ +/* + * linux/kernel/vserver/dlimit.c + * + * Virtual Server: Context Disk Limits + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 initial version + * V0.02 compat32 splitup + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* __alloc_dl_info() + + * allocate an initialized dl_info struct + * doesn't make it visible (hash) */ + +static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid) +{ + struct dl_info *new = NULL; + + vxdprintk(VXD_CBIT(dlim, 5), + "alloc_dl_info(%p,%d)*", sb, xid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct dl_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct dl_info)); + new->dl_xid = xid; + new->dl_sb = sb; + INIT_RCU_HEAD(&new->dl_rcu); + INIT_HLIST_NODE(&new->dl_hlist); + spin_lock_init(&new->dl_lock); + atomic_set(&new->dl_refcnt, 0); + atomic_set(&new->dl_usecnt, 0); + + /* rest of init goes here */ + + vxdprintk(VXD_CBIT(dlim, 4), + "alloc_dl_info(%p,%d) = %p", sb, xid, new); + return new; +} + +/* __dealloc_dl_info() + + * final disposal of dl_info */ + +static void __dealloc_dl_info(struct dl_info *dli) +{ + vxdprintk(VXD_CBIT(dlim, 4), + "dealloc_dl_info(%p)", dli); + + dli->dl_hlist.next = LIST_POISON1; + dli->dl_xid = -1; + dli->dl_sb = 0; + + BUG_ON(atomic_read(&dli->dl_usecnt)); + BUG_ON(atomic_read(&dli->dl_refcnt)); + + kfree(dli); +} + + +/* hash table for dl_info hash */ + +#define DL_HASH_SIZE 13 + +struct hlist_head dl_info_hash[DL_HASH_SIZE]; + +static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(struct super_block *sb, xid_t xid) +{ + return ((xid ^ (unsigned long)sb) % DL_HASH_SIZE); +} + + + +/* __hash_dl_info() + + * add the dli to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_dl_info(struct dl_info *dli) +{ + struct hlist_head *head; + + vxdprintk(VXD_CBIT(dlim, 6), + "__hash_dl_info: %p[#%d]", dli, dli->dl_xid); + get_dl_info(dli); + head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)]; + hlist_add_head_rcu(&dli->dl_hlist, head); +} + +/* __unhash_dl_info() + + * remove the dli from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_dl_info(struct dl_info *dli) +{ + vxdprintk(VXD_CBIT(dlim, 6), + "__unhash_dl_info: %p[#%d]", dli, dli->dl_xid); + hlist_del_rcu(&dli->dl_hlist); + put_dl_info(dli); +} + + +/* __lookup_dl_info() + + * requires the rcu_read_lock() + * doesn't increment the dl_refcnt */ + +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, xid_t xid) +{ + struct hlist_head *head = &dl_info_hash[__hashval(sb, xid)]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct dl_info *dli = + hlist_entry(pos, struct dl_info, dl_hlist); + + if (dli->dl_xid == xid && dli->dl_sb == sb) { + return dli; + } + } + return NULL; +} + + +struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid) +{ + struct dl_info *dli; + + rcu_read_lock(); + dli = get_dl_info(__lookup_dl_info(sb, xid)); + vxdprintk(VXD_CBIT(dlim, 7), + "locate_dl_info(%p,#%d) = %p", sb, xid, dli); + rcu_read_unlock(); + return dli; +} + +void rcu_free_dl_info(struct rcu_head *head) +{ + struct dl_info *dli = container_of(head, struct dl_info, dl_rcu); + int usecnt, refcnt; + + BUG_ON(!dli || !head); + + usecnt = atomic_read(&dli->dl_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&dli->dl_refcnt); + BUG_ON(refcnt < 0); + + vxdprintk(VXD_CBIT(dlim, 3), + "rcu_free_dl_info(%p)", dli); + if (!usecnt) + __dealloc_dl_info(dli); + else + printk("!!! rcu didn't free\n"); +} + + + + +int do_addrem_dlimit(uint32_t id, const char __user *name, + uint32_t flags, int add) +{ + struct nameidata nd; + int ret; + + ret = user_path_walk_link(name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + if (add) { + dli = __alloc_dl_info(sb, id); + spin_lock(&dl_info_hash_lock); + + ret = -EEXIST; + if (__lookup_dl_info(sb, id)) + goto out_unlock; + __hash_dl_info(dli); + dli = NULL; + } else { + spin_lock(&dl_info_hash_lock); + dli = __lookup_dl_info(sb, id); + + ret = -ESRCH; + if (!dli) + goto out_unlock; + __unhash_dl_info(dli); + } + ret = 0; + out_unlock: + spin_unlock(&dl_info_hash_lock); + if (add && dli) + __dealloc_dl_info(dli); + out_release: + path_release(&nd); + } + return ret; +} + +int vc_add_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1); +} + +int vc_rem_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0); +} + +#ifdef CONFIG_COMPAT + +int vc_add_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0_x32 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, + compat_ptr(vc_data.name_ptr), vc_data.flags, 1); +} + +int vc_rem_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0_x32 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, + compat_ptr(vc_data.name_ptr), vc_data.flags, 0); +} + +#endif /* CONFIG_COMPAT */ + + +static inline +int do_set_dlimit(uint32_t id, const char __user *name, + uint32_t space_used, uint32_t space_total, + uint32_t inodes_used, uint32_t inodes_total, + uint32_t reserved, uint32_t flags) +{ + struct nameidata nd; + int ret; + + ret = user_path_walk_link(name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + if ((reserved != (uint32_t)CDLIM_KEEP && + reserved > 100) || + (inodes_used != (uint32_t)CDLIM_KEEP && + inodes_used > inodes_total) || + (space_used != (uint32_t)CDLIM_KEEP && + space_used > space_total)) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + + if (inodes_used != (uint32_t)CDLIM_KEEP) + dli->dl_inodes_used = inodes_used; + if (inodes_total != (uint32_t)CDLIM_KEEP) + dli->dl_inodes_total = inodes_total; + if (space_used != (uint32_t)CDLIM_KEEP) { + dli->dl_space_used = space_used; + dli->dl_space_used <<= 10; + } + if (space_total == (uint32_t)CDLIM_INFINITY) + dli->dl_space_total = (uint64_t)CDLIM_INFINITY; + else if (space_total != (uint32_t)CDLIM_KEEP) { + dli->dl_space_total = space_total; + dli->dl_space_total <<= 10; + } + if (reserved != (uint32_t)CDLIM_KEEP) + dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100; + + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = 0; + + out_release: + path_release(&nd); + } + return ret; +} + +int vc_set_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_dlimit(id, vc_data.name, + vc_data.space_used, vc_data.space_total, + vc_data.inodes_used, vc_data.inodes_total, + vc_data.reserved, vc_data.flags); +} + +#ifdef CONFIG_COMPAT + +int vc_set_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0_x32 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_dlimit(id, compat_ptr(vc_data.name_ptr), + vc_data.space_used, vc_data.space_total, + vc_data.inodes_used, vc_data.inodes_total, + vc_data.reserved, vc_data.flags); +} + +#endif /* CONFIG_COMPAT */ + + +static inline +int do_get_dlimit(uint32_t id, const char __user *name, + uint32_t *space_used, uint32_t *space_total, + uint32_t *inodes_used, uint32_t *inodes_total, + uint32_t *reserved, uint32_t *flags) +{ + struct nameidata nd; + int ret; + + ret = user_path_walk_link(name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + *inodes_used = dli->dl_inodes_used; + *inodes_total = dli->dl_inodes_total; + *space_used = dli->dl_space_used >> 10; + if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY) + *space_total = (uint32_t)CDLIM_INFINITY; + else + *space_total = dli->dl_space_total >> 10; + + *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10); + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = -EFAULT; + + ret = 0; + out_release: + path_release(&nd); + } + return ret; +} + + +int vc_get_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_dlimit(id, vc_data.name, + &vc_data.space_used, &vc_data.space_total, + &vc_data.inodes_used, &vc_data.inodes_total, + &vc_data.reserved, &vc_data.flags); + if (ret) + return ret; + + if (copy_to_user(data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +#ifdef CONFIG_COMPAT + +int vc_get_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0_x32 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr), + &vc_data.space_used, &vc_data.space_total, + &vc_data.inodes_used, &vc_data.inodes_total, + &vc_data.reserved, &vc_data.flags); + if (ret) + return ret; + + if (copy_to_user(data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +#endif /* CONFIG_COMPAT */ + + +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct dl_info *dli; + __u64 blimit, bfree, bavail; + __u32 ifree; + + dli = locate_dl_info(sb, vx_current_xid()); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_total == (uint32_t)CDLIM_INFINITY) + goto no_ilim; + + /* reduce max inodes available to limit */ + if (buf->f_files > dli->dl_inodes_total) + buf->f_files = dli->dl_inodes_total; + + ifree = dli->dl_inodes_total - dli->dl_inodes_used; + /* reduce free inodes to min */ + if (ifree < buf->f_ffree) + buf->f_ffree = ifree; + +no_ilim: + if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY) + goto no_blim; + + blimit = dli->dl_space_total >> sb->s_blocksize_bits; + + if (dli->dl_space_total < dli->dl_space_used) + bfree = 0; + else + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + + bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult); + if (bavail < dli->dl_space_used) + bavail = 0; + else + bavail = (bavail - dli->dl_space_used) + >> sb->s_blocksize_bits; + + /* reduce max space available to limit */ + if (buf->f_blocks > blimit) + buf->f_blocks = blimit; + + /* reduce free space to min */ + if (bfree < buf->f_bfree) + buf->f_bfree = bfree; + + /* reduce avail space to min */ + if (bavail < buf->f_bavail) + buf->f_bavail = bavail; + +no_blim: + spin_unlock(&dli->dl_lock); + put_dl_info(dli); + + return; +} + +#include + +EXPORT_SYMBOL_GPL(locate_dl_info); +EXPORT_SYMBOL_GPL(rcu_free_dl_info); + Index: linux-2.6.14/kernel/vserver/helper.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/helper.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,200 @@ +/* + * linux/kernel/vserver/helper.c + * + * Virtual Context Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic helper + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +char vshelper_path[255] = "/sbin/vshelper"; + + +int do_vshelper(char *name, char *argv[], char *envp[], int sync) +{ + int ret; + + if ((ret = call_usermodehelper(name, argv, envp, sync))) { + printk( KERN_WARNING + "%s: (%s %s) returned %s with %d\n", + name, argv[1], argv[2], + sync?"sync":"async", ret); + } + vxdprintk(VXD_CBIT(switch, 4), + "%s: (%s %s) returned %s with %d", + name, argv[1], argv[2], sync?"sync":"async", ret); + return ret; +} + +/* + * vshelper path is set via /proc/sys + * invoked by vserver sys_reboot(), with + * the following arguments + * + * argv [0] = vshelper_path; + * argv [1] = action: "restart", "halt", "poweroff", ... + * argv [2] = context identifier + * + * envp [*] = type-specific parameters + */ + +long vs_reboot_helper(struct vx_info *vxi, int cmd, void *arg) +{ + char id_buf[8], cmd_buf[16]; + char uid_buf[16], pid_buf[16]; + int ret; + + char *argv[] = {vshelper_path, NULL, id_buf, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + uid_buf, pid_buf, cmd_buf, 0}; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id); + + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid); + snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid); + + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + argv[1] = "restart"; + break; + + case LINUX_REBOOT_CMD_HALT: + argv[1] = "halt"; + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + argv[1] = "poweroff"; + break; + + case LINUX_REBOOT_CMD_SW_SUSPEND: + argv[1] = "swsusp"; + break; + + default: + return 0; + } + +#ifndef CONFIG_VSERVER_LEGACY + ret = do_vshelper(vshelper_path, argv, envp, 1); +#else + ret = do_vshelper(vshelper_path, argv, envp, 0); +#endif + return (ret) ? -EPERM : 0; +} + + +long vs_reboot(unsigned int cmd, void * arg) +{ + struct vx_info *vxi = current->vx_info; + long ret = 0; + + vxdprintk(VXD_CBIT(misc, 5), + "vs_reboot(%p[#%d],%d)", + vxi, vxi?vxi->vx_id:0, cmd); + if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) { + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + case LINUX_REBOOT_CMD_HALT: + case LINUX_REBOOT_CMD_POWER_OFF: + vx_info_kill(vxi, 0, SIGKILL); + vx_info_kill(vxi, 1, SIGKILL); + default: + break; + } + } else { + ret = vs_reboot_helper(vxi, cmd, arg); + } + return ret; +} + + +/* + * argv [0] = vshelper_path; + * argv [1] = action: "startup", "shutdown" + * argv [2] = context identifier + * + * envp [*] = type-specific parameters + */ + +long vs_state_change(struct vx_info *vxi, unsigned int cmd) +{ + char id_buf[8], cmd_buf[16]; + char *argv[] = {vshelper_path, NULL, id_buf, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0}; + + if (!vx_info_flags(vxi, VXF_SC_HELPER, 0)) + return 0; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id); + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + + switch (cmd) { + case VSC_STARTUP: + argv[1] = "startup"; + break; + case VSC_SHUTDOWN: + argv[1] = "shutdown"; + break; + default: + return 0; + } + + do_vshelper(vshelper_path, argv, envp, 1); + return 0; +} + + +/* + * argv [0] = vshelper_path; + * argv [1] = action: "netup", "netdown" + * argv [2] = context identifier + * + * envp [*] = type-specific parameters + */ + +long vs_net_change(struct nx_info *nxi, unsigned int cmd) +{ + char id_buf[8], cmd_buf[16]; + char *argv[] = {vshelper_path, NULL, id_buf, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0}; + + if (!nx_info_flags(nxi, NXF_SC_HELPER, 0)) + return 0; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", nxi->nx_id); + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + + switch (cmd) { + case VSC_NETUP: + argv[1] = "netup"; + break; + case VSC_NETDOWN: + argv[1] = "netdown"; + break; + default: + return 0; + } + + do_vshelper(vshelper_path, argv, envp, 1); + return 0; +} + Index: linux-2.6.14/kernel/vserver/history.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/history.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,184 @@ +/* + * kernel/vserver/history.c + * + * Virtual Context History Backtrace + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic structure + * V0.02 hash/unhash and trace + * V0.03 preemption fixes + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + + +#ifdef CONFIG_VSERVER_HISTORY +#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE +#else +#define VXH_SIZE 64 +#endif + +struct _vx_history { + unsigned int counter; + + struct _vx_hist_entry entry[VXH_SIZE+1]; +}; + + +DEFINE_PER_CPU(struct _vx_history, vx_history_buffer); + +unsigned volatile int vxh_active = 1; + +static atomic_t sequence = ATOMIC_INIT(0); + + +/* vxh_advance() + + * requires disabled preemption */ + +struct _vx_hist_entry *vxh_advance(void *loc) +{ + unsigned int cpu = smp_processor_id(); + struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu); + struct _vx_hist_entry *entry; + unsigned int index; + + index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE; + entry = &hist->entry[index]; + + entry->seq = atomic_inc_return(&sequence); + entry->loc = loc; + return entry; +} + + +#define VXH_LOC_FMTS "(#%04x,*%d):%p" + +#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc + + +#define VXH_VXI_FMTS "%p[#%d,%d.%d]" + +#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \ + (e)->vxi.ptr?(e)->vxi.xid:0, \ + (e)->vxi.ptr?(e)->vxi.usecnt:0, \ + (e)->vxi.ptr?(e)->vxi.tasks:0 + +void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu) +{ + switch (e->type) { + case VXH_THROW_OOPS: + printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e)); + break; + + case VXH_GET_VX_INFO: + case VXH_PUT_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_GET_VX_INFO)?"get":"put", + VXH_VXI_ARGS(e)); + break; + + case VXH_INIT_VX_INFO: + case VXH_SET_VX_INFO: + case VXH_CLR_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n", + VXH_LOC_ARGS(e), + (e->type==VXH_INIT_VX_INFO)?"init": + ((e->type==VXH_SET_VX_INFO)?"set":"clr"), + VXH_VXI_ARGS(e), e->sc.data); + break; + + case VXH_CLAIM_VX_INFO: + case VXH_RELEASE_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n", + VXH_LOC_ARGS(e), + (e->type==VXH_CLAIM_VX_INFO)?"claim":"release", + VXH_VXI_ARGS(e), e->sc.data); + break; + + case VXH_ALLOC_VX_INFO: + case VXH_DEALLOC_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_ALLOC_VX_INFO)?"alloc":"dealloc", + VXH_VXI_ARGS(e)); + break; + + case VXH_HASH_VX_INFO: + case VXH_UNHASH_VX_INFO: + printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_HASH_VX_INFO)?"hash":"unhash", + VXH_VXI_ARGS(e)); + break; + + case VXH_LOC_VX_INFO: + case VXH_LOOKUP_VX_INFO: + case VXH_CREATE_VX_INFO: + printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_CREATE_VX_INFO)?"create": + ((e->type==VXH_LOC_VX_INFO)?"loc":"lookup"), + e->ll.arg, VXH_VXI_ARGS(e)); + break; + } +} + +static void __vxh_dump_history(void) +{ + unsigned int i,j; + + printk("History:\tSEQ: %8x\tNR_CPUS: %d\n", + atomic_read(&sequence), NR_CPUS); + + for (i=0; i < VXH_SIZE; i++) { + for (j=0; j < NR_CPUS; j++) { + struct _vx_history *hist = + &per_cpu(vx_history_buffer, j); + unsigned int index = (hist->counter-i) % VXH_SIZE; + struct _vx_hist_entry *entry = &hist->entry[index]; + + vxh_dump_entry(entry, j); + } + } +} + +void vxh_dump_history(void) +{ + vxh_active = 0; +#ifdef CONFIG_SMP + local_irq_enable(); + smp_send_stop(); + local_irq_disable(); +#endif + __vxh_dump_history(); +} + + +/* vserver syscall commands below here */ + + +int vc_dump_history(uint32_t id) +{ + vxh_active = 0; + __vxh_dump_history(); + vxh_active = 1; + + return 0; +} + +EXPORT_SYMBOL_GPL(vxh_advance); + Index: linux-2.6.14/kernel/vserver/init.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/init.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,44 @@ +/* + * linux/kernel/init.c + * + * Virtual Server Init + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include +#include + +int vserver_register_sysctl(void); +void vserver_unregister_sysctl(void); + + +static int __init init_vserver(void) +{ + int ret = 0; + +#ifdef CONFIG_VSERVER_DEBUG + vserver_register_sysctl(); +#endif + return ret; +} + + +static void __exit exit_vserver(void) +{ + +#ifdef CONFIG_VSERVER_DEBUG + vserver_unregister_sysctl(); +#endif + return; +} + + +module_init(init_vserver); +module_exit(exit_vserver); + Index: linux-2.6.14/kernel/vserver/inode.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/inode.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,366 @@ +/* + * linux/kernel/vserver/inode.c + * + * Virtual Server: File System Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 separated from vcontext V0.05 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint32_t *mask) +{ + struct proc_dir_entry *entry; + + if (!in || !in->i_sb) + return -ESRCH; + + *flags = IATTR_XID + | (IS_BARRIER(in) ? IATTR_BARRIER : 0) + | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0) + | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0); + *mask = IATTR_IUNLINK | IATTR_IMMUTABLE; + + if (S_ISDIR(in->i_mode)) + *mask |= IATTR_BARRIER; + + if (IS_TAGXID(in)) { + *xid = in->i_xid; + *mask |= IATTR_XID; + } + + switch (in->i_sb->s_magic) { + case PROC_SUPER_MAGIC: + entry = PROC_I(in)->pde; + + /* check for specific inodes? */ + if (entry) + *mask |= IATTR_FLAGS; + if (entry) + *flags |= (entry->vx_flags & IATTR_FLAGS); + else + *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS); + break; + + case DEVPTS_SUPER_MAGIC: + *xid = in->i_xid; + *mask |= IATTR_XID; + break; + + default: + break; + } + return 0; +} + +int vc_get_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data = { .xid = -1 }; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_get_iattr(nd.dentry->d_inode, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT + +int vc_get_iattr_x32(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1_x32 vc_data = { .xid = -1 }; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd); + if (!ret) { + ret = __vc_get_iattr(nd.dentry->d_inode, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#endif /* CONFIG_COMPAT */ + + +static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uint32_t *mask) +{ + struct inode *in = de->d_inode; + int error = 0, is_proc = 0, has_xid = 0; + struct iattr attr = { 0 }; + + if (!in || !in->i_sb) + return -ESRCH; + + is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC); + if ((*mask & IATTR_FLAGS) && !is_proc) + return -EINVAL; + + has_xid = IS_TAGXID(in) || + (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC); + if ((*mask & IATTR_XID) && !has_xid) + return -EINVAL; + + down(&in->i_sem); + if (*mask & IATTR_XID) { + attr.ia_xid = *xid; + attr.ia_valid |= ATTR_XID; + } + + if (*mask & IATTR_FLAGS) { + struct proc_dir_entry *entry = PROC_I(in)->pde; + unsigned int iflags = PROC_I(in)->vx_flags; + + iflags = (iflags & ~(*mask & IATTR_FLAGS)) + | (*flags & IATTR_FLAGS); + PROC_I(in)->vx_flags = iflags; + if (entry) + entry->vx_flags = iflags; + } + + if (*mask & (IATTR_BARRIER | IATTR_IUNLINK | IATTR_IMMUTABLE)) { + attr.ia_valid |= ATTR_ATTR_FLAG; + if (*mask & IATTR_IMMUTABLE) { + if (*flags & IATTR_IMMUTABLE) + in->i_flags |= S_IMMUTABLE; + else + in->i_flags &= ~S_IMMUTABLE; + } + if (*mask & IATTR_IUNLINK) { + if (*flags & IATTR_IUNLINK) + in->i_flags |= S_IUNLINK; + else + in->i_flags &= ~S_IUNLINK; + } + if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) { + if (*flags & IATTR_BARRIER) + in->i_flags |= S_BARRIER; + else + in->i_flags &= ~S_BARRIER; + } + } + + if (attr.ia_valid) { + if (in->i_op && in->i_op->setattr) + error = in->i_op->setattr(de, &attr); + else { + error = inode_change_ok(in, &attr); + if (!error) + error = inode_setattr(in, &attr); + } + } + + up(&in->i_sem); + return 0; +} + +int vc_set_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data; + int ret; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_set_iattr(nd.dentry, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT + +int vc_set_iattr_x32(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1_x32 vc_data; + int ret; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd); + if (!ret) { + ret = __vc_set_iattr(nd.dentry, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#endif /* CONFIG_COMPAT */ + +#ifdef CONFIG_VSERVER_LEGACY + +#define PROC_DYNAMIC_FIRST 0xF0000000UL + +int vx_proc_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + struct proc_dir_entry *entry; + int error = 0; + int flags; + + if (inode->i_ino < PROC_DYNAMIC_FIRST) + return -ENOTTY; + + entry = PROC_I(inode)->pde; + if (!entry) + return -ENOTTY; + + switch(cmd) { + case FIOC_GETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + flags = entry->vx_flags; + if (capable(CAP_CONTEXT)) + error = put_user(flags, (int *) arg); + break; + } + case FIOC_SETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -EFAULT; + if (get_user(flags, (int *) arg)) + break; + error = 0; + entry->vx_flags = flags; + break; + } + default: + return -ENOTTY; + } + return error; +} +#endif /* CONFIG_VSERVER_LEGACY */ + +#ifdef CONFIG_XID_PROPAGATE + +int vx_parse_xid(char *string, xid_t *xid, int remove) +{ + static match_table_t tokens = { + {1, "xid=%u"}, + {0, NULL} + }; + substring_t args[MAX_OPT_ARGS]; + int token, option = 0; + + if (!string) + return 0; + + token = match_token(string, tokens, args); + if (token && xid && !match_int(args, &option)) + *xid = option; + + vxdprintk(VXD_CBIT(xid, 7), + "vx_parse_xid(»%s«): %d:#%d", + string, token, option); + + if (token && remove) { + char *p = strstr(string, "xid="); + char *q = p; + + if (p) { + while (*q != '\0' && *q != ',') + q++; + while (*q) + *p++ = *q++; + while (*p) + *p++ = '\0'; + } + } + return token; +} + +void __vx_propagate_xid(struct nameidata *nd, struct inode *inode) +{ + xid_t new_xid = 0; + struct vfsmount *mnt; + int propagate; + + if (!nd) + return; + mnt = nd->mnt; + if (!mnt) + return; + + propagate = (mnt->mnt_flags & MNT_XID); + if (propagate) + new_xid = mnt->mnt_xid; + + vxdprintk(VXD_CBIT(xid, 7), + "vx_propagate_xid(%p[#%lu.%d]): %d,%d", + inode, inode->i_ino, inode->i_xid, + new_xid, (propagate)?1:0); + + if (propagate) + inode->i_xid = new_xid; +} + +#include + +EXPORT_SYMBOL_GPL(__vx_propagate_xid); + +#endif /* CONFIG_XID_PROPAGATE */ + Index: linux-2.6.14/kernel/vserver/legacy.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/legacy.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,109 @@ +/* + * linux/kernel/vserver/legacy.c + * + * Virtual Server: Legacy Funtions + * + * Copyright (C) 2001-2003 Jacques Gelinas + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext.c V0.05 + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + + +static int vx_set_initpid(struct vx_info *vxi, int pid) +{ + if (vxi->vx_initpid) + return -EPERM; + + vxi->vx_initpid = pid; + return 0; +} + +int vc_new_s_context(uint32_t ctx, void __user *data) +{ + int ret = -ENOMEM; + struct vcmd_new_s_context_v1 vc_data; + struct vx_info *new_vxi; + + if (copy_from_user(&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* legacy hack, will be removed soon */ + if (ctx == -2) { + /* assign flags and initpid */ + if (!current->vx_info) + return -EINVAL; + ret = 0; + if (vc_data.flags & VX_INFO_INIT) + ret = vx_set_initpid(current->vx_info, current->tgid); + if (ret == 0) { + /* We keep the same vx_id, but lower the capabilities */ + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + ret = vx_current_xid(); + current->vx_info->vx_flags |= vc_data.flags; + } + return ret; + } + + if (!vx_check(0, VX_ADMIN) || !capable(CAP_SYS_ADMIN) + /* might make sense in the future, or not ... */ + || vx_flags(VX_INFO_LOCK, 0)) + return -EPERM; + + /* ugly hack for Spectator */ + if (ctx == 1) { + current->xid = 1; + return 0; + } + + if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) || + (ctx == 0)) + return -EINVAL; + + if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT)) + new_vxi = lookup_or_create_vx_info(ctx); + else + new_vxi = lookup_vx_info(ctx); + + if (!new_vxi) + return -EINVAL; + + ret = -EPERM; + if (!vx_info_flags(new_vxi, VXF_STATE_SETUP, 0) && + vx_info_flags(new_vxi, VX_INFO_PRIVATE, 0)) + goto out_put; + + new_vxi->vx_flags &= ~(VXF_STATE_SETUP|VXF_STATE_INIT); + + ret = vx_migrate_task(current, new_vxi); + if (ret == 0) { + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + new_vxi->vx_flags |= vc_data.flags; + if (vc_data.flags & VX_INFO_INIT) + vx_set_initpid(new_vxi, current->tgid); + if (vc_data.flags & VX_INFO_NAMESPACE) + vx_set_namespace(new_vxi, + current->namespace, current->fs); + if (vc_data.flags & VX_INFO_NPROC) + new_vxi->limit.rlim[RLIMIT_NPROC] = + current->signal->rlim[RLIMIT_NPROC].rlim_max; + ret = new_vxi->vx_id; + } +out_put: + put_vx_info(new_vxi); + return ret; +} + Index: linux-2.6.14/kernel/vserver/legacynet.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/legacynet.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,86 @@ + +/* + * linux/kernel/vserver/legacynet.c + * + * Virtual Server: Legacy Network Funtions + * + * Copyright (C) 2001-2003 Jacques Gelinas + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from legacy.c + * + */ + +#include +#include +#include +#include +#include +// #include +#include +#include + +#include +#include + + +extern struct nx_info *create_nx_info(void); + +/* set ipv4 root (syscall) */ + +int vc_set_ipv4root(uint32_t nbip, void __user *data) +{ + int i, err = -EPERM; + struct vcmd_set_ipv4root_v3 vc_data; + struct nx_info *new_nxi, *nxi = current->nx_info; + + if (nbip < 0 || nbip > NB_IPV4ROOT) + return -EINVAL; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN)) + /* We are allowed to change everything */ + err = 0; + else if (nxi) { + int found = 0; + + /* We are allowed to select a subset of the currently + installed IP numbers. No new one are allowed + We can't change the broadcast address though */ + for (i=0; inbipv4; j++) { + if (nxip == nxi->ipv4[j]) { + found++; + break; + } + } + } + if ((found == nbip) && + (vc_data.broadcast == nxi->v4_bcast)) + err = 0; + } + if (err) + return err; + + new_nxi = create_nx_info(); + if (IS_ERR(new_nxi)) + return -EINVAL; + + new_nxi->nbipv4 = nbip; + for (i=0; iipv4[i] = vc_data.nx_mask_pair[i].ip; + new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask; + } + new_nxi->v4_bcast = vc_data.broadcast; + if (nxi) + printk("!!! switching nx_info %p->%p\n", nxi, new_nxi); + + nx_migrate_task(current, new_nxi); + put_nx_info(new_nxi); + return 0; +} + + Index: linux-2.6.14/kernel/vserver/limit.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/limit.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,178 @@ +/* + * linux/kernel/vserver/limit.c + * + * Virtual Server: Context Limits + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +const char *vlimit_name[NUM_LIMITS] = { + [RLIMIT_CPU] = "CPU", + [RLIMIT_RSS] = "RSS", + [RLIMIT_NPROC] = "NPROC", + [RLIMIT_NOFILE] = "NOFILE", + [RLIMIT_MEMLOCK] = "VML", + [RLIMIT_AS] = "VM", + [RLIMIT_LOCKS] = "LOCKS", + [RLIMIT_SIGPENDING] = "SIGP", + [RLIMIT_MSGQUEUE] = "MSGQ", + + [VLIMIT_NSOCK] = "NSOCK", + [VLIMIT_OPENFD] = "OPENFD", + [VLIMIT_ANON] = "ANON", + [VLIMIT_SHMEM] = "SHMEM", +}; + +EXPORT_SYMBOL_GPL(vlimit_name); + + +static int is_valid_rlimit(int id) +{ + int valid = 0; + + switch (id) { + case RLIMIT_RSS: + case RLIMIT_NPROC: + case RLIMIT_NOFILE: + case RLIMIT_MEMLOCK: + case RLIMIT_AS: + + case VLIMIT_NSOCK: + case VLIMIT_OPENFD: + case VLIMIT_ANON: + case VLIMIT_SHMEM: + valid = 1; + break; + } + return valid; +} + +static inline uint64_t vc_get_rlim(struct vx_info *vxi, int id) +{ + unsigned long limit; + + limit = vxi->limit.rlim[id]; + if (limit == RLIM_INFINITY) + return CRLIM_INFINITY; + return limit; +} + +int vc_get_rlimit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_rlimit_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (!is_valid_rlimit(vc_data.id)) + return -ENOTSUPP; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.maximum = vc_get_rlim(vxi, vc_data.id); + vc_data.minimum = CRLIM_UNSET; + vc_data.softlimit = CRLIM_UNSET; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_rlimit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_rlimit_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (!is_valid_rlimit(vc_data.id)) + return -ENOTSUPP; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + if (vc_data.maximum != CRLIM_KEEP) + vxi->limit.rlim[vc_data.id] = vc_data.maximum; + put_vx_info(vxi); + + return 0; +} + +int vc_get_rlimit_mask(uint32_t id, void __user *data) +{ + static struct vcmd_ctx_rlimit_mask_v0 mask = { + /* minimum */ + 0 + , /* softlimit */ + 0 + , /* maximum */ + (1 << RLIMIT_RSS) | + (1 << RLIMIT_NPROC) | + (1 << RLIMIT_NOFILE) | + (1 << RLIMIT_MEMLOCK) | + (1 << RLIMIT_LOCKS) | + (1 << RLIMIT_AS) | + (1 << VLIMIT_ANON) | + 0 + }; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_to_user(data, &mask, sizeof(mask))) + return -EFAULT; + return 0; +} + + +void vx_vsi_meminfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long v; + + v = vxi->limit.rlim[RLIMIT_RSS]; + if (v != RLIM_INFINITY) + val->totalram = min(val->totalram, v); + v = atomic_read(&vxi->limit.rcur[RLIMIT_RSS]); + val->freeram = (v < val->totalram) ? val->totalram - v : 0; + val->bufferram = 0; + val->totalhigh = 0; + val->freehigh = 0; + return; +} + +void vx_vsi_swapinfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long v, w; + + v = vxi->limit.rlim[RLIMIT_RSS]; + w = vxi->limit.rlim[RLIMIT_AS]; + if (w != RLIM_INFINITY) + val->totalswap = min(val->totalswap, w - + ((v != RLIM_INFINITY) ? v : 0)); + w = atomic_read(&vxi->limit.rcur[RLIMIT_AS]); + val->freeswap = (w < val->totalswap) ? val->totalswap - w : 0; + return; +} + Index: linux-2.6.14/kernel/vserver/limit_init.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/limit_init.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,28 @@ + +static inline void vx_info_init_limit(struct _vx_limit *limit) +{ + int lim; + + for (lim=0; limrlim[lim] = RLIM_INFINITY; + limit->rmax[lim] = 0; + atomic_set(&limit->rcur[lim], 0); + atomic_set(&limit->lhit[lim], 0); + } +} + +static inline void vx_info_exit_limit(struct _vx_limit *limit) +{ +#ifdef CONFIG_VSERVER_DEBUG + unsigned long value; + unsigned int lim; + + for (lim=0; limrcur[lim]); + vxwprintk(value, + "!!! limit: %p[%s,%d] = %ld on exit.", + limit, vlimit_name[lim], lim, value); + } +#endif +} + Index: linux-2.6.14/kernel/vserver/limit_proc.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/limit_proc.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,58 @@ +#ifndef _VX_LIMIT_PROC_H +#define _VX_LIMIT_PROC_H + + +static inline void vx_limit_fixup(struct _vx_limit *limit) +{ + unsigned long value; + unsigned int lim; + + for (lim=0; limrcur[lim]); + if (value > limit->rmax[lim]) + limit->rmax[lim] = value; + if (limit->rmax[lim] > limit->rlim[lim]) + limit->rmax[lim] = limit->rlim[lim]; + } +} + +#define VX_LIMIT_FMT ":\t%10d\t%10ld\t%10ld\t%6d\n" + +#define VX_LIMIT_ARG(r) \ + ,atomic_read(&limit->rcur[r]) \ + ,limit->rmax[r] \ + ,limit->rlim[r] \ + ,atomic_read(&limit->lhit[r]) + +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer) +{ + vx_limit_fixup(limit); + return sprintf(buffer, + "PROC" VX_LIMIT_FMT + "VM" VX_LIMIT_FMT + "VML" VX_LIMIT_FMT + "RSS" VX_LIMIT_FMT + "ANON" VX_LIMIT_FMT + "FILES" VX_LIMIT_FMT + "OFD" VX_LIMIT_FMT + "LOCKS" VX_LIMIT_FMT + "SOCK" VX_LIMIT_FMT + "MSGQ" VX_LIMIT_FMT + "SHM" VX_LIMIT_FMT + VX_LIMIT_ARG(RLIMIT_NPROC) + VX_LIMIT_ARG(RLIMIT_AS) + VX_LIMIT_ARG(RLIMIT_MEMLOCK) + VX_LIMIT_ARG(RLIMIT_RSS) + VX_LIMIT_ARG(VLIMIT_ANON) + VX_LIMIT_ARG(RLIMIT_NOFILE) + VX_LIMIT_ARG(VLIMIT_OPENFD) + VX_LIMIT_ARG(RLIMIT_LOCKS) + VX_LIMIT_ARG(VLIMIT_NSOCK) + VX_LIMIT_ARG(RLIMIT_MSGQUEUE) + VX_LIMIT_ARG(VLIMIT_SHMEM) + ); +} + +#endif /* _VX_LIMIT_PROC_H */ + + Index: linux-2.6.14/kernel/vserver/namespace.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/namespace.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,124 @@ +/* + * linux/kernel/vserver/namespace.c + * + * Virtual Server: Context Namespace Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from context.c 0.07 + * V0.02 added task locking for namespace + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* namespace functions */ + +#include + +int vx_set_namespace(struct vx_info *vxi, struct namespace *ns, struct fs_struct *fs) +{ + struct fs_struct *fs_copy; + + if (vxi->vx_namespace) + return -EPERM; + if (!ns || !fs) + return -EINVAL; + + fs_copy = copy_fs_struct(fs); + if (!fs_copy) + return -ENOMEM; + + get_namespace(ns); + vxi->vx_namespace = ns; + vxi->vx_fs = fs_copy; + return 0; +} + +int vc_enter_namespace(uint32_t id, void *data) +{ + struct vx_info *vxi; + struct fs_struct *old_fs, *fs; + struct namespace *old_ns; + int ret = 0; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = -EINVAL; + if (!vxi->vx_namespace) + goto out_put; + + ret = -ENOMEM; + fs = copy_fs_struct(vxi->vx_fs); + if (!fs) + goto out_put; + + ret = 0; + task_lock(current); + old_ns = current->namespace; + old_fs = current->fs; + get_namespace(vxi->vx_namespace); + current->namespace = vxi->vx_namespace; + current->fs = fs; + task_unlock(current); + + put_namespace(old_ns); + put_fs_struct(old_fs); +out_put: + put_vx_info(vxi); + return ret; +} + +int vc_cleanup_namespace(uint32_t id, void *data) +{ + down_write(¤t->namespace->sem); + spin_lock(&vfsmount_lock); + umount_unused(current->namespace->root, current->fs); + spin_unlock(&vfsmount_lock); + up_write(¤t->namespace->sem); + return 0; +} + +int vc_set_namespace(uint32_t id, void __user *data) +{ + struct fs_struct *fs; + struct namespace *ns; + struct vx_info *vxi; + int ret; + + if (vx_check(0, VX_ADMIN|VX_WATCH)) + return -ENOSYS; + + task_lock(current); + vxi = get_vx_info(current->vx_info); + fs = current->fs; + atomic_inc(&fs->count); + ns = current->namespace; + get_namespace(current->namespace); + task_unlock(current); + + ret = vx_set_namespace(vxi, ns, fs); + + put_namespace(ns); + put_fs_struct(fs); + put_vx_info(vxi); + return ret; +} + Index: linux-2.6.14/kernel/vserver/network.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/network.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,760 @@ +/* + * linux/kernel/vserver/network.c + * + * Virtual Server: Network Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * V0.02 cleaned up implementation + * V0.03 added equiv nx commands + * V0.04 switch to RCU based hash + * V0.05 and back to locking again + * + */ + +#include +#include +#include +#include +#include + +#include + + +/* __alloc_nx_info() + + * allocate an initialized nx_info struct + * doesn't make it visible (hash) */ + +static struct nx_info *__alloc_nx_info(nid_t nid) +{ + struct nx_info *new = NULL; + + vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct nx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct nx_info)); + new->nx_id = nid; + INIT_HLIST_NODE(&new->nx_hlist); + atomic_set(&new->nx_usecnt, 0); + atomic_set(&new->nx_tasks, 0); + new->nx_state = 0; + + new->nx_flags = NXF_INIT_SET; + + /* rest of init goes here */ + + vxdprintk(VXD_CBIT(nid, 0), + "alloc_nx_info(%d) = %p", nid, new); + return new; +} + +/* __dealloc_nx_info() + + * final disposal of nx_info */ + +static void __dealloc_nx_info(struct nx_info *nxi) +{ + vxdprintk(VXD_CBIT(nid, 0), + "dealloc_nx_info(%p)", nxi); + + nxi->nx_hlist.next = LIST_POISON1; + nxi->nx_id = -1; + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_tasks)); + + nxi->nx_state |= NXS_RELEASED; + kfree(nxi); +} + +static void __shutdown_nx_info(struct nx_info *nxi) +{ + nxi->nx_state |= NXS_SHUTDOWN; + vs_net_change(nxi, VSC_NETDOWN); +} + +/* exported stuff */ + +void free_nx_info(struct nx_info *nxi) +{ + /* context shutdown is mandatory */ + BUG_ON(nxi->nx_state != NXS_SHUTDOWN); + + /* context must not be hashed */ + BUG_ON(nxi->nx_state & NXS_HASHED); + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_tasks)); + + __dealloc_nx_info(nxi); +} + + +/* hash table for nx_info hash */ + +#define NX_HASH_SIZE 13 + +struct hlist_head nx_info_hash[NX_HASH_SIZE]; + +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(nid_t nid) +{ + return (nid % NX_HASH_SIZE); +} + + + +/* __hash_nx_info() + + * add the nxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_nx_info(struct nx_info *nxi) +{ + struct hlist_head *head; + + vxd_assert_lock(&nx_info_hash_lock); + vxdprintk(VXD_CBIT(nid, 4), + "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id); + + /* context must not be hashed */ + BUG_ON(nx_info_state(nxi, NXS_HASHED)); + + nxi->nx_state |= NXS_HASHED; + head = &nx_info_hash[__hashval(nxi->nx_id)]; + hlist_add_head(&nxi->nx_hlist, head); +} + +/* __unhash_nx_info() + + * remove the nxi from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_nx_info(struct nx_info *nxi) +{ + vxd_assert_lock(&nx_info_hash_lock); + vxdprintk(VXD_CBIT(nid, 4), + "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id); + + /* context must be hashed */ + BUG_ON(!nx_info_state(nxi, NXS_HASHED)); + + nxi->nx_state &= ~NXS_HASHED; + hlist_del(&nxi->nx_hlist); +} + + +/* __lookup_nx_info() + + * requires the hash_lock to be held + * doesn't increment the nx_refcnt */ + +static inline struct nx_info *__lookup_nx_info(nid_t nid) +{ + struct hlist_head *head = &nx_info_hash[__hashval(nid)]; + struct hlist_node *pos; + struct nx_info *nxi; + + vxd_assert_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { + nxi = hlist_entry(pos, struct nx_info, nx_hlist); + + if (nxi->nx_id == nid) + goto found; + } + nxi = NULL; +found: + vxdprintk(VXD_CBIT(nid, 0), + "__lookup_nx_info(#%u): %p[#%u]", + nid, nxi, nxi?nxi->nx_id:0); + return nxi; +} + + +/* __nx_dynamic_id() + + * find unused dynamic nid + * requires the hash_lock to be held */ + +static inline nid_t __nx_dynamic_id(void) +{ + static nid_t seq = MAX_N_CONTEXT; + nid_t barrier = seq; + + vxd_assert_lock(&nx_info_hash_lock); + do { + if (++seq > MAX_N_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__lookup_nx_info(seq)) { + vxdprintk(VXD_CBIT(nid, 4), + "__nx_dynamic_id: [#%d]", seq); + return seq; + } + } while (barrier != seq); + return 0; +} + +/* __create_nx_info() + + * create the requested context + * get() and hash it */ + +static struct nx_info * __create_nx_info(int id) +{ + struct nx_info *new, *nxi = NULL; + + vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id); + + if (!(new = __alloc_nx_info(id))) + return ERR_PTR(-ENOMEM); + + /* required to make dynamic xids unique */ + spin_lock(&nx_info_hash_lock); + + /* dynamic context requested */ + if (id == NX_DYNAMIC_ID) { + id = __nx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + nxi = ERR_PTR(-EAGAIN); + goto out_unlock; + } + new->nx_id = id; + } + /* static context requested */ + else if ((nxi = __lookup_nx_info(id))) { + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) = %p (already there)", id, nxi); + if (nx_info_flags(nxi, NXF_STATE_SETUP, 0)) + nxi = ERR_PTR(-EBUSY); + else + nxi = ERR_PTR(-EEXIST); + goto out_unlock; + } + /* dynamic nid creation blocker */ + else if (id >= MIN_D_CONTEXT) { + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) (dynamic rejected)", id); + nxi = ERR_PTR(-EINVAL); + goto out_unlock; + } + + /* new context */ + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) = %p (new)", id, new); + __hash_nx_info(get_nx_info(new)); + nxi = new, new = NULL; + +out_unlock: + spin_unlock(&nx_info_hash_lock); + if (new) + __dealloc_nx_info(new); + return nxi; +} + + + +/* exported stuff */ + + +void unhash_nx_info(struct nx_info *nxi) +{ + __shutdown_nx_info(nxi); + spin_lock(&nx_info_hash_lock); + __unhash_nx_info(nxi); + spin_unlock(&nx_info_hash_lock); +} + +#ifdef CONFIG_VSERVER_LEGACYNET + +struct nx_info *create_nx_info(void) +{ + return __create_nx_info(NX_DYNAMIC_ID); +} + +#endif + +/* lookup_nx_info() + + * search for a nx_info and get() it + * negative id means current */ + +struct nx_info *lookup_nx_info(int id) +{ + struct nx_info *nxi = NULL; + + if (id < 0) { + nxi = get_nx_info(current->nx_info); + } else if (id > 1) { + spin_lock(&nx_info_hash_lock); + nxi = get_nx_info(__lookup_nx_info(id)); + spin_unlock(&nx_info_hash_lock); + } + return nxi; +} + +/* nid_is_hashed() + + * verify that nid is still hashed */ + +int nid_is_hashed(nid_t nid) +{ + int hashed; + + spin_lock(&nx_info_hash_lock); + hashed = (__lookup_nx_info(nid) != NULL); + spin_unlock(&nx_info_hash_lock); + return hashed; +} + + +#ifdef CONFIG_PROC_FS + +int get_nid_list(int index, unsigned int *nids, int size) +{ + int hindex, nr_nids = 0; + + for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) { + struct hlist_head *head = &nx_info_hash[hindex]; + struct hlist_node *pos; + + spin_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { + struct nx_info *nxi; + + if (--index > 0) + continue; + + nxi = hlist_entry(pos, struct nx_info, nx_hlist); + nids[nr_nids] = nxi->nx_id; + if (++nr_nids >= size) { + spin_unlock(&nx_info_hash_lock); + goto out; + } + } + /* keep the lock time short */ + spin_unlock(&nx_info_hash_lock); + } +out: + return nr_nids; +} +#endif + + +/* + * migrate task to new network + * gets nxi, puts old_nxi on change + */ + +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) +{ + struct nx_info *old_nxi; + int ret = 0; + + if (!p || !nxi) + BUG(); + + vxdprintk(VXD_CBIT(nid, 5), + "nx_migrate_task(%p,%p[#%d.%d.%d])", + p, nxi, nxi->nx_id, + atomic_read(&nxi->nx_usecnt), + atomic_read(&nxi->nx_tasks)); + + /* maybe disallow this completely? */ + old_nxi = task_get_nx_info(p); + if (old_nxi == nxi) + goto out; + + task_lock(p); + if (old_nxi) + clr_nx_info(&p->nx_info); + claim_nx_info(nxi, p); + set_nx_info(&p->nx_info, nxi); + p->nid = nxi->nx_id; + task_unlock(p); + + vxdprintk(VXD_CBIT(nid, 5), + "moved task %p into nxi:%p[#%d]", + p, nxi, nxi->nx_id); + + if (old_nxi) + release_nx_info(old_nxi, p); +out: + put_nx_info(old_nxi); + return ret; +} + + +#include +#include + + +int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi) +{ + if (!nxi) + return 1; + if (!ifa) + return 0; + return addr_in_nx_info(nxi, ifa->ifa_address); +} + +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) +{ + struct in_device *in_dev; + struct in_ifaddr **ifap = NULL; + struct in_ifaddr *ifa = NULL; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + rcu_read_unlock(); + if (!nxi) + return 1; + if (!in_dev) + return 0; + + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + ifap = &ifa->ifa_next) { + if (addr_in_nx_info(nxi, ifa->ifa_address)) + return 1; + } + return 0; +} + +/* + * check if address is covered by socket + * + * sk: the socket to check against + * addr: the address in question (must be != 0) + */ +static inline int __addr_in_socket(struct sock *sk, uint32_t addr) +{ + struct nx_info *nxi = sk->sk_nx_info; + uint32_t saddr = inet_rcv_saddr(sk); + + vxdprintk(VXD_CBIT(net, 5), + "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx", + sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (saddr) { + /* direct address match */ + return (saddr == addr); + } else if (nxi) { + /* match against nx_info */ + return addr_in_nx_info(nxi, addr); + } else { + /* unrestricted any socket */ + return 1; + } +} + + +int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk) +{ + vxdprintk(VXD_CBIT(net, 2), + "nx_addr_conflict(%p,%p) %d.%d,%d.%d", + nxi, sk, VXD_QUAD(addr)); + + if (addr) { + /* check real address */ + return __addr_in_socket(sk, addr); + } else if (nxi) { + /* check against nx_info */ + int i, n = nxi->nbipv4; + + for (i=0; iipv4[i])) + return 1; + return 0; + } else { + /* check against any */ + return 1; + } +} + + +/* vserver syscall commands below here */ + +/* taks nid and nx_info functions */ + +#include + + +int vc_task_nid(uint32_t id, void __user *data) +{ + nid_t nid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + nid = (tsk) ? tsk->nid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + nid = current->nid; + return nid; +} + + +int vc_nx_info(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_nx_info_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.nid = nxi->nx_id; + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* network functions */ + +int vc_net_create(uint32_t nid, void __user *data) +{ + struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET }; + struct nx_info *new_nxi; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if ((nid > MAX_S_CONTEXT) && (nid != VX_DYNAMIC_ID)) + return -EINVAL; + if (nid < 2) + return -EINVAL; + + new_nxi = __create_nx_info(nid); + if (IS_ERR(new_nxi)) + return PTR_ERR(new_nxi); + + /* initial flags */ + new_nxi->nx_flags = vc_data.flagword; + + vs_net_change(new_nxi, VSC_NETUP); + ret = new_nxi->nx_id; + nx_migrate_task(current, new_nxi); + /* if this fails, we might end up with a hashed nx_info */ + put_nx_info(new_nxi); + return ret; +} + + +int vc_net_migrate(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + nx_migrate_task(current, nxi); + put_nx_info(nxi); + return 0; +} + +int vc_net_add(uint32_t nid, void __user *data) +{ + struct vcmd_net_addr_v0 vc_data; + struct nx_info *nxi; + int index, pos, ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + switch (vc_data.type) { + case NXA_TYPE_IPV4: + if ((vc_data.count < 1) || (vc_data.count > 4)) + return -EINVAL; + break; + + default: + break; + } + + nxi = lookup_nx_info(nid); + if (!nxi) + return -ESRCH; + + switch (vc_data.type) { + case NXA_TYPE_IPV4: + index = 0; + while ((index < vc_data.count) && + ((pos = nxi->nbipv4) < NB_IPV4ROOT)) { + nxi->ipv4[pos] = vc_data.ip[index]; + nxi->mask[pos] = vc_data.mask[index]; + index++; + nxi->nbipv4++; + } + ret = index; + break; + + case NXA_TYPE_IPV4|NXA_MOD_BCAST: + nxi->v4_bcast = vc_data.ip[0]; + ret = 1; + break; + + default: + ret = -EINVAL; + break; + } + + put_nx_info(nxi); + return ret; +} + +int vc_net_remove(uint32_t nid, void __user *data) +{ + struct vcmd_net_addr_v0 vc_data; + struct nx_info *nxi; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = lookup_nx_info(nid); + if (!nxi) + return -ESRCH; + + switch (vc_data.type) { + case NXA_TYPE_ANY: + nxi->nbipv4 = 0; + break; + + default: + ret = -EINVAL; + break; + } + + put_nx_info(nxi); + return ret; +} + +int vc_get_nflags(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_flags_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.flagword = nxi->nx_flags; + + /* special STATE flag handling */ + vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME); + + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_nflags(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_flags_v0 vc_data; + uint64_t mask, trigger; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + /* special STATE flag handling */ + mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME); + trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword); + + nxi->nx_flags = vx_mask_flags(nxi->nx_flags, + vc_data.flagword, mask); + put_nx_info(nxi); + return 0; +} + +int vc_get_ncaps(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.ncaps = nxi->nx_ncaps; + vc_data.cmask = ~0UL; + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_ncaps(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps, + vc_data.ncaps, vc_data.cmask); + put_nx_info(nxi); + return 0; +} + + +#include + +EXPORT_SYMBOL_GPL(free_nx_info); +EXPORT_SYMBOL_GPL(unhash_nx_info); + Index: linux-2.6.14/kernel/vserver/proc.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/proc.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,914 @@ +/* + * linux/kernel/vserver/proc.c + * + * Virtual Context Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 basic structure + * V0.02 adaptation vs1.3.0 + * V0.03 proc permissions + * V0.04 locking/generic + * V0.05 next generation procfs + * V0.06 inode validation + * V0.07 generic rewrite vid + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "cvirt_proc.h" +#include "limit_proc.h" +#include "sched_proc.h" +#include "vci_config.h" + +static struct proc_dir_entry *proc_virtual; + +static struct proc_dir_entry *proc_vnet; + + +enum vid_directory_inos { + PROC_XID_INO = 32, + PROC_XID_INFO, + PROC_XID_STATUS, + PROC_XID_LIMIT, + PROC_XID_SCHED, + PROC_XID_CVIRT, + PROC_XID_CACCT, + + PROC_NID_INO = 64, + PROC_NID_INFO, + PROC_NID_STATUS, +}; + +#define PROC_VID_MASK 0x60 + + +/* first the actual feeds */ + + +static int proc_virtual_info(int vid, char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + "VCISyscall:\t%d\n" + "VCIKernel:\t%08x\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF + ,__NR_vserver + ,vci_kernel_config() + ); +} + +static int proc_virtual_status(int vid, char *buffer) +{ + return sprintf(buffer, + "#CTotal:\t%d\n" + "#CActive:\t%d\n" + ,atomic_read(&vx_global_ctotal) + ,atomic_read(&vx_global_cactive) + ); +} + + +int proc_xid_info (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + "Init:\t%d\n" + ,vxi->vx_id + ,vxi + ,vxi->vx_initpid + ); + put_vx_info(vxi); + return length; +} + +int proc_xid_status (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = sprintf(buffer, + "UseCnt:\t%d\n" + "Tasks:\t%d\n" + "Flags:\t%016llx\n" + "BCaps:\t%016llx\n" + "CCaps:\t%016llx\n" +// "Ticks:\t%d\n" + ,atomic_read(&vxi->vx_usecnt) + ,atomic_read(&vxi->vx_tasks) + ,(unsigned long long)vxi->vx_flags + ,(unsigned long long)vxi->vx_bcaps + ,(unsigned long long)vxi->vx_ccaps +// ,atomic_read(&vxi->limit.ticks) + ); + put_vx_info(vxi); + return length; +} + +int proc_xid_limit (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_limit(&vxi->limit, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_sched (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_sched(&vxi->sched, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_cvirt (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + vx_update_load(vxi); + length = vx_info_proc_cvirt(&vxi->cvirt, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_cacct (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_cacct(&vxi->cacct, buffer); + put_vx_info(vxi); + return length; +} + + +static int proc_vnet_info(int vid, char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + "VCISyscall:\t%d\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF + ,__NR_vserver + ); +} + +#define atoquad(a) \ + (((a)>>0) & 0xff), (((a)>>8) & 0xff), \ + (((a)>>16) & 0xff), (((a)>>24) & 0xff) + +int proc_nid_info (int vid, char *buffer) +{ + struct nx_info *nxi; + int length, i; + + nxi = lookup_nx_info(vid); + if (!nxi) + return 0; + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + ,nxi->nx_id + ,nxi + ); + for (i=0; inbipv4; i++) { + length += sprintf(buffer + length, + "%d:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i, + atoquad(nxi->ipv4[i]), + atoquad(nxi->mask[i])); + } + put_nx_info(nxi); + return length; +} + +int proc_nid_status (int vid, char *buffer) +{ + struct nx_info *nxi; + int length; + + nxi = lookup_nx_info(vid); + if (!nxi) + return 0; + length = sprintf(buffer, + "UseCnt:\t%d\n" + "Tasks:\t%d\n" + ,atomic_read(&nxi->nx_usecnt) + ,atomic_read(&nxi->nx_tasks) + ); + put_nx_info(nxi); + return length; +} + +/* here the inode helpers */ + + +#define fake_ino(id,nr) (((nr) & 0xFFFF) | \ + (((id) & 0xFFFF) << 16)) + +#define inode_vid(i) (((i)->i_ino >> 16) & 0xFFFF) +#define inode_type(i) ((i)->i_ino & 0xFFFF) + +#define MAX_MULBY10 ((~0U-9)/10) + + +static struct inode *proc_vid_make_inode(struct super_block * sb, + int vid, int ino) +{ + struct inode *inode = new_inode(sb); + + if (!inode) + goto out; + + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(vid, ino); + + inode->i_uid = 0; + inode->i_gid = 0; +out: + return inode; +} + +static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd) +{ + struct inode * inode = dentry->d_inode; + int vid, hashed=0; + + vid = inode_vid(inode); + switch (inode_type(inode) & PROC_VID_MASK) { + case PROC_XID_INO: + hashed = xid_is_hashed(vid); + break; + case PROC_NID_INO: + hashed = nid_is_hashed(vid); + break; + } + if (hashed) + return 1; + d_drop(dentry); + return 0; +} + + +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) + +static ssize_t proc_vid_info_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + unsigned long page; + ssize_t length; + int vid; + + if (count > PROC_BLOCK_SIZE) + count = PROC_BLOCK_SIZE; + if (!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + vid = inode_vid(inode); + length = PROC_I(inode)->op.proc_vid_read(vid, (char*)page); + + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, + (char *)page, length); + free_page(page); + return length; +} + + + + + +/* here comes the lower level (vid) */ + +static struct file_operations proc_vid_info_file_operations = { + read: proc_vid_info_read, +}; + +static struct dentry_operations proc_vid_dentry_operations = { + d_revalidate: proc_vid_revalidate, +}; + + +struct vid_entry { + int type; + int len; + char *name; + mode_t mode; +}; + +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} + +static struct vid_entry vx_base_stuff[] = { + E(PROC_XID_INFO, "info", S_IFREG|S_IRUGO), + E(PROC_XID_STATUS, "status", S_IFREG|S_IRUGO), + E(PROC_XID_LIMIT, "limit", S_IFREG|S_IRUGO), + E(PROC_XID_SCHED, "sched", S_IFREG|S_IRUGO), + E(PROC_XID_CVIRT, "cvirt", S_IFREG|S_IRUGO), + E(PROC_XID_CACCT, "cacct", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + +static struct vid_entry vn_base_stuff[] = { + E(PROC_NID_INFO, "info", S_IFREG|S_IRUGO), + E(PROC_NID_STATUS, "status", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + + + +static struct dentry *proc_vid_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode; + struct vid_entry *p; + int error; + + error = -ENOENT; + inode = NULL; + + switch (inode_type(dir)) { + case PROC_XID_INO: + p = vx_base_stuff; + break; + case PROC_NID_INO: + p = vn_base_stuff; + break; + default: + goto out; + } + + for (; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + goto out; + + error = -EINVAL; + inode = proc_vid_make_inode(dir->i_sb, inode_vid(dir), p->type); + if (!inode) + goto out; + + switch(p->type) { + case PROC_XID_INFO: + PROC_I(inode)->op.proc_vid_read = proc_xid_info; + break; + case PROC_XID_STATUS: + PROC_I(inode)->op.proc_vid_read = proc_xid_status; + break; + case PROC_XID_LIMIT: + PROC_I(inode)->op.proc_vid_read = proc_xid_limit; + break; + case PROC_XID_SCHED: + PROC_I(inode)->op.proc_vid_read = proc_xid_sched; + break; + case PROC_XID_CVIRT: + PROC_I(inode)->op.proc_vid_read = proc_xid_cvirt; + break; + case PROC_XID_CACCT: + PROC_I(inode)->op.proc_vid_read = proc_xid_cacct; + break; + + case PROC_NID_INFO: + PROC_I(inode)->op.proc_vid_read = proc_nid_info; + break; + case PROC_NID_STATUS: + PROC_I(inode)->op.proc_vid_read = proc_nid_status; + break; + + default: + printk("procfs: impossible type (%d)",p->type); + iput(inode); + return ERR_PTR(-EINVAL); + } + inode->i_mode = p->mode; + inode->i_fop = &proc_vid_info_file_operations; + inode->i_nlink = 1; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + error = 0; +out: + return ERR_PTR(error); +} + + +static int proc_vid_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + int i, size; + struct inode *inode = filp->f_dentry->d_inode; + struct vid_entry *p; + + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, + inode->i_ino, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, i, + PROC_ROOT_INO, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + switch (inode_type(inode)) { + case PROC_XID_INO: + size = sizeof(vx_base_stuff); + p = vx_base_stuff + i; + break; + case PROC_NID_INO: + size = sizeof(vn_base_stuff); + p = vn_base_stuff + i; + break; + default: + return 1; + } + if (i >= size/sizeof(struct vid_entry)) + return 1; + while (p->name) { + if (filldir(dirent, p->name, p->len, + filp->f_pos, fake_ino(inode_vid(inode), + p->type), p->mode >> 12) < 0) + return 0; + filp->f_pos++; + p++; + } + } + return 1; +} + + + + +/* now the upper level (virtual) */ + +static struct file_operations proc_vid_file_operations = { + read: generic_read_dir, + readdir: proc_vid_readdir, +}; + +static struct inode_operations proc_vid_inode_operations = { + lookup: proc_vid_lookup, +}; + + + +static __inline__ int atovid(const char *str, int len) +{ + int vid, c; + + vid = 0; + while (len-- > 0) { + c = *str - '0'; + str++; + if (c > 9) + return -1; + if (vid >= MAX_MULBY10) + return -1; + vid *= 10; + vid += c; + if (!vid) + return -1; + } + return vid; +} + +static __inline__ unsigned long atoaddr(const char *str, int len) +{ + unsigned long addr, c; + + addr = 0; + while (len-- > 0) { + c = *str - '0'; + if (c > 9) + c -= 'A'-'0'+10; + if (c > 15) + c -= 'a'-'A'; + if (c > 15) + return -1; + str++; + if (addr >= ((1 << 28) - 1)) + return -1; + addr = (addr << 4) | c; + if (!addr) + return -1; + } + return addr; +} + + +struct dentry *proc_virtual_lookup(struct inode *dir, + struct dentry * dentry, struct nameidata *nd) +{ + int xid, len, ret; + struct vx_info *vxi; + const char *name; + struct inode *inode; + + name = dentry->d_name.name; + len = dentry->d_name.len; + ret = -ENOMEM; + + if (len == 7 && !memcmp(name, "current", 7)) { + inode = new_inode(dir->i_sb); + if (!inode) + goto out; + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(1, PROC_XID_INO); + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + d_add(dentry, inode); + return NULL; + } + if (len == 4 && !memcmp(name, "info", 4)) { + inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_INFO); + if (!inode) + goto out; + inode->i_fop = &proc_vid_info_file_operations; + PROC_I(inode)->op.proc_vid_read = proc_virtual_info; + inode->i_mode = S_IFREG|S_IRUGO; + d_add(dentry, inode); + return NULL; + } + if (len == 6 && !memcmp(name, "status", 6)) { + inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_STATUS); + if (!inode) + goto out; + inode->i_fop = &proc_vid_info_file_operations; + PROC_I(inode)->op.proc_vid_read = proc_virtual_status; + inode->i_mode = S_IFREG|S_IRUGO; + d_add(dentry, inode); + return NULL; + } + + ret = -ENOENT; + xid = atovid(name, len); + if (xid < 0) + goto out; + vxi = lookup_vx_info(xid); + if (!vxi) + goto out; + + inode = NULL; + if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT)) + inode = proc_vid_make_inode(dir->i_sb, + vxi->vx_id, PROC_XID_INO); + if (!inode) + goto out_release; + + inode->i_mode = S_IFDIR|S_IRUGO; + inode->i_op = &proc_vid_inode_operations; + inode->i_fop = &proc_vid_file_operations; + inode->i_nlink = 2; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + ret = 0; + +out_release: + put_vx_info(vxi); +out: + return ERR_PTR(ret); +} + + +struct dentry *proc_vnet_lookup(struct inode *dir, + struct dentry * dentry, struct nameidata *nd) +{ + int nid, len, ret; + struct nx_info *nxi; + const char *name; + struct inode *inode; + + name = dentry->d_name.name; + len = dentry->d_name.len; + ret = -ENOMEM; + if (len == 7 && !memcmp(name, "current", 7)) { + inode = new_inode(dir->i_sb); + if (!inode) + goto out; + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(1, PROC_NID_INO); + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + d_add(dentry, inode); + return NULL; + } + if (len == 4 && !memcmp(name, "info", 4)) { + inode = proc_vid_make_inode(dir->i_sb, 0, PROC_NID_INFO); + if (!inode) + goto out; + inode->i_fop = &proc_vid_info_file_operations; + PROC_I(inode)->op.proc_vid_read = proc_vnet_info; + inode->i_mode = S_IFREG|S_IRUGO; + d_add(dentry, inode); + return NULL; + } + + ret = -ENOENT; + nid = atovid(name, len); + if (nid < 0) + goto out; + nxi = lookup_nx_info(nid); + if (!nxi) + goto out; + + inode = NULL; + if (1) + inode = proc_vid_make_inode(dir->i_sb, + nxi->nx_id, PROC_NID_INO); + if (!inode) + goto out_release; + + inode->i_mode = S_IFDIR|S_IRUGO; + inode->i_op = &proc_vid_inode_operations; + inode->i_fop = &proc_vid_file_operations; + inode->i_nlink = 2; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + ret = 0; + +out_release: + put_nx_info(nxi); +out: + return ERR_PTR(ret); +} + + + + +#define PROC_NUMBUF 10 +#define PROC_MAXVIDS 32 + +int proc_virtual_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int xid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos-3; + unsigned int nr_xids, i; + ino_t ino; + + switch ((long)filp->f_pos) { + case 0: + ino = fake_ino(0, PROC_XID_INO); + if (filldir(dirent, ".", 1, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + ino = filp->f_dentry->d_parent->d_inode->i_ino; + if (filldir(dirent, "..", 2, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 2: + ino = fake_ino(0, PROC_XID_INFO); + if (filldir(dirent, "info", 4, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 3: + ino = fake_ino(0, PROC_XID_STATUS); + if (filldir(dirent, "status", 6, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 4: + if (vx_current_xid() > 1) { + ino = fake_ino(1, PROC_XID_INO); + if (filldir(dirent, "current", 7, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + } + filp->f_pos++; + } + + nr_xids = get_xid_list(nr, xid_array, PROC_MAXVIDS); + for (i = 0; i < nr_xids; i++) { + int xid = xid_array[i]; + ino_t ino = fake_ino(xid, PROC_XID_INO); + unsigned int j = PROC_NUMBUF; + + do buf[--j] = '0' + (xid % 10); while (xid/=10); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, + filp->f_pos, ino, DT_DIR) < 0) + break; + filp->f_pos++; + } + return 0; +} + + +static struct file_operations proc_virtual_dir_operations = { + read: generic_read_dir, + readdir: proc_virtual_readdir, +}; + +static struct inode_operations proc_virtual_dir_inode_operations = { + lookup: proc_virtual_lookup, +}; + + +int proc_vnet_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int nid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos-3; + unsigned int nr_nids, i; + ino_t ino; + + switch ((long)filp->f_pos) { + case 0: + ino = fake_ino(0, PROC_NID_INO); + if (filldir(dirent, ".", 1, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + ino = filp->f_dentry->d_parent->d_inode->i_ino; + if (filldir(dirent, "..", 2, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 2: + ino = fake_ino(0, PROC_NID_INFO); + if (filldir(dirent, "info", 4, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 3: + if (vx_current_xid() > 1) { + ino = fake_ino(1, PROC_NID_INO); + if (filldir(dirent, "current", 7, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + } + filp->f_pos++; + } + + nr_nids = get_nid_list(nr, nid_array, PROC_MAXVIDS); + for (i = 0; i < nr_nids; i++) { + int nid = nid_array[i]; + ino_t ino = fake_ino(nid, PROC_NID_INO); + unsigned long j = PROC_NUMBUF; + + do buf[--j] = '0' + (nid % 10); while (nid/=10); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, + filp->f_pos, ino, DT_DIR) < 0) + break; + filp->f_pos++; + } + return 0; +} + + +static struct file_operations proc_vnet_dir_operations = { + read: generic_read_dir, + readdir: proc_vnet_readdir, +}; + +static struct inode_operations proc_vnet_dir_inode_operations = { + lookup: proc_vnet_lookup, +}; + + + +void proc_vx_init(void) +{ + struct proc_dir_entry *ent; + + ent = proc_mkdir("virtual", 0); + if (ent) { + ent->proc_fops = &proc_virtual_dir_operations; + ent->proc_iops = &proc_virtual_dir_inode_operations; + } + proc_virtual = ent; + + ent = proc_mkdir("virtnet", 0); + if (ent) { + ent->proc_fops = &proc_vnet_dir_operations; + ent->proc_iops = &proc_vnet_dir_inode_operations; + } + proc_vnet = ent; +} + + + + +/* per pid info */ + + +int proc_pid_vx_info(struct task_struct *p, char *buffer) +{ + struct vx_info *vxi; + char * orig = buffer; + + buffer += sprintf (buffer,"XID:\t%d\n", vx_task_xid(p)); + vxi = task_get_vx_info(p); + if (vxi && !vx_flags(VXF_INFO_HIDE, 0)) { + buffer += sprintf (buffer,"BCaps:\t%016llx\n" + ,(unsigned long long)vxi->vx_bcaps); + buffer += sprintf (buffer,"CCaps:\t%016llx\n" + ,(unsigned long long)vxi->vx_ccaps); + buffer += sprintf (buffer,"CFlags:\t%016llx\n" + ,(unsigned long long)vxi->vx_flags); + buffer += sprintf (buffer,"CIPid:\t%d\n" + ,vxi->vx_initpid); + } + put_vx_info(vxi); + return buffer - orig; +} + + +int proc_pid_nx_info(struct task_struct *p, char *buffer) +{ + struct nx_info *nxi; + char * orig = buffer; + + buffer += sprintf (buffer,"NID:\t%d\n", nx_task_nid(p)); + nxi = task_get_nx_info(p); + if (nxi && !vx_flags(VXF_INFO_HIDE, 0)) { + int i; + + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer, + "V4Root[%d]:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i + ,NIPQUAD(nxi->ipv4[i]) + ,NIPQUAD(nxi->mask[i])); + } + buffer += sprintf (buffer, + "V4Root[bcast]:\t%d.%d.%d.%d\n" + ,NIPQUAD(nxi->v4_bcast)); + } + put_nx_info(nxi); + return buffer - orig; +} + Index: linux-2.6.14/kernel/vserver/sched.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/sched.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,218 @@ +/* + * linux/kernel/vserver/sched.c + * + * Virtual Server: Scheduler Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 adapted Sam Vilains version to 2.6.3 + * V0.02 removed legacy interface + * + */ + +#include +#include +#include +#include +#include + +#include +#include + + +/* + * recalculate the context's scheduling tokens + * + * ret > 0 : number of tokens available + * ret = 0 : context is paused + * ret < 0 : number of jiffies until new tokens arrive + * + */ +int vx_tokens_recalc(struct vx_info *vxi) +{ + long delta, tokens = 0; + + if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0)) + /* we are paused */ + return 0; + + delta = jiffies - vxi->sched.jiffies; + + if (delta >= vxi->sched.interval) { + /* lockdown scheduler info */ + spin_lock(&vxi->sched.tokens_lock); + + /* calc integral token part */ + delta = jiffies - vxi->sched.jiffies; + tokens = delta / vxi->sched.interval; + delta = tokens * vxi->sched.interval; + tokens *= vxi->sched.fill_rate; + + atomic_add(tokens, &vxi->sched.tokens); + vxi->sched.jiffies += delta; + tokens = atomic_read(&vxi->sched.tokens); + + if (tokens > vxi->sched.tokens_max) { + tokens = vxi->sched.tokens_max; + atomic_set(&vxi->sched.tokens, tokens); + } + spin_unlock(&vxi->sched.tokens_lock); + } else { + /* no new tokens */ + tokens = vx_tokens_avail(vxi); + if (tokens <= 0) + vxi->vx_state |= VXS_ONHOLD; + if (tokens < vxi->sched.tokens_min) { + /* enough tokens will be available in */ + if (vxi->sched.tokens_min == 0) + return delta - vxi->sched.interval; + return delta - vxi->sched.interval * + vxi->sched.tokens_min / vxi->sched.fill_rate; + } + } + + /* we have some tokens left */ + if (vx_info_state(vxi, VXS_ONHOLD) && + (tokens >= vxi->sched.tokens_min)) + vxi->vx_state &= ~VXS_ONHOLD; + if (vx_info_state(vxi, VXS_ONHOLD)) + tokens -= vxi->sched.tokens_min; + + return tokens; +} + +/* + * effective_prio - return the priority that is based on the static + * priority but is modified by bonuses/penalties. + * + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] + * into a -4 ... 0 ... +4 bonus/penalty range. + * + * Additionally, we scale another amount based on the number of + * CPU tokens currently held by the context, if the process is + * part of a context (and the appropriate SCHED flag is set). + * This ranges from -5 ... 0 ... +15, quadratically. + * + * So, the total bonus is -9 .. 0 .. +19 + * We use ~50% of the full 0...39 priority range so that: + * + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. + * unless that context is far exceeding its CPU allocation. + * + * Both properties are important to certain workloads. + */ +int vx_effective_vavavoom(struct vx_info *vxi, int max_prio) +{ + int vavavoom, max; + + /* lots of tokens = lots of vavavoom + * no tokens = no vavavoom */ + if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) { + max = vxi->sched.tokens_max; + vavavoom = max - vavavoom; + max = max * max; + vavavoom = max_prio * VAVAVOOM_RATIO / 100 + * (vavavoom*vavavoom - (max >> 2)) / max; + } else + vavavoom = 0; + + vxi->sched.vavavoom = vavavoom; + return vavavoom; +} + + +int vc_set_sched_v2(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v2 vc_data; + struct vx_info *vxi; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(xid); + if (!vxi) + return -EINVAL; + + spin_lock(&vxi->sched.tokens_lock); + + if (vc_data.interval != SCHED_KEEP) + vxi->sched.interval = vc_data.interval; + if (vc_data.fill_rate != SCHED_KEEP) + vxi->sched.fill_rate = vc_data.fill_rate; + if (vc_data.tokens_min != SCHED_KEEP) + vxi->sched.tokens_min = vc_data.tokens_min; + if (vc_data.tokens_max != SCHED_KEEP) + vxi->sched.tokens_max = vc_data.tokens_max; + if (vc_data.tokens != SCHED_KEEP) + atomic_set(&vxi->sched.tokens, vc_data.tokens); + + /* Sanity check the resultant values */ + if (vxi->sched.fill_rate <= 0) + vxi->sched.fill_rate = 1; + if (vxi->sched.interval <= 0) + vxi->sched.interval = HZ; + if (vxi->sched.tokens_max == 0) + vxi->sched.tokens_max = 1; + if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max) + atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max); + if (vxi->sched.tokens_min > vxi->sched.tokens_max) + vxi->sched.tokens_min = vxi->sched.tokens_max; + + spin_unlock(&vxi->sched.tokens_lock); + put_vx_info(vxi); + return 0; +} + + +int vc_set_sched(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v3 vc_data; + struct vx_info *vxi; + unsigned int set_mask; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(xid); + if (!vxi) + return -EINVAL; + + set_mask = vc_data.set_mask; + + spin_lock(&vxi->sched.tokens_lock); + + if (set_mask & VXSM_FILL_RATE) + vxi->sched.fill_rate = vc_data.fill_rate; + if (set_mask & VXSM_INTERVAL) + vxi->sched.interval = vc_data.interval; + if (set_mask & VXSM_TOKENS) + atomic_set(&vxi->sched.tokens, vc_data.tokens); + if (set_mask & VXSM_TOKENS_MIN) + vxi->sched.tokens_min = vc_data.tokens_min; + if (set_mask & VXSM_TOKENS_MAX) + vxi->sched.tokens_max = vc_data.tokens_max; + if (set_mask & VXSM_PRIO_BIAS) + vxi->sched.priority_bias = vc_data.priority_bias; + + /* Sanity check the resultant values */ + if (vxi->sched.fill_rate <= 0) + vxi->sched.fill_rate = 1; + if (vxi->sched.interval <= 0) + vxi->sched.interval = HZ; + if (vxi->sched.tokens_max == 0) + vxi->sched.tokens_max = 1; + if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max) + atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max); + if (vxi->sched.tokens_min > vxi->sched.tokens_max) + vxi->sched.tokens_min = vxi->sched.tokens_max; + if (vxi->sched.priority_bias > MAX_PRIO_BIAS) + vxi->sched.priority_bias = MAX_PRIO_BIAS; + if (vxi->sched.priority_bias < MIN_PRIO_BIAS) + vxi->sched.priority_bias = MIN_PRIO_BIAS; + + spin_unlock(&vxi->sched.tokens_lock); + put_vx_info(vxi); + return 0; +} + Index: linux-2.6.14/kernel/vserver/sched_init.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/sched_init.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,30 @@ + +static inline void vx_info_init_sched(struct _vx_sched *sched) +{ + int i; + + /* scheduling; hard code starting values as constants */ + sched->fill_rate = 1; + sched->interval = 4; + sched->tokens_min = HZ >> 4; + sched->tokens_max = HZ >> 1; + sched->jiffies = jiffies; + sched->tokens_lock = SPIN_LOCK_UNLOCKED; + + atomic_set(&sched->tokens, HZ >> 2); + sched->cpus_allowed = CPU_MASK_ALL; + sched->priority_bias = 0; + sched->vavavoom = 0; + + for_each_cpu(i) { + sched->cpu[i].user_ticks = 0; + sched->cpu[i].sys_ticks = 0; + sched->cpu[i].hold_ticks = 0; + } +} + +static inline void vx_info_exit_sched(struct _vx_sched *sched) +{ + return; +} + Index: linux-2.6.14/kernel/vserver/sched_proc.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/sched_proc.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,40 @@ +#ifndef _VX_SCHED_PROC_H +#define _VX_SCHED_PROC_H + + +static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer) +{ + int length = 0; + int i; + + length += sprintf(buffer, + "Token:\t\t%8d\n" + "FillRate:\t%8d\n" + "Interval:\t%8d\n" + "TokensMin:\t%8d\n" + "TokensMax:\t%8d\n" + "PrioBias:\t%8d\n" + "VaVaVoom:\t%8d\n" + ,atomic_read(&sched->tokens) + ,sched->fill_rate + ,sched->interval + ,sched->tokens_min + ,sched->tokens_max + ,sched->priority_bias + ,sched->vavavoom + ); + + for_each_online_cpu(i) { + length += sprintf(buffer + length, + "cpu %d: %lld %lld %lld\n" + ,i + ,(long long)sched->cpu[i].user_ticks + ,(long long)sched->cpu[i].sys_ticks + ,(long long)sched->cpu[i].hold_ticks + ); + } + + return length; +} + +#endif /* _VX_SCHED_PROC_H */ Index: linux-2.6.14/kernel/vserver/signal.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/signal.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,133 @@ +/* + * linux/kernel/vserver/signal.c + * + * Virtual Server: Signal Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +#include + +#include +#include + +#include +#include + + +int vx_info_kill(struct vx_info *vxi, int pid, int sig) +{ + int retval, count=0; + struct task_struct *p; + unsigned long priv = 0; + + retval = -ESRCH; + vxdprintk(VXD_CBIT(misc, 4), + "vx_info_kill(%p[#%d],%d,%d)*", + vxi, vxi->vx_id, pid, sig); + read_lock(&tasklist_lock); + switch (pid) { + case 0: + priv = 1; + case -1: + for_each_process(p) { + int err = 0; + + if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 || + (pid && vxi->vx_initpid == p->pid)) + continue; + + err = group_send_sig_info(sig, (void*)priv, p); + ++count; + if (err != -EPERM) + retval = err; + } + break; + + case 1: + if (vxi->vx_initpid) { + pid = vxi->vx_initpid; + priv = 1; + } + /* fallthrough */ + default: + p = find_task_by_real_pid(pid); + if (p) { + if (vx_task_xid(p) == vxi->vx_id) + retval = group_send_sig_info(sig, + (void*)priv, p); + } + break; + } + read_unlock(&tasklist_lock); + vxdprintk(VXD_CBIT(misc, 4), + "vx_info_kill(%p[#%d],%d,%d) = %d", + vxi, vxi->vx_id, pid, sig, retval); + return retval; +} + +int vc_ctx_kill(uint32_t id, void __user *data) +{ + int retval; + struct vcmd_ctx_kill_v0 vc_data; + struct vx_info *vxi; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + retval = vx_info_kill(vxi, vc_data.pid, vc_data.sig); + put_vx_info(vxi); + return retval; +} + + +static int __wait_exit(struct vx_info *vxi) +{ + DECLARE_WAITQUEUE(wait, current); + int ret = 0; + + add_wait_queue(&vxi->vx_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + +wait: + if (vx_info_state(vxi, VXS_SHUTDOWN|VXS_HASHED) == VXS_SHUTDOWN) + goto out; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + goto out; + } + schedule(); + goto wait; + +out: + set_current_state(TASK_RUNNING); + remove_wait_queue(&vxi->vx_wait, &wait); + return ret; +} + + + +int vc_wait_exit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + int ret; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = __wait_exit(vxi); + put_vx_info(vxi); + return ret; +} + Index: linux-2.6.14/kernel/vserver/switch.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/switch.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,260 @@ +/* + * linux/kernel/vserver/switch.c + * + * Virtual Server: Syscall Switch + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 syscall switch + * V0.02 added signal to context + * V0.03 added rlimit functions + * V0.04 added iattr, task/xid functions + * V0.05 added debug/history stuff + * V0.06 added compat32 layer + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + + +static inline +int vc_get_version(uint32_t id) +{ +#ifdef CONFIG_VSERVER_LEGACY_VERSION + if (id == 63) + return VCI_LEGACY_VERSION; +#endif + return VCI_VERSION; +} + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +#ifdef CONFIG_COMPAT +#define __COMPAT(name, id, data, compat) \ + (compat) ? name ## _x32 (id, data) : name (id, data) +#else +#define __COMPAT(name, id, data, compat) \ + name (id, data) +#endif + + +static inline +long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat) +{ + vxdprintk(VXD_CBIT(switch, 0), + "vc: VCMD_%02d_%d[%d], %d,%p,%d", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), id, data, compat); + +#ifdef CONFIG_VSERVER_LEGACY + if (!capable(CAP_CONTEXT) && + /* dirty hack for capremove */ + !(cmd==VCMD_new_s_context && id==-2)) + return -EPERM; +#else + if (!capable(CAP_CONTEXT)) + return -EPERM; +#endif + + switch (cmd) { + case VCMD_get_version: + return vc_get_version(id); + + case VCMD_dump_history: +#ifdef CONFIG_VSERVER_HISTORY + return vc_dump_history(id); +#else + return -ENOSYS; +#endif + +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_new_s_context: + return vc_new_s_context(id, data); +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + case VCMD_set_ipv4root: + return vc_set_ipv4root(id, data); +#endif + + case VCMD_task_xid: + return vc_task_xid(id, data); + case VCMD_vx_info: + return vc_vx_info(id, data); + + case VCMD_task_nid: + return vc_task_nid(id, data); + case VCMD_nx_info: + return vc_nx_info(id, data); + + case VCMD_set_namespace: + return vc_set_namespace(id, data); + case VCMD_cleanup_namespace: + return vc_cleanup_namespace(id, data); + } + + /* those are allowed while in setup too */ + if (!vx_check(0, VX_ADMIN|VX_WATCH) && + !vx_flags(VXF_STATE_SETUP,0)) + return -EPERM; + +#ifdef CONFIG_VSERVER_LEGACY + switch (cmd) { + case VCMD_set_cflags: + case VCMD_set_ccaps: + if (vx_check(0, VX_WATCH)) + return 0; + } +#endif + + switch (cmd) { + case VCMD_get_rlimit: + return vc_get_rlimit(id, data); + case VCMD_set_rlimit: + return vc_set_rlimit(id, data); + case VCMD_get_rlimit_mask: + return vc_get_rlimit_mask(id, data); + + case VCMD_get_vhi_name: + return vc_get_vhi_name(id, data); + case VCMD_set_vhi_name: + return vc_set_vhi_name(id, data); + + case VCMD_set_cflags: + return vc_set_cflags(id, data); + case VCMD_get_cflags: + return vc_get_cflags(id, data); + + case VCMD_set_ccaps: + return vc_set_ccaps(id, data); + case VCMD_get_ccaps: + return vc_get_ccaps(id, data); + + case VCMD_set_nflags: + return vc_set_nflags(id, data); + case VCMD_get_nflags: + return vc_get_nflags(id, data); + + case VCMD_set_ncaps: + return vc_set_ncaps(id, data); + case VCMD_get_ncaps: + return vc_get_ncaps(id, data); + + case VCMD_set_sched_v2: + return vc_set_sched_v2(id, data); + /* this is version 3 */ + case VCMD_set_sched: + return vc_set_sched(id, data); + + case VCMD_add_dlimit: + return __COMPAT(vc_add_dlimit, id, data, compat); + case VCMD_rem_dlimit: + return __COMPAT(vc_rem_dlimit, id, data, compat); + case VCMD_set_dlimit: + return __COMPAT(vc_set_dlimit, id, data, compat); + case VCMD_get_dlimit: + return __COMPAT(vc_get_dlimit, id, data, compat); + } + + /* below here only with VX_ADMIN */ + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + switch (cmd) { + case VCMD_ctx_kill: + return vc_ctx_kill(id, data); + + case VCMD_wait_exit: + return vc_wait_exit(id, data); + + case VCMD_create_context: +#ifdef CONFIG_VSERVER_LEGACY + return vc_ctx_create(id, NULL); +#else + return -ENOSYS; +#endif + + case VCMD_get_iattr: + return __COMPAT(vc_get_iattr, id, data, compat); + case VCMD_set_iattr: + return __COMPAT(vc_set_iattr, id, data, compat); + + case VCMD_enter_namespace: + return vc_enter_namespace(id, data); + + case VCMD_ctx_create_v0: +#ifdef CONFIG_VSERVER_LEGACY + if (id == 1) { + current->xid = 1; + return 1; + } +#endif + return vc_ctx_create(id, NULL); + case VCMD_ctx_create: + return vc_ctx_create(id, data); + case VCMD_ctx_migrate: + return vc_ctx_migrate(id, data); + + case VCMD_net_create_v0: + return vc_net_create(id, NULL); + case VCMD_net_create: + return vc_net_create(id, data); + case VCMD_net_migrate: + return vc_net_migrate(id, data); + case VCMD_net_add: + return vc_net_add(id, data); + case VCMD_net_remove: + return vc_net_remove(id, data); + + } + return -ENOSYS; +} + +extern asmlinkage long +sys_vserver(uint32_t cmd, uint32_t id, void __user *data) +{ + long ret = do_vserver(cmd, id, data, 0); + + vxdprintk(VXD_CBIT(switch, 1), + "vc: VCMD_%02d_%d[%d] = %08lx(%ld)", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), ret, ret); + return ret; +} + +#ifdef CONFIG_COMPAT + +extern asmlinkage long +sys32_vserver(uint32_t cmd, uint32_t id, void __user *data) +{ + long ret = do_vserver(cmd, id, data, 1); + + vxdprintk(VXD_CBIT(switch, 1), + "vc: VCMD_%02d_%d[%d] = %08lx(%ld)", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), ret, ret); + return ret; +} + +#endif /* CONFIG_COMPAT */ Index: linux-2.6.14/kernel/vserver/sysctl.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/sysctl.c 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,286 @@ +/* + * kernel/vserver/sysctl.c + * + * Virtual Context Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#define CTL_VSERVER 4242 /* unused? */ + +enum { + CTL_DEBUG_ERROR = 0, + CTL_DEBUG_SWITCH = 1, + CTL_DEBUG_XID, + CTL_DEBUG_NID, + CTL_DEBUG_NET, + CTL_DEBUG_LIMIT, + CTL_DEBUG_DLIM, + CTL_DEBUG_QUOTA, + CTL_DEBUG_CVIRT, + CTL_DEBUG_MISC, +}; + + +unsigned int vx_debug_switch = 0; +unsigned int vx_debug_xid = 0; +unsigned int vx_debug_nid = 0; +unsigned int vx_debug_net = 0; +unsigned int vx_debug_limit = 0; +unsigned int vx_debug_dlim = 0; +unsigned int vx_debug_quota = 0; +unsigned int vx_debug_cvirt = 0; +unsigned int vx_debug_misc = 0; + + +static struct ctl_table_header *vserver_table_header; +static ctl_table vserver_table[]; + + +void vserver_register_sysctl(void) +{ + if (!vserver_table_header) { + vserver_table_header = register_sysctl_table(vserver_table, 1); + } + +} + +void vserver_unregister_sysctl(void) +{ + if (vserver_table_header) { + unregister_sysctl_table(vserver_table_header); + vserver_table_header = NULL; + } +} + + +static int proc_dodebug(ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char tmpbuf[20], *p, c; + unsigned int value; + size_t left, len; + + if ((*ppos && !write) || !*lenp) { + *lenp = 0; + return 0; + } + + left = *lenp; + + if (write) { + if (!access_ok(VERIFY_READ, buffer, left)) + return -EFAULT; + p = (char *) buffer; + while (left && __get_user(c, p) >= 0 && isspace(c)) + left--, p++; + if (!left) + goto done; + + if (left > sizeof(tmpbuf) - 1) + return -EINVAL; + if (copy_from_user(tmpbuf, p, left)) + return -EFAULT; + tmpbuf[left] = '\0'; + + for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--) + value = 10 * value + (*p - '0'); + if (*p && !isspace(*p)) + return -EINVAL; + while (left && isspace(*p)) + left--, p++; + *(unsigned int *) table->data = value; + } else { + if (!access_ok(VERIFY_WRITE, buffer, left)) + return -EFAULT; + len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data); + if (len > left) + len = left; + if (__copy_to_user(buffer, tmpbuf, len)) + return -EFAULT; + if ((left -= len) > 0) { + if (put_user('\n', (char *)buffer + len)) + return -EFAULT; + left--; + } + } + +done: + *lenp -= left; + *ppos += *lenp; + return 0; +} + + + +static ctl_table debug_table[] = { + { + .ctl_name = CTL_DEBUG_SWITCH, + .procname = "debug_switch", + .data = &vx_debug_switch, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_XID, + .procname = "debug_xid", + .data = &vx_debug_xid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_NID, + .procname = "debug_nid", + .data = &vx_debug_nid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_NET, + .procname = "debug_net", + .data = &vx_debug_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_LIMIT, + .procname = "debug_limit", + .data = &vx_debug_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_DLIM, + .procname = "debug_dlim", + .data = &vx_debug_dlim, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_QUOTA, + .procname = "debug_quota", + .data = &vx_debug_quota, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_CVIRT, + .procname = "debug_cvirt", + .data = &vx_debug_cvirt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_MISC, + .procname = "debug_misc", + .data = &vx_debug_misc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { .ctl_name = 0 } +}; + +static ctl_table vserver_table[] = { + { + .ctl_name = CTL_VSERVER, + .procname = "vserver", + .mode = 0555, + .child = debug_table + }, + { .ctl_name = 0 } +}; + + +static match_table_t tokens = { + { CTL_DEBUG_SWITCH, "switch=%x" }, + { CTL_DEBUG_XID, "xid=%x" }, + { CTL_DEBUG_NID, "nid=%x" }, + { CTL_DEBUG_NET, "net=%x" }, + { CTL_DEBUG_LIMIT, "limit=%x" }, + { CTL_DEBUG_DLIM, "dlim=%x" }, + { CTL_DEBUG_QUOTA, "quota=%x" }, + { CTL_DEBUG_CVIRT, "cvirt=%x" }, + { CTL_DEBUG_MISC, "misc=%x" }, + { CTL_DEBUG_ERROR, NULL } +}; + +#define HANDLE_CASE(id, name, val) \ + case CTL_DEBUG_ ## id: \ + vx_debug_ ## name = val; \ + printk("vs_debug_" #name "=0x%x\n", val); \ + break + + +static int __init vs_debug_setup(char *str) +{ + char *p; + int token; + + printk("vs_debug_setup(%s)\n", str); + while ((p = strsep(&str, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + unsigned int value; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + value = (token>0)?simple_strtoul(args[0].from, NULL, 0):0; + + switch (token) { + HANDLE_CASE(SWITCH, switch, value); + HANDLE_CASE(XID, xid, value); + HANDLE_CASE(NID, nid, value); + HANDLE_CASE(NET, net, value); + HANDLE_CASE(LIMIT, limit, value); + HANDLE_CASE(DLIM, dlim, value); + HANDLE_CASE(QUOTA, dlim, value); + HANDLE_CASE(CVIRT, cvirt, value); + HANDLE_CASE(MISC, misc, value); + default: + return -EINVAL; + break; + } + } + return 1; +} + +__setup("vsdebug=", vs_debug_setup); + + + +EXPORT_SYMBOL_GPL(vx_debug_switch); +EXPORT_SYMBOL_GPL(vx_debug_xid); +EXPORT_SYMBOL_GPL(vx_debug_nid); +EXPORT_SYMBOL_GPL(vx_debug_net); +EXPORT_SYMBOL_GPL(vx_debug_limit); +EXPORT_SYMBOL_GPL(vx_debug_dlim); +EXPORT_SYMBOL_GPL(vx_debug_quota); +EXPORT_SYMBOL_GPL(vx_debug_cvirt); +EXPORT_SYMBOL_GPL(vx_debug_misc); + Index: linux-2.6.14/kernel/vserver/vci_config.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14/kernel/vserver/vci_config.h 2005-10-31 11:05:45.000000000 -0600 @@ -0,0 +1,74 @@ + +enum { + VCI_KCBIT_LEGACY = 1, + VCI_KCBIT_LEGACYNET, + VCI_KCBIT_NGNET, + + VCI_KCBIT_PROC_SECURE, + VCI_KCBIT_HARDCPU, + VCI_KCBIT_HARDCPU_IDLE, + + VCI_KCBIT_LEGACY_VERSION, + VCI_KCBIT_COWBL, + + VCI_KCBIT_DEBUG = 16, + VCI_KCBIT_HISTORY = 20, + VCI_KCBIT_TAGXID = 24, +}; + + +static inline uint32_t vci_kernel_config(void) +{ + return + /* various legacy options */ +#ifdef CONFIG_VSERVER_LEGACY + (1 << VCI_KCBIT_LEGACY) | +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + (1 << VCI_KCBIT_LEGACYNET) | +#endif +#ifdef CONFIG_VSERVER_LEGACY_VERSION + (1 << VCI_KCBIT_LEGACY_VERSION) | +#endif + + /* configured features */ +#ifdef CONFIG_VSERVER_PROC_SECURE + (1 << VCI_KCBIT_PROC_SECURE) | +#endif +#ifdef CONFIG_VSERVER_HARDCPU + (1 << VCI_KCBIT_HARDCPU) | +#endif +#ifdef CONFIG_VSERVER_HARDCPU_IDLE + (1 << VCI_KCBIT_HARDCPU_IDLE) | +#endif +#ifdef CONFIG_VSERVER_COWBL + (1 << VCI_KCBIT_COWBL) | +#endif + + /* debug options */ +#ifdef CONFIG_VSERVER_DEBUG + (1 << VCI_KCBIT_DEBUG) | +#endif +#ifdef CONFIG_VSERVER_HISTORY + (1 << VCI_KCBIT_HISTORY) | +#endif + + /* inode xid tagging */ +#if defined(CONFIG_INOXID_NONE) + (0 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_UID16) + (1 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_GID16) + (2 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_UGID24) + (3 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_INTERN) + (4 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_RUNTIME) + (5 << VCI_KCBIT_TAGXID) | +#else + (7 << VCI_KCBIT_TAGXID) | +#endif + 0; +} + Index: linux-2.6.14/mm/filemap.c =================================================================== --- linux-2.6.14.orig/mm/filemap.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/filemap.c 2005-10-31 11:05:45.000000000 -0600 @@ -1089,6 +1089,31 @@ return written; } +/* FIXME: It would be as simple as this, if we had a (void __user*) to write. + * We already have a kernel buffer, so it should be even simpler, right? ;) + * + * Yes, sorta. After duplicating the complete path of generic_file_write(), + * at least some special cases could be removed, so the copy is simpler than + * the original. But it remains a copy, so overall complexity increases. + */ +static ssize_t +generic_kernel_file_write(struct file *, const char *, size_t, loff_t *); + +ssize_t generic_file_sendpage(struct file *file, struct page *page, + int offset, size_t size, loff_t *ppos, int more) +{ + ssize_t ret; + char *kaddr; + + kaddr = kmap(page); + ret = generic_kernel_file_write(file, kaddr + offset, size, ppos); + kunmap(page); + + return ret; +} + +EXPORT_SYMBOL(generic_file_sendpage); + ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos, size_t count, read_actor_t actor, void *target) { @@ -1725,6 +1750,19 @@ } EXPORT_SYMBOL(remove_suid); +static inline size_t +filemap_copy_from_kernel(struct page *page, unsigned long offset, + const char *buf, unsigned bytes) +{ + char *kaddr; + + kaddr = kmap(page); + memcpy(kaddr + offset, buf, bytes); + kunmap(page); + + return bytes; +} + size_t __filemap_copy_from_user_iovec(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) @@ -2090,6 +2128,155 @@ } EXPORT_SYMBOL(generic_file_aio_write_nolock); +/* + * TODO: + * This largely tries to copy generic_file_aio_write_nolock(), although it + * doesn't have to be nearly as generic. A real cleanup should either + * merge this into generic_file_aio_write_nolock() as well or keep it special + * and remove as much code as possible. + */ +static ssize_t +generic_kernel_file_aio_write_nolock(struct kiocb *iocb, const struct iovec*iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct file *file = iocb->ki_filp; + struct address_space * mapping = file->f_mapping; + struct address_space_operations *a_ops = mapping->a_ops; + size_t ocount; /* original count */ + size_t count; /* after file limit checks */ + struct inode *inode = mapping->host; + long status = 0; + loff_t pos; + struct page *page; + struct page *cached_page = NULL; + const int isblk = S_ISBLK(inode->i_mode); + ssize_t written; + ssize_t err; + size_t bytes; + struct pagevec lru_pvec; + const struct iovec *cur_iov = iov; /* current iovec */ + size_t iov_base = 0; /* offset in the current iovec */ + unsigned long seg; + char *buf; + + ocount = 0; + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + ocount += iv->iov_len; + if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) + return -EINVAL; + } + + count = ocount; + pos = *ppos; + pagevec_init(&lru_pvec, 0); + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = mapping->backing_dev_info; + written = 0; + + err = generic_write_checks(file, &pos, &count, isblk); + if (err) + goto out; + + + if (count == 0) + goto out; + + remove_suid(file->f_dentry); + inode_update_time(inode, 1); + + /* There is no sane reason to use O_DIRECT */ + BUG_ON(file->f_flags & O_DIRECT); + + buf = (char *)iov->iov_base; + do { + unsigned long index; + unsigned long offset; + size_t copied; + + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) + bytes = count; + + page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); + if (!page) { + status = -ENOMEM; + break; + } + + status = a_ops->prepare_write(file, page, offset, offset+bytes); + if (unlikely(status)) { + loff_t isize = i_size_read(inode); + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. + */ + unlock_page(page); + page_cache_release(page); + if (pos + bytes > isize) + vmtruncate(inode, isize); + break; + } + + BUG_ON(nr_segs != 1); + copied = filemap_copy_from_kernel(page, offset, buf, bytes); + + flush_dcache_page(page); + status = a_ops->commit_write(file, page, offset, offset+bytes); + if (likely(copied > 0)) { + if (!status) + status = copied; + + if (status >= 0) { + written += status; + count -= status; + pos += status; + buf += status; + if (unlikely(nr_segs > 1)) + filemap_set_next_iovec(&cur_iov, + &iov_base, status); + } + } + if (unlikely(copied != bytes)) + if (status >= 0) + status = -EFAULT; + unlock_page(page); + mark_page_accessed(page); + page_cache_release(page); + if (status < 0) + break; + balance_dirty_pages_ratelimited(mapping); + cond_resched(); + } while (count); + *ppos = pos; + + if (cached_page) + page_cache_release(cached_page); + + /* + * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC + */ + if (status >= 0) { + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) + status = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + } + + err = written ? written : status; +out: + pagevec_lru_add(&lru_pvec); + current->backing_dev_info = 0; + return err; +} + ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) @@ -2139,6 +2326,21 @@ ret = wait_on_sync_kiocb(&kiocb); return ret; } + +static ssize_t +generic_kernel_file_write_nolock(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct kiocb kiocb; + ssize_t ret; + + init_sync_kiocb(&kiocb, file); + ret = generic_kernel_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(&kiocb); + return ret; +} + EXPORT_SYMBOL(generic_file_write_nolock); ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf, @@ -2193,6 +2395,21 @@ } EXPORT_SYMBOL(generic_file_write); +static ssize_t generic_kernel_file_write(struct file *file, const char *buf, + size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_mapping->host; + ssize_t err; + struct iovec local_iov = {.iov_base = (void __user *)buf, + .iov_len = count }; + + down(&inode->i_sem); + err = generic_kernel_file_write_nolock(file, &local_iov, 1, ppos); + up(&inode->i_sem); + + return err; +} + ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { Index: linux-2.6.14/mm/fremap.c =================================================================== --- linux-2.6.14.orig/mm/fremap.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/fremap.c 2005-10-31 11:05:45.000000000 -0600 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -67,6 +68,9 @@ pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); + + if (!vx_rsspages_avail(mm, 1)) + goto err_unlock; pud = pud_alloc(mm, pgd, addr); if (!pud) Index: linux-2.6.14/mm/hugetlb.c =================================================================== --- linux-2.6.14.orig/mm/hugetlb.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/hugetlb.c 2005-10-31 11:05:45.000000000 -0600 @@ -15,6 +15,7 @@ #include #include +#include const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; static unsigned long nr_huge_pages, free_huge_pages; Index: linux-2.6.14/mm/memory.c =================================================================== --- linux-2.6.14.orig/mm/memory.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/memory.c 2005-10-31 11:05:45.000000000 -0600 @@ -1694,6 +1694,10 @@ grab_swap_token(); } + if (!vx_rsspages_avail(mm, 1)) { + ret = VM_FAULT_OOM; + goto out; + } mark_page_accessed(page); lock_page(page); @@ -1775,6 +1779,8 @@ pte_unmap(page_table); spin_unlock(&mm->page_table_lock); + if (!vx_rsspages_avail(mm, 1)) + goto no_mem; if (unlikely(anon_vma_prepare(vma))) goto no_mem; page = alloc_zeroed_user_highpage(vma, addr); @@ -1848,6 +1854,9 @@ } retry: cond_resched(); + /* FIXME: is that check useful here? */ + if (!vx_rsspages_avail(mm, 1)) + return VM_FAULT_OOM; new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); /* * No smp_rmb is needed here as long as there's a full Index: linux-2.6.14/mm/mlock.c =================================================================== --- linux-2.6.14.orig/mm/mlock.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/mlock.c 2005-10-31 11:05:45.000000000 -0600 @@ -9,6 +9,7 @@ #include #include #include +#include static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, @@ -64,7 +65,7 @@ ret = make_pages_present(start, end); } - vma->vm_mm->locked_vm -= pages; + vx_vmlocked_sub(vma->vm_mm, pages); out: if (ret == -ENOMEM) ret = -EAGAIN; @@ -122,7 +123,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) { - unsigned long locked; + unsigned long locked, grow; unsigned long lock_limit; int error = -ENOMEM; @@ -133,8 +134,10 @@ len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; - locked = len >> PAGE_SHIFT; - locked += current->mm->locked_vm; + grow = len >> PAGE_SHIFT; + if (!vx_vmlocked_avail(current->mm, grow)) + goto out; + locked = current->mm->locked_vm + grow; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; @@ -142,6 +145,7 @@ /* check against resource limits */ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = do_mlock(start, len, 1); +out: up_write(¤t->mm->mmap_sem); return error; } @@ -201,6 +205,8 @@ lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; + if (!vx_vmlocked_avail(current->mm, current->mm->total_vm)) + goto out; if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); Index: linux-2.6.14/mm/mmap.c =================================================================== --- linux-2.6.14.orig/mm/mmap.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/mmap.c 2005-10-31 11:05:45.000000000 -0600 @@ -1109,10 +1109,10 @@ kmem_cache_free(vm_area_cachep, vma); } out: - mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); __vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } if (flags & MAP_POPULATE) { @@ -1472,9 +1472,9 @@ return -ENOMEM; /* Ok, everything looks good - let it rip */ - mm->total_vm += grow; + vx_vmpages_add(mm, grow); if (vma->vm_flags & VM_LOCKED) - mm->locked_vm += grow; + vx_vmlocked_add(mm, grow); __vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow); return 0; } @@ -1614,9 +1614,10 @@ { size_t len = area->vm_end - area->vm_start; - area->vm_mm->total_vm -= len >> PAGE_SHIFT; + vx_vmpages_sub(area->vm_mm, len >> PAGE_SHIFT); + if (area->vm_flags & VM_LOCKED) - area->vm_mm->locked_vm -= len >> PAGE_SHIFT; + vx_vmlocked_sub(area->vm_mm, len >> PAGE_SHIFT); vm_stat_unaccount(area); remove_vm_struct(area); } @@ -1860,6 +1861,8 @@ lock_limit >>= PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; + if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT)) + return -ENOMEM; } /* @@ -1914,9 +1917,9 @@ vma->vm_page_prot = protection_map[flags & 0x0f]; vma_link(mm, vma, prev, rb_link, rb_parent); out: - mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); if (flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } return addr; @@ -1947,8 +1950,8 @@ mm->mmap = mm->mmap_cache = NULL; mm->mm_rb = RB_ROOT; set_mm_counter(mm, rss, 0); - mm->total_vm = 0; - mm->locked_vm = 0; + vx_vmpages_sub(mm, mm->total_vm); + vx_vmlocked_sub(mm, mm->locked_vm); spin_unlock(&mm->page_table_lock); @@ -1994,7 +1997,8 @@ if (__vma && __vma->vm_start < vma->vm_end) return -ENOMEM; if ((vma->vm_flags & VM_ACCOUNT) && - security_vm_enough_memory(vma_pages(vma))) + security_vm_enough_memory(vma_pages(vma)) && + !vx_vmpages_avail(mm, vma_pages(vma))) return -ENOMEM; vma_link(mm, vma, prev, rb_link, rb_parent); return 0; @@ -2067,5 +2071,7 @@ if (cur + npages > lim) return 0; + if (!vx_vmpages_avail(mm, npages)) + return 0; return 1; } Index: linux-2.6.14/mm/mremap.c =================================================================== --- linux-2.6.14.orig/mm/mremap.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/mremap.c 2005-10-31 11:05:45.000000000 -0600 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -232,7 +233,7 @@ * if we failed to move page tables we still do total_vm increment * since do_munmap() will decrement it by old_len == new_len */ - mm->total_vm += new_len >> PAGE_SHIFT; + vx_vmpages_add(mm, new_len >> PAGE_SHIFT); __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); if (do_munmap(mm, old_addr, old_len) < 0) { @@ -249,7 +250,7 @@ } if (vm_flags & VM_LOCKED) { - mm->locked_vm += new_len >> PAGE_SHIFT; + vx_vmlocked_add(mm, new_len >> PAGE_SHIFT); if (new_len > old_len) make_pages_present(new_addr + old_len, new_addr + new_len); @@ -355,6 +356,9 @@ ret = -EAGAIN; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) goto out; + if (!vx_vmlocked_avail(current->mm, + (new_len - old_len) >> PAGE_SHIFT)) + goto out; } if (!may_expand_vm(current->mm, (new_len - old_len) >> PAGE_SHIFT)) { ret = -ENOMEM; @@ -383,11 +387,11 @@ vma_adjust(vma, vma->vm_start, addr + new_len, vma->vm_pgoff, NULL); - current->mm->total_vm += pages; + vx_vmpages_add(current->mm, pages); __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, pages); if (vma->vm_flags & VM_LOCKED) { - current->mm->locked_vm += pages; + vx_vmlocked_add(vma->vm_mm, pages); make_pages_present(addr + old_len, addr + new_len); } Index: linux-2.6.14/mm/nommu.c =================================================================== --- linux-2.6.14.orig/mm/nommu.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/nommu.c 2005-10-31 11:05:45.000000000 -0600 @@ -816,7 +816,7 @@ realalloc += kobjsize(vma); askedalloc += sizeof(*vma); - current->mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(current->mm, len >> PAGE_SHIFT); add_nommu_vma(vma); @@ -931,7 +931,7 @@ realalloc -= kobjsize(vml); askedalloc -= sizeof(*vml); kfree(vml); - mm->total_vm -= len >> PAGE_SHIFT; + vx_vmpages_sub(mm, len >> PAGE_SHIFT); #ifdef DEBUG show_process_blocks(); @@ -950,7 +950,7 @@ printk("Exit_mmap:\n"); #endif - mm->total_vm = 0; + vx_vmpages_sub(mm, mm->total_vm); while ((tmp = mm->context.vmlist)) { mm->context.vmlist = tmp->next; Index: linux-2.6.14/mm/oom_kill.c =================================================================== --- linux-2.6.14.orig/mm/oom_kill.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/oom_kill.c 2005-10-31 11:05:45.000000000 -0600 @@ -55,6 +55,7 @@ * The memory size of the process is the basis for the badness. */ points = p->mm->total_vm; + /* FIXME add vserver badness ;) */ /* * Processes which fork a lot of child processes are likely Index: linux-2.6.14/mm/page_alloc.c =================================================================== --- linux-2.6.14.orig/mm/page_alloc.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/page_alloc.c 2005-10-31 11:05:45.000000000 -0600 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1262,6 +1263,8 @@ val->freehigh = 0; #endif val->mem_unit = PAGE_SIZE; + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_meminfo(val); } EXPORT_SYMBOL(si_meminfo); Index: linux-2.6.14/mm/rmap.c =================================================================== --- linux-2.6.14.orig/mm/rmap.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/rmap.c 2005-10-31 11:05:45.000000000 -0600 @@ -52,6 +52,7 @@ #include #include #include +#include #include Index: linux-2.6.14/mm/shmem.c =================================================================== --- linux-2.6.14.orig/mm/shmem.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/shmem.c 2005-10-31 11:05:45.000000000 -0600 @@ -50,7 +50,6 @@ #include /* This magic number is used in glibc for posix shared memory */ -#define TMPFS_MAGIC 0x01021994 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) @@ -1580,7 +1579,7 @@ { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - buf->f_type = TMPFS_MAGIC; + buf->f_type = TMPFS_SUPER_MAGIC; buf->f_bsize = PAGE_CACHE_SIZE; buf->f_namelen = NAME_MAX; spin_lock(&sbinfo->stat_lock); @@ -1993,7 +1992,7 @@ sb->s_maxbytes = SHMEM_MAX_BYTES; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = TMPFS_MAGIC; + sb->s_magic = TMPFS_SUPER_MAGIC; sb->s_op = &shmem_ops; inode = shmem_get_inode(sb, S_IFDIR | mode, 0); Index: linux-2.6.14/mm/swapfile.c =================================================================== --- linux-2.6.14.orig/mm/swapfile.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/swapfile.c 2005-10-31 11:05:45.000000000 -0600 @@ -30,6 +30,7 @@ #include #include #include +#include DEFINE_SPINLOCK(swap_lock); unsigned int nr_swapfiles; @@ -1601,6 +1602,8 @@ val->freeswap = nr_swap_pages + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; spin_unlock(&swap_lock); + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_swapinfo(val); } /* Index: linux-2.6.14/mm/vmscan.c =================================================================== --- linux-2.6.14.orig/mm/vmscan.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/mm/vmscan.c 2005-10-31 11:05:45.000000000 -0600 @@ -1326,7 +1326,7 @@ swap_setup(); for_each_pgdat(pgdat) pgdat->kswapd - = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL)); + = find_task_by_real_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL)); total_memory = nr_free_pagecache_pages(); hotcpu_notifier(cpu_callback, 0); return 0; Index: linux-2.6.14/net/core/dev.c =================================================================== --- linux-2.6.14.orig/net/core/dev.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/core/dev.c 2005-10-31 11:05:45.000000000 -0600 @@ -113,6 +113,7 @@ #include /* Note : will define WIRELESS_EXT */ #include #endif /* CONFIG_NET_RADIO */ +#include #include /* @@ -1837,6 +1838,9 @@ total = 0; for (dev = dev_base; dev; dev = dev->next) { + if (vx_flags(VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, current->nx_info)) + continue; for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -1897,6 +1901,10 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { + struct nx_info *nxi = current->nx_info; + + if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi)) + return; if (dev->get_stats) { struct net_device_stats *stats = dev->get_stats(dev); Index: linux-2.6.14/net/core/rtnetlink.c =================================================================== --- linux-2.6.14.orig/net/core/rtnetlink.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/core/rtnetlink.c 2005-10-31 11:05:45.000000000 -0600 @@ -277,6 +277,9 @@ for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; + if (vx_info_flags(skb->sk->sk_vx_info, VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, skb->sk->sk_nx_info)) + continue; if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0, @@ -450,6 +453,9 @@ sizeof(struct rtnl_link_ifmap) + sizeof(struct rtnl_link_stats) + 128); + if (vx_flags(VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, current->nx_info)) + return; skb = alloc_skb(size, GFP_KERNEL); if (!skb) return; Index: linux-2.6.14/net/core/sock.c =================================================================== --- linux-2.6.14.orig/net/core/sock.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/core/sock.c 2005-10-31 11:05:45.000000000 -0600 @@ -124,6 +124,9 @@ #include #include +#include +#include +#include #ifdef CONFIG_INET #include @@ -659,6 +662,8 @@ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); } + sock_vx_init(sk); + sock_nx_init(sk); if (security_sk_alloc(sk, family, priority)) goto out_free; @@ -697,6 +702,11 @@ __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + vx_sock_dec(sk); + clr_vx_info(&sk->sk_vx_info); + sk->sk_xid = -1; + clr_nx_info(&sk->sk_nx_info); + sk->sk_nid = -1; if (sk->sk_prot_creator->slab != NULL) kmem_cache_free(sk->sk_prot_creator->slab, sk); else @@ -714,6 +724,8 @@ memcpy(newsk, sk, sk->sk_prot->obj_size); /* SANITY */ + sock_vx_init(newsk); + sock_nx_init(newsk); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); @@ -754,6 +766,12 @@ newsk->sk_priority = 0; atomic_set(&newsk->sk_refcnt, 2); + set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info); + newsk->sk_xid = sk->sk_xid; + vx_sock_inc(newsk); + set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info); + newsk->sk_nid = sk->sk_nid; + /* * Increment the counter in the same struct proto as the master * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that @@ -1319,6 +1337,11 @@ sk->sk_stamp.tv_sec = -1L; sk->sk_stamp.tv_usec = -1L; + set_vx_info(&sk->sk_vx_info, current->vx_info); + sk->sk_xid = vx_current_xid(); + vx_sock_inc(sk); + set_nx_info(&sk->sk_nx_info, current->nx_info); + sk->sk_nid = nx_current_nid(); atomic_set(&sk->sk_refcnt, 1); } Index: linux-2.6.14/net/ipv4/af_inet.c =================================================================== --- linux-2.6.14.orig/net/ipv4/af_inet.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/af_inet.c 2005-10-31 11:05:45.000000000 -0600 @@ -112,6 +112,7 @@ #ifdef CONFIG_IP_MROUTE #include #endif +#include DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly; @@ -279,8 +280,11 @@ } err = -EPERM; + if ((protocol == IPPROTO_ICMP) && vx_ccaps(VXC_RAW_ICMP)) + goto override; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; +override: err = -EPROTONOSUPPORT; if (!protocol) goto out_rcu_unlock; @@ -400,6 +404,10 @@ unsigned short snum; int chk_addr_ret; int err; + __u32 s_addr; /* Address used for validation */ + __u32 s_addr1; /* Address used for socket */ + __u32 s_addr2; /* Broadcast address for the socket */ + struct nx_info *nxi = sk->sk_nx_info; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { @@ -410,7 +418,40 @@ if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + s_addr = addr->sin_addr.s_addr; + s_addr1 = s_addr; + s_addr2 = 0xffffffffl; + + vxdprintk(VXD_CBIT(net, 3), + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d", + sk, sk->sk_nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0), + VXD_QUAD(s_addr)); + if (nxi) { + __u32 v4_bcast = nxi->v4_bcast; + __u32 ipv4root = nxi->ipv4[0]; + int nbipv4 = nxi->nbipv4; + + if (s_addr == 0) { + /* bind to any for 1-n */ + s_addr = ipv4root; + s_addr1 = (nbipv4 > 1) ? 0 : s_addr; + s_addr2 = v4_bcast; + } else if (s_addr == IPI_LOOPBACK) { + /* rewrite localhost to ipv4root */ + s_addr = ipv4root; + s_addr1 = ipv4root; + } else if (s_addr != v4_bcast) { + /* normal address bind */ + if (!addr_in_nx_info(nxi, s_addr)) + return -EADDRNOTAVAIL; + } + } + chk_addr_ret = inet_addr_type(s_addr); + + vxdprintk(VXD_CBIT(net, 3), + "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d", + sk, VXD_QUAD(s_addr), VXD_QUAD(s_addr1), VXD_QUAD(s_addr2)); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -422,7 +463,7 @@ err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !inet->freebind && - addr->sin_addr.s_addr != INADDR_ANY && + s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -447,7 +488,8 @@ if (sk->sk_state != TCP_CLOSE || inet->num) goto out_release_sock; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->rcv_saddr = inet->saddr = s_addr1; + inet->rcv_saddr2 = s_addr2; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->saddr = 0; /* Use device */ Index: linux-2.6.14/net/ipv4/devinet.c =================================================================== --- linux-2.6.14.orig/net/ipv4/devinet.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/devinet.c 2005-10-31 11:05:45.000000000 -0600 @@ -507,6 +507,33 @@ return rc; } +/* + Check that a device is not member of the ipv4root assigned to the process + Return true if this is the case + + If the process is not bound to specific IP, then it returns 0 (all + interface are fine). +*/ +static inline int devinet_notiproot (struct in_ifaddr *ifa) +{ + int ret = 0; + struct nx_info *nxi; + + if ((nxi = current->nx_info)) { + int i; + int nbip = nxi->nbipv4; + __u32 addr = ifa->ifa_local; + ret = 1; + for (i=0; iipv4[i] == addr) { + ret = 0; + break; + } + } + } + return ret; +} + int devinet_ioctl(unsigned int cmd, void __user *arg) { @@ -614,6 +641,9 @@ ret = -EADDRNOTAVAIL; if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) goto done; + if (vx_flags(VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, current->nx_info)) + goto done; switch(cmd) { case SIOCGIFADDR: /* Get interface address */ @@ -757,6 +787,9 @@ goto out; for (; ifa; ifa = ifa->ifa_next) { + if (vx_flags(VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, current->nx_info)) + continue; if (!buf) { done += sizeof(ifr); continue; @@ -1068,6 +1101,7 @@ struct net_device *dev; struct in_device *in_dev; struct in_ifaddr *ifa; + struct sock *sk = skb->sk; int s_ip_idx, s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; @@ -1085,6 +1119,9 @@ for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { + if (sk && vx_info_flags(sk->sk_vx_info, VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, sk->sk_nx_info)) + continue; if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, Index: linux-2.6.14/net/ipv4/fib_hash.c =================================================================== --- linux-2.6.14.orig/net/ipv4/fib_hash.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/fib_hash.c 2005-10-31 11:05:45.000000000 -0600 @@ -988,6 +988,8 @@ return flags; } +extern int dev_in_nx_info(struct net_device *, struct nx_info *); + /* * This outputs /proc/net/route. * @@ -1018,7 +1020,8 @@ prefix = f->fn_key; mask = FZ_MASK(iter->zone); flags = fib_flag_trans(fa->fa_type, mask, fi); - if (fi) + if (fi && (!vx_flags(VXF_HIDE_NETIF, 0) || + dev_in_nx_info(fi->fib_dev, current->nx_info))) snprintf(bf, sizeof(bf), "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", fi->fib_dev ? fi->fib_dev->name : "*", prefix, Index: linux-2.6.14/net/ipv4/netfilter/ip_conntrack_proto_sctp.c =================================================================== --- linux-2.6.14.orig/net/ipv4/netfilter/ip_conntrack_proto_sctp.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/netfilter/ip_conntrack_proto_sctp.c 2005-10-31 11:05:45.000000000 -0600 @@ -60,7 +60,7 @@ static unsigned long ip_ct_sctp_timeout_closed = 10 SECS; static unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS; static unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS; -static unsigned long ip_ct_sctp_timeout_established = 5 DAYS; +static unsigned long ip_ct_sctp_timeout_established = 2 DAYS; static unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; static unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; Index: linux-2.6.14/net/ipv4/netfilter/ip_conntrack_proto_tcp.c =================================================================== --- linux-2.6.14.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2005-10-31 11:05:45.000000000 -0600 @@ -87,7 +87,7 @@ unsigned long ip_ct_tcp_timeout_syn_sent = 2 MINS; unsigned long ip_ct_tcp_timeout_syn_recv = 60 SECS; -unsigned long ip_ct_tcp_timeout_established = 5 DAYS; +unsigned long ip_ct_tcp_timeout_established = 2 DAYS; unsigned long ip_ct_tcp_timeout_fin_wait = 2 MINS; unsigned long ip_ct_tcp_timeout_close_wait = 60 SECS; unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS; Index: linux-2.6.14/net/ipv4/raw.c =================================================================== --- linux-2.6.14.orig/net/ipv4/raw.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/raw.c 2005-10-31 11:05:45.000000000 -0600 @@ -102,6 +102,27 @@ write_unlock_bh(&raw_v4_lock); } + +/* + * Check if a given address matches for a socket + * + * nxi: the socket's nx_info if any + * addr: to be verified address + * saddr/baddr: socket addresses + */ +static inline int raw_addr_match ( + struct nx_info *nxi, + uint32_t addr, + uint32_t saddr, + uint32_t baddr) +{ + if (addr && (saddr == addr || baddr == addr)) + return 1; + if (!saddr) + return addr_in_nx_info(nxi, addr); + return 0; +} + struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif) @@ -113,7 +134,8 @@ if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + raw_addr_match(sk->sk_nx_info, laddr, + inet->rcv_saddr, inet->rcv_saddr2) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -311,6 +333,10 @@ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } + err = -EPERM; + if (!vx_check(0, VX_ADMIN) && !capable(CAP_NET_RAW) + && (!addr_in_nx_info(sk->sk_nx_info, iph->saddr))) + goto error; err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); @@ -483,6 +509,12 @@ if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); + if (sk->sk_nx_info) { + err = ip_find_src(sk->sk_nx_info, &rt, &fl); + + if (err) + goto done; + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) @@ -752,7 +784,8 @@ struct hlist_node *node; sk_for_each(sk, node, &raw_v4_htable[state->bucket]) - if (sk->sk_family == PF_INET) + if (sk->sk_family == PF_INET && + vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) goto found; } sk = NULL; @@ -768,7 +801,8 @@ sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != PF_INET); + } while (sk && (sk->sk_family != PF_INET || + !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))); if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { sk = sk_head(&raw_v4_htable[state->bucket]); Index: linux-2.6.14/net/ipv4/tcp_ipv4.c =================================================================== --- linux-2.6.14.orig/net/ipv4/tcp_ipv4.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/tcp_ipv4.c 2005-10-31 11:05:45.000000000 -0600 @@ -76,6 +76,7 @@ #include #include #include +#include int sysctl_tcp_tw_reuse; int sysctl_tcp_low_latency; @@ -1525,6 +1526,12 @@ req = req->dl_next; while (1) { while (req) { + vxdprintk(VXD_CBIT(net, 6), + "sk,req: %p [#%d] (from %d)", req->sk, + (req->sk)?req->sk->sk_xid:0, vx_current_xid()); + if (req->sk && + !vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (req->rsk_ops->family == st->family) { cur = req; goto out; @@ -1549,6 +1556,10 @@ } get_sk: sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)", + sk, sk->sk_xid, vx_current_xid()); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (sk->sk_family == st->family) { cur = sk; goto out; @@ -1600,18 +1611,26 @@ read_lock(&tcp_hashinfo.ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { - if (sk->sk_family != st->family) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egf: %p [#%d] (from %d)", + sk, sk->sk_xid, vx_current_xid()); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; + if (sk->sk_family != st->family) continue; - } rc = sk; goto out; } st->state = TCP_SEQ_STATE_TIME_WAIT; inet_twsk_for_each(tw, node, &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { - if (tw->tw_family != st->family) { + vxdprintk(VXD_CBIT(net, 6), + "tw: %p [#%d] (from %d)", + tw, tw->tw_xid, vx_current_xid()); + if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) + continue; + if (tw->tw_family != st->family) continue; - } rc = tw; goto out; } @@ -1635,7 +1654,8 @@ tw = cur; tw = tw_next(tw); get_tw: - while (tw && tw->tw_family != st->family) { + while (tw && (tw->tw_family != st->family || + !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))) { tw = tw_next(tw); } if (tw) { @@ -1659,6 +1679,11 @@ sk = sk_next(sk); sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egn: %p [#%d] (from %d)", + sk, sk->sk_xid, vx_current_xid()); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (sk->sk_family == st->family) goto found; } Index: linux-2.6.14/net/ipv4/tcp_minisocks.c =================================================================== --- linux-2.6.14.orig/net/ipv4/tcp_minisocks.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/tcp_minisocks.c 2005-10-31 11:05:45.000000000 -0600 @@ -29,6 +29,10 @@ #include #include +#include +#include +#include + #ifdef CONFIG_SYSCTL #define SYNC_INIT 0 /* let the user enable it */ #else @@ -295,6 +299,11 @@ tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + tw->tw_xid = sk->sk_xid; + tw->tw_vx_info = NULL; + tw->tw_nid = sk->sk_nid; + tw->tw_nx_info = NULL; + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); Index: linux-2.6.14/net/ipv4/udp.c =================================================================== --- linux-2.6.14.orig/net/ipv4/udp.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/udp.c 2005-10-31 11:05:45.000000000 -0600 @@ -175,14 +175,12 @@ struct inet_sock *inet2 = inet_sk(sk2); if (inet2->num == snum && - sk2 != sk && - !ipv6_only_sock(sk2) && + sk2 != sk && !ipv6_only_sock(sk2) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && + nx_addr_conflict(sk->sk_nx_info, + inet_rcv_saddr(sk), sk2) && (!sk2->sk_reuse || !sk->sk_reuse)) goto fail; } @@ -217,6 +215,17 @@ write_unlock_bh(&udp_hash_lock); } +static inline int udp_in_list(struct nx_info *nx_info, u32 addr) +{ + int n = nx_info->nbipv4; + int i; + + for (i=0; iipv4[i] == addr) + return 1; + return 0; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -237,6 +246,11 @@ if (inet->rcv_saddr != daddr) continue; score+=2; + } else if (sk->sk_nx_info) { + if (udp_in_list(sk->sk_nx_info, daddr)) + score+=2; + else + continue; } if (inet->daddr) { if (inet->daddr != saddr) @@ -293,7 +307,8 @@ if (inet->num != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr && + inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr) || ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; @@ -603,6 +618,15 @@ .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + struct nx_info *nxi = sk->sk_nx_info; + + if (nxi) { + err = ip_find_src(nxi, &rt, &fl); + if (err) + goto out; + if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + daddr = fl.fl4_dst = nxi->ipv4[0]; + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; @@ -1385,8 +1409,10 @@ for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; + sk_for_each(sk, node, &udp_hash[state->bucket]) { - if (sk->sk_family == state->family) + if (sk->sk_family == state->family && + vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) goto found; } } @@ -1403,7 +1429,8 @@ sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != state->family); + } while (sk && (sk->sk_family != state->family || + !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { sk = sk_head(&udp_hash[state->bucket]); Index: linux-2.6.14/net/ipv6/addrconf.c =================================================================== --- linux-2.6.14.orig/net/ipv6/addrconf.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv6/addrconf.c 2005-10-31 11:05:45.000000000 -0600 @@ -2408,7 +2408,10 @@ static int if6_seq_show(struct seq_file *seq, void *v) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; - seq_printf(seq, + + /* no ipv6 inside a vserver for now */ + if (vx_check(0, VX_ADMIN|VX_WATCH)) + seq_printf(seq, "%04x%04x%04x%04x%04x%04x%04x%04x %02x %02x %02x %02x %8s\n", NIP6(ifp->addr), ifp->idev->dev->ifindex, @@ -2761,6 +2764,10 @@ struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; read_lock(&dev_base_lock); @@ -2967,6 +2974,10 @@ struct net_device *dev; struct inet6_dev *idev; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + read_lock(&dev_base_lock); for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) Index: linux-2.6.14/net/netlink/af_netlink.c =================================================================== --- linux-2.6.14.orig/net/netlink/af_netlink.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/netlink/af_netlink.c 2005-10-31 11:05:45.000000000 -0600 @@ -55,6 +55,9 @@ #include #include #include +#include +#include +#include #include #include Index: linux-2.6.14/net/socket.c =================================================================== --- linux-2.6.14.orig/net/socket.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/socket.c 2005-10-31 11:05:45.000000000 -0600 @@ -96,6 +96,7 @@ #include #include +#include static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, @@ -536,7 +537,7 @@ struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); - int err; + int err, len; si->sock = sock; si->scm = NULL; @@ -547,7 +548,21 @@ if (err) return err; - return sock->ops->sendmsg(iocb, sock, msg, size); + len = sock->ops->sendmsg(iocb, sock, msg, size); + if (sock->sk) { + if (len == size) + vx_sock_send(sock->sk, size); + else + vx_sock_fail(sock->sk, size); + } + vxdprintk(VXD_CBIT(net, 7), + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + (unsigned int)size, len); + return len; } int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -585,7 +600,7 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - int err; + int err, len; struct sock_iocb *si = kiocb_to_siocb(iocb); si->sock = sock; @@ -598,7 +613,17 @@ if (err) return err; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + len = sock->ops->recvmsg(iocb, sock, msg, size, flags); + if ((len >= 0) && sock->sk) + vx_sock_recv(sock->sk, len); + vxdprintk(VXD_CBIT(net, 7), + "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + (unsigned int)size, len); + return len; } int sock_recvmsg(struct socket *sock, struct msghdr *msg, @@ -1085,6 +1110,10 @@ if (type < 0 || type >= SOCK_MAX) return -EINVAL; + /* disable IPv6 inside vservers for now */ + if (family == PF_INET6 && !vx_check(0, VX_ADMIN)) + return -EAFNOSUPPORT; + /* Compatibility. This uglymoron is moved from INET layer to here to avoid @@ -1195,6 +1224,7 @@ if (retval < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock->flags); retval = sock_map_fd(sock); if (retval < 0) goto out_release; @@ -1225,10 +1255,12 @@ err = sock_create(family, type, protocol, &sock1); if (err < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock1->flags); err = sock_create(family, type, protocol, &sock2); if (err < 0) goto out_release_1; + set_bit(SOCK_USER_SOCKET, &sock2->flags); err = sock1->ops->socketpair(sock1, sock2); if (err < 0) Index: linux-2.6.14/net/sunrpc/auth.c =================================================================== --- linux-2.6.14.orig/net/sunrpc/auth.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/sunrpc/auth.c 2005-10-31 11:05:45.000000000 -0600 @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -244,6 +245,7 @@ struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, + .xid = vx_current_xid(), .group_info = current->group_info, }; struct rpc_cred *ret; @@ -263,6 +265,7 @@ struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, + .xid = vx_current_xid(), .group_info = current->group_info, }; struct rpc_cred *ret; Index: linux-2.6.14/net/sunrpc/auth_unix.c =================================================================== --- linux-2.6.14.orig/net/sunrpc/auth_unix.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/sunrpc/auth_unix.c 2005-10-31 11:05:45.000000000 -0600 @@ -13,12 +13,14 @@ #include #include #include +#include #define NFS_NGROUPS 16 struct unx_cred { struct rpc_cred uc_base; gid_t uc_gid; + xid_t uc_xid; gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid @@ -80,6 +82,7 @@ if (flags & RPC_TASK_ROOTCREDS) { cred->uc_uid = 0; cred->uc_gid = 0; + cred->uc_xid = vx_current_xid(); cred->uc_gids[0] = NOGROUP; } else { int groups = acred->group_info->ngroups; @@ -88,6 +91,7 @@ cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; + cred->uc_xid = acred->xid; for (i = 0; i < groups; i++) cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) @@ -119,7 +123,8 @@ int groups; if (cred->uc_uid != acred->uid - || cred->uc_gid != acred->gid) + || cred->uc_gid != acred->gid + || cred->uc_xid != acred->xid) return 0; groups = acred->group_info->ngroups; @@ -145,7 +150,7 @@ struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; u32 *base, *hold; - int i; + int i, tagxid; *p++ = htonl(RPC_AUTH_UNIX); base = p++; @@ -155,9 +160,12 @@ * Copy the UTS nodename captured when the client was created. */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); + tagxid = task->tk_client->cl_tagxid; - *p++ = htonl((u32) cred->uc_uid); - *p++ = htonl((u32) cred->uc_gid); + *p++ = htonl((u32) XIDINO_UID(tagxid, + cred->uc_uid, cred->uc_xid)); + *p++ = htonl((u32) XIDINO_GID(tagxid, + cred->uc_gid, cred->uc_xid)); hold = p++; for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) *p++ = htonl((u32) cred->uc_gids[i]); Index: linux-2.6.14/net/unix/af_unix.c =================================================================== --- linux-2.6.14.orig/net/unix/af_unix.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/unix/af_unix.c 2005-10-31 11:05:45.000000000 -0600 @@ -117,6 +117,9 @@ #include #include #include +#include +#include +#include int sysctl_unix_max_dgram_qlen = 10; @@ -781,7 +784,7 @@ */ mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current->fs->umask); - err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); + err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0, NULL); if (err) goto out_mknod_dput; up(&nd.dentry->d_inode->i_sem); Index: linux-2.6.14/net/x25/af_x25.c =================================================================== --- linux-2.6.14.orig/net/x25/af_x25.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/x25/af_x25.c 2005-10-31 11:05:45.000000000 -0600 @@ -490,7 +490,10 @@ x25 = x25_sk(sk); - sock_init_data(sock, sk); + sk->sk_socket = sock; + sk->sk_type = sock->type; + sk->sk_sleep = &sock->wait; + sock->sk = sk; x25_init_timers(sk); Index: linux-2.6.14/security/commoncap.c =================================================================== --- linux-2.6.14.orig/security/commoncap.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/security/commoncap.c 2005-10-31 11:05:45.000000000 -0600 @@ -142,7 +142,7 @@ /* Derived from fs/exec.c:compute_creds. */ kernel_cap_t new_permitted, working; - new_permitted = cap_intersect (bprm->cap_permitted, cap_bset); + new_permitted = cap_intersect (bprm->cap_permitted, vx_current_bcaps()); working = cap_intersect (bprm->cap_inheritable, current->cap_inheritable); new_permitted = cap_combine (new_permitted, working); @@ -311,7 +311,8 @@ int cap_syslog (int type) { - if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) + if ((type != 3 && type != 10) && + !capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SYSLOG)) return -EPERM; return 0; } Index: linux-2.6.14/security/dummy.c =================================================================== --- linux-2.6.14.orig/security/dummy.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/security/dummy.c 2005-10-31 11:05:45.000000000 -0600 @@ -84,7 +84,7 @@ return 0; } -static int dummy_quotactl (int cmds, int type, int id, struct super_block *sb) +static int dummy_quotactl (int cmds, int type, int id, struct dqhash *hash) { return 0; } Index: linux-2.6.14/security/security.c =================================================================== --- linux-2.6.14.orig/security/security.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/security/security.c 2005-10-31 11:05:45.000000000 -0600 @@ -185,6 +185,8 @@ */ int capable(int cap) { + if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap)) + return 0; if (security_ops->capable(current, cap)) { /* capability denied */ return 0; @@ -195,9 +197,24 @@ return 1; } +int vx_capable(int cap, int ccap) +{ + if (security_ops->capable(current, cap)) { + /* capability denied */ + return 0; + } + if (!vx_ccaps(ccap)) + return 0; + + /* capability granted */ + current->flags |= PF_SUPERPRIV; + return 1; +} + EXPORT_SYMBOL_GPL(register_security); EXPORT_SYMBOL_GPL(unregister_security); EXPORT_SYMBOL_GPL(mod_reg_security); EXPORT_SYMBOL_GPL(mod_unreg_security); EXPORT_SYMBOL(capable); +EXPORT_SYMBOL(vx_capable); EXPORT_SYMBOL(security_ops); Index: linux-2.6.14/include/net/inet_hashtables.h =================================================================== --- linux-2.6.14.orig/include/net/inet_hashtables.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/net/inet_hashtables.h 2005-10-31 11:05:45.000000000 -0600 @@ -293,6 +293,25 @@ return ((struct rtable *)skb->dst)->rt_iif; } +/* + * Check if a given address matches for a tcp socket + * + * nxi: the socket's nx_info if any + * addr: to be verified address + * saddr: socket addresses + */ +static inline int tcp_addr_match ( + struct nx_info *nxi, + uint32_t addr, + uint32_t saddr) +{ + if (addr && (saddr == addr)) + return 1; + if (!saddr) + return addr_in_nx_info(nxi, addr); + return 0; +} + extern struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, const unsigned short hnum, @@ -313,7 +332,7 @@ const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + tcp_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && !sk->sk_bound_dev_if) goto sherry_cache; Index: linux-2.6.14/include/net/inet_timewait_sock.h =================================================================== --- linux-2.6.14.orig/include/net/inet_timewait_sock.h 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/include/net/inet_timewait_sock.h 2005-10-31 11:05:45.000000000 -0600 @@ -38,8 +38,8 @@ * If time > 4sec, it is "slow" path, no recycling is required, * so that we select tick to get range about 4 seconds. */ -#if HZ <= 16 || HZ > 4096 -# error Unsupported: HZ <= 16 or HZ > 4096 +#if HZ <= 16 || HZ > 32768 +# error Unsupported: HZ <= 16 or HZ > 32768 #elif HZ <= 32 # define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) #elif HZ <= 64 @@ -54,8 +54,14 @@ # define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) #elif HZ <= 2048 # define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#else +#elif HZ <= 4096 # define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 8192 +# define TCP_TW_RECYCLE_TICK (13+2-TCP_TW_RECYCLE_SLOTS_LOG) +#elif HZ <= 16384 +# define TCP_TW_RECYCLE_TICK (14+2-TCP_TW_RECYCLE_SLOTS_LOG) +#else +# define TCP_TW_RECYCLE_TICK (15+2-TCP_TW_RECYCLE_SLOTS_LOG) #endif /* TIME_WAIT reaping mechanism. */ @@ -115,6 +121,10 @@ #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot +#define tw_xid __tw_common.skc_xid +#define tw_vx_info __tw_common.skc_vx_info +#define tw_nid __tw_common.skc_nid +#define tw_nx_info __tw_common.skc_nx_info volatile unsigned char tw_substate; /* 3 bits hole, try to pack */ unsigned char tw_rcv_wscale; Index: linux-2.6.14/net/ipv4/inet_connection_sock.c =================================================================== --- linux-2.6.14.orig/net/ipv4/inet_connection_sock.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/inet_connection_sock.c 2005-10-31 11:05:45.000000000 -0600 @@ -39,7 +39,6 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { - const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -52,9 +51,8 @@ sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) + if (nx_addr_conflict(sk->sk_nx_info, + inet_rcv_saddr(sk), sk2)) break; } } Index: linux-2.6.14/net/ipv4/inet_hashtables.c =================================================================== --- linux-2.6.14.orig/net/ipv4/inet_hashtables.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/net/ipv4/inet_hashtables.c 2005-10-31 11:05:45.000000000 -0600 @@ -121,6 +121,7 @@ EXPORT_SYMBOL(inet_listen_wlock); + /* * Don't inline this cruft. Here are some nice properties to exploit here. The * BSD API does not allow a listening sock to specify the remote port nor the @@ -141,11 +142,10 @@ const __u32 rcv_saddr = inet->rcv_saddr; int score = sk->sk_family == PF_INET ? 1 : 0; - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; + if (tcp_addr_match(sk->sk_nx_info, daddr, rcv_saddr)) score += 2; - } + else + continue; if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) continue; Index: linux-2.6.14/security/selinux/hooks.c =================================================================== --- linux-2.6.14.orig/security/selinux/hooks.c 2005-10-27 19:02:08.000000000 -0500 +++ linux-2.6.14/security/selinux/hooks.c 2005-10-31 11:05:45.000000000 -0600 @@ -1381,9 +1381,10 @@ return error; } -static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb) +static int selinux_quotactl(int cmds, int type, int id, struct dqhash *hash) { int rc = 0; + struct super_block *sb = hash->dqh_sb; if (!sb) return 0;