diff -NurpP --minimal linux-2.4.22/Documentation/Configure.help linux-2.4.22-vs1.21/Documentation/Configure.help --- linux-2.4.22/Documentation/Configure.help Mon Aug 25 13:44:39 2003 +++ linux-2.4.22-vs1.21/Documentation/Configure.help Thu Dec 11 00:49:10 2003 @@ -529,6 +529,11 @@ CONFIG_BLK_DEV_LOOP Most users will answer N here. +Virtual Root device support +CONFIG_BLK_DEV_VROOT + Saying Y here will allow you to use quota/fs ioctls on a shared + partition within a virtual server without compromising security. + Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL) CONFIG_BLK_DEV_UMEM Saying Y here will include support for the MM5415 family of diff -NurpP --minimal linux-2.4.22/Makefile linux-2.4.22-vs1.21/Makefile --- linux-2.4.22/Makefile Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/Makefile Thu Dec 11 00:49:06 2003 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 22 -EXTRAVERSION = +EXTRAVERSION = -vs1.21 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -NurpP --minimal linux-2.4.22/arch/alpha/kernel/entry.S linux-2.4.22-vs1.21/arch/alpha/kernel/entry.S --- linux-2.4.22/arch/alpha/kernel/entry.S Fri Jun 13 16:51:29 2003 +++ linux-2.4.22-vs1.21/arch/alpha/kernel/entry.S Thu Dec 11 00:49:06 2003 @@ -1044,7 +1044,7 @@ sys_call_table: .quad alpha_ni_syscall /* 270 */ .quad alpha_ni_syscall .quad alpha_ni_syscall - .quad alpha_ni_syscall + .quad sys_vserver /* 273 sys_vserver */ .quad alpha_ni_syscall .quad alpha_ni_syscall /* 275 */ .quad alpha_ni_syscall diff -NurpP --minimal linux-2.4.22/arch/i386/kernel/entry.S linux-2.4.22-vs1.21/arch/i386/kernel/entry.S --- linux-2.4.22/arch/i386/kernel/entry.S Fri Jun 13 16:51:29 2003 +++ linux-2.4.22-vs1.21/arch/i386/kernel/entry.S Thu Dec 11 00:49:06 2003 @@ -659,10 +659,25 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_ni_syscall) /* sys_exit_group */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_lookup_dcookie */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_create */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_ctl 255 */ + .long SYMBOL_NAME(sys_ni_syscall) /* 255 sys_epoll_ctl */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_wait */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_remap_file_pages */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_tid_address */ + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) /* 260 */ + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) /* 265 */ + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) /* 270 */ + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_vserver) /* 273 sys_vserver */ .rept NR_syscalls-(.-sys_call_table)/4 .long SYMBOL_NAME(sys_ni_syscall) diff -NurpP --minimal linux-2.4.22/arch/i386/kernel/ptrace.c linux-2.4.22-vs1.21/arch/i386/kernel/ptrace.c --- linux-2.4.22/arch/i386/kernel/ptrace.c Sat Aug 3 02:39:42 2002 +++ linux-2.4.22-vs1.21/arch/i386/kernel/ptrace.c Thu Dec 11 00:49:06 2003 @@ -170,7 +170,7 @@ asmlinkage int sys_ptrace(long request, if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(child->vx_id, VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; diff -NurpP --minimal linux-2.4.22/arch/parisc/kernel/syscall.S linux-2.4.22-vs1.21/arch/parisc/kernel/syscall.S --- linux-2.4.22/arch/parisc/kernel/syscall.S Fri Jun 13 16:51:31 2003 +++ linux-2.4.22-vs1.21/arch/parisc/kernel/syscall.S Thu Dec 11 00:49:06 2003 @@ -605,6 +605,71 @@ sys_call_table: ENTRY_SAME(gettid) ENTRY_SAME(readahead) ENTRY_SAME(tkill) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 210 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 215 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 220 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 225 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 230 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 235 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 240 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 245 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 250 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 255 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 260 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 265 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) /* 270 */ + ENTRY_SAME(ni_syscall) + ENTRY_SAME(ni_syscall) + ENTRY_SAME(vserver) /* 273 sys_vserver */ .end diff -NurpP --minimal linux-2.4.22/arch/ppc/kernel/misc.S linux-2.4.22-vs1.21/arch/ppc/kernel/misc.S --- linux-2.4.22/arch/ppc/kernel/misc.S Mon Aug 25 13:44:40 2003 +++ linux-2.4.22-vs1.21/arch/ppc/kernel/misc.S Thu Dec 11 00:49:06 2003 @@ -1243,6 +1243,48 @@ _GLOBAL(sys_call_table) .long sys_ni_syscall /* reserved for sys_io_getevents */ .long sys_ni_syscall /* 230 reserved for sys_io_submit */ .long sys_ni_syscall /* reserved for sys_io_cancel */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 235 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 240 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 245 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 250 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 255 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 260 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 265 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 270 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_vserver /* 273 sys_vserver */ .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall diff -NurpP --minimal linux-2.4.22/arch/ppc/kernel/ptrace.c linux-2.4.22-vs1.21/arch/ppc/kernel/ptrace.c --- linux-2.4.22/arch/ppc/kernel/ptrace.c Mon Aug 25 13:44:40 2003 +++ linux-2.4.22-vs1.21/arch/ppc/kernel/ptrace.c Thu Dec 11 00:49:06 2003 @@ -188,7 +188,7 @@ int sys_ptrace(long request, long pid, l if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(child->vx_id, VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; diff -NurpP --minimal linux-2.4.22/arch/ppc64/kernel/misc.S linux-2.4.22-vs1.21/arch/ppc64/kernel/misc.S --- linux-2.4.22/arch/ppc64/kernel/misc.S Mon Aug 25 13:44:40 2003 +++ linux-2.4.22-vs1.21/arch/ppc64/kernel/misc.S Thu Dec 11 00:49:06 2003 @@ -803,24 +803,74 @@ _GLOBAL(sys_call_table32) .llong .sys_madvise /* 205 */ .llong .sys_mincore /* 206 */ .llong .sys_gettid /* 207 */ -#if 0 /* Reserved syscalls */ - .llong .sys_tkill /* 208 */ - .llong .sys_setxattr - .llong .sys_lsetxattr /* 210 */ - .llong .sys_fsetxattr - .llong .sys_getxattr - .llong .sys_lgetxattr - .llong .sys_fgetxattr - .llong .sys_listxattr /* 215 */ - .llong .sys_llistxattr - .llong .sys_flistxattr - .llong .sys_removexattr - .llong .sys_lremovexattr - .llong .sys_fremovexattr /* 220 */ - .llong .sys_futex -#endif - .llong .sys_perfmonctl /* Put this here for now ... */ - .rept NR_syscalls-222 + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 210 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 215 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 220 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 225 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 230 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 235 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 240 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 245 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 250 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 255 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 260 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 265 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 270 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_vserver /* 273 sys_vserver */ + + .rept NR_syscalls-273 .llong .sys_ni_syscall .endr #endif @@ -1034,23 +1084,73 @@ _GLOBAL(sys_call_table) .llong .sys_madvise /* 205 */ .llong .sys_mincore /* 206 */ .llong .sys_gettid /* 207 */ -#if 0 /* Reserved syscalls */ - .llong .sys_tkill /* 208 */ - .llong .sys_setxattr - .llong .sys_lsetxattr /* 210 */ - .llong .sys_fsetxattr - .llong .sys_getxattr - .llong .sys_lgetxattr - .llong .sys_fgetxattr - .llong .sys_listxattr /* 215 */ - .llong .sys_llistxattr - .llong .sys_flistxattr - .llong .sys_removexattr - .llong .sys_lremovexattr - .llong .sys_fremovexattr /* 220 */ - .llong .sys_futex -#endif - .llong .sys_perfmonctl /* Put this here for now ... */ - .rept NR_syscalls-222 + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 210 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 215 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 220 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 225 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 230 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 235 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 240 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 245 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 250 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 255 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 260 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 265 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 270 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_vserver /* 273 sys_vserver */ + + .rept NR_syscalls-273 .llong .sys_ni_syscall .endr diff -NurpP --minimal linux-2.4.22/arch/ppc64/kernel/ptrace.c linux-2.4.22-vs1.21/arch/ppc64/kernel/ptrace.c --- linux-2.4.22/arch/ppc64/kernel/ptrace.c Fri Jun 13 16:51:32 2003 +++ linux-2.4.22-vs1.21/arch/ppc64/kernel/ptrace.c Thu Dec 11 00:49:06 2003 @@ -115,7 +115,7 @@ int sys_ptrace(long request, long pid, l if (child) get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) + if (!child || !vx_check(child->vx_id, VX_WATCH|VX_IDENT)) goto out; ret = -EPERM; diff -NurpP --minimal linux-2.4.22/arch/sparc/kernel/systbls.S linux-2.4.22-vs1.21/arch/sparc/kernel/systbls.S --- linux-2.4.22/arch/sparc/kernel/systbls.S Fri Jun 13 16:51:32 2003 +++ linux-2.4.22-vs1.21/arch/sparc/kernel/systbls.S Thu Dec 11 00:49:06 2003 @@ -70,7 +70,10 @@ sys_call_table: /*240*/ .long sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler /*245*/ .long sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep /*250*/ .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl -/*255*/ .long sys_nis_syscall, sys_nis_syscall +/*255*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall +/*260*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall +/*265*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall +/*270*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_vserver, sys_nis_syscall #ifdef CONFIG_SUNOS_EMUL /* Now the SunOS syscall table. */ diff -NurpP --minimal linux-2.4.22/arch/sparc64/kernel/entry.S linux-2.4.22-vs1.21/arch/sparc64/kernel/entry.S --- linux-2.4.22/arch/sparc64/kernel/entry.S Fri Jun 13 16:51:32 2003 +++ linux-2.4.22-vs1.21/arch/sparc64/kernel/entry.S Thu Dec 11 00:49:06 2003 @@ -26,7 +26,7 @@ #define curptr g6 -#define NR_SYSCALLS 256 /* Each OS is different... */ +#define NR_SYSCALLS 274 /* Each OS is different... */ .text .align 32 diff -NurpP --minimal linux-2.4.22/arch/sparc64/kernel/ptrace.c linux-2.4.22-vs1.21/arch/sparc64/kernel/ptrace.c --- linux-2.4.22/arch/sparc64/kernel/ptrace.c Fri Nov 29 00:53:12 2002 +++ linux-2.4.22-vs1.21/arch/sparc64/kernel/ptrace.c Thu Dec 11 00:49:06 2003 @@ -156,7 +156,7 @@ asmlinkage void do_ptrace(struct pt_regs get_task_struct(child); read_unlock(&tasklist_lock); - if (!child) { + if (!child || !vx_check(child->vx_id, VX_WATCH|VX_IDENT)) { pt_error_return(regs, ESRCH); goto out; } diff -NurpP --minimal linux-2.4.22/arch/sparc64/kernel/systbls.S linux-2.4.22-vs1.21/arch/sparc64/kernel/systbls.S --- linux-2.4.22/arch/sparc64/kernel/systbls.S Fri Jun 13 16:51:32 2003 +++ linux-2.4.22-vs1.21/arch/sparc64/kernel/systbls.S Thu Dec 11 00:49:06 2003 @@ -70,7 +70,10 @@ sys_call_table32: /*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys32_sched_rr_get_interval, sys32_nanosleep /*250*/ .word sys32_mremap, sys32_sysctl, sys_getsid, sys_fdatasync, sys32_nfsservctl - .word sys_aplib + .word sys_aplib, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall +/*260*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall + .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall +/*270*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_vserver, sys_nis_syscall /* Now the 64-bit native Linux syscall table. */ @@ -129,7 +132,10 @@ sys_call_table: /*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep /*250*/ .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl - .word sys_aplib + .word sys_aplib, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall +/*260*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall + .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall +/*270*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_vserver, sys_nis_syscall #if defined(CONFIG_SUNOS_EMUL) || defined(CONFIG_SOLARIS_EMUL) || \ defined(CONFIG_SOLARIS_EMUL_MODULE) @@ -225,5 +231,12 @@ sunos_sys_table: .word sunos_nosys, sunos_nosys /*250*/ .word sunos_nosys, sunos_nosys, sunos_nosys .word sunos_nosys, sunos_nosys, sys_aplib + .word sunos_nosys, sunos_nosys, sunos_nosys + .word sunos_nosys, sunos_nosys, sunos_nosys + .word sunos_nosys, sunos_nosys, sunos_nosys + .word sunos_nosys, sunos_nosys, sunos_nosys + .word sunos_nosys, sunos_nosys, sunos_nosys + .word sunos_nosys, sunos_nosys, sunos_nosys + .word sunos_nosys #endif diff -NurpP --minimal linux-2.4.22/arch/x86_64/ia32/ia32entry.S linux-2.4.22-vs1.21/arch/x86_64/ia32/ia32entry.S --- linux-2.4.22/arch/x86_64/ia32/ia32entry.S Fri Jun 13 16:51:32 2003 +++ linux-2.4.22-vs1.21/arch/x86_64/ia32/ia32entry.S Thu Dec 11 00:49:06 2003 @@ -369,9 +369,41 @@ ia32_sys_call_table: .quad quiet_ni_syscall /* fremovexattr - 237 */ .quad sys_tkill .quad sys_sendfile64 - .quad quiet_ni_syscall /* futex */ + .quad quiet_ni_syscall /* 240 futex */ .quad quiet_ni_syscall /* sched_setaffinity */ .quad quiet_ni_syscall /* sched_getaffinity */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall /* 245 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall /* 250 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall /* 255 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall /* 260 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall /* 265 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall /* 270 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad sys_vserver /* 273 sys_vserver */ + ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 .quad ni_syscall diff -NurpP --minimal linux-2.4.22/arch/x86_64/kernel/sys_x86_64.c linux-2.4.22-vs1.21/arch/x86_64/kernel/sys_x86_64.c --- linux-2.4.22/arch/x86_64/kernel/sys_x86_64.c Fri Jun 13 16:51:32 2003 +++ linux-2.4.22-vs1.21/arch/x86_64/kernel/sys_x86_64.c Thu Dec 11 00:49:06 2003 @@ -108,8 +108,18 @@ unsigned long arch_get_unmapped_area(str asmlinkage long sys_uname(struct new_utsname * name) { int err; + struct new_utsname tmp, *pttmp; + down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + if (current->s_info) { + tmp = system_utsname; + strcpy (tmp.nodename, current->s_info->nodename); + strcpy (tmp.domainname, current->s_info->domainname); + pttmp = &tmp; + } + else + pttmp = &system_utsname; + err=copy_to_user(name, pttmp, sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err = copy_to_user(name->machine, "i686", 5); diff -NurpP --minimal linux-2.4.22/drivers/block/Config.in linux-2.4.22-vs1.21/drivers/block/Config.in --- linux-2.4.22/drivers/block/Config.in Fri Nov 29 00:53:12 2002 +++ linux-2.4.22-vs1.21/drivers/block/Config.in Thu Dec 11 00:49:10 2003 @@ -40,6 +40,7 @@ dep_tristate 'Mylex DAC960/DAC1100 PCI R dep_tristate 'Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)' CONFIG_BLK_DEV_UMEM $CONFIG_PCI $CONFIG_EXPERIMENTAL tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP +tristate 'Virtual Root device support' CONFIG_BLK_DEV_VROOT dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET tristate 'RAM disk support' CONFIG_BLK_DEV_RAM diff -NurpP --minimal linux-2.4.22/drivers/block/Makefile linux-2.4.22-vs1.21/drivers/block/Makefile --- linux-2.4.22/drivers/block/Makefile Fri Jun 13 16:51:32 2003 +++ linux-2.4.22-vs1.21/drivers/block/Makefile Thu Dec 11 00:49:10 2003 @@ -31,6 +31,7 @@ obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss. obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o +obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o subdir-$(CONFIG_PARIDE) += paride diff -NurpP --minimal linux-2.4.22/drivers/block/vroot.c linux-2.4.22-vs1.21/drivers/block/vroot.c --- linux-2.4.22/drivers/block/vroot.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.22-vs1.21/drivers/block/vroot.c Thu Dec 11 00:49:10 2003 @@ -0,0 +1,329 @@ +/* + * linux/drivers/block/vroot.c + * + * Written by Herbert Pötzl, 9/11/2002 + * + * based on the loop.c code by Theodore Ts'o. + * + * Copyright 2002-2003 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + * + */ + +#define MAJOR_NR VROOT_MAJOR + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "vroot.h" + +static int max_vroot = MAX_VROOT_DEFAULT; +static struct vroot_device *vroot_dev; +static devfs_handle_t devfs_handle; /* For the directory */ + +#ifdef MODULE +typedef kdev_t (*vroot_get_dev_f)(int dev); + +extern int register_vroot_get_dev(vroot_get_dev_f); +extern int unregister_vroot_get_dev(vroot_get_dev_f); + +static kdev_t _vroot_get_dev(int dev) +#else + kdev_t vroot_get_dev(int dev) +#endif +{ + struct vroot_device *vr; + + if (dev >= max_vroot) + return NODEV; + + vr = &vroot_dev[dev]; + if (vr->vr_state != Vr_bound) + return NODEV; + dprintk(KERN_INFO "vroot[%d]_get_dev: dev(%d,%d)\n", + dev, MAJOR(vr->vr_device), MINOR(vr->vr_device)); + return vr->vr_device; +} + +static int vroot_set_dev( + struct vroot_device *vr, + struct file *vr_file, + kdev_t dev, + unsigned int arg) +{ + struct file *file; + struct inode *inode; + int error; + + MOD_INC_USE_COUNT; + + error = -EBUSY; + if (vr->vr_state != Vr_unbound) + goto out; + + error = -EBADF; + file = fget(arg); + if (!file) + goto out; + + error = -EINVAL; + inode = file->f_dentry->d_inode; + + if (S_ISBLK(inode->i_mode)) { + vr->vr_device = inode->i_rdev; + if (vr->vr_device == dev) { + error = -EBUSY; + goto out_fput; + } + } else + goto out_fput; + + dprintk(KERN_INFO "vroot[%d]_set_dev: dev(%d,%d)\n", + vr->vr_number, + MAJOR(inode->i_rdev), MINOR(inode->i_rdev)); + + vr->vr_state = Vr_bound; + fput(file); + return 0; + out_fput: + fput(file); + out: + MOD_DEC_USE_COUNT; + return error; +} + +static int vroot_clr_dev( + struct vroot_device *vr, + struct file *vr_file, + kdev_t dev) +{ + if (vr->vr_state != Vr_bound) + return -ENXIO; + if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */ + return -EBUSY; + + dprintk(KERN_INFO "vroot[%d]_clr_dev: dev(%d,%d)\n", + vr->vr_number, + MAJOR(vr->vr_device), MINOR(vr->vr_device)); + + vr->vr_state = Vr_unbound; + vr->vr_device = NODEV; + MOD_DEC_USE_COUNT; + return 0; +} + +static int vroot_make_request( + request_queue_t *q, + int rw, + struct buffer_head *rbh) +{ + if (!buffer_locked(rbh)) + BUG(); + + if (MINOR(rbh->b_rdev) >= max_vroot) + goto out; + + dprintk(KERN_WARNING "vroot[%d]_make_request: denied.\n", + MINOR(rbh->b_rdev)); + out: + buffer_IO_error(rbh); + return 0; +} + +static int vr_ioctl( + struct inode * inode, + struct file * file, + unsigned int cmd, + unsigned long arg) +{ + struct vroot_device *vr; + int dev, err; + + if (!inode) + return -EINVAL; + if (MAJOR(inode->i_rdev) != MAJOR_NR) { + dprintk(KERN_WARNING "vr_ioctl: pseudo-major != %d\n", + MAJOR_NR); + return -ENODEV; + } + dev = MINOR(inode->i_rdev); + if (dev >= max_vroot) + return -ENODEV; + vr = &vroot_dev[dev]; + down(&vr->vr_ctl_mutex); + switch (cmd) { + case VROOT_SET_DEV: + err = vroot_set_dev(vr, file, inode->i_rdev, arg); + break; + case VROOT_CLR_DEV: + err = vroot_clr_dev(vr, file, inode->i_rdev); + break; + default: + err = -EINVAL; + break; + } + up(&vr->vr_ctl_mutex); + return err; +} + +static int vr_open( + struct inode *inode, + struct file *file) +{ + struct vroot_device *vr; + int dev; + + if (!inode) + return -EINVAL; + if (MAJOR(inode->i_rdev) != MAJOR_NR) { + dprintk(KERN_WARNING "vr_open: pseudo-major != %d\n", MAJOR_NR); + return -ENODEV; + } + dev = MINOR(inode->i_rdev); + if (dev >= max_vroot) + return -ENODEV; + + vr = &vroot_dev[dev]; + MOD_INC_USE_COUNT; + down(&vr->vr_ctl_mutex); + + vr->vr_refcnt++; + up(&vr->vr_ctl_mutex); + return 0; +} + +static int vr_release( + struct inode *inode, + struct file *file) +{ + struct vroot_device *vr; + int dev; + + if (!inode) + return 0; + if (MAJOR(inode->i_rdev) != MAJOR_NR) { + dprintk(KERN_WARNING "vr_release: pseudo-major != %d\n", + MAJOR_NR); + return 0; + } + dev = MINOR(inode->i_rdev); + if (dev >= max_vroot) + return 0; + + vr = &vroot_dev[dev]; + down(&vr->vr_ctl_mutex); + + vr->vr_refcnt--; + up(&vr->vr_ctl_mutex); + MOD_DEC_USE_COUNT; + return 0; +} + +static struct block_device_operations vr_fops = { + owner: THIS_MODULE, + open: vr_open, + release: vr_release, + ioctl: vr_ioctl, +}; + +/* + * And now the modules code and kernel interface. + */ +MODULE_PARM(max_vroot, "i"); +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)"); +MODULE_LICENSE("GPL"); + +MODULE_AUTHOR ("Herbert Pötzl"); +MODULE_DESCRIPTION ("Virtual Root Device Mapper"); + + +int __init vroot_init(void) +{ + int i; + + if ((max_vroot < 1) || (max_vroot > 256)) { + printk(KERN_WARNING "vroot: invalid max_vroot (must be between" + " 1 and 256), using default (%d)\n", + MAX_VROOT_DEFAULT); + max_vroot = MAX_VROOT_DEFAULT; + } + + if (devfs_register_blkdev(MAJOR_NR, "vroot", &vr_fops)) { + printk(KERN_WARNING "Unable to get major number %d for vroot" + " device\n", MAJOR_NR); + return -EIO; + } + + devfs_handle = devfs_mk_dir(NULL, "vroot", NULL); + devfs_register_series(devfs_handle, "%u", max_vroot, + DEVFS_FL_DEFAULT, MAJOR_NR, 0, + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, + &vr_fops, NULL); + + vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL); + if (!vroot_dev) + return -ENOMEM; + + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), vroot_make_request); + + for (i = 0; i < max_vroot; i++) { + struct vroot_device *vr = &vroot_dev[i]; + memset(vr, 0, sizeof(struct vroot_device)); + init_MUTEX(&vr->vr_ctl_mutex); + vr->vr_number = i; + vr->vr_state = Vr_unbound; + } + + for (i = 0; i < max_vroot; i++) + register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &vr_fops, 0); + +#ifdef MODULE + register_vroot_get_dev(_vroot_get_dev); +#endif + printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot); + return 0; +} + +void vroot_exit(void) +{ +#ifdef MODULE + unregister_vroot_get_dev(_vroot_get_dev); +#endif + devfs_unregister(devfs_handle); + if (devfs_unregister_blkdev(MAJOR_NR, "vroot")) + printk(KERN_WARNING "vroot: cannot unregister blkdev\n"); + + kfree(vroot_dev); +} + +module_init(vroot_init); +module_exit(vroot_exit); + +#ifndef MODULE +static int __init max_vroot_setup(char *str) +{ + max_vroot = simple_strtol(str, NULL, 0); + return 1; +} + +__setup("max_vroot=", max_vroot_setup); + +#endif diff -NurpP --minimal linux-2.4.22/drivers/block/vroot.h linux-2.4.22-vs1.21/drivers/block/vroot.h --- linux-2.4.22/drivers/block/vroot.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.22-vs1.21/drivers/block/vroot.h Thu Dec 11 00:49:10 2003 @@ -0,0 +1,46 @@ +#ifndef _LINUX_VROOT_H +#define _LINUX_VROOT_H + +#include + +/* + * linux/drivers/block/vroot.h + * + * Written by Herbert Pötzl, 9/11/2002 + * + * Copyright 2002-2003 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + */ + +#ifdef __KERNEL__ + +/* Possible states of device */ +enum { + Vr_unbound, + Vr_bound, +}; + +struct vroot_device { + int vr_number; + int vr_refcnt; + + struct semaphore vr_ctl_mutex; + kdev_t vr_device; + int vr_state; +}; + +#define dprintk(...) /* printk(__VA_ARGS__) */ + +#endif /* __KERNEL__ */ + +#define MAX_VROOT_DEFAULT 8 + +/* + * IOCTL commands --- we will commandeer 0x56 ('V') + */ + +#define VROOT_SET_DEV 0x5600 +#define VROOT_CLR_DEV 0x5601 + +#endif diff -NurpP --minimal linux-2.4.22/fs/Makefile linux-2.4.22-vs1.21/fs/Makefile --- linux-2.4.22/fs/Makefile Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/Makefile Thu Dec 11 00:49:10 2003 @@ -7,7 +7,7 @@ O_TARGET := fs.o -export-objs := filesystems.o open.o dcache.o buffer.o dquot.o +export-objs := filesystems.o open.o dcache.o buffer.o dquot.o quota.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ diff -NurpP --minimal linux-2.4.22/fs/devpts/inode.c linux-2.4.22-vs1.21/fs/devpts/inode.c --- linux-2.4.22/fs/devpts/inode.c Thu Oct 25 09:02:26 2001 +++ linux-2.4.22-vs1.21/fs/devpts/inode.c Thu Dec 11 00:49:09 2003 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -180,6 +181,18 @@ static int devpts_statfs(struct super_bl static DECLARE_FSTYPE(devpts_fs_type, "devpts", devpts_read_super, FS_SINGLE); +static int devpts_tty_permission(struct inode *inode, int mask) +{ + int ret = -EACCES; + if (vx_check(inode->u.devpts_i.vx_id, VX_IDENT)) + ret = vfs_permission(inode, mask); + return ret; +} + +struct inode_operations devpts_tty_inode_operations = { + permission: devpts_tty_permission, +}; + void devpts_pty_new(int number, kdev_t device) { struct super_block *sb = devpts_mnt->mnt_sb; @@ -198,6 +211,8 @@ void devpts_pty_new(int number, kdev_t d inode->i_uid = sbi->setuid ? sbi->uid : current->fsuid; inode->i_gid = sbi->setgid ? sbi->gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->u.devpts_i.vx_id = current->vx_id; + inode->i_op = &devpts_tty_inode_operations; init_special_inode(inode, S_IFCHR|sbi->mode, kdev_t_to_nr(device)); if ( sbi->inodes[number] ) { diff -NurpP --minimal linux-2.4.22/fs/devpts/root.c linux-2.4.22-vs1.21/fs/devpts/root.c --- linux-2.4.22/fs/devpts/root.c Fri Dec 21 18:41:55 2001 +++ linux-2.4.22-vs1.21/fs/devpts/root.c Thu Dec 11 00:49:09 2003 @@ -14,6 +14,7 @@ #include #include #include +#include #include "devpts_i.h" static int devpts_root_readdir(struct file *,void *,filldir_t); @@ -64,7 +65,9 @@ static int devpts_root_readdir(struct fi default: while ( nr - 2 < sbi->max_ptys ) { int ptynr = nr - 2; - if ( sbi->inodes[ptynr] ) { + struct inode *inode = sbi->inodes[ptynr]; + if (inode && vx_check(inode->u.devpts_i.vx_id, + VX_WATCH|VX_IDENT)) { genptsname(numbuf, ptynr); if ( filldir(dirent, numbuf, strlen(numbuf), nr, nr, DT_CHR) < 0 ) return 0; @@ -100,6 +103,7 @@ static struct dentry *devpts_root_lookup unsigned int entry; int i; const char *p; + struct inode *inode; dentry->d_op = &devpts_dentry_operations; @@ -126,10 +130,15 @@ static struct dentry *devpts_root_lookup if ( entry >= sbi->max_ptys ) return NULL; - if ( sbi->inodes[entry] ) - atomic_inc(&sbi->inodes[entry]->i_count); + inode = sbi->inodes[entry]; + if (inode && vx_check(inode->u.devpts_i.vx_id, VX_IDENT)) + atomic_inc(&inode->i_count); + else + inode = NULL; - d_add(dentry, sbi->inodes[entry]); + d_add(dentry, inode); return NULL; } + + diff -NurpP --minimal linux-2.4.22/fs/exec.c linux-2.4.22-vs1.21/fs/exec.c --- linux-2.4.22/fs/exec.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/exec.c Thu Dec 11 00:49:06 2003 @@ -733,7 +733,7 @@ void compute_creds(struct linux_binprm * kernel_cap_t new_permitted, working; int do_unlock = 0; - new_permitted = cap_intersect(bprm->cap_permitted, cap_bset); + new_permitted = cap_intersect(bprm->cap_permitted, current->cap_bset); working = cap_intersect(bprm->cap_inheritable, current->cap_inheritable); new_permitted = cap_combine(new_permitted, working); diff -NurpP --minimal linux-2.4.22/fs/ext2/ialloc.c linux-2.4.22-vs1.21/fs/ext2/ialloc.c --- linux-2.4.22/fs/ext2/ialloc.c Fri Jun 13 16:51:37 2003 +++ linux-2.4.22-vs1.21/fs/ext2/ialloc.c Thu Dec 11 00:49:10 2003 @@ -388,7 +388,7 @@ repeat: inode->u.ext2_i.i_new_inode = 1; inode->u.ext2_i.i_flags = dir->u.ext2_i.i_flags & ~EXT2_BTREE_FL; if (S_ISLNK(mode)) - inode->u.ext2_i.i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); + inode->u.ext2_i.i_flags &= ~(EXT2_IMMUTABLE_FILE_FL|EXT2_IMMUTABLE_LINK_FL|EXT2_APPEND_FL); inode->u.ext2_i.i_block_group = group; ext2_set_inode_flags(inode); insert_inode_hash(inode); diff -NurpP --minimal linux-2.4.22/fs/ext2/inode.c linux-2.4.22-vs1.21/fs/ext2/inode.c --- linux-2.4.22/fs/ext2/inode.c Fri Jun 13 16:51:37 2003 +++ linux-2.4.22-vs1.21/fs/ext2/inode.c Thu Dec 11 00:49:10 2003 @@ -46,6 +46,8 @@ void ext2_put_inode (struct inode * inod ext2_discard_prealloc (inode); } +static void ext2_truncate_nocheck (struct inode * inode); + /* * Called at the last iput() if i_nlink is zero. */ @@ -62,7 +64,7 @@ void ext2_delete_inode (struct inode * i ext2_update_inode(inode, IS_SYNC(inode)); inode->i_size = 0; if (inode->i_blocks) - ext2_truncate (inode); + ext2_truncate_nocheck(inode); ext2_free_inode (inode); unlock_kernel(); @@ -786,7 +788,7 @@ static void ext2_free_branches(struct in ext2_free_data(inode, p, q); } -void ext2_truncate (struct inode * inode) +static void ext2_truncate_nocheck(struct inode * inode) { u32 *i_data = inode->u.ext2_i.i_data; int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); @@ -801,8 +803,6 @@ void ext2_truncate (struct inode * inode if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; ext2_discard_prealloc(inode); @@ -877,17 +877,26 @@ do_indirects: } } +void ext2_truncate (struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE_FILE(inode)) + return; + ext2_truncate_nocheck(inode); +} + void ext2_set_inode_flags(struct inode *inode) { unsigned int flags = inode->u.ext2_i.i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME); + inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE_FILE|S_IMMUTABLE_LINK|S_NOATIME); if (flags & EXT2_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT2_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & EXT2_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + if (flags & EXT2_IMMUTABLE_FILE_FL) + inode->i_flags |= S_IMMUTABLE_FILE; + if (flags & EXT2_IMMUTABLE_LINK_FL) + inode->i_flags |= S_IMMUTABLE_LINK; if (flags & EXT2_NOATIME_FL) inode->i_flags |= S_NOATIME; } diff -NurpP --minimal linux-2.4.22/fs/ext2/ioctl.c linux-2.4.22-vs1.21/fs/ext2/ioctl.c --- linux-2.4.22/fs/ext2/ioctl.c Fri Jun 13 16:51:37 2003 +++ linux-2.4.22-vs1.21/fs/ext2/ioctl.c Thu Dec 11 00:49:10 2003 @@ -39,12 +39,12 @@ int ext2_ioctl (struct inode * inode, st oldflags = inode->u.ext2_i.i_flags; /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. + * The IMMUTABLE_* and APPEND_ONLY flags can only be changed + * by the relevant capability. * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { + if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FILE_FL | EXT2_IMMUTABLE_LINK_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } diff -NurpP --minimal linux-2.4.22/fs/ext3/ialloc.c linux-2.4.22-vs1.21/fs/ext3/ialloc.c --- linux-2.4.22/fs/ext3/ialloc.c Fri Jun 13 16:51:37 2003 +++ linux-2.4.22-vs1.21/fs/ext3/ialloc.c Thu Dec 11 00:49:10 2003 @@ -485,7 +485,7 @@ repeat: inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL; if (S_ISLNK(mode)) - inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); + inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FILE_FL|EXT3_IMMUTABLE_LINK_FL|EXT3_APPEND_FL); #ifdef EXT3_FRAGMENTS inode->u.ext3_i.i_faddr = 0; inode->u.ext3_i.i_frag_no = 0; diff -NurpP --minimal linux-2.4.22/fs/ext3/inode.c linux-2.4.22-vs1.21/fs/ext3/inode.c --- linux-2.4.22/fs/ext3/inode.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/ext3/inode.c Thu Dec 11 00:49:10 2003 @@ -172,6 +172,7 @@ void ext3_put_inode (struct inode * inod ext3_discard_prealloc (inode); } +static void ext3_truncate_nocheck (struct inode *inode); /* * Called at the last iput() if i_nlink is zero. */ @@ -201,7 +202,7 @@ void ext3_delete_inode (struct inode * i handle->h_sync = 1; inode->i_size = 0; if (inode->i_blocks) - ext3_truncate(inode); + ext3_truncate_nocheck(inode); /* * Kill off the orphan record which ext3_truncate created. * AKPM: I think this can be inside the above `if'. @@ -1854,7 +1855,7 @@ static void ext3_free_branches(handle_t * ext3_truncate() run will find them and release them. */ -void ext3_truncate(struct inode * inode) +static void ext3_truncate_nocheck(struct inode * inode) { handle_t *handle; u32 *i_data = inode->u.ext3_i.i_data; @@ -1870,8 +1871,6 @@ void ext3_truncate(struct inode * inode) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; ext3_discard_prealloc(inode); @@ -2001,6 +2000,13 @@ out_stop: ext3_journal_stop(handle, inode); } +void ext3_truncate(struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE_FILE(inode)) + return; + ext3_truncate_nocheck(inode); +} + /* * ext3_get_inode_loc returns with an extra refcount against the * inode's underlying buffer_head on success. @@ -2072,13 +2078,15 @@ void ext3_set_inode_flags(struct inode * { unsigned int flags = inode->u.ext3_i.i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME); + inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE_FILE|S_IMMUTABLE_LINK|S_NOATIME); if (flags & EXT3_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT3_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & EXT3_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + if (flags & EXT3_IMMUTABLE_FILE_FL) + inode->i_flags |= S_IMMUTABLE_FILE; + if (flags & EXT3_IMMUTABLE_LINK_FL) + inode->i_flags |= S_IMMUTABLE_LINK; if (flags & EXT3_NOATIME_FL) inode->i_flags |= S_NOATIME; } diff -NurpP --minimal linux-2.4.22/fs/ext3/ioctl.c linux-2.4.22-vs1.21/fs/ext3/ioctl.c --- linux-2.4.22/fs/ext3/ioctl.c Fri Jun 13 16:51:37 2003 +++ linux-2.4.22-vs1.21/fs/ext3/ioctl.c Thu Dec 11 00:49:10 2003 @@ -48,12 +48,12 @@ int ext3_ioctl (struct inode * inode, st jflag = flags & EXT3_JOURNAL_DATA_FL; /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. + * The IMMUTABLE_* and APPEND_ONLY flags can only be changed + * by the relevant capability. * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { + if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FILE_FL | EXT3_IMMUTABLE_LINK_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } diff -NurpP --minimal linux-2.4.22/fs/fat/file.c linux-2.4.22-vs1.21/fs/fat/file.c --- linux-2.4.22/fs/fat/file.c Sun Aug 12 19:56:56 2001 +++ linux-2.4.22-vs1.21/fs/fat/file.c Thu Dec 11 00:49:10 2003 @@ -119,7 +119,7 @@ void fat_truncate(struct inode *inode) /* Why no return value? Surely the disk could fail... */ if (IS_RDONLY (inode)) return /* -EPERM */; - if (IS_IMMUTABLE(inode)) + if (IS_IMMUTABLE_FILE(inode)) return /* -EPERM */; cluster = 1 << sbi->cluster_bits; /* diff -NurpP --minimal linux-2.4.22/fs/fat/inode.c linux-2.4.22-vs1.21/fs/fat/inode.c --- linux-2.4.22/fs/fat/inode.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/fat/inode.c Thu Dec 11 00:49:10 2003 @@ -950,7 +950,7 @@ static void fat_fill_inode(struct inode } if(de->attr & ATTR_SYS) if (sbi->options.sys_immutable) - inode->i_flags |= S_IMMUTABLE; + inode->i_flags |= S_IMMUTABLE_FILE; MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; /* this is as close to the truth as we can get ... */ inode->i_blksize = 1 << sbi->cluster_bits; diff -NurpP --minimal linux-2.4.22/fs/hpfs/file.c linux-2.4.22-vs1.21/fs/hpfs/file.c --- linux-2.4.22/fs/hpfs/file.c Mon Aug 13 02:37:53 2001 +++ linux-2.4.22-vs1.21/fs/hpfs/file.c Thu Dec 11 00:49:10 2003 @@ -60,7 +60,7 @@ secno hpfs_bmap(struct inode *inode, uns void hpfs_truncate(struct inode *i) { - if (IS_IMMUTABLE(i)) return /*-EPERM*/; + if (IS_IMMUTABLE_FILE(i)) return /*-EPERM*/; i->i_hpfs_n_secs = 0; i->i_blocks = 1 + ((i->i_size + 511) >> 9); i->u.hpfs_i.mmu_private = i->i_size; diff -NurpP --minimal linux-2.4.22/fs/intermezzo/vfs.c linux-2.4.22-vs1.21/fs/intermezzo/vfs.c --- linux-2.4.22/fs/intermezzo/vfs.c Fri Jun 13 16:51:37 2003 +++ linux-2.4.22-vs1.21/fs/intermezzo/vfs.c Thu Dec 11 00:49:10 2003 @@ -140,7 +140,7 @@ static inline int may_delete(struct inod if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IMMUTABLE_LINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -262,7 +262,7 @@ int presto_settime(struct presto_file_se return -EROFS; } - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode)) { EXIT; return -EPERM; } @@ -377,7 +377,7 @@ int presto_do_setattr(struct presto_file return -EROFS; } - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode)) { EXIT; return -EPERM; } @@ -772,7 +772,7 @@ int presto_do_link(struct presto_file_se * A link to an append-only or immutable file cannot be created. */ error = -EPERM; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { + if (IS_APPEND(inode) || IS_IMMUTABLE_LINK(inode)) { EXIT; goto exit_lock; } @@ -2362,7 +2362,7 @@ int presto_do_set_ext_attr(struct presto return -EROFS; } - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode)) { EXIT; return -EPERM; } diff -NurpP --minimal linux-2.4.22/fs/jfs/xattr.c linux-2.4.22-vs1.21/fs/jfs/xattr.c --- linux-2.4.22/fs/jfs/xattr.c Fri Nov 29 00:53:15 2002 +++ linux-2.4.22-vs1.21/fs/jfs/xattr.c Thu Dec 11 00:49:10 2003 @@ -646,7 +646,7 @@ static int can_set_xattr(struct inode *i if (IS_RDONLY(inode)) return -EROFS; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode)) + if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode)) return -EPERM; if((strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) != 0) && diff -NurpP --minimal linux-2.4.22/fs/namei.c linux-2.4.22-vs1.21/fs/namei.c --- linux-2.4.22/fs/namei.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/namei.c Thu Dec 11 00:49:10 2003 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -152,6 +153,15 @@ int vfs_permission(struct inode * inode, { umode_t mode = inode->i_mode; + /* + A dir with permission bit all 0s is a dead zone for + process running in a vserver. By doing + chmod 000 /vservers + you fix the "escape from chroot" bug. + */ + if ((mode & 0777) == 0 && S_ISDIR(mode) + && !vx_check(0, VX_ADMIN)) + return -EACCES; if (mask & MAY_WRITE) { /* * Nobody gets write access to a read-only fs. @@ -163,7 +173,7 @@ int vfs_permission(struct inode * inode, /* * Nobody gets write access to an immutable file. */ - if (IS_IMMUTABLE(inode)) + if (IS_IMMUTABLE_FILE(inode)) return -EACCES; } @@ -904,8 +914,7 @@ static inline int may_delete(struct inod return error; if (IS_APPEND(dir)) return -EPERM; - if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||IS_IMMUTABLE_LINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -1618,7 +1627,7 @@ int vfs_link(struct dentry *old_dentry, * A link to an append-only or immutable file cannot be created. */ error = -EPERM; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IMMUTABLE_LINK(inode)) goto exit_lock; if (!dir->i_op || !dir->i_op->link) goto exit_lock; diff -NurpP --minimal linux-2.4.22/fs/nfsd/vfs.c linux-2.4.22-vs1.21/fs/nfsd/vfs.c --- linux-2.4.22/fs/nfsd/vfs.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/nfsd/vfs.c Thu Dec 11 00:49:10 2003 @@ -1480,7 +1480,7 @@ nfsd_permission(struct svc_export *exp, if (acc == MAY_NOP) return 0; #if 0 - dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", + dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s%s\n", acc, (acc & MAY_READ)? " read" : "", (acc & MAY_WRITE)? " write" : "", @@ -1490,7 +1490,8 @@ nfsd_permission(struct svc_export *exp, (acc & MAY_LOCK)? " lock" : "", (acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "", inode->i_mode, - IS_IMMUTABLE(inode)? " immut" : "", + IS_IMMUTABLE_FILE(inode)? " immut(F)" : "", + IS_IMMUTABLE_LINK(inode)? " immut(L)" : "", IS_APPEND(inode)? " append" : "", IS_RDONLY(inode)? " ro" : ""); dprintk(" owner %d/%d user %d/%d\n", @@ -1509,7 +1510,7 @@ nfsd_permission(struct svc_export *exp, && (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC))) { if (EX_RDONLY(exp) || IS_RDONLY(inode)) return nfserr_rofs; - if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) + if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE_FILE(inode)) return nfserr_perm; } if ((acc & MAY_TRUNC) && IS_APPEND(inode)) diff -NurpP --minimal linux-2.4.22/fs/open.c linux-2.4.22-vs1.21/fs/open.c --- linux-2.4.22/fs/open.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/open.c Thu Dec 11 00:49:10 2003 @@ -148,7 +148,7 @@ static inline long do_sys_truncate(const goto dput_and_out; error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode)) goto dput_and_out; /* @@ -480,7 +480,7 @@ asmlinkage long sys_fchmod(unsigned int if (IS_RDONLY(inode)) goto out_putf; err = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode)) goto out_putf; if (mode == (mode_t) -1) mode = inode->i_mode; @@ -511,7 +511,7 @@ asmlinkage long sys_chmod(const char * f goto dput_and_out; error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode)) goto dput_and_out; if (mode == (mode_t) -1) @@ -541,7 +541,7 @@ static int chown_common(struct dentry * if (IS_RDONLY(inode)) goto out; error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode)) goto out; if (user == (uid_t) -1) user = inode->i_uid; diff -NurpP --minimal linux-2.4.22/fs/proc/array.c linux-2.4.22-vs1.21/fs/proc/array.c --- linux-2.4.22/fs/proc/array.c Fri Jun 13 16:51:37 2003 +++ linux-2.4.22-vs1.21/fs/proc/array.c Thu Dec 11 00:49:10 2003 @@ -75,6 +75,7 @@ #include #include #include +#include /* Gcc optimizes away "strlen(x)" for constant x */ #define ADDBUF(buffer, string) \ @@ -147,8 +148,13 @@ static inline const char * get_task_stat static inline char * task_state(struct task_struct *p, char *buffer) { int g; - + pid_t ppid; read_lock(&tasklist_lock); + ppid = p->p_opptr->pid; + if (ppid != 0 + && current->s_info + && current->s_info->initpid == ppid) + ppid = 1; buffer += sprintf(buffer, "State:\t%s\n" "Tgid:\t%d\n" @@ -158,7 +164,7 @@ static inline char * task_state(struct t "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), p->tgid, - p->pid, p->pid ? p->p_opptr->pid : 0, 0, + p->pid, p->pid ? ppid : 0, 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); read_unlock(&tasklist_lock); @@ -266,10 +272,12 @@ static inline char *task_cap(struct task { return buffer + sprintf(buffer, "CapInh:\t%016x\n" "CapPrm:\t%016x\n" - "CapEff:\t%016x\n", + "CapEff:\t%016x\n" + "CapBset:\t%016x\n", cap_t(p->cap_inheritable), cap_t(p->cap_permitted), - cap_t(p->cap_effective)); + cap_t(p->cap_effective), + cap_t(p->cap_bset)); } @@ -291,6 +299,51 @@ int proc_pid_status(struct task_struct * } buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); + if (task->s_info) { + int i; + + buffer += sprintf (buffer,"s_context: %d [", task->vx_id); + for (i=0; is_info->vx_id[i]; + + if (ctx == 0) + break; + buffer += sprintf (buffer," %d",ctx); + } + *buffer++ = ']'; + *buffer++ = '\n'; + buffer += sprintf (buffer,"ctxticks: %d %ld %d\n" + ,atomic_read(&task->s_info->ticks) + ,task->counter + ,atomic_read(&task->s_info->refcount)); + buffer += sprintf (buffer,"ctxflags: %d\n" + ,task->s_info->flags); + buffer += sprintf (buffer,"initpid: %d\n" + ,task->s_info->initpid); + } else { + buffer += sprintf (buffer,"s_context: %d\n", task->vx_id); + buffer += sprintf (buffer,"ctxticks: none\n"); + buffer += sprintf (buffer,"ctxflags: none\n"); + buffer += sprintf (buffer,"initpid: none\n"); + } + if (task->ip_info) { + int i; + + buffer += sprintf (buffer,"ipv4root:"); + for (i=0; iip_info->nbipv4; i++){ + buffer += sprintf (buffer," %08x/%08x" + ,task->ip_info->ipv4[i] + ,task->ip_info->mask[i]); + } + *buffer++ = '\n'; + buffer += sprintf (buffer,"ipv4root_bcast: %08x\n" + ,task->ip_info->v4_bcast); + buffer += sprintf (buffer,"ipv4root_refcnt: %d\n" + ,atomic_read(&task->ip_info->refcount)); + } else { + buffer += sprintf (buffer,"ipv4root: 0\n"); + buffer += sprintf (buffer,"ipv4root_bcast: 0\n"); + } #if defined(CONFIG_ARCH_S390) buffer = task_show_regs(task, buffer); #endif @@ -344,6 +397,8 @@ int proc_pid_stat(struct task_struct *ta read_lock(&tasklist_lock); ppid = task->pid ? task->p_opptr->pid : 0; + if (current->s_info && current->s_info->initpid == ppid) + ppid = 1; read_unlock(&tasklist_lock); res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \ diff -NurpP --minimal linux-2.4.22/fs/proc/base.c linux-2.4.22-vs1.21/fs/proc/base.c --- linux-2.4.22/fs/proc/base.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/proc/base.c Thu Dec 11 00:49:10 2003 @@ -1056,6 +1056,10 @@ struct dentry *proc_pid_lookup(struct in if (!task) goto out; + if (pid != 1 && !vx_check(task->vx_id, VX_WATCH|VX_IDENT)) { + free_task_struct(task); + goto out; + } inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO); free_task_struct(task); @@ -1066,7 +1070,7 @@ struct dentry *proc_pid_lookup(struct in inode->i_op = &proc_base_inode_operations; inode->i_fop = &proc_base_operations; inode->i_nlink = 3; - inode->i_flags|=S_IMMUTABLE; + inode->i_flags|=S_IMMUTABLE_FILE; dentry->d_op = &pid_base_dentry_operations; d_add(dentry, inode); @@ -1101,6 +1105,18 @@ static int get_pid_list(int index, unsig for_each_task(p) { int pid = p->pid; if (!pid) + continue; + /* Even if the pid 1 is not part of the security context */ + /* we show it anyway. This makes the security box */ + /* more standard (and helps pstree do its job) */ + /* So current process "knows" pid 1 exist anyway and can't */ + /* send any signal either */ + + /* A process with security context 1 can see all processes */ + if (pid != 1 && !vx_check(p->vx_id, VX_WATCH|VX_IDENT)) + continue; + /* We hide the fakeinit process since we show it as process 1 */ + if (current->s_info && current->s_info->initpid == pid) continue; if (--index >= 0) continue; diff -NurpP --minimal linux-2.4.22/fs/quota.c linux-2.4.22-vs1.21/fs/quota.c --- linux-2.4.22/fs/quota.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/quota.c Thu Dec 11 00:49:10 2003 @@ -14,6 +14,10 @@ #include #include +#include +#include +#include + struct dqstats dqstats; /* Check validity of quotactl */ @@ -95,15 +99,60 @@ static int check_quotactl_valid(struct s if (cmd == Q_GETQUOTA || cmd == Q_XGETQUOTA) { if (((type == USRQUOTA && current->euid != id) || (type == GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !capable(CAP_SYS_ADMIN) && !capable(CAP_QUOTACTL)) return -EPERM; } else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO && cmd != Q_XGETQSTAT) - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !capable(CAP_QUOTACTL)) return -EPERM; return 0; } +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE) +#if defined(CONFIG_BLK_DEV_VROOT_MODULE) + +static rwlock_t dquot_vroot_lock = RW_LOCK_UNLOCKED; + +typedef kdev_t (*vroot_get_dev_f)(int dev); + +static vroot_get_dev_f vroot_get_dev = NULL; + +int register_vroot_get_dev(vroot_get_dev_f func) +{ + int ret = -EBUSY; + + write_lock(&dquot_vroot_lock); + if (!vroot_get_dev) { + vroot_get_dev = func; + ret = 0; + } + write_unlock(&dquot_vroot_lock); + return ret; +} + +int unregister_vroot_get_dev(vroot_get_dev_f func) +{ + int ret = -EINVAL; + + write_lock(&dquot_vroot_lock); + if (vroot_get_dev == func) { + vroot_get_dev = NULL; + ret = 0; + } + write_unlock(&dquot_vroot_lock); + return ret; +} + +EXPORT_SYMBOL(register_vroot_get_dev); +EXPORT_SYMBOL(unregister_vroot_get_dev); + +#else /* CONFIG_BLK_DEV_VROOT */ + +extern kdev_t vroot_get_dev(int dev); + +#endif +#endif + /* Resolve device pathname to superblock */ static struct super_block *resolve_dev(const char *path) { @@ -124,6 +173,21 @@ static struct super_block *resolve_dev(c ret = -ENOTBLK; if (!S_ISBLK(mode)) goto out; + +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE) + if (MAJOR(dev) == VROOT_MAJOR) { + ret = -ENODEV; +#ifdef CONFIG_BLK_DEV_VROOT_MODULE + read_lock(&dquot_vroot_lock); + dev = (vroot_get_dev) ? vroot_get_dev(MINOR(dev)) : NODEV; + read_unlock(&dquot_vroot_lock); +#else + dev = vroot_get_dev(MINOR(dev)); +#endif + if (dev == NODEV) + goto out; + } +#endif ret = -ENODEV; sb = get_super(dev); if (!sb) @@ -308,11 +372,11 @@ static int check_compat_quotactl_valid(s if (cmd == Q_V1_GETQUOTA || cmd == Q_V2_GETQUOTA) { if (((type == USRQUOTA && current->euid != id) || (type == GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !capable(CAP_SYS_ADMIN) && !capable(CAP_QUOTACTL)) return -EPERM; } else if (cmd != Q_V1_GETSTATS && cmd != Q_V2_GETSTATS && cmd != Q_V2_GETINFO && cmd != Q_COMP_SYNC) - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !capable(CAP_QUOTACTL)) return -EPERM; return 0; } diff -NurpP --minimal linux-2.4.22/fs/reiserfs/inode.c linux-2.4.22-vs1.21/fs/reiserfs/inode.c --- linux-2.4.22/fs/reiserfs/inode.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/reiserfs/inode.c Thu Dec 11 00:49:10 2003 @@ -1574,7 +1574,7 @@ int reiserfs_new_inode (struct reiserfs_ /* symlink cannot be immutable or append only, right? */ if( S_ISLNK( inode -> i_mode ) ) - inode -> i_flags &= ~ ( S_IMMUTABLE | S_APPEND ); + inode -> i_flags &= ~ ( S_IMMUTABLE_FILE | S_APPEND ); /* item head of new item */ ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid; @@ -2177,10 +2177,14 @@ void sd_attrs_to_i_attrs( __u16 sd_attrs inode -> i_flags |= S_SYNC; else inode -> i_flags &= ~S_SYNC; - if( sd_attrs & REISERFS_IMMUTABLE_FL ) - inode -> i_flags |= S_IMMUTABLE; + if( sd_attrs & REISERFS_IMMUTABLE_FILE_FL ) + inode -> i_flags |= S_IMMUTABLE_FILE; else - inode -> i_flags &= ~S_IMMUTABLE; + inode -> i_flags &= ~S_IMMUTABLE_FILE; + if( sd_attrs & REISERFS_IMMUTABLE_LINK_FL ) + inode -> i_flags |= S_IMMUTABLE_LINK; + else + inode -> i_flags &= ~S_IMMUTABLE_LINK; if( sd_attrs & REISERFS_APPEND_FL ) inode -> i_flags |= S_APPEND; else @@ -2199,10 +2203,14 @@ void sd_attrs_to_i_attrs( __u16 sd_attrs void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ) { if( reiserfs_attrs( inode -> i_sb ) ) { - if( inode -> i_flags & S_IMMUTABLE ) - *sd_attrs |= REISERFS_IMMUTABLE_FL; + if( inode -> i_flags & S_IMMUTABLE_FILE ) + *sd_attrs |= REISERFS_IMMUTABLE_FILE_FL; else - *sd_attrs &= ~REISERFS_IMMUTABLE_FL; + *sd_attrs &= ~REISERFS_IMMUTABLE_FILE_FL; + if( inode -> i_flags & S_IMMUTABLE_LINK ) + *sd_attrs |= REISERFS_IMMUTABLE_LINK_FL; + else + *sd_attrs &= ~REISERFS_IMMUTABLE_LINK_FL; if( inode -> i_flags & S_SYNC ) *sd_attrs |= REISERFS_SYNC_FL; else diff -NurpP --minimal linux-2.4.22/fs/reiserfs/ioctl.c linux-2.4.22-vs1.21/fs/reiserfs/ioctl.c --- linux-2.4.22/fs/reiserfs/ioctl.c Mon Aug 25 13:44:43 2003 +++ linux-2.4.22-vs1.21/fs/reiserfs/ioctl.c Thu Dec 11 00:49:10 2003 @@ -51,7 +51,8 @@ int reiserfs_ioctl (struct inode * inode if (get_user(flags, (int *) arg)) return -EFAULT; - if ( ( ( flags ^ inode->u.reiserfs_i.i_attrs) & ( REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) && + if ( ( ( flags ^ inode->u.reiserfs_i.i_attrs) & + ( REISERFS_IMMUTABLE_FILE_FL | REISERFS_IMMUTABLE_LINK_FL | REISERFS_APPEND_FL )) && !capable( CAP_LINUX_IMMUTABLE ) ) return -EPERM; diff -NurpP --minimal linux-2.4.22/fs/udf/inode.c linux-2.4.22-vs1.21/fs/udf/inode.c --- linux-2.4.22/fs/udf/inode.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.22-vs1.21/fs/udf/inode.c Thu Dec 11 00:49:10 2003 @@ -860,7 +860,7 @@ void udf_truncate(struct inode * inode) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IMMUTABLE_FILE(inode)) return; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) diff -NurpP --minimal linux-2.4.22/fs/ufs/truncate.c linux-2.4.22-vs1.21/fs/ufs/truncate.c --- linux-2.4.22/fs/ufs/truncate.c Fri Nov 29 00:53:15 2002 +++ linux-2.4.22-vs1.21/fs/ufs/truncate.c Thu Dec 11 00:49:10 2003 @@ -434,7 +434,7 @@ void ufs_truncate (struct inode * inode) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IMMUTABLE_FILE(inode)) return; while (1) { retry = ufs_trunc_direct(inode); diff -NurpP --minimal linux-2.4.22/include/asm-alpha/unistd.h linux-2.4.22-vs1.21/include/asm-alpha/unistd.h --- linux-2.4.22/include/asm-alpha/unistd.h Sat Aug 3 02:39:45 2002 +++ linux-2.4.22-vs1.21/include/asm-alpha/unistd.h Thu Dec 11 00:49:06 2003 @@ -233,6 +233,7 @@ #define __NR_osf_memcntl 260 /* not implemented */ #define __NR_osf_fdatasync 261 /* not implemented */ +#define __NR_vserver 273 /* * Linux-specific system calls begin at 300 diff -NurpP --minimal linux-2.4.22/include/asm-i386/unistd.h linux-2.4.22-vs1.21/include/asm-i386/unistd.h --- linux-2.4.22/include/asm-i386/unistd.h Fri Nov 29 00:53:15 2002 +++ linux-2.4.22-vs1.21/include/asm-i386/unistd.h Thu Dec 11 00:49:06 2003 @@ -258,6 +258,8 @@ #define __NR_free_hugepages 251 #define __NR_exit_group 252 +#define __NR_vserver 273 + /* user-visible error numbers are in the range -1 - -124: see */ #define __syscall_return(type, res) \ diff -NurpP --minimal linux-2.4.22/include/asm-parisc/unistd.h linux-2.4.22-vs1.21/include/asm-parisc/unistd.h --- linux-2.4.22/include/asm-parisc/unistd.h Fri Jun 13 16:51:38 2003 +++ linux-2.4.22-vs1.21/include/asm-parisc/unistd.h Thu Dec 11 00:49:06 2003 @@ -702,7 +702,9 @@ #define __NR_readahead (__NR_Linux + 207) #define __NR_tkill (__NR_Linux + 208) -#define __NR_Linux_syscalls 208 +#define __NR_vserver (__NR_Linux + 273) + +#define __NR_Linux_syscalls 274 #define HPUX_GATEWAY_ADDR 0xC0000004 #define LINUX_GATEWAY_ADDR 0x100 diff -NurpP --minimal linux-2.4.22/include/asm-ppc/unistd.h linux-2.4.22-vs1.21/include/asm-ppc/unistd.h --- linux-2.4.22/include/asm-ppc/unistd.h Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/include/asm-ppc/unistd.h Thu Dec 11 00:49:06 2003 @@ -239,6 +239,8 @@ #define __NR_io_cancel 231 #endif +#define __NR_vserver 273 + #define __NR(n) #n /* On powerpc a system call basically clobbers the same registers like a diff -NurpP --minimal linux-2.4.22/include/asm-ppc64/unistd.h linux-2.4.22-vs1.21/include/asm-ppc64/unistd.h --- linux-2.4.22/include/asm-ppc64/unistd.h Fri Jun 13 16:51:38 2003 +++ linux-2.4.22-vs1.21/include/asm-ppc64/unistd.h Thu Dec 11 00:49:06 2003 @@ -244,6 +244,7 @@ #define __NR_alloc_hugepages 232 #define __NR_free_hugepages 233 #define __NR_exit_group 234 +#define __NR_vserver 273 #define __NR(n) #n diff -NurpP --minimal linux-2.4.22/include/asm-sparc/unistd.h linux-2.4.22-vs1.21/include/asm-sparc/unistd.h --- linux-2.4.22/include/asm-sparc/unistd.h Fri Jun 13 16:51:38 2003 +++ linux-2.4.22-vs1.21/include/asm-sparc/unistd.h Thu Dec 11 00:49:06 2003 @@ -272,6 +272,8 @@ #define __NR_nfsservctl 254 #define __NR_aplib 255 +#define __NR_vserver 273 + #define _syscall0(type,name) \ type name(void) \ { \ diff -NurpP --minimal linux-2.4.22/include/asm-sparc64/unistd.h linux-2.4.22-vs1.21/include/asm-sparc64/unistd.h --- linux-2.4.22/include/asm-sparc64/unistd.h Fri Jun 13 16:51:38 2003 +++ linux-2.4.22-vs1.21/include/asm-sparc64/unistd.h Thu Dec 11 00:49:06 2003 @@ -274,6 +274,8 @@ #define __NR_nfsservctl 254 #define __NR_aplib 255 +#define __NR_vserver 273 + #define _syscall0(type,name) \ type name(void) \ { \ diff -NurpP --minimal linux-2.4.22/include/asm-x86_64/ia32_unistd.h linux-2.4.22-vs1.21/include/asm-x86_64/ia32_unistd.h --- linux-2.4.22/include/asm-x86_64/ia32_unistd.h Fri Jun 13 16:51:38 2003 +++ linux-2.4.22-vs1.21/include/asm-x86_64/ia32_unistd.h Thu Dec 11 00:49:06 2003 @@ -250,6 +250,8 @@ #define __NR_ia32_sched_setaffinity 241 #define __NR_ia32_sched_getaffinity 242 -#define IA32_NR_syscalls 245 +#define __NR_ia32_vserver 273 + +#define IA32_NR_syscalls 274 #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ diff -NurpP --minimal linux-2.4.22/include/asm-x86_64/unistd.h linux-2.4.22-vs1.21/include/asm-x86_64/unistd.h --- linux-2.4.22/include/asm-x86_64/unistd.h Fri Jun 13 16:51:38 2003 +++ linux-2.4.22-vs1.21/include/asm-x86_64/unistd.h Thu Dec 11 00:49:06 2003 @@ -496,7 +496,10 @@ __SYSCALL(__NR_remap_file_pages, sys_ni_ #define __NR_getdents64 217 __SYSCALL(__NR_getdents64, sys_getdents64) -#define __NR_syscall_max __NR_getdents64 +#define __NR_vserver 273 +__SYSCALL(__NR_vserver, sys_vserver) + +#define __NR_syscall_max __NR_vserver #ifndef __NO_STUBS diff -NurpP --minimal linux-2.4.22/include/linux/capability.h linux-2.4.22-vs1.21/include/linux/capability.h --- linux-2.4.22/include/linux/capability.h Wed Nov 19 18:34:54 2003 +++ linux-2.4.22-vs1.21/include/linux/capability.h Thu Dec 11 00:49:44 2003 @@ -130,7 +130,8 @@ typedef __u32 kernel_cap_t; #define CAP_SETPCAP 8 -/* Allow modification of S_IMMUTABLE and S_APPEND file attributes */ +/* Allow modification of S_IMMUTABLE_* and S_APPEND file + attributes */ #define CAP_LINUX_IMMUTABLE 9 @@ -231,6 +232,7 @@ typedef __u32 kernel_cap_t; /* Allow enabling/disabling tagged queuing on SCSI controllers and sending arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ +/* Allow the selection of a security context */ #define CAP_SYS_ADMIN 21 @@ -278,6 +280,10 @@ typedef __u32 kernel_cap_t; /* Allow taking of leases on files */ #define CAP_LEASE 28 + +/* Allow quotactl */ + +#define CAP_QUOTACTL 29 #ifdef __KERNEL__ /* diff -NurpP --minimal linux-2.4.22/include/linux/devpts_fs_info.h linux-2.4.22-vs1.21/include/linux/devpts_fs_info.h --- linux-2.4.22/include/linux/devpts_fs_info.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.22-vs1.21/include/linux/devpts_fs_info.h Thu Dec 11 00:49:06 2003 @@ -0,0 +1,4 @@ +struct devpts_inode_info { + int vx_id; +}; + diff -NurpP --minimal linux-2.4.22/include/linux/ext2_fs.h linux-2.4.22-vs1.21/include/linux/ext2_fs.h --- linux-2.4.22/include/linux/ext2_fs.h Wed Nov 19 18:36:05 2003 +++ linux-2.4.22-vs1.21/include/linux/ext2_fs.h Thu Dec 11 00:50:54 2003 @@ -187,7 +187,7 @@ struct ext2_group_desc #define EXT2_UNRM_FL 0x00000002 /* Undelete */ #define EXT2_COMPR_FL 0x00000004 /* Compress file */ #define EXT2_SYNC_FL 0x00000008 /* Synchronous updates */ -#define EXT2_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT2_IMMUTABLE_FILE_FL 0x00000010 /* Immutable file */ #define EXT2_APPEND_FL 0x00000020 /* writes to file may only append */ #define EXT2_NODUMP_FL 0x00000040 /* do not dump file */ #define EXT2_NOATIME_FL 0x00000080 /* do not update atime */ @@ -198,10 +198,11 @@ struct ext2_group_desc #define EXT2_ECOMPR_FL 0x00000800 /* Compression error */ /* End compression flags --- maybe not all used */ #define EXT2_BTREE_FL 0x00001000 /* btree format dir */ +#define EXT2_IMMUTABLE_LINK_FL 0x00008000 /* Immutable link */ #define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ -#define EXT2_FL_USER_VISIBLE 0x00001FFF /* User visible flags */ -#define EXT2_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ +#define EXT2_FL_USER_VISIBLE 0x00009FFF /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE 0x000080FF /* User modifiable flags */ /* * ioctl commands diff -NurpP --minimal linux-2.4.22/include/linux/ext3_fs.h linux-2.4.22-vs1.21/include/linux/ext3_fs.h --- linux-2.4.22/include/linux/ext3_fs.h Wed Nov 19 18:36:12 2003 +++ linux-2.4.22-vs1.21/include/linux/ext3_fs.h Thu Dec 11 00:51:00 2003 @@ -190,7 +190,7 @@ struct ext3_group_desc #define EXT3_UNRM_FL 0x00000002 /* Undelete */ #define EXT3_COMPR_FL 0x00000004 /* Compress file */ #define EXT3_SYNC_FL 0x00000008 /* Synchronous updates */ -#define EXT3_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT3_IMMUTABLE_FILE_FL 0x00000010 /* Immutable file */ #define EXT3_APPEND_FL 0x00000020 /* writes to file may only append */ #define EXT3_NODUMP_FL 0x00000040 /* do not dump file */ #define EXT3_NOATIME_FL 0x00000080 /* do not update atime */ @@ -203,10 +203,11 @@ struct ext3_group_desc #define EXT3_INDEX_FL 0x00001000 /* hash-indexed directory */ #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ +#define EXT3_IMMUTABLE_LINK_FL 0x00008000 /* Immutable link */ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -#define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */ -#define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ +#define EXT3_FL_USER_VISIBLE 0x0000DFFF /* User visible flags */ +#define EXT3_FL_USER_MODIFIABLE 0x000080FF /* User modifiable flags */ /* * Inode dynamic state flags diff -NurpP --minimal linux-2.4.22/include/linux/fs.h linux-2.4.22-vs1.21/include/linux/fs.h --- linux-2.4.22/include/linux/fs.h Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/include/linux/fs.h Thu Dec 11 00:49:44 2003 @@ -132,9 +132,10 @@ extern int leases_enable, dir_notify_ena #define S_NOATIME 2 /* Do not update access times */ #define S_QUOTA 4 /* Quota initialized for file */ #define S_APPEND 8 /* Append-only file */ -#define S_IMMUTABLE 16 /* Immutable file */ +#define S_IMMUTABLE_FILE 16 /* Immutable file */ #define S_DEAD 32 /* removed, but still open directory */ #define S_NOQUOTA 64 /* Inode is not counted to quota */ +#define S_IMMUTABLE_LINK 128 /* Immutable links */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -158,7 +159,8 @@ extern int leases_enable, dir_notify_ena #define IS_QUOTAINIT(inode) ((inode)->i_flags & S_QUOTA) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) -#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) +#define IS_IMMUTABLE_FILE(inode) ((inode)->i_flags & S_IMMUTABLE_FILE) +#define IS_IMMUTABLE_LINK(inode) ((((inode)->i_flags & S_IMMUTABLE_FILE) << 3) ^ ((inode)->i_flags & S_IMMUTABLE_LINK) ) #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) @@ -322,6 +324,7 @@ extern void set_bh_page(struct buffer_he #include #include #include +#include /* * Attribute flags. These should be or-ed together to figure out what @@ -366,8 +369,9 @@ struct iattr { #define ATTR_FLAG_SYNCRONOUS 1 /* Syncronous write */ #define ATTR_FLAG_NOATIME 2 /* Don't update atime */ #define ATTR_FLAG_APPEND 4 /* Append-only file */ -#define ATTR_FLAG_IMMUTABLE 8 /* Immutable file */ +#define ATTR_FLAG_IMMUTABLE_FILE 8 /* Immutable file */ #define ATTR_FLAG_NODIRATIME 16 /* Don't update atime for directory */ +#define ATTR_FLAG_IMMUTABLE_LINK 32 /* Immutable file */ /* * Includes for diskquotas and mount structures. @@ -517,6 +521,7 @@ struct inode { struct socket socket_i; struct usbdev_inode_info usbdev_i; struct jffs2_inode_info jffs2_i; + struct devpts_inode_info devpts_i; void *generic_ip; } u; }; diff -NurpP --minimal linux-2.4.22/include/linux/major.h linux-2.4.22-vs1.21/include/linux/major.h --- linux-2.4.22/include/linux/major.h Fri Jun 13 16:51:38 2003 +++ linux-2.4.22-vs1.21/include/linux/major.h Thu Dec 11 00:49:10 2003 @@ -24,6 +24,7 @@ #define PTY_SLAVE_MAJOR 3 #define HD_MAJOR IDE0_MAJOR #define TTY_MAJOR 4 +#define VROOT_MAJOR 4 #define TTYAUX_MAJOR 5 #define LP_MAJOR 6 #define VCS_MAJOR 7 diff -NurpP --minimal linux-2.4.22/include/linux/reiserfs_fs.h linux-2.4.22-vs1.21/include/linux/reiserfs_fs.h --- linux-2.4.22/include/linux/reiserfs_fs.h Wed Nov 19 18:37:13 2003 +++ linux-2.4.22-vs1.21/include/linux/reiserfs_fs.h Thu Dec 11 00:51:59 2003 @@ -866,7 +866,8 @@ struct stat_data_v1 /* we want common flags to have the same values as in ext2, so chattr(1) will work without problems */ -#define REISERFS_IMMUTABLE_FL EXT2_IMMUTABLE_FL +#define REISERFS_IMMUTABLE_FILE_FL EXT2_IMMUTABLE_FILE_FL +#define REISERFS_IMMUTABLE_LINK_FL EXT2_IMMUTABLE_LINK_FL #define REISERFS_APPEND_FL EXT2_APPEND_FL #define REISERFS_SYNC_FL EXT2_SYNC_FL #define REISERFS_NOATIME_FL EXT2_NOATIME_FL @@ -883,7 +884,8 @@ struct stat_data_v1 #define REISERFS_NOTAIL_FL (0x00008000) /* EXT2_NOTAIL_FL */ /* persistent flags that file inherits from the parent directory */ -#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ +#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FILE_FL | \ + REISERFS_IMMUTABLE_LINK_FL | \ REISERFS_SYNC_FL | \ REISERFS_NOATIME_FL | \ REISERFS_NODUMP_FL | \ diff -NurpP --minimal linux-2.4.22/include/linux/sched.h linux-2.4.22-vs1.21/include/linux/sched.h --- linux-2.4.22/include/linux/sched.h Mon Nov 17 19:49:11 2003 +++ linux-2.4.22-vs1.21/include/linux/sched.h Thu Dec 11 00:49:44 2003 @@ -85,6 +85,7 @@ extern int last_pid; #endif #include +#include #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 @@ -270,6 +271,7 @@ struct user_struct { /* Hash table maintenance information */ struct user_struct *next, **pprev; uid_t uid; + int vx_id; }; #define get_current_user() ({ \ @@ -277,6 +279,10 @@ struct user_struct { atomic_inc(&__user->__count); \ __user; }) + +struct context_info; +struct iproot_info; + extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -404,6 +410,11 @@ struct task_struct { unsigned long sas_ss_sp; size_t sas_ss_size; int (*notifier)(void *priv); + int vx_id; /* Process can only deal with other processes */ + /* with the same vx_id */ + __u32 cap_bset; /* Maximum capability of this process and children */ + struct context_info *s_info; + struct iproot_info *ip_info; void *notifier_data; sigset_t *notifier_mask; @@ -509,6 +520,7 @@ extern struct exec_domain default_exec_d blocked: {{0}}, \ alloc_lock: SPIN_LOCK_UNLOCKED, \ journal_info: NULL, \ + cap_bset: CAP_INIT_EFF_SET, \ } @@ -573,7 +585,7 @@ static inline void task_release_cpu(stru } /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(int, uid_t); extern void free_uid(struct user_struct *); #include diff -NurpP --minimal linux-2.4.22/include/linux/sys.h linux-2.4.22-vs1.21/include/linux/sys.h --- linux-2.4.22/include/linux/sys.h Fri Jun 13 16:51:39 2003 +++ linux-2.4.22-vs1.21/include/linux/sys.h Thu Dec 11 00:49:06 2003 @@ -4,7 +4,7 @@ /* * system call entry points ... but not all are defined */ -#define NR_syscalls 270 +#define NR_syscalls 274 /* * These are system calls that will be removed at some time diff -NurpP --minimal linux-2.4.22/include/linux/sysctl.h linux-2.4.22-vs1.21/include/linux/sysctl.h --- linux-2.4.22/include/linux/sysctl.h Mon Dec 8 22:53:18 2003 +++ linux-2.4.22-vs1.21/include/linux/sysctl.h Thu Dec 11 00:49:46 2003 @@ -127,6 +127,7 @@ enum KERN_CORE_PATTERN=56, /* string: pattern for core-files */ KERN_PPC_L3CR=57, /* l3cr register on PPC */ KERN_EXCEPTION_TRACE=58, /* boolean: exception trace */ + KERN_VSHELPER=59, /* string: path to vshelper policy agent */ }; diff -NurpP --minimal linux-2.4.22/include/linux/vcontext.h linux-2.4.22-vs1.21/include/linux/vcontext.h --- linux-2.4.22/include/linux/vcontext.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.22-vs1.21/include/linux/vcontext.h Thu Dec 11 00:49:44 2003 @@ -0,0 +1,114 @@ +#ifndef _VX_CONTEXT_H +#define _VX_CONTEXT_H + +/* + We may have a different domainname and nodename for each security + context. By default, a security context share the same as its + parent, potentially the information in system_utsname +*/ +#define VX_INFO_LOCK 1 /* Can't request a new vx_id */ +#define VX_INFO_SCHED 2 /* All process in the vx_id */ + /* Contribute to the schedular */ +#define VX_INFO_NPROC 4 /* Limit number of processes in a context */ +#define VX_INFO_PRIVATE 8 /* Noone can join this security context */ +#define VX_INFO_INIT 16 /* This process wants to become the */ + /* logical process 1 of the security */ + /* context */ +#define VX_INFO_HIDEINFO 32 /* Hide some information in /proc */ +#define VX_INFO_ULIMIT 64 /* Use ulimit of the current process */ + /* to become the global limits */ + /* of the context */ + +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */ +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */ + +#define NB_S_CONTEXT 16 + +#define NB_IPV4ROOT 16 + +#include +#include + +struct context_info { + atomic_t refcount; + short int vx_id[NB_S_CONTEXT];/* root is allowed to switch the current */ + /* security context using any in this table */ + unsigned long rlim[RLIM_NLIMITS]; /* Per context limit */ + atomic_t res[RLIM_NLIMITS]; /* Current value */ + struct proc_dir_entry *procent; + char nodename[65]; + char domainname[65]; + int flags; /* VX_INFO_xxx */ + atomic_t ticks; /* Number of ticks used by all process */ + /* in the vx_id */ + int initpid; /* PID of the logical process 1 of the */ + /* of the context */ + int nr_threads; + unsigned long total_forks; + unsigned int bias_cswtch; + long bias_jiffies; + long bias_idle; + void *data1; + void *data2; + void *data3; + void *data4; +}; + +struct iproot_info { + unsigned long mark; /* Special signature for debugging */ + atomic_t refcount; + int nbipv4; + __u32 ipv4[NB_IPV4ROOT];/* Process can only bind to these IPs */ + /* The first one is used to connect */ + /* and for bind any service */ + /* The other must be used explicity when */ + /* binding */ + __u32 mask[NB_IPV4ROOT];/* Netmask for each ipv4 */ + /* Used to select the proper source address */ + /* for sockets */ + __u32 v4_bcast; /* Broadcast address used to receive UDP packets */ +}; + + +#define VX_ADMIN 0x0001 +#define VX_WATCH 0x0002 + +#define VX_IDENT 0x0010 +#define VX_EQUIV 0x0020 +#define VX_PARENT 0x0040 +#define VX_CHILD 0x0080 + +#define VX_ARG_MASK 0x00F0 + +#include + +/* required to resolve recursive dependancies */ +#define vx_check(c,m) __vx_check(current->vx_id,c,m) + +/* + * check current context for ADMIN/WATCH and + * optionally agains supplied argument + */ +static inline int __vx_check(int cctx, int ctx, unsigned int mode) +{ + if (mode & VX_ARG_MASK) { + if ((mode & VX_IDENT) && (ctx == cctx)) + return 1; + if ((mode & VX_EQUIV) && (ctx == cctx)) + return 1; + } + return (((mode & VX_ADMIN) && (cctx == 0)) || + ((mode & VX_WATCH) && (cctx == 1))); +} + + +void vx_assign_info(struct task_struct *); +void vx_release_info(struct task_struct *); + +void vx_assign_ip_info(struct iproot_info *); +void vx_release_ip_info(struct iproot_info *); + +int vc_new_s_context(uint32_t, void *); +int vc_set_ipv4root(uint32_t, void *); + +#endif diff -NurpP --minimal linux-2.4.22/include/linux/vswitch.h linux-2.4.22-vs1.21/include/linux/vswitch.h --- linux-2.4.22/include/linux/vswitch.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.22-vs1.21/include/linux/vswitch.h Thu Dec 11 00:49:54 2003 @@ -0,0 +1,122 @@ +#ifndef _LINUX_VIRTUAL_H +#define _LINUX_VIRTUAL_H + +#include +#include + +#define VC_CATEGORY(c) (((c) >> 24) & 0x3F) +#define VC_COMMAND(c) (((c) >> 16) & 0xFF) +#define VC_VERSION(c) ((c) & 0xFFF) + +#define VC_CMD(c,i,v) ((((VC_CAT_ ## c) & 0x3F) << 24) \ + | (((i) & 0xFF) << 16) | ((v) & 0xFFF)) + +/* + + Syscall Matrix V2.3 + + |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL| + |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | | + |INFO |SETUP | |MOVE | | | | | | + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SYSTEM |VERSION| | | | | | |DEVICES| | + HOST | 00| 01| 02| 03| 04| 05| | 06| 07| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + CPU | | | | | | | |SCHED. | | + PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + MEMORY | | | | | | | |SWAP | | + | 16| 17| 18| 19| 20| 21| | 22| 23| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + NETWORK| | | | | | | |SERIAL | | + | 24| 25| 26| 27| 28| 29| | 30| 31| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + DISK | | | | | | | | | | + VFS | 32| 33| 34| 35| 36| 37| | 38| 39| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + OTHER | | | | | | | | | | + | 40| 41| 42| 43| 44| 45| | 46| 47| + =======+=======+=======+=======+=======+=======+=======+ +=======+=======+ + SPECIAL| | | | | | | | | | + | 48| 49| 50| 51| 52| 53| | 54| 55| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SPECIAL| | | | |RLIMIT |SYSCALL| | |COMPAT | + | 56| 57| 58| 59| 60|TEST 61| | 62| 63| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + +*/ + +#define VC_CAT_VERSION 0 + +#define VC_CAT_PROCTRL 12 + +#define VC_CAT_RLIMIT 60 + +#define VC_CAT_SYSTEST 61 +#define VC_CAT_COMPAT 63 + +/* interface version */ + +#define VCI_VERSION 0x00010004 + + + +/* query version */ + +#define VCMD_get_version VC_CMD(VERSION, 0, 0) + + +/* compatibiliy vserver commands */ + +#define VCMD_new_s_context VC_CMD(COMPAT, 1, 1) +#define VCMD_set_ipv4root VC_CMD(COMPAT, 2, 3) + +/* compatibiliy vserver arguments */ + +struct vcmd_new_s_context_v1 { + uint32_t remove_cap; + uint32_t flags; +}; + +struct vcmd_set_ipv4root_v3 { + /* number of pairs in id */ + uint32_t broadcast; + struct { + uint32_t ip; + uint32_t mask; + } ip_mask_pair[NB_IPV4ROOT]; +}; + +/* context signalling */ + +#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0) + +struct vcmd_ctx_kill_v0 { + int32_t pid; + int32_t sig; +}; + +/* rlimit vserver commands */ + +#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0) +#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0) +#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0) + +struct vcmd_ctx_rlimit_v0 { + uint32_t id; + uint64_t minimum; + uint64_t softlimit; + uint64_t maximum; +}; + +struct vcmd_ctx_rlimit_mask_v0 { + uint32_t minimum; + uint32_t softlimit; + uint32_t maximum; +}; + +#define CRLIM_INFINITY (~0ULL) +#define CRLIM_KEEP (~1ULL) + + +#endif /* _LINUX_VIRTUAL_H */ diff -NurpP --minimal linux-2.4.22/include/net/ip.h linux-2.4.22-vs1.21/include/net/ip.h --- linux-2.4.22/include/net/ip.h Mon Dec 8 22:55:21 2003 +++ linux-2.4.22-vs1.21/include/net/ip.h Thu Dec 11 00:51:50 2003 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.4.22/include/net/route.h linux-2.4.22-vs1.21/include/net/route.h --- linux-2.4.22/include/net/route.h Wed Nov 19 18:37:04 2003 +++ linux-2.4.22-vs1.21/include/net/route.h Thu Dec 11 00:51:50 2003 @@ -32,6 +32,7 @@ #include #include #include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -166,6 +167,44 @@ static inline char rt_tos2priority(u8 to static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif) { int err; + struct iproot_info *ip_info = current->ip_info; + if (ip_info != NULL) { + __u32 ipv4root = ip_info->ipv4[0]; + if (ipv4root != 0) { + int n = ip_info->nbipv4; + if (src == 0) { + if (n > 1) { + u32 foundsrc; + int i; + err = ip_route_output(rp, dst, src, tos, oif); + if (err) return err; + foundsrc = (*rp)->rt_src; + ip_rt_put(*rp); + for (i=0; imask[i]; + u32 ipv4 = ip_info->ipv4[i]; + u32 netipv4 = ipv4 & mask; + if ((foundsrc & mask) == netipv4) { + src = ipv4; + break; + } + } + } + if (src == 0) + src = dst == 0x0100007f + ? 0x0100007f: ipv4root; + } else { + int i; + for (i=0; iipv4[i] == src) break; + } + if (i == n) + return -EPERM; + } + if (dst == 0x0100007f && !vx_check(0, VX_ADMIN)) + dst = ipv4root; + } + } err = ip_route_output(rp, dst, src, tos, oif); if (err || (dst && src)) return err; diff -NurpP --minimal linux-2.4.22/include/net/sock.h linux-2.4.22-vs1.21/include/net/sock.h --- linux-2.4.22/include/net/sock.h Mon Nov 17 19:49:47 2003 +++ linux-2.4.22-vs1.21/include/net/sock.h Thu Dec 11 00:50:31 2003 @@ -498,6 +498,7 @@ do { spin_lock_init(&((__sk)->lock.slock } while(0) struct sock { + /* See tcp.h comment on tcp_tw_bucket */ /* Socket demultiplex comparisons on incoming packets. */ __u32 daddr; /* Foreign IPv4 addr */ __u32 rcv_saddr; /* Bound local IPv4 addr */ @@ -519,6 +520,8 @@ struct sock { unsigned char reuse; /* SO_REUSEADDR setting */ unsigned char shutdown; atomic_t refcnt; /* Reference count */ + struct iproot_info *ip_info; + /* End of common section with tcp_tw_bucket */ socket_lock_t lock; /* Synchronizer... */ int rcvbuf; /* Size of receive buffer in bytes */ @@ -536,6 +539,7 @@ struct sock { __u32 saddr; /* Sending source */ unsigned int allocation; /* Allocation mode */ int sndbuf; /* Size of send buffer in bytes */ + __u32 rcv_saddr2; /* Second bound ipv4 addr, for ipv4root */ struct sock *prev; /* Not all are volatile, but some are, so we might as well say they all are. @@ -680,6 +684,9 @@ struct sock { /* RPC layer private data */ void *user_data; + /* Context of process creating this socket */ + int vx_id; + /* Callbacks */ void (*state_change)(struct sock *sk); void (*data_ready)(struct sock *sk,int bytes); diff -NurpP --minimal linux-2.4.22/include/net/tcp.h linux-2.4.22-vs1.21/include/net/tcp.h --- linux-2.4.22/include/net/tcp.h Mon Nov 17 19:52:27 2003 +++ linux-2.4.22-vs1.21/include/net/tcp.h Thu Dec 11 00:52:49 2003 @@ -173,6 +173,7 @@ struct tcp_tw_bucket { unsigned char reuse, rcv_wscale; /* It is also TW bucket specific */ atomic_t refcnt; + struct ipv4_info *ip_info; /* And these are ours. */ int hashent; @@ -191,6 +192,7 @@ struct tcp_tw_bucket { struct in6_addr v6_daddr; struct in6_addr v6_rcv_saddr; #endif + int vx_id; }; extern kmem_cache_t *tcp_timewait_cachep; diff -NurpP --minimal linux-2.4.22/ipc/util.c linux-2.4.22-vs1.21/ipc/util.c --- linux-2.4.22/ipc/util.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/ipc/util.c Thu Dec 11 00:49:10 2003 @@ -93,6 +93,8 @@ int ipc_findkey(struct ipc_ids* ids, key struct kern_ipc_perm* p; for (id = 0; id <= ids->max_id; id++) { + if (!vx_check(ids->entries[id].vx_id, VX_IDENT)) + continue; p = ids->entries[id].p; if(p==NULL) continue; @@ -167,6 +169,7 @@ found: spin_lock(&ids->ary); ids->entries[id].p = new; + ids->entries[id].vx_id = current->vx_id; return id; } diff -NurpP --minimal linux-2.4.22/ipc/util.h linux-2.4.22-vs1.21/ipc/util.h --- linux-2.4.22/ipc/util.h Fri Nov 29 00:53:15 2002 +++ linux-2.4.22-vs1.21/ipc/util.h Thu Dec 11 00:54:05 2003 @@ -5,6 +5,8 @@ * ipc helper functions (c) 1999 Manfred Spraul */ +#include + #define USHRT_MAX 0xffff #define SEQ_MULTIPLIER (IPCMNI) @@ -25,6 +27,7 @@ struct ipc_ids { struct ipc_id { struct kern_ipc_perm* p; + int vx_id; // Context owning this ID }; @@ -74,8 +77,11 @@ extern inline struct kern_ipc_perm* ipc_ spin_lock(&ids->ary); out = ids->entries[lid].p; - if(out==NULL) + if (out==NULL || + !vx_check(ids->entries[lid].vx_id, VX_WATCH|VX_IDENT)) { spin_unlock(&ids->ary); + out = NULL; + } return out; } diff -NurpP --minimal linux-2.4.22/kernel/Makefile linux-2.4.22-vs1.21/kernel/Makefile --- linux-2.4.22/kernel/Makefile Mon Sep 17 06:22:40 2001 +++ linux-2.4.22-vs1.21/kernel/Makefile Thu Dec 11 00:49:06 2003 @@ -14,7 +14,7 @@ export-objs = signal.o sys.o kmod.o cont obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ sysctl.o acct.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o context.o + signal.o sys.o kmod.o context.o vswitch.o vcontext.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o diff -NurpP --minimal linux-2.4.22/kernel/exit.c linux-2.4.22-vs1.21/kernel/exit.c --- linux-2.4.22/kernel/exit.c Fri Nov 29 00:53:15 2002 +++ linux-2.4.22-vs1.21/kernel/exit.c Thu Dec 11 00:49:10 2003 @@ -16,6 +16,7 @@ #ifdef CONFIG_BSD_PROCESS_ACCT #include #endif +#include #include #include @@ -66,6 +67,8 @@ static void release_task(struct task_str current->counter += p->counter; if (current->counter >= MAX_COUNTER) current->counter = MAX_COUNTER; + vx_release_info(p); + vx_release_ip_info(p->ip_info); p->pid = 0; free_task_struct(p); } else { @@ -159,8 +162,18 @@ static inline int has_stopped_jobs(int p static inline void forget_original_parent(struct task_struct * father) { struct task_struct * p; + struct task_struct *vchild_reaper = child_reaper; read_lock(&tasklist_lock); + if (father->s_info) { + pid_t initpid = father->s_info->initpid; + if ((initpid != 0) && (father->pid != initpid)) { + struct task_struct *r = find_task_by_pid(initpid); + + if (r != NULL) + vchild_reaper = r; + } + } for_each_task(p) { if (p->p_opptr == father) { @@ -169,7 +182,7 @@ static inline void forget_original_paren p->self_exec_id++; /* Make sure we're not reparenting to ourselves */ - p->p_opptr = child_reaper; + p->p_opptr = vchild_reaper; if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0); } diff -NurpP --minimal linux-2.4.22/kernel/fork.c linux-2.4.22-vs1.21/kernel/fork.c --- linux-2.4.22/kernel/fork.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/kernel/fork.c Thu Dec 11 00:49:10 2003 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -661,6 +662,10 @@ int do_fork(unsigned long clone_flags, u *p = *current; retval = -EAGAIN; + if (p->s_info && (p->s_info->flags & VX_INFO_NPROC)) { + if (atomic_read(&p->s_info->refcount) >= p->rlim[RLIMIT_NPROC].rlim_max) + goto bad_fork_free; + } /* * Check if we are over our maximum process limit, but be sure to * exclude root. This is needed to make it possible for login and @@ -670,6 +675,9 @@ int do_fork(unsigned long clone_flags, u if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur && !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE)) goto bad_fork_free; + + vx_assign_info(p); + vx_assign_ip_info(p->ip_info); atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); diff -NurpP --minimal linux-2.4.22/kernel/printk.c linux-2.4.22-vs1.21/kernel/printk.c --- linux-2.4.22/kernel/printk.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/kernel/printk.c Thu Dec 11 00:49:10 2003 @@ -26,6 +26,7 @@ #include #include /* For in_interrupt() */ #include +#include #include @@ -171,6 +172,9 @@ int do_syslog(int type, char * buf, int int do_clear = 0; char c; int error = 0; + + if (!capable(CAP_SYS_ADMIN) && !vx_check(0, VX_ADMIN)) + return -EPERM; switch (type) { case 0: /* Close log */ diff -NurpP --minimal linux-2.4.22/kernel/sched.c linux-2.4.22-vs1.21/kernel/sched.c --- linux-2.4.22/kernel/sched.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/kernel/sched.c Thu Dec 11 00:49:10 2003 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -165,7 +166,12 @@ static inline int goodness(struct task_s * Don't do any other calculations if the time slice is * over.. */ - weight = p->counter; + if (p->s_info && (p->s_info->flags & VX_INFO_SCHED)) { + weight = atomic_read(&p->s_info->ticks) / + atomic_read(&p->s_info->refcount); + weight = (weight+p->counter) >> 1; + } else + weight = p->counter; if (!weight) goto out; @@ -618,8 +624,19 @@ repeat_schedule: spin_unlock_irq(&runqueue_lock); read_lock(&tasklist_lock); - for_each_task(p) + /* + Reset the s_info->ticks to the sum off all + member processes p->counter + */ + for_each_task(p) { + if (p->s_info && (p->s_info->flags & VX_INFO_SCHED)) + atomic_set(&p->s_info->ticks, 0); + } + for_each_task(p) { p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); + if (p->s_info && (p->s_info->flags & VX_INFO_SCHED)) + atomic_add(p->counter, &p->s_info->ticks); + } read_unlock(&tasklist_lock); spin_lock_irq(&runqueue_lock); goto repeat_schedule; diff -NurpP --minimal linux-2.4.22/kernel/signal.c linux-2.4.22-vs1.21/kernel/signal.c --- linux-2.4.22/kernel/signal.c Fri Jun 13 16:51:39 2003 +++ linux-2.4.22-vs1.21/kernel/signal.c Thu Dec 11 00:49:10 2003 @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -621,7 +622,9 @@ kill_pg_info(int sig, struct siginfo *in retval = -ESRCH; read_lock(&tasklist_lock); for_each_task(p) { - if (p->pgrp == pgrp && thread_group_leader(p)) { + if (p->pgrp == pgrp && thread_group_leader(p) && + ((long)info == 1 || + vx_check(p->vx_id, VX_IDENT))) { int err = send_sig_info(sig, info, p); if (retval) retval = err; @@ -675,7 +678,20 @@ kill_proc_info(int sig, struct siginfo * if (tg) p = tg; } - error = send_sig_info(sig, info, p); + switch ((unsigned long)info) { + case 0: + if (vx_check(p->vx_id, VX_IDENT)) + error = send_sig_info(sig, info, p); + break; + case 1: + error = send_sig_info(sig, info, p); + break; + default: + if ((info->si_code == SI_KERNEL) + || vx_check(p->vx_id, VX_IDENT)) + error = send_sig_info(sig, info, p); + break; + } } read_unlock(&tasklist_lock); return error; @@ -699,7 +715,9 @@ static int kill_something_info(int sig, read_lock(&tasklist_lock); for_each_task(p) { - if (p->pid > 1 && p != current && thread_group_leader(p)) { + if (p->pid > 1 && p != current && + thread_group_leader(p) && + vx_check(p->vx_id, VX_IDENT)) { int err = send_sig_info(sig, info, p); ++count; if (err != -EPERM) diff -NurpP --minimal linux-2.4.22/kernel/sys.c linux-2.4.22-vs1.21/kernel/sys.c --- linux-2.4.22/kernel/sys.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/kernel/sys.c Thu Dec 11 00:49:10 2003 @@ -6,14 +6,17 @@ #include #include +#include #include #include #include #include +#include #include #include #include #include +#include #include #include @@ -277,6 +280,67 @@ asmlinkage long sys_getpriority(int whic return retval; } +/* + * vshelper path is set via /proc/sys + * invoked by vserver sys_reboot(), with + * the following arguments + * + * argv [0] = vshelper_path; + * argv [1] = context identifier + * argv [2] = "restart", "halt", "poweroff", ... + * argv [3] = additional argument (restart2) + * + * envp [*] = type-specific parameters + */ +char vshelper_path[255] = "/sbin/vshelper"; + +long vs_reboot(unsigned int cmd, void * arg) +{ + char id_buf[8], cmd_buf[32]; + char uid_buf[32], pid_buf[32]; + char buffer[256]; + + char *argv[] = {vshelper_path, id_buf, NULL, NULL, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + uid_buf, pid_buf, cmd_buf, 0}; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", current->vx_id); + + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid); + snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid); + + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + argv[2] = "restart"; + break; + + case LINUX_REBOOT_CMD_HALT: + argv[2] = "halt"; + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + argv[2] = "poweroff"; + break; + + case LINUX_REBOOT_CMD_RESTART2: + if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) + return -EFAULT; + argv[3] = buffer; + default: + argv[2] = "restart2"; + break; + } + + if (call_usermodehelper(*argv, argv, envp)) { + printk( KERN_WARNING + "vs_reboot(): failed to exec (%s %s %s %s)\n", + vshelper_path, argv[1], argv[2], argv[3]); + return -EPERM; + } + return 0; +} /* * Reboot system call: for obvious reasons only root may call it, @@ -300,6 +364,9 @@ asmlinkage long sys_reboot(int magic1, i magic2 != LINUX_REBOOT_MAGIC2B)) return -EINVAL; + if (!vx_check(0, VX_ADMIN | VX_WATCH)) + return vs_reboot(cmd, arg); + lock_kernel(); switch (cmd) { case LINUX_REBOOT_CMD_RESTART: @@ -518,7 +585,7 @@ static int set_user(uid_t new_ruid, int * cheaply with the new uid cache, so if it matters * we should be checking for it. -DaveM */ - new_user = alloc_uid(new_ruid); + new_user = alloc_uid(current->vx_id, new_ruid); if (!new_user) return -EAGAIN; old_user = current->user; @@ -1034,9 +1101,18 @@ DECLARE_RWSEM(uts_sem); asmlinkage long sys_newuname(struct new_utsname * name) { int errno = 0; + struct new_utsname tmp,*pttmp; down_read(&uts_sem); - if (copy_to_user(name,&system_utsname,sizeof *name)) + if (current->s_info) { + tmp = system_utsname; + strcpy (tmp.nodename,current->s_info->nodename); + strcpy (tmp.domainname,current->s_info->domainname); + pttmp = &tmp; + } + else + pttmp = &system_utsname; + if (copy_to_user(name,pttmp,sizeof *name)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1045,6 +1121,7 @@ asmlinkage long sys_newuname(struct new_ asmlinkage long sys_sethostname(char *name, int len) { int errno; + char *nodename; char tmp[__NEW_UTS_LEN]; if (!capable(CAP_SYS_ADMIN)) @@ -1053,9 +1130,12 @@ asmlinkage long sys_sethostname(char *na return -EINVAL; down_write(&uts_sem); errno = -EFAULT; + nodename = system_utsname.nodename; + if (current->s_info) + nodename = current->s_info->nodename; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.nodename, tmp, len); - system_utsname.nodename[len] = 0; + memcpy(nodename, tmp, len); + nodename[len] = 0; errno = 0; } up_write(&uts_sem); @@ -1065,15 +1145,19 @@ asmlinkage long sys_sethostname(char *na asmlinkage long sys_gethostname(char *name, int len) { int i, errno; + char *nodename; if (len < 0) return -EINVAL; down_read(&uts_sem); - i = 1 + strlen(system_utsname.nodename); + nodename = system_utsname.nodename; + if (current->s_info) + nodename = current->s_info->nodename; + i = 1 + strlen(nodename); if (i > len) i = len; errno = 0; - if (copy_to_user(name, system_utsname.nodename, i)) + if (copy_to_user(name, nodename, i)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1086,6 +1170,7 @@ asmlinkage long sys_gethostname(char *na asmlinkage long sys_setdomainname(char *name, int len) { int errno; + char *domainname; char tmp[__NEW_UTS_LEN]; if (!capable(CAP_SYS_ADMIN)) @@ -1094,10 +1179,13 @@ asmlinkage long sys_setdomainname(char * return -EINVAL; down_write(&uts_sem); + domainname = system_utsname.domainname; + if (current->s_info) + domainname = current->s_info->domainname; errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.domainname, tmp, len); - system_utsname.domainname[len] = 0; + memcpy(domainname, tmp, len); + domainname[len] = 0; errno = 0; } up_write(&uts_sem); diff -NurpP --minimal linux-2.4.22/kernel/sysctl.c linux-2.4.22-vs1.21/kernel/sysctl.c --- linux-2.4.22/kernel/sysctl.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/kernel/sysctl.c Thu Dec 11 00:49:10 2003 @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -76,6 +77,7 @@ extern int sem_ctls[]; #endif extern int exception_trace; +extern char vshelper_path[]; #ifdef __sparc__ extern char reboot_command []; @@ -269,6 +271,8 @@ static ctl_table kern_table[] = { {KERN_EXCEPTION_TRACE,"exception-trace", &exception_trace,sizeof(int),0644,NULL,&proc_dointvec}, #endif + {KERN_VSHELPER,"vshelper", + &vshelper_path,256,0644,NULL,&proc_dostring,&sysctl_string}, {0} }; @@ -392,6 +396,8 @@ extern asmlinkage long sys_sysctl(struct static int test_perm(int mode, int op) { + if (!capable(CAP_SYS_ADMIN)) + mode &= ~(0222); if (!current->euid) mode >>= 6; else if (in_egroup_p(0)) @@ -807,7 +813,18 @@ static int proc_doutsstring(ctl_table *t void *buffer, size_t *lenp) { int r; + ctl_table tmp; + /* HACK for per context hostname and domainname */ + if (current->s_info) { + tmp = *table; + table = &tmp; + + if (table->data == (void*)&system_utsname.nodename) + tmp.data = ¤t->s_info->nodename; + else if (table->data == (void*)&system_utsname.domainname) + tmp.data = ¤t->s_info->domainname; + } if (!write) { down_read(&uts_sem); r=proc_dostring(table,0,filp,buffer,lenp); diff -NurpP --minimal linux-2.4.22/kernel/timer.c linux-2.4.22-vs1.21/kernel/timer.c --- linux-2.4.22/kernel/timer.c Fri Nov 29 00:53:15 2002 +++ linux-2.4.22-vs1.21/kernel/timer.c Thu Dec 11 00:49:10 2003 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -599,6 +600,8 @@ void update_process_times(int user_tick) update_one_process(p, user_tick, system, cpu); if (p->pid) { + if (p->s_info && (p->s_info->flags & VX_INFO_SCHED)) + atomic_dec (&p->s_info->ticks); if (--p->counter <= 0) { p->counter = 0; /* @@ -752,6 +755,10 @@ asmlinkage unsigned long sys_alarm(unsig */ asmlinkage long sys_getpid(void) { + if (current->s_info && + current->s_info->initpid == current->tgid) + /* We are faking process 1 for this security context */ + return 1; return current->tgid; } @@ -798,6 +805,10 @@ asmlinkage long sys_getppid(void) #endif break; } + if (pid && current->s_info + && current->s_info->initpid == pid) + /* We are faking process 1 for this security context */ + pid = 1; return pid; } diff -NurpP --minimal linux-2.4.22/kernel/user.c linux-2.4.22-vs1.21/kernel/user.c --- linux-2.4.22/kernel/user.c Wed Nov 29 07:43:39 2000 +++ linux-2.4.22-vs1.21/kernel/user.c Thu Dec 11 00:49:06 2003 @@ -6,6 +6,19 @@ * We have a per-user structure to keep track of how many * processes, files etc the user has claimed, in order to be * able to have per-user limits for system resources. + * + * For the vserver project, the key is extended from UID to (SC,UID), + * with SC being the security context ID. Thus, each security context + * has independant per-UID resource usage counters. + * + * As a consequence, even if two UIDs are the same, the 'struct user *' + * in their task_struct could be different. I don't think any code cares. + * + * (vserver modifications done Sun Jan 13 08:48:45 CET 2002 by bof@bof.de) + * + * NOTE: For now, the hash function is unmodified: the same uid in several + * security contexts, will always sit on the same hash chain. This could + * be changed easily. */ #include @@ -56,7 +69,7 @@ static inline void uid_hash_remove(struc *pprev = next; } -static inline struct user_struct *uid_hash_find(uid_t uid, struct user_struct **hashent) +static inline struct user_struct *uid_hash_find(int vx_id, uid_t uid, struct user_struct **hashent) { struct user_struct *next; @@ -65,7 +78,7 @@ static inline struct user_struct *uid_ha struct user_struct *up = next; if (next) { next = up->next; - if (up->uid != uid) + if (up->uid != uid || up->vx_id != vx_id) continue; atomic_inc(&up->__count); } @@ -82,13 +95,13 @@ void free_uid(struct user_struct *up) } } -struct user_struct * alloc_uid(uid_t uid) +struct user_struct * alloc_uid(int vx_id, uid_t uid) { struct user_struct **hashent = uidhashentry(uid); struct user_struct *up; spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(vx_id, uid, hashent); spin_unlock(&uidhash_lock); if (!up) { @@ -98,6 +111,7 @@ struct user_struct * alloc_uid(uid_t uid if (!new) return NULL; new->uid = uid; + new->vx_id = vx_id; atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); atomic_set(&new->files, 0); @@ -107,7 +121,7 @@ struct user_struct * alloc_uid(uid_t uid * on adding the same user already.. */ spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(vx_id, uid, hashent); if (up) { kmem_cache_free(uid_cachep, new); } else { diff -NurpP --minimal linux-2.4.22/kernel/vcontext.c linux-2.4.22-vs1.21/kernel/vcontext.c --- linux-2.4.22/kernel/vcontext.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.22-vs1.21/kernel/vcontext.c Thu Dec 11 00:49:10 2003 @@ -0,0 +1,425 @@ +/* + * linux/kernel/vcontext.c + * + * Virtual Context Support + * + * Copyright (C) 2003 Herbert Pötzl + * + * V0.01 context helper + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +int vc_ctx_kill(uint32_t id, void *data) +{ + int retval, count=0; + struct vcmd_ctx_kill_v0 vc_data; + struct siginfo info; + struct task_struct *p; + pid_t initpid = 0; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (!vx_check(0, VX_ADMIN)) + return -EPERM; + + info.si_signo = vc_data.sig; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->pid; + info.si_uid = current->uid; + + retval = -ESRCH; + read_lock(&tasklist_lock); + switch (vc_data.pid) { + case -1: + case 0: + for_each_task(p) { + if (!initpid && p->vx_id == id && p->s_info) + initpid = p->s_info->initpid; + if (p->vx_id == id && p->pid > 1 + && (!vc_data.pid || initpid != p->pid) + && thread_group_leader(p)) { + int err = send_sig_info(vc_data.sig, &info, p); + + ++count; + if (err != -EPERM) + retval = err; + } + } + break; + + default: + p = find_task_by_pid(vc_data.pid); + if (p) { + if (!thread_group_leader(p)) { + struct task_struct *tg; + + tg = find_task_by_pid(p->tgid); + if (tg) + p = tg; + } + if ((id == -1) || (p->vx_id == id)) + retval = send_sig_info(vc_data.sig, &info, p); + } + break; + } + read_unlock(&tasklist_lock); + return retval; +} + +int vc_get_rlimit(uint32_t id, void *data) +{ + return -ENOSYS; +} + +int vc_set_rlimit(uint32_t id, void *data) +{ + return -ENOSYS; +} + +int vc_get_rlimit_mask(uint32_t id, void *data) +{ + return -ENOSYS; +} + + + +/* system functions */ + + +/* + * Alloc a new s_info to the current process and release + * the one currently owned by the current process. + */ +static void vx_alloc_info(void) +{ + struct context_info *s_info; + + s_info = kmalloc(sizeof(struct context_info), GFP_KERNEL); + if (s_info) { + int i; + memset (s_info, 0, sizeof(*s_info)); + s_info->vx_id[0] = current->vx_id; + atomic_set(&s_info->refcount, 1); + atomic_set(&s_info->ticks, current->counter); + s_info->flags = 0; + s_info->initpid = 0; + s_info->nr_threads = 1; + s_info->total_forks = 0; + s_info->bias_cswtch = kstat.context_swtch; + s_info->bias_jiffies = jiffies; + s_info->bias_idle = init_tasks[0]->times.tms_utime + + init_tasks[0]->times.tms_stime; + for (i=0; irlim[i] = 0xffffffff; + atomic_set(&s_info->res[i], 0); + } + down_read(&uts_sem); + if (current->s_info) { + strcpy(s_info->nodename, current->s_info->nodename); + strcpy(s_info->domainname, current->s_info->domainname); + } else { + strcpy(s_info->nodename, system_utsname.nodename); + strcpy(s_info->domainname, system_utsname.domainname); + } + up_read(&uts_sem); + vx_release_info(current); + current->s_info = s_info; + /* + The current process is switching to a new context + so we preset the open file counter with + the file currently open by that process. + Some of those files may have been opened by + a parent, so do not strictly belong to this + process, so we kind of over bill the current process + but it is minimal. + */ + atomic_set(&s_info->res[RLIMIT_NOFILE], + atomic_read(¤t->files->count)); + } +} + +/* + * Increase the reference count on the context_info member of a task + */ +void vx_assign_info (struct task_struct *p) +{ + down_write (&uts_sem); + if (p->s_info) + atomic_inc(&p->s_info->refcount); + up_write (&uts_sem); +} + +/* + * Decrease the reference count on the context_info member of a task + * Free the struct if the reference count reach 0. + */ +void vx_release_info (struct task_struct *p) +{ + down_write (&uts_sem); + if (p->s_info) { + if (atomic_dec_and_test(&p->s_info->refcount)) { + kfree(p->s_info); + p->s_info = NULL; + } + } + up_write (&uts_sem); +} + +/* + * Alloc a new ip_info to the current process and release + * the one currently owned by the current process. + */ +static void vx_alloc_ip_info(void) +{ + struct iproot_info *ip_info = + kmalloc(sizeof(struct iproot_info), GFP_KERNEL); + + memset(ip_info, 0, sizeof(*ip_info)); + ip_info->mark = 0xdeadbeef; + atomic_set(&ip_info->refcount, 1); + vx_release_ip_info(current->ip_info); + current->ip_info = ip_info; +} + +/* + * Increase the reference count on the ip_info member of a task + */ +void vx_assign_ip_info (struct iproot_info *ip_info) +{ + if (ip_info) { + atomic_inc(&ip_info->refcount); + if (ip_info->mark != 0xdeadbeef) + printk("vx_assign_ip_info: broken signature %08lx\n", ip_info->mark); + } +} + +/* + * Decrease the reference count on the ip_info struct + * Free the struct if the reference count reach 0. + */ +void vx_release_ip_info (struct iproot_info *ip_info) +{ + if (ip_info) + if (atomic_dec_and_test(&ip_info->refcount)) + kfree(ip_info); +} + + +static int vx_switch_user_struct(int new_context) +{ + struct user_struct *new_user; + + new_user = alloc_uid(new_context, current->uid); + if (!new_user) + return -ENOMEM; + + if (new_user != current->user) { + struct user_struct *old_user = current->user; + + atomic_inc(&new_user->processes); + atomic_dec(&old_user->processes); + current->user = new_user; + free_uid(old_user); + } else + free_uid(new_user); + return 0; +} + +static int vx_set_initpid(int flags) +{ + int ret = 0; + if (flags & VX_INFO_INIT) { + if (current->s_info == NULL) + ret = -EINVAL; + else if (current->s_info->initpid != 0) + ret = -EPERM; + else + current->s_info->initpid = current->tgid; + } + return ret; +} + + +/* new security context (syscall) */ + +/* + * Change to a new security context and reduce the capability + * basic set of the current process + */ +int vc_new_s_context(uint32_t ctx, void *data) +{ + int ret = -EPERM; + struct vcmd_new_s_context_v1 vc_data; + + if (copy_from_user(&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (ctx == -1) { + if (current->s_info == NULL + || !(current->s_info->flags & VX_INFO_LOCK)) { + /* Ok we allocate a new context. For now, we just increase */ + /* it. Wrap around possible, so we loop */ + static int new_xid = MAX_S_CONTEXT; + static spinlock_t alloc_ctx_lock = SPIN_LOCK_UNLOCKED; + int old_xid = current->vx_id; + int barrier = new_xid; + int valid = 0; + + spin_lock(&alloc_ctx_lock); + do { + struct task_struct *p; + + valid = 1; + if (++new_xid > MAX_S_CONTEXT) + new_xid = MIN_D_CONTEXT; + + /* Check if in use */ + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->vx_id == new_xid) { + valid = 0; + break; + } + } + read_unlock(&tasklist_lock); + + if (valid) { + current->vx_id = new_xid; + break; + } + } while (barrier != new_xid); + spin_unlock(&alloc_ctx_lock); + + if (!valid) + return -EDEADLK; + + ret = vx_switch_user_struct(new_xid); + if (ret == 0) { + current->cap_bset &= (~vc_data.remove_cap); + ret = new_xid; + vx_alloc_info(); + if (current->s_info) { + vx_set_initpid(vc_data.flags); + current->s_info->flags |= vc_data.flags; + } + } else + current->vx_id = old_xid; + } + } else if (ctx == -2) { + ret = vx_set_initpid(vc_data.flags); + if (ret == 0) { + /* We keep the same vx_id, but lower the capabilities */ + current->cap_bset &= (~vc_data.remove_cap); + ret = current->vx_id; + if (current->s_info) { + if (vc_data.flags & VX_INFO_INIT) + current->s_info->initpid = current->tgid; + current->s_info->flags |= vc_data.flags; + } + } + } else if (ctx <= 0 || ctx > MAX_S_CONTEXT) { + ret = -EINVAL; + } else if (vx_check(0, VX_ADMIN) + && capable(CAP_SYS_ADMIN) + && (current->s_info == NULL + ||(current->s_info->flags & VX_INFO_LOCK) == 0)) { + /* The root context can become any context it wants */ + int found = 0; + struct task_struct *p; + + /* Check if in use so we reuse the same context_info */ + read_lock(&tasklist_lock); + ret = ctx; + for_each_task(p) { + if (p->vx_id == ctx) { + found = 1; + if (p->s_info == NULL + || !(p->s_info->flags & VX_INFO_PRIVATE)) { + vx_release_info(current); + vx_assign_info (p); + current->s_info = p->s_info; + } + else + ret = -EPERM; + break; + } + } + read_unlock(&tasklist_lock); + if (ret == ctx) { + ret = vx_switch_user_struct(ctx); + if (ret == 0) { + current->vx_id = ctx; + current->cap_bset &= (~vc_data.remove_cap); + if (!found) + vx_alloc_info(); + if (current->s_info) + current->s_info->flags |= vc_data.flags; + } + } + } + return ret; +} + + +/* set ipv4 root (syscall) */ + +int vc_set_ipv4root(uint32_t nbip, void *data) +{ + int ret = -EPERM; + struct vcmd_set_ipv4root_v3 vc_data; + struct iproot_info *ip_info = current->ip_info; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if (nbip < 0 || nbip > NB_IPV4ROOT) + ret = -EINVAL; + if (!ip_info || ip_info->ipv4[0] == 0 || capable(CAP_NET_ADMIN)) + // We are allowed to change everything + ret = 0; + else if (current->ip_info) { + // We are allowed to select a subset of the currently + // installed IP numbers. No new one allowed + // We can't change the broadcast address though + int i; + int found = 0; + for (i=0; inbipv4; j++) { + if (ipi == ip_info->ipv4[j]) { + found++; + break; + } + } + } + if (found == nbip && vc_data.broadcast == ip_info->v4_bcast) + ret = 0; + } + if (ret == 0) { + int i; + + vx_alloc_ip_info(); /* release existing? */ + ip_info = current->ip_info; + ip_info->nbipv4 = nbip; + for (i=0; iipv4[i] = vc_data.ip_mask_pair[i].ip; + ip_info->mask[i] = vc_data.ip_mask_pair[i].mask; + } + ip_info->v4_bcast = vc_data.broadcast; + } + return ret; +} diff -NurpP --minimal linux-2.4.22/kernel/vswitch.c linux-2.4.22-vs1.21/kernel/vswitch.c --- linux-2.4.22/kernel/vswitch.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.22-vs1.21/kernel/vswitch.c Thu Dec 11 00:49:06 2003 @@ -0,0 +1,71 @@ +/* + * linux/kernel/vswitch.c + * + * Virtual Context Support + * + * Copyright (C) 2003 Herbert Pötzl + * + * V0.01 syscall switch + * V0.02 added signal to context + * + */ + +#include +#include +#include + +#include + + +static inline int +vc_get_version(uint32_t id) +{ + return VCI_VERSION; +} + + +extern int vc_new_s_context(uint32_t, void *); +extern int vc_set_ipv4root(uint32_t, void *); + +extern int vc_get_rlimit(uint32_t, void *); +extern int vc_set_rlimit(uint32_t, void *); +extern int vc_get_rlimit_mask(uint32_t, void *); + +extern int vc_ctx_kill(uint32_t, void *); + + +asmlinkage int +sys_vserver(uint32_t cmd, uint32_t id, void *data) +{ + int ret = -EINVAL; + + switch (cmd) { + case VCMD_get_version: + ret = vc_get_version(id); + break; + + case VCMD_new_s_context: + ret = vc_new_s_context(id, data); + break; + case VCMD_set_ipv4root: + ret = vc_set_ipv4root(id, data); + break; + + case VCMD_get_rlimit: + ret = vc_get_rlimit(id, data); + break; + case VCMD_set_rlimit: + ret = vc_set_rlimit(id, data); + break; + case VCMD_get_rlimit_mask: + ret = vc_get_rlimit_mask(id, data); + break; + + case VCMD_ctx_kill: + ret = vc_ctx_kill(id, data); + break; + + } + return ret; +} + diff -NurpP --minimal linux-2.4.22/net/ipv4/af_inet.c linux-2.4.22-vs1.21/net/ipv4/af_inet.c --- linux-2.4.22/net/ipv4/af_inet.c Fri Jun 13 16:51:39 2003 +++ linux-2.4.22-vs1.21/net/ipv4/af_inet.c Thu Dec 11 00:49:10 2003 @@ -177,6 +177,8 @@ void inet_sock_destruct(struct sock *sk) if (sk->protinfo.af_inet.opt) kfree(sk->protinfo.af_inet.opt); + vx_release_ip_info(sk->ip_info); + sk->ip_info = NULL; dst_release(sk->dst_cache); #ifdef INET_REFCNT_DEBUG atomic_dec(&inet_sock_nr); @@ -393,6 +395,9 @@ static int inet_create(struct socket *so sk->protinfo.af_inet.mc_index = 0; sk->protinfo.af_inet.mc_list = NULL; + sk->vx_id = current->vx_id; + sk->ip_info = NULL; + #ifdef INET_REFCNT_DEBUG atomic_inc(&inet_sock_nr); #endif @@ -477,6 +482,11 @@ static int inet_bind(struct socket *sock unsigned short snum; int chk_addr_ret; int err; + __u32 s_addr; /* Address used for validation */ + __u32 s_addr1; + __u32 s_addr2 = 0xffffffffl; /* Optional address of the socket */ + /* bcast in ipv4root world */ + struct iproot_info *ip_info; /* If the socket has its own bind function then use it. (RAW) */ if(sk->prot->bind) @@ -485,7 +495,37 @@ static int inet_bind(struct socket *sock if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + s_addr = s_addr1 = addr->sin_addr.s_addr; + ip_info = current->ip_info; + if (ip_info) { + __u32 v4_bcast = ip_info->v4_bcast; + __u32 ipv4root = ip_info->ipv4[0]; + int nbipv4 = ip_info->nbipv4; + if (s_addr == 0) { + s_addr = ipv4root; + if (nbipv4 > 1) + s_addr1 = 0; + else { + s_addr1 = ipv4root; + ip_info = NULL; + } + s_addr2 = v4_bcast; + } else if (s_addr == 0x0100007f) { + s_addr = s_addr1 = ipv4root; + ip_info = NULL; + } else if (s_addr != v4_bcast + && s_addr != ipv4root) { + int i; + for (i=0; iipv4[i]) + break; + } + if (i == nbipv4) + return -EADDRNOTAVAIL; + ip_info = NULL; + } + } + chk_addr_ret = inet_addr_type(s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -496,7 +536,7 @@ static int inet_bind(struct socket *sock */ if (sysctl_ip_nonlocal_bind == 0 && sk->protinfo.af_inet.freebind == 0 && - addr->sin_addr.s_addr != INADDR_ANY && + s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -521,13 +561,19 @@ static int inet_bind(struct socket *sock (sk->num != 0)) goto out; - sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr; + sk->rcv_saddr = sk->saddr = s_addr1; + sk->rcv_saddr2 = s_addr2; + sk->ip_info = ip_info; + if (ip_info) + vx_assign_ip_info(ip_info); if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) sk->saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ if (sk->prot->get_port(sk, snum) != 0) { sk->saddr = sk->rcv_saddr = 0; + sk->ip_info = NULL; + vx_release_ip_info(ip_info); err = -EADDRINUSE; goto out; } diff -NurpP --minimal linux-2.4.22/net/ipv4/devinet.c linux-2.4.22-vs1.21/net/ipv4/devinet.c --- linux-2.4.22/net/ipv4/devinet.c Fri Jun 13 16:51:39 2003 +++ linux-2.4.22-vs1.21/net/ipv4/devinet.c Thu Dec 11 00:49:10 2003 @@ -455,6 +455,33 @@ static __inline__ int inet_abc_len(u32 a return -1; } +/* + Check that a device is not member of the ipv4root assigned to the process + Return true if this is the case + + If the process is not bound to specific IP, then it returns 0 (all + interface are fine). +*/ +static int devinet_notiproot (struct in_ifaddr *ifa) +{ + int ret = 0; + struct iproot_info *info = current->ip_info; + + if (info && !vx_check(0, VX_ADMIN)) { + int i; + int nbip = info->nbipv4; + __u32 addr = ifa->ifa_local; + ret = 1; + for (i=0; iipv4[i] == addr) { + ret = 0; + break; + } + } + } + return ret; +} + int devinet_ioctl(unsigned int cmd, void *arg) { @@ -558,7 +585,10 @@ int devinet_ioctl(unsigned int cmd, void ret = -EADDRNOTAVAIL; goto done; } - + if (ifa != NULL && devinet_notiproot(ifa)) { + ret = -EADDRNOTAVAIL; + goto done; + } switch(cmd) { case SIOCGIFADDR: /* Get interface address */ sin->sin_addr.s_addr = ifa->ifa_local; @@ -691,6 +721,9 @@ inet_gifconf(struct net_device *dev, cha return 0; for ( ; ifa; ifa = ifa->ifa_next) { + // We do not show other IP devices to vservers + if (devinet_notiproot(ifa)) + continue; if (!buf) { done += sizeof(ifr); continue; @@ -932,6 +965,8 @@ static int inet_dump_ifaddr(struct sk_bu read_lock(&in_dev->lock); for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { + if (devinet_notiproot(ifa)) + continue; if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, diff -NurpP --minimal linux-2.4.22/net/ipv4/raw.c linux-2.4.22-vs1.21/net/ipv4/raw.c --- linux-2.4.22/net/ipv4/raw.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/net/ipv4/raw.c Thu Dec 11 00:49:10 2003 @@ -96,16 +96,47 @@ static void raw_v4_unhash(struct sock *s write_unlock_bh(&raw_v4_lock); } + +/* + Check if an address is in the list +*/ +static inline int raw_addr_in_list ( + u32 rcv_saddr1, + u32 rcv_saddr2, + u32 loc_addr, + struct iproot_info *ip_info) +{ + int ret = 0; + if (loc_addr != 0 && + (rcv_saddr1 == loc_addr || rcv_saddr2 == loc_addr)) + ret = 1; + else if (rcv_saddr1 == 0) { + /* Accept any address or only the one in the list */ + if (ip_info == NULL) + ret = 1; + else { + int n = ip_info->nbipv4; + int i; + for (i=0; iipv4[i] == loc_addr) { + ret = 1; + break; + } + } + } + } + return ret; +} + struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif) { struct sock *s = sk; - for (s = sk; s; s = s->next) { if (s->num == num && !(s->daddr && s->daddr != raddr) && - !(s->rcv_saddr && s->rcv_saddr != laddr) && + raw_addr_in_list(s->rcv_saddr,s->rcv_saddr2,laddr,s->ip_info) && !(s->bound_dev_if && s->bound_dev_if != dif)) break; /* gotcha */ } @@ -657,7 +688,8 @@ int raw_get_info(char *buffer, char **st struct sock *sk; for (sk = raw_v4_htable[i]; sk; sk = sk->next, num++) { - if (sk->family != PF_INET) + if (sk->family != PF_INET || + !vx_check(sk->vx_id, VX_WATCH|VX_IDENT)) continue; pos += 128; if (pos <= offset) diff -NurpP --minimal linux-2.4.22/net/ipv4/tcp_ipv4.c linux-2.4.22-vs1.21/net/ipv4/tcp_ipv4.c --- linux-2.4.22/net/ipv4/tcp_ipv4.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/net/ipv4/tcp_ipv4.c Thu Dec 11 00:49:10 2003 @@ -180,6 +180,49 @@ static inline void tcp_bind_hash(struct sk->prev = (struct sock *) tb; } +/* + Return 1 if addr match the socket IP list + or the socket is INADDR_ANY +*/ +static inline int tcp_in_list (struct sock *sk, u32 addr) +{ + struct iproot_info *ip_info = sk->ip_info; + + if (ip_info) { + int n = ip_info->nbipv4; + int i; + + for (i=0; iipv4[i] == addr) + return 1; + } + else if (!sk->rcv_saddr || sk->rcv_saddr == addr) + return 1; + return 0; +} + +/* + Check if the addresses in sk1 conflict with those in sk2 +*/ +int tcp_ipv4_addr_conflict (struct sock *sk1, struct sock *sk2) +{ + if (sk1->rcv_saddr) { + /* Bind to one address only */ + return tcp_in_list (sk2,sk1->rcv_saddr); + } else if (sk1->ip_info) { + /* A restricted bind(any) */ + struct iproot_info *ip_info = sk1->ip_info; + int n = ip_info->nbipv4; + int i; + + for (i=0; iipv4[i])) + return 1; + } else /* A bind(any) do not allow other bind on the same port */ + return 1; + return 0; +} + static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) { struct sock *sk2 = tb->owners; @@ -193,9 +236,7 @@ static inline int tcp_bind_conflict(stru if (!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) { - if (!sk2->rcv_saddr || - !sk->rcv_saddr || - (sk2->rcv_saddr == sk->rcv_saddr)) + if (tcp_ipv4_addr_conflict(sk,sk2)) break; } } @@ -414,6 +455,34 @@ void tcp_unhash(struct sock *sk) wake_up(&tcp_lhash_wait); } +/* + Check if an address is in the list +*/ +static inline int tcp_addr_in_list ( + u32 rcv_saddr, + u32 daddr, + struct iproot_info *ip_info) +{ + if (rcv_saddr == daddr) + return 1; + else if (rcv_saddr == 0) { + /* Accept any address or check the list */ + if (!ip_info) + return 1; + else { + int n = ip_info->nbipv4; + int i; + + for (i=0; iipv4[i] == daddr) + return 1; + } + } + return 0; +} + + + /* Don't inline this cruft. Here are some nice properties to * exploit here. The BSD API does not allow a listening TCP * to specify the remote port nor the remote address for the @@ -435,11 +504,10 @@ static struct sock *__tcp_v4_lookup_list #else score = 1; #endif - if(rcv_saddr) { - if (rcv_saddr != daddr) - continue; + if (tcp_addr_in_list(rcv_saddr,daddr,sk->ip_info)) score+=2; - } + else + continue; if (sk->bound_dev_if) { if (sk->bound_dev_if != dif) continue; @@ -466,8 +534,8 @@ inline struct sock *tcp_v4_lookup_listen if (sk) { if (sk->num == hnum && sk->next == NULL && - (!sk->rcv_saddr || sk->rcv_saddr == daddr) && (sk->family == PF_INET || !ipv6_only_sock(sk)) && + tcp_addr_in_list(sk->rcv_saddr,daddr,sk->ip_info) && !sk->bound_dev_if) goto sherry_cache; sk = __tcp_v4_lookup_listener(sk, daddr, hnum, dif); @@ -2189,6 +2257,9 @@ int tcp_get_info(char *buffer, char **st int uid; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + if (!vx_check(sk->vx_id, VX_WATCH|VX_IDENT)) + continue; + if (!TCP_INET_FAMILY(sk->family)) goto skip_listen; @@ -2242,7 +2313,8 @@ skip_listen: read_lock(&head->lock); for(sk = head->chain; sk; sk = sk->next, num++) { - if (!TCP_INET_FAMILY(sk->family)) + if (!TCP_INET_FAMILY(sk->family) || + !vx_check(sk->vx_id, VX_WATCH|VX_IDENT)) continue; pos += TMPSZ; if (pos <= offset) @@ -2257,7 +2329,8 @@ skip_listen: for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain; tw != NULL; tw = (struct tcp_tw_bucket *)tw->next, num++) { - if (!TCP_INET_FAMILY(tw->family)) + if (!TCP_INET_FAMILY(tw->family) || + !vx_check(tw->vx_id, VX_WATCH|VX_IDENT)) continue; pos += TMPSZ; if (pos <= offset) diff -NurpP --minimal linux-2.4.22/net/ipv4/tcp_minisocks.c linux-2.4.22-vs1.21/net/ipv4/tcp_minisocks.c --- linux-2.4.22/net/ipv4/tcp_minisocks.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/net/ipv4/tcp_minisocks.c Thu Dec 11 00:49:10 2003 @@ -380,6 +380,9 @@ void tcp_time_wait(struct sock *sk, int tw->ts_recent_stamp= tp->ts_recent_stamp; tw->pprev_death = NULL; + tw->vx_id = sk->vx_id; + tw->ip_info = NULL; + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if(tw->family == PF_INET6) { memcpy(&tw->v6_daddr, @@ -651,6 +654,7 @@ struct sock *tcp_create_openreq_child(st #endif memcpy(newsk, sk, sizeof(*newsk)); + vx_assign_ip_info(newsk->ip_info); newsk->state = TCP_SYN_RECV; /* SANITY */ diff -NurpP --minimal linux-2.4.22/net/ipv4/udp.c linux-2.4.22-vs1.21/net/ipv4/udp.c --- linux-2.4.22/net/ipv4/udp.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/net/ipv4/udp.c Thu Dec 11 00:49:10 2003 @@ -110,6 +110,9 @@ rwlock_t udp_hash_lock = RW_LOCK_UNLOCKE /* Shared by v4/v6 udp. */ int udp_port_rover; +int tcp_ipv4_addr_conflict (struct sock *sk1, struct sock *sk2); + + static int udp_v4_get_port(struct sock *sk, unsigned short snum) { write_lock_bh(&udp_hash_lock); @@ -165,9 +168,7 @@ gotit: sk2 != sk && !ipv6_only_sock(sk2) && sk2->bound_dev_if == sk->bound_dev_if && - (!sk2->rcv_saddr || - !sk->rcv_saddr || - sk2->rcv_saddr == sk->rcv_saddr) && + tcp_ipv4_addr_conflict (sk2,sk) && (!sk2->reuse || !sk->reuse)) goto fail; } @@ -210,6 +211,17 @@ static void udp_v4_unhash(struct sock *s write_unlock_bh(&udp_hash_lock); } +static int udp_in_list (struct iproot_info *ip_info, u32 addr) +{ + int n = ip_info->nbipv4; + int i; + + for (i=0; iipv4[i] == addr) + return 1; + return 0; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -231,6 +243,11 @@ struct sock *udp_v4_lookup_longway(u32 s if(sk->rcv_saddr != daddr) continue; score+=2; + } else if (sk->ip_info) { + if (udp_in_list (sk->ip_info,daddr)) + score+=2; + else + continue; } if(sk->daddr) { if(sk->daddr != saddr) @@ -284,7 +301,7 @@ static inline struct sock *udp_v4_mcast_ if ((s->num != hnum) || (s->daddr && s->daddr!=rmt_addr) || (s->dport != rmt_port && s->dport != 0) || - (s->rcv_saddr && s->rcv_saddr != loc_addr) || + (s->rcv_saddr && s->rcv_saddr != loc_addr && s->rcv_saddr2 != loc_addr) || ipv6_only_sock(s) || (s->bound_dev_if && s->bound_dev_if != dif)) continue; @@ -532,6 +549,18 @@ int udp_sendmsg(struct sock *sk, struct rt = (struct rtable*)sk_dst_check(sk, 0); if (rt == NULL) { + struct iproot_info *ip_info = current->ip_info; + + if (ip_info != NULL) { + __u32 ipv4root = ip_info->ipv4[0]; + if (ipv4root) { + if (daddr == 0x0100007f && + !vx_check(0, VX_ADMIN)) + daddr = ipv4root; + if (ufh.saddr == 0) + ufh.saddr = ipv4root; + } + } err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif); if (err) goto out; @@ -1019,7 +1048,8 @@ int udp_get_info(char *buffer, char **st struct sock *sk; for (sk = udp_hash[i]; sk; sk = sk->next, num++) { - if (sk->family != PF_INET) + if (sk->family != PF_INET || + !vx_check(sk->vx_id, VX_WATCH|VX_IDENT)) continue; pos += 128; if (pos <= offset) diff -NurpP --minimal linux-2.4.22/net/ipv6/raw.c linux-2.4.22-vs1.21/net/ipv6/raw.c --- linux-2.4.22/net/ipv6/raw.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/net/ipv6/raw.c Thu Dec 11 00:49:10 2003 @@ -879,7 +879,8 @@ int raw6_get_info(char *buffer, char **s struct sock *sk; for (sk = raw_v6_htable[i]; sk; sk = sk->next, num++) { - if (sk->family != PF_INET6) + if (sk->family != PF_INET6 || + !vx_check(sk->vx_id, VX_WATCH|VX_IDENT)) continue; pos += LINE_LEN+1; if (pos <= offset) diff -NurpP --minimal linux-2.4.22/net/ipv6/tcp_ipv6.c linux-2.4.22-vs1.21/net/ipv6/tcp_ipv6.c --- linux-2.4.22/net/ipv6/tcp_ipv6.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/net/ipv6/tcp_ipv6.c Thu Dec 11 00:49:10 2003 @@ -2027,7 +2027,8 @@ int tcp6_get_info(char *buffer, char **s int uid; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - if (sk->family != PF_INET6) + if (sk->family != PF_INET6 || + !vx_check(sk->vx_id, VX_WATCH|VX_IDENT)) continue; pos += LINE_LEN+1; if (pos >= offset) { @@ -2077,7 +2078,8 @@ int tcp6_get_info(char *buffer, char **s read_lock(&head->lock); for(sk = head->chain; sk; sk = sk->next, num++) { - if (sk->family != PF_INET6) + if (sk->family != PF_INET6 || + !vx_check(sk->vx_id, VX_WATCH|VX_IDENT)) continue; pos += LINE_LEN+1; if (pos <= offset) @@ -2092,7 +2094,8 @@ int tcp6_get_info(char *buffer, char **s for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain; tw != NULL; tw = (struct tcp_tw_bucket *)tw->next, num++) { - if (tw->family != PF_INET6) + if (tw->family != PF_INET6 || + !vx_check(tw->vx_id, VX_WATCH|VX_IDENT)) continue; pos += LINE_LEN+1; if (pos <= offset) diff -NurpP --minimal linux-2.4.22/net/ipv6/udp.c linux-2.4.22-vs1.21/net/ipv6/udp.c --- linux-2.4.22/net/ipv6/udp.c Mon Aug 25 13:44:44 2003 +++ linux-2.4.22-vs1.21/net/ipv6/udp.c Thu Dec 11 00:49:10 2003 @@ -979,7 +979,8 @@ int udp6_get_info(char *buffer, char **s struct sock *sk; for (sk = udp_hash[i]; sk; sk = sk->next, num++) { - if (sk->family != PF_INET6) + if (sk->family != PF_INET6 || + !vx_check(sk->vx_id, VX_WATCH|VX_IDENT)) continue; pos += LINE_LEN+1; if (pos <= offset) diff -NurpP --minimal linux-2.4.22/net/unix/af_unix.c linux-2.4.22-vs1.21/net/unix/af_unix.c --- linux-2.4.22/net/unix/af_unix.c Fri Nov 29 00:53:16 2002 +++ linux-2.4.22-vs1.21/net/unix/af_unix.c Thu Dec 11 00:49:10 2003 @@ -479,6 +479,8 @@ static struct sock * unix_create1(struct sk->write_space = unix_write_space; + sk->vx_id = current->vx_id; + sk->max_ack_backlog = sysctl_unix_max_dgram_qlen; sk->destruct = unix_sock_destructor; sk->protinfo.af_unix.dentry=NULL; @@ -1756,6 +1758,9 @@ static int unix_read_proc(char *buffer, read_lock(&unix_table_lock); forall_unix_sockets (i,s) { + if (!vx_check(s->vx_id, VX_WATCH|VX_IDENT)) + continue; + unix_state_rlock(s); len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5lu",