diff -NurpP --minimal linux-2.6.10-rc1/Documentation/vserver/debug.txt linux-2.6.10-rc1-vs1.9.3/Documentation/vserver/debug.txt --- linux-2.6.10-rc1/Documentation/vserver/debug.txt 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/Documentation/vserver/debug.txt 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,171 @@ + +debug_switch: + + 0 1 + + 1 2 + + 2 4 + + 3 8 + + 4 16 + + 5 32 + + 6 64 + + 7 128 + + +debug_xid: + + 0 1 "alloc_vx_info(%d) = %p\n" + "dealloc_vx_info(%p)" + "loc_vx_info(%d) = %p (not available)" + "loc_vx_info(%d) = %p (found)" + "loc_vx_info(%d) = %p (new)" + + 1 2 "alloc_vx_info(%d)*" + "loc_vx_info(%d)*" + "locate_vx_info(%d)" + + 2 4 "get_vx_info(%p[#%d.%d])" + "put_vx_info(%p[#%d.%d])" + + 3 8 "set_vx_info(%p[#%d.%d.%d])" + "clr_vx_info(%p[#%d.%d.%d])" + "rcu_free_vx_info(%p): uc=%d" + + 4 16 "__hash_vx_info: %p[#%d]" + "__unhash_vx_info: %p[#%d]" + "__vx_dynamic_id: [#%d]" + + 5 32 "vx_migrate_task(%p,%p[#%d.%d])" + "task_get_vx_info(%p)" + + 6 64 "vx_set_init(%p[#%d],%p[#%d,%d,%d])" + + 7 128 + + +debug_nid: + + 0 1 "alloc_nx_info() = %p" + "dealloc_nx_info(%p)" + "loc_nx_info(%d) = %p (not available)" + "loc_nx_info(%d) = %p (found)" + "loc_nx_info(%d) = %p (new)" + + 1 2 "alloc_nx_info(%d)*" + "loc_nx_info(%d)*" + + 2 4 "get_nx_info(%p[#%d.%d])" + "put_nx_info(%p[#%d.%d])" + + 3 8 "set_nx_info(%p[#%d.%d.%d])" + "clr_nx_info(%p[#%d.%d.%d])" + "rcu_free_nx_info(%p): uc=%d" + + 4 16 "__hash_nx_info: %p[#%d]" + "__unhash_nx_info: %p[#%d]" + "__nx_dynamic_id: [#%d]" + + 5 32 "nx_migrate_task(%p,%p[#%d.%d])" + "task_get_nx_info(%p)" + "create_nx_info()" + + 6 64 + + 7 128 + + +debug_dlim: + + 0 1 "alloc_dl_info(%p,%d) = %p" + "dealloc_dl_info(%p)" + "locate_dl_info(%p,#%d) = %p" + + 1 2 "alloc_dl_info(%p,%d)*" + + 2 4 "get_dl_info(%p[#%d.%d])" + "put_dl_info(%p[#%d.%d])" + + 3 8 "rcu_free_dl_info(%p)" + "__hash_dl_info: %p[#%d]" + "__unhash_dl_info: %p[#%d]" + + + 4 16 "ALLOC (%p,#%d)%c inode (%d)" + "FREE (%p,#%d)%c inode" + + 5 32 "ALLOC (%p,#%d)%c %lld bytes (%d)" + "FREE (%p,#%d)%c %lld bytes" + + 6 64 "ADJUST: %lld,%lld on %d,%d [mult=%d]" + + 7 128 "ext3_has_free_blocks(%p): free=%u, root=%u" + "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d" + + + +debug_cvirt: + + + 0 1 + + 1 2 + + 2 4 "vx_map_tgid: %p/%llx: %d -> %d" + "vx_rmap_tgid: %p/%llx: %d -> %d" + + 3 8 + + 4 16 + + 5 32 + + 6 64 + + 7 128 + + + +debug_net: + + + 0 1 + + 1 2 + + 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d" + + 3 8 "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d" + "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d" + + 4 16 "ip_route_connect(%p) %p,%p;%lx" + + 5 32 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx" + + 6 64 "sk: %p [#%d] (from %d)" + "sk,req: %p [#%d] (from %d)" + "sk,egf: %p [#%d] (from %d)" + "sk,egn: %p [#%d] (from %d)" + "tw: %p [#%d] (from %d)" + + 7 128 "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d" + "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d" + + + + +debug_limit: + + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s" + "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d" + + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s" + "vx_acc_pages[%5d,%s,%2d]: %5d += %5d" + "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d" + + diff -NurpP --minimal linux-2.6.10-rc1/Makefile linux-2.6.10-rc1-vs1.9.3/Makefile --- linux-2.6.10-rc1/Makefile 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/Makefile 2004-10-31 00:41:27.000000000 +0200 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 10 -EXTRAVERSION =-rc1 +EXTRAVERSION =-rc1-vs1.9.3 NAME=Woozy Numbat # *DOCUMENTATION* diff -NurpP --minimal linux-2.6.10-rc1/arch/alpha/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/alpha/Kconfig --- linux-2.6.10-rc1/arch/alpha/Kconfig 2004-10-18 23:55:37.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/alpha/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -594,6 +594,8 @@ source "arch/alpha/oprofile/Kconfig" source "arch/alpha/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/alpha/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/alpha/kernel/ptrace.c --- linux-2.6.10-rc1/arch/alpha/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/alpha/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -288,6 +288,8 @@ do_sys_ptrace(long request, long pid, lo read_unlock(&tasklist_lock); if (!child) goto out_notsk; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out; if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff -NurpP --minimal linux-2.6.10-rc1/arch/alpha/kernel/systbls.S linux-2.6.10-rc1-vs1.9.3/arch/alpha/kernel/systbls.S --- linux-2.6.10-rc1/arch/alpha/kernel/systbls.S 2004-10-18 23:54:37.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/alpha/kernel/systbls.S 2004-10-31 00:41:27.000000000 +0200 @@ -291,7 +291,7 @@ sys_call_table: .quad alpha_ni_syscall /* 270 */ .quad alpha_ni_syscall .quad alpha_ni_syscall - .quad alpha_ni_syscall + .quad sys_vserver /* 273 sys_vserver */ .quad alpha_ni_syscall .quad alpha_ni_syscall /* 275 */ .quad alpha_ni_syscall diff -NurpP --minimal linux-2.6.10-rc1/arch/arm/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/arm/Kconfig --- linux-2.6.10-rc1/arch/arm/Kconfig 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/arm/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -660,6 +660,8 @@ source "drivers/mmc/Kconfig" source "arch/arm/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/arm/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/arm/kernel/ptrace.c --- linux-2.6.10-rc1/arch/arm/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/arm/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -754,6 +754,8 @@ asmlinkage int sys_ptrace(long request, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/arm26/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/arm26/Kconfig --- linux-2.6.10-rc1/arch/arm26/Kconfig 2004-10-18 23:54:32.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/arm26/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -216,6 +216,8 @@ source "drivers/usb/Kconfig" source "arch/arm26/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/arm26/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/arm26/kernel/ptrace.c --- linux-2.6.10-rc1/arch/arm26/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/arm26/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -691,6 +691,8 @@ asmlinkage int sys_ptrace(long request, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/cris/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/cris/Kconfig --- linux-2.6.10-rc1/arch/cris/Kconfig 2004-10-18 23:55:07.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/cris/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -169,6 +169,8 @@ source "drivers/usb/Kconfig" source "arch/cris/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/h8300/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/h8300/Kconfig --- linux-2.6.10-rc1/arch/h8300/Kconfig 2004-10-18 23:54:55.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/h8300/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -183,6 +183,8 @@ source "fs/Kconfig" source "arch/h8300/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/h8300/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/h8300/kernel/ptrace.c --- linux-2.6.10-rc1/arch/h8300/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/h8300/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -80,6 +80,8 @@ asmlinkage int sys_ptrace(long request, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/i386/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/i386/Kconfig --- linux-2.6.10-rc1/arch/i386/Kconfig 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/i386/Kconfig 2004-11-04 19:52:50.688736126 +0100 @@ -720,6 +720,46 @@ config HIGHMEM64G endchoice +choice + prompt "Memory Split User Space" + default SPLIT_3GB + help + A different Userspace/Kernel split allows you to + utilize up to alsmost 3GB of RAM without the requirement + for HIGHMEM. It also increases the available lowmem. + +config SPLIT_3GB + bool "3.0GB/1.0GB Kernel (Default)" + help + This is the default split of 3GB userspace to 1GB kernel + space, which will result in about 860MB of lowmem. + +config SPLIT_25GB + bool "2.5GB/1.5GB Kernel" + help + This split provides 2.5GB userspace and 1.5GB kernel + space, which will result in about 1370MB of lowmem. + +config SPLIT_2GB + bool "2.0GB/2.0GB Kernel" + help + This split provides 2GB userspace and 2GB kernel + space, which will result in about 1880MB of lowmem. + +config SPLIT_15GB + bool "1.5GB/2.5GB Kernel" + help + This split provides 1.5GB userspace and 2.5GB kernel + space, which will result in about 2390MB of lowmem. + +config SPLIT_1GB + bool "1.0GB/3.0GB Kernel" + help + This split provides 1GB userspace and 3GB kernel + space, which will result in about 2900MB of lowmem. + +endchoice + config HIGHMEM bool depends on HIGHMEM64G || HIGHMEM4G @@ -1194,6 +1234,8 @@ source "arch/i386/oprofile/Kconfig" source "arch/i386/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/i386/kernel/entry.S linux-2.6.10-rc1-vs1.9.3/arch/i386/kernel/entry.S --- linux-2.6.10-rc1/arch/i386/kernel/entry.S 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/i386/kernel/entry.S 2004-10-31 00:41:27.000000000 +0200 @@ -855,7 +855,7 @@ ENTRY(sys_call_table) .long sys_tgkill /* 270 */ .long sys_utimes .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ + .long sys_vserver .long sys_mbind .long sys_get_mempolicy .long sys_set_mempolicy diff -NurpP --minimal linux-2.6.10-rc1/arch/i386/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/i386/kernel/ptrace.c --- linux-2.6.10-rc1/arch/i386/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/i386/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -260,6 +260,8 @@ asmlinkage int sys_ptrace(long request, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/i386/kernel/sys_i386.c linux-2.6.10-rc1-vs1.9.3/arch/i386/kernel/sys_i386.c --- linux-2.6.10-rc1/arch/i386/kernel/sys_i386.c 2004-10-18 23:55:28.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/i386/kernel/sys_i386.c 2004-10-31 00:41:27.000000000 +0200 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -217,7 +218,7 @@ asmlinkage int sys_uname(struct old_utsn if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } @@ -225,6 +226,7 @@ asmlinkage int sys_uname(struct old_utsn asmlinkage int sys_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -233,15 +235,16 @@ asmlinkage int sys_olduname(struct oldol down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error |= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error |= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error |= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error |= __put_user(0,name->machine+__OLD_UTS_LEN); up_read(&uts_sem); diff -NurpP --minimal linux-2.6.10-rc1/arch/i386/mm/hugetlbpage.c linux-2.6.10-rc1-vs1.9.3/arch/i386/mm/hugetlbpage.c --- linux-2.6.10-rc1/arch/i386/mm/hugetlbpage.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/i386/mm/hugetlbpage.c 2004-10-31 00:41:27.000000000 +0200 @@ -42,7 +42,8 @@ static void set_huge_pte(struct mm_struc { pte_t entry; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE); if (write_access) { entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); @@ -82,7 +83,8 @@ int copy_hugetlb_page_range(struct mm_st ptepage = pte_page(entry); get_page(ptepage); set_pte(dst_pte, entry); - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } return 0; @@ -218,7 +220,8 @@ void unmap_hugepage_range(struct vm_area page = pte_page(pte); put_page(page); } - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); flush_tlb_range(vma, start, end); } diff -NurpP --minimal linux-2.6.10-rc1/arch/ia64/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/ia64/Kconfig --- linux-2.6.10-rc1/arch/ia64/Kconfig 2004-10-18 23:55:27.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ia64/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -387,6 +387,8 @@ source "arch/ia64/oprofile/Kconfig" source "arch/ia64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/ia64/ia32/binfmt_elf32.c linux-2.6.10-rc1-vs1.9.3/arch/ia64/ia32/binfmt_elf32.c --- linux-2.6.10-rc1/arch/ia64/ia32/binfmt_elf32.c 2004-10-18 23:55:18.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ia64/ia32/binfmt_elf32.c 2004-11-04 19:52:50.706733282 +0100 @@ -187,7 +187,7 @@ ia64_elf32_init (struct pt_regs *regs) int ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) { - unsigned long stack_base; + unsigned long stack_base, grow; struct vm_area_struct *mpnt; struct mm_struct *mm = current->mm; int i; @@ -204,8 +204,10 @@ ia32_setup_arg_pages (struct linux_binpr if (!mpnt) return -ENOMEM; - if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p)) - >> PAGE_SHIFT)) { + grow = (IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p)) + >> PAGE_SHIFT; + if (security_vm_enough_memory(grow) || + !vx_vmpages_avail(mm, grow)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -226,7 +228,9 @@ ia32_setup_arg_pages (struct linux_binpr mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)? PAGE_COPY_EXEC: PAGE_COPY; insert_vm_struct(current->mm, mpnt); - current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt); + // current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(current->mm, current->mm->total_vm - vma_pages(mpnt)); + current->mm->stack_vm = current->mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.10-rc1/arch/ia64/kernel/entry.S linux-2.6.10-rc1-vs1.9.3/arch/ia64/kernel/entry.S --- linux-2.6.10-rc1/arch/ia64/kernel/entry.S 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ia64/kernel/entry.S 2004-10-31 00:41:27.000000000 +0200 @@ -1526,7 +1526,7 @@ sys_call_table: data8 sys_mq_notify data8 sys_mq_getsetattr data8 sys_ni_syscall // reserved for kexec_load - data8 sys_ni_syscall + data8 sys_vserver data8 sys_setaltroot // 1270 data8 sys_ni_syscall data8 sys_ni_syscall diff -NurpP --minimal linux-2.6.10-rc1/arch/ia64/kernel/perfmon.c linux-2.6.10-rc1-vs1.9.3/arch/ia64/kernel/perfmon.c --- linux-2.6.10-rc1/arch/ia64/kernel/perfmon.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ia64/kernel/perfmon.c 2004-11-04 19:54:53.296358186 +0100 @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include @@ -2344,7 +2346,8 @@ pfm_smpl_buffer_alloc(struct task_struct */ insert_vm_struct(mm, vma); - mm->total_vm += size >> PAGE_SHIFT; + // mm->total_vm += size >> PAGE_SHIFT; + vx_vmpages_add(mm, size >> PAGE_SHIFT); vm_stat_account(vma); up_write(&task->mm->mmap_sem); diff -NurpP --minimal linux-2.6.10-rc1/arch/ia64/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/ia64/kernel/ptrace.c --- linux-2.6.10-rc1/arch/ia64/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ia64/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -1339,6 +1339,9 @@ sys_ptrace (long request, pid_t pid, uns read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; + ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; diff -NurpP --minimal linux-2.6.10-rc1/arch/ia64/mm/fault.c linux-2.6.10-rc1-vs1.9.3/arch/ia64/mm/fault.c --- linux-2.6.10-rc1/arch/ia64/mm/fault.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ia64/mm/fault.c 2004-11-04 19:52:50.730729490 +0100 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -33,12 +34,19 @@ expand_backing_store (struct vm_area_str grow = PAGE_SIZE >> PAGE_SHIFT; if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur - || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur)) + || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > + current->signal->rlim[RLIMIT_AS].rlim_cur)) + return -ENOMEM; + if (!vx_vmpages_avail(vma->vm_mm, grow) || + ((vma->vm_flags & VM_LOCKED) && + !vx_vmlocked_avail(vma->vm_mm, grow))) return -ENOMEM; vma->vm_end += PAGE_SIZE; - vma->vm_mm->total_vm += grow; + // vma->vm_mm->total_vm += grow; + vx_vmpages_add(vma->vm_mm, grow); if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; + // vma->vm_mm->locked_vm += grow; + vx_vmlocked_add(vma->vm_mm, grow); __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow); return 0; } diff -NurpP --minimal linux-2.6.10-rc1/arch/ia64/mm/hugetlbpage.c linux-2.6.10-rc1-vs1.9.3/arch/ia64/mm/hugetlbpage.c --- linux-2.6.10-rc1/arch/ia64/mm/hugetlbpage.c 2004-10-18 23:54:27.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ia64/mm/hugetlbpage.c 2004-10-31 00:41:27.000000000 +0200 @@ -65,7 +65,8 @@ set_huge_pte (struct mm_struct *mm, stru { pte_t entry; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE); if (write_access) { entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); @@ -108,7 +109,8 @@ int copy_hugetlb_page_range(struct mm_st ptepage = pte_page(entry); get_page(ptepage); set_pte(dst_pte, entry); - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } return 0; @@ -249,7 +251,8 @@ void unmap_hugepage_range(struct vm_area put_page(page); pte_clear(pte); } - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); flush_tlb_range(vma, start, end); } diff -NurpP --minimal linux-2.6.10-rc1/arch/m68k/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/m68k/Kconfig --- linux-2.6.10-rc1/arch/m68k/Kconfig 2004-10-18 23:54:32.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/m68k/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -650,6 +650,8 @@ source "fs/Kconfig" source "arch/m68k/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/m68k/atari/stram.c linux-2.6.10-rc1-vs1.9.3/arch/m68k/atari/stram.c --- linux-2.6.10-rc1/arch/m68k/atari/stram.c 2004-10-18 23:54:54.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/m68k/atari/stram.c 2004-10-31 00:41:27.000000000 +0200 @@ -635,7 +635,8 @@ static inline void unswap_pte(struct vm_ set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); swap_free(entry); get_page(page); - ++vma->vm_mm->rss; + // ++vma->vm_mm->rss; + vx_rsspages_inc(vma->vm_mm); } static inline void unswap_pmd(struct vm_area_struct * vma, pmd_t *dir, diff -NurpP --minimal linux-2.6.10-rc1/arch/m68k/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/m68k/kernel/ptrace.c --- linux-2.6.10-rc1/arch/m68k/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/m68k/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -140,6 +140,8 @@ asmlinkage int sys_ptrace(long request, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/m68knommu/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/m68knommu/Kconfig --- linux-2.6.10-rc1/arch/m68knommu/Kconfig 2004-10-18 23:54:39.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/m68knommu/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -515,6 +515,8 @@ source "fs/Kconfig" source "arch/m68knommu/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/m68knommu/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/m68knommu/kernel/ptrace.c --- linux-2.6.10-rc1/arch/m68knommu/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/m68knommu/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -124,6 +124,8 @@ asmlinkage int sys_ptrace(long request, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/mips/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/mips/Kconfig --- linux-2.6.10-rc1/arch/mips/Kconfig 2004-10-18 23:54:08.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/mips/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -1558,6 +1558,8 @@ source "fs/Kconfig" source "arch/mips/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/mips/kernel/irixelf.c linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/irixelf.c --- linux-2.6.10-rc1/arch/mips/kernel/irixelf.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/irixelf.c 2004-10-31 00:41:27.000000000 +0200 @@ -686,7 +686,8 @@ static int load_irix_binary(struct linux /* Do this so that we can load the interpreter, if need be. We will * change some of these later. */ - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); setup_arg_pages(bprm, EXSTACK_DEFAULT); current->mm->start_stack = bprm->p; diff -NurpP --minimal linux-2.6.10-rc1/arch/mips/kernel/linux32.c linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/linux32.c --- linux-2.6.10-rc1/arch/mips/kernel/linux32.c 2004-10-18 23:54:37.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/linux32.c 2004-10-31 00:41:27.000000000 +0200 @@ -1208,7 +1208,7 @@ asmlinkage long sys32_newuname(struct ne int ret = 0; down_read(&uts_sem); - if (copy_to_user(name,&system_utsname,sizeof *name)) + if (copy_to_user(name, vx_new_utsname(), sizeof *name)) ret = -EFAULT; up_read(&uts_sem); diff -NurpP --minimal linux-2.6.10-rc1/arch/mips/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/ptrace.c --- linux-2.6.10-rc1/arch/mips/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -76,6 +76,8 @@ asmlinkage int sys_ptrace(long request, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/mips/kernel/syscall.c linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/syscall.c --- linux-2.6.10-rc1/arch/mips/kernel/syscall.c 2004-10-18 23:54:55.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/syscall.c 2004-10-31 00:41:27.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -209,7 +210,7 @@ out: */ asmlinkage int sys_uname(struct old_utsname * name) { - if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name))) return 0; return -EFAULT; } @@ -220,21 +221,23 @@ asmlinkage int sys_uname(struct old_utsn asmlinkage int sys_olduname(struct oldold_utsname * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) return -EFAULT; - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error = __put_user(0,name->machine+__OLD_UTS_LEN); error = error ? -EFAULT : 0; @@ -260,10 +263,10 @@ asmlinkage int _sys_sysmips(int cmd, lon return -EFAULT; down_write(&uts_sem); - strncpy(system_utsname.nodename, nodename, len); + strncpy(vx_new_uts(nodename), nodename, len); nodename[__NEW_UTS_LEN] = '\0'; - strlcpy(system_utsname.nodename, nodename, - sizeof(system_utsname.nodename)); + strlcpy(vx_new_uts(nodename), nodename, + sizeof(vx_new_uts(nodename))); up_write(&uts_sem); return 0; } diff -NurpP --minimal linux-2.6.10-rc1/arch/mips/kernel/sysirix.c linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/sysirix.c --- linux-2.6.10-rc1/arch/mips/kernel/sysirix.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/mips/kernel/sysirix.c 2004-10-31 00:41:27.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -582,7 +583,8 @@ asmlinkage int irix_brk(unsigned long br /* * Check if we have enough memory.. */ - if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { + if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT) || + !vx_vmpages_avail(mm, (newbrk-oldbrk) >> PAGE_SHIFT)) { ret = -ENOMEM; goto out; } diff -NurpP --minimal linux-2.6.10-rc1/arch/parisc/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/parisc/Kconfig --- linux-2.6.10-rc1/arch/parisc/Kconfig 2004-10-18 23:54:37.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/parisc/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -190,6 +190,8 @@ source "arch/parisc/oprofile/Kconfig" source "arch/parisc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/parisc/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/parisc/kernel/ptrace.c --- linux-2.6.10-rc1/arch/parisc/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/parisc/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -110,6 +110,9 @@ long sys_ptrace(long request, pid_t pid, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; + ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; diff -NurpP --minimal linux-2.6.10-rc1/arch/parisc/kernel/sys_parisc32.c linux-2.6.10-rc1-vs1.9.3/arch/parisc/kernel/sys_parisc32.c --- linux-2.6.10-rc1/arch/parisc/kernel/sys_parisc32.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/parisc/kernel/sys_parisc32.c 2004-10-31 00:41:27.000000000 +0200 @@ -680,6 +680,7 @@ asmlinkage int sys32_sysinfo(struct sysi do { seq = read_seqbegin(&xtime_lock); + /* requires vx virtualization */ val.uptime = jiffies / HZ; val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); diff -NurpP --minimal linux-2.6.10-rc1/arch/ppc/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/ppc/Kconfig --- linux-2.6.10-rc1/arch/ppc/Kconfig 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ppc/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -1232,6 +1232,8 @@ source "arch/ppc/oprofile/Kconfig" source "arch/ppc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/ppc/kernel/misc.S linux-2.6.10-rc1-vs1.9.3/arch/ppc/kernel/misc.S --- linux-2.6.10-rc1/arch/ppc/kernel/misc.S 2004-10-18 23:55:07.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ppc/kernel/misc.S 2004-10-31 00:41:27.000000000 +0200 @@ -1435,7 +1435,7 @@ _GLOBAL(sys_call_table) .long ppc_fadvise64_64 .long sys_ni_syscall /* 255 - rtas (used on ppc64) */ .long sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .long sys_ni_syscall /* 257 reserved for vserver */ + .long sys_vserver .long sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ .long sys_ni_syscall /* 259 reserved for new sys_mbind */ .long sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ diff -NurpP --minimal linux-2.6.10-rc1/arch/ppc/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/ppc/kernel/ptrace.c --- linux-2.6.10-rc1/arch/ppc/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ppc/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -261,6 +261,8 @@ int sys_ptrace(long request, long pid, l read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/ppc/kernel/syscalls.c linux-2.6.10-rc1-vs1.9.3/arch/ppc/kernel/syscalls.c --- linux-2.6.10-rc1/arch/ppc/kernel/syscalls.c 2004-10-18 23:55:28.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ppc/kernel/syscalls.c 2004-10-31 00:41:27.000000000 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -229,7 +230,7 @@ int sys_uname(struct old_utsname __user int err = -EFAULT; down_read(&uts_sem); - if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name))) err = 0; up_read(&uts_sem); return err; @@ -238,6 +239,7 @@ int sys_uname(struct old_utsname __user int sys_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -245,15 +247,16 @@ int sys_olduname(struct oldold_utsname _ return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error = __put_user(0,name->machine+__OLD_UTS_LEN); up_read(&uts_sem); diff -NurpP --minimal linux-2.6.10-rc1/arch/ppc64/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/ppc64/Kconfig --- linux-2.6.10-rc1/arch/ppc64/Kconfig 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ppc64/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -354,6 +354,8 @@ source "arch/ppc64/oprofile/Kconfig" source "arch/ppc64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/ppc64/kernel/misc.S linux-2.6.10-rc1-vs1.9.3/arch/ppc64/kernel/misc.S --- linux-2.6.10-rc1/arch/ppc64/kernel/misc.S 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ppc64/kernel/misc.S 2004-10-31 00:41:27.000000000 +0200 @@ -951,7 +951,7 @@ _GLOBAL(sys_call_table32) .llong .ppc32_fadvise64_64 /* 32bit only fadvise64_64 */ .llong .ppc_rtas /* 255 */ .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong .sys_ni_syscall /* 257 reserved for vserver */ + .llong .sys_vserver .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ .llong .compat_mbind .llong .compat_get_mempolicy /* 260 */ diff -NurpP --minimal linux-2.6.10-rc1/arch/ppc64/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/ppc64/kernel/ptrace.c --- linux-2.6.10-rc1/arch/ppc64/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ppc64/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -76,6 +76,8 @@ int sys_ptrace(long request, long pid, l read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/ppc64/kernel/sys_ppc32.c linux-2.6.10-rc1-vs1.9.3/arch/ppc64/kernel/sys_ppc32.c --- linux-2.6.10-rc1/arch/ppc64/kernel/sys_ppc32.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ppc64/kernel/sys_ppc32.c 2004-10-31 00:41:27.000000000 +0200 @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -1173,6 +1174,7 @@ asmlinkage long sys32_time(compat_time_t asmlinkage int sys32_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -1180,15 +1182,16 @@ asmlinkage int sys32_olduname(struct old return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error = __put_user(0,name->machine+__OLD_UTS_LEN); up_read(&uts_sem); diff -NurpP --minimal linux-2.6.10-rc1/arch/ppc64/mm/hugetlbpage.c linux-2.6.10-rc1-vs1.9.3/arch/ppc64/mm/hugetlbpage.c --- linux-2.6.10-rc1/arch/ppc64/mm/hugetlbpage.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/ppc64/mm/hugetlbpage.c 2004-10-31 00:41:27.000000000 +0200 @@ -125,7 +125,8 @@ static void setup_huge_pte(struct mm_str hugepte_t entry; int i; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_sub(mm, HPAGE_SIZE / PAGE_SIZE); entry = mk_hugepte(page, write_access); for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) set_hugepte(ptep+i, entry); @@ -288,7 +289,8 @@ int copy_hugetlb_page_range(struct mm_st /* This is the first hugepte in a batch */ ptepage = hugepte_page(entry); get_page(ptepage); - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); } set_hugepte(dst_pte, entry); @@ -411,7 +413,8 @@ void unmap_hugepage_range(struct vm_area } put_cpu(); - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); } int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) diff -NurpP --minimal linux-2.6.10-rc1/arch/s390/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/s390/Kconfig --- linux-2.6.10-rc1/arch/s390/Kconfig 2004-10-18 23:53:51.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/s390/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -461,6 +461,8 @@ source "arch/s390/oprofile/Kconfig" source "arch/s390/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/s390/kernel/compat_exec.c linux-2.6.10-rc1-vs1.9.3/arch/s390/kernel/compat_exec.c --- linux-2.6.10-rc1/arch/s390/kernel/compat_exec.c 2004-10-18 23:55:06.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/s390/kernel/compat_exec.c 2004-10-31 00:41:27.000000000 +0200 @@ -36,7 +36,7 @@ int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack) { - unsigned long stack_base; + unsigned long stack_base, grow; struct vm_area_struct *mpnt; struct mm_struct *mm = current->mm; int i; @@ -53,7 +53,10 @@ int setup_arg_pages32(struct linux_binpr if (!mpnt) return -ENOMEM; - if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + grow = (STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p)) + >> PAGE_SHIFT; + if (security_vm_enough_memory(grow) || + !vx_vmpages_avail(mm, grow)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -69,7 +72,9 @@ int setup_arg_pages32(struct linux_binpr mpnt->vm_page_prot = PAGE_COPY; mpnt->vm_flags = VM_STACK_FLAGS; insert_vm_struct(mm, mpnt); - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + // mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.10-rc1/arch/s390/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/s390/kernel/ptrace.c --- linux-2.6.10-rc1/arch/s390/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/s390/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -695,9 +695,11 @@ sys_ptrace(long request, long pid, long read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = do_ptrace(child, request, addr, data); - +out_tsk: put_task_struct(child); out: unlock_kernel(); diff -NurpP --minimal linux-2.6.10-rc1/arch/s390/kernel/syscalls.S linux-2.6.10-rc1-vs1.9.3/arch/s390/kernel/syscalls.S --- linux-2.6.10-rc1/arch/s390/kernel/syscalls.S 2004-10-18 23:53:13.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/s390/kernel/syscalls.S 2004-10-31 00:41:27.000000000 +0200 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) -NI_SYSCALL /* reserved for vserver */ +SYSCALL(sys_vserver,sys_vserver,sys_vserver) SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) diff -NurpP --minimal linux-2.6.10-rc1/arch/sh/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/sh/Kconfig --- linux-2.6.10-rc1/arch/sh/Kconfig 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sh/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -794,6 +794,8 @@ source "arch/sh/oprofile/Kconfig" source "arch/sh/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/sh/kernel/kgdb_stub.c linux-2.6.10-rc1-vs1.9.3/arch/sh/kernel/kgdb_stub.c --- linux-2.6.10-rc1/arch/sh/kernel/kgdb_stub.c 2004-10-18 23:53:43.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sh/kernel/kgdb_stub.c 2004-10-31 00:41:27.000000000 +0200 @@ -412,7 +412,7 @@ static struct task_struct *get_thread(in if (pid == PID_MAX) pid = 0; /* First check via PID */ - thread = find_task_by_pid(pid); + thread = find_task_by_real_pid(pid); if (thread) return thread; diff -NurpP --minimal linux-2.6.10-rc1/arch/sh/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/sh/kernel/ptrace.c --- linux-2.6.10-rc1/arch/sh/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sh/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -108,6 +108,8 @@ asmlinkage int sys_ptrace(long request, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/sh/mm/hugetlbpage.c linux-2.6.10-rc1-vs1.9.3/arch/sh/mm/hugetlbpage.c --- linux-2.6.10-rc1/arch/sh/mm/hugetlbpage.c 2004-10-18 23:54:32.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sh/mm/hugetlbpage.c 2004-10-31 00:41:27.000000000 +0200 @@ -62,7 +62,8 @@ static void set_huge_pte(struct mm_struc unsigned long i; pte_t entry; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE); if (write_access) entry = pte_mkwrite(pte_mkdirty(mk_pte(page, @@ -115,7 +116,8 @@ int copy_hugetlb_page_range(struct mm_st pte_val(entry) += PAGE_SIZE; dst_pte++; } - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } return 0; @@ -206,7 +208,8 @@ void unmap_hugepage_range(struct vm_area pte++; } } - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); flush_tlb_range(vma, start, end); } diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/sparc/Kconfig --- linux-2.6.10-rc1/arch/sparc/Kconfig 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -382,6 +382,8 @@ source "drivers/char/watchdog/Kconfig" source "arch/sparc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/sparc/kernel/ptrace.c --- linux-2.6.10-rc1/arch/sparc/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -321,6 +321,10 @@ asmlinkage void do_ptrace(struct pt_regs pt_error_return(regs, ESRCH); goto out; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc/kernel/sys_sparc.c linux-2.6.10-rc1-vs1.9.3/arch/sparc/kernel/sys_sparc.c --- linux-2.6.10-rc1/arch/sparc/kernel/sys_sparc.c 2004-10-18 23:53:50.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc/kernel/sys_sparc.c 2004-10-31 00:41:27.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -471,13 +472,13 @@ asmlinkage int sys_getdomainname(char __ down_read(&uts_sem); - nlen = strlen(system_utsname.domainname) + 1; + nlen = strlen(vx_new_uts(domainname)) + 1; if (nlen < len) len = nlen; if (len > __NEW_UTS_LEN) goto done; - if (copy_to_user(name, system_utsname.domainname, len)) + if (copy_to_user(name, vx_new_uts(domainname), len)) goto done; err = 0; done: diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc/kernel/systbls.S linux-2.6.10-rc1-vs1.9.3/arch/sparc/kernel/systbls.S --- linux-2.6.10-rc1/arch/sparc/kernel/systbls.S 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc/kernel/systbls.S 2004-10-31 00:41:27.000000000 +0200 @@ -72,7 +72,7 @@ sys_call_table: /*250*/ .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl /*255*/ .long sys_nis_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .long sys_setaltroot, sys_add_key, sys_request_key, sys_keyctl diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc64/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/sparc64/Kconfig --- linux-2.6.10-rc1/arch/sparc64/Kconfig 2004-10-18 23:55:06.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc64/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -608,6 +608,8 @@ source "arch/sparc64/oprofile/Kconfig" source "arch/sparc64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc64/kernel/binfmt_aout32.c linux-2.6.10-rc1-vs1.9.3/arch/sparc64/kernel/binfmt_aout32.c --- linux-2.6.10-rc1/arch/sparc64/kernel/binfmt_aout32.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc64/kernel/binfmt_aout32.c 2004-10-31 00:41:27.000000000 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -239,7 +240,8 @@ static int load_aout32_binary(struct lin current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc64/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/sparc64/kernel/ptrace.c --- linux-2.6.10-rc1/arch/sparc64/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc64/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -172,6 +172,10 @@ asmlinkage void do_ptrace(struct pt_regs pt_error_return(regs, ESRCH); goto out; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc64/kernel/sys_sparc.c linux-2.6.10-rc1-vs1.9.3/arch/sparc64/kernel/sys_sparc.c --- linux-2.6.10-rc1/arch/sparc64/kernel/sys_sparc.c 2004-10-18 23:54:31.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc64/kernel/sys_sparc.c 2004-10-31 00:41:27.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -465,13 +466,13 @@ asmlinkage long sys_getdomainname(char _ down_read(&uts_sem); - nlen = strlen(system_utsname.domainname) + 1; + nlen = strlen(vx_new_uts(domainname)) + 1; if (nlen < len) len = nlen; if (len > __NEW_UTS_LEN) goto done; - if (copy_to_user(name, system_utsname.domainname, len)) + if (copy_to_user(name, vx_new_uts(domainname), len)) goto done; err = 0; done: diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc64/kernel/systbls.S linux-2.6.10-rc1-vs1.9.3/arch/sparc64/kernel/systbls.S --- linux-2.6.10-rc1/arch/sparc64/kernel/systbls.S 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc64/kernel/systbls.S 2004-10-31 00:41:27.000000000 +0200 @@ -73,7 +73,7 @@ sys_call_table32: /*250*/ .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl .word sys_ni_syscall, sys32_clock_settime, compat_clock_gettime, compat_clock_getres, sys32_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys32_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys32_timer_create, sys_vserver, compat_sys_io_setup, sys_io_destroy /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid /*280*/ .word sys_setaltroot, sys_add_key, sys_request_key, sys_keyctl @@ -139,7 +139,7 @@ sys_call_table: /*250*/ .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl .word sys_ni_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .word sys_setaltroot, sys_add_key, sys_request_key, sys_keyctl diff -NurpP --minimal linux-2.6.10-rc1/arch/sparc64/mm/hugetlbpage.c linux-2.6.10-rc1-vs1.9.3/arch/sparc64/mm/hugetlbpage.c --- linux-2.6.10-rc1/arch/sparc64/mm/hugetlbpage.c 2004-10-18 23:54:38.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/sparc64/mm/hugetlbpage.c 2004-10-31 00:41:27.000000000 +0200 @@ -59,7 +59,8 @@ static void set_huge_pte(struct mm_struc unsigned long i; pte_t entry; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE); if (write_access) entry = pte_mkwrite(pte_mkdirty(mk_pte(page, @@ -112,7 +113,8 @@ int copy_hugetlb_page_range(struct mm_st pte_val(entry) += PAGE_SIZE; dst_pte++; } - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } return 0; @@ -203,7 +205,8 @@ void unmap_hugepage_range(struct vm_area pte++; } } - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); flush_tlb_range(vma, start, end); } diff -NurpP --minimal linux-2.6.10-rc1/arch/um/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/um/Kconfig --- linux-2.6.10-rc1/arch/um/Kconfig 2004-10-18 23:54:08.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/um/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -219,6 +219,8 @@ source "net/Kconfig" source "fs/Kconfig" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/um/kernel/process_kern.c linux-2.6.10-rc1-vs1.9.3/arch/um/kernel/process_kern.c --- linux-2.6.10-rc1/arch/um/kernel/process_kern.c 2004-10-18 23:54:30.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/um/kernel/process_kern.c 2004-11-04 19:52:50.731729331 +0100 @@ -18,6 +18,8 @@ #include "linux/capability.h" #include "linux/vmalloc.h" #include "linux/spinlock.h" +#include "linux/vs_cvirt.h" + #include "asm/unistd.h" #include "asm/mman.h" #include "asm/segment.h" diff -NurpP --minimal linux-2.6.10-rc1/arch/um/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/um/kernel/ptrace.c --- linux-2.6.10-rc1/arch/um/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/um/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -53,6 +53,8 @@ int sys_ptrace(long request, long pid, l read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/um/kernel/sys_call_table.c linux-2.6.10-rc1-vs1.9.3/arch/um/kernel/sys_call_table.c --- linux-2.6.10-rc1/arch/um/kernel/sys_call_table.c 2004-10-18 23:54:55.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/um/kernel/sys_call_table.c 2004-11-04 19:52:50.741727751 +0100 @@ -51,6 +51,7 @@ extern syscall_handler_t old_mmap_i386; extern syscall_handler_t old_select; extern syscall_handler_t sys_modify_ldt; extern syscall_handler_t sys_rt_sigsuspend; +extern syscall_handler_t sys_vserver; syscall_handler_t *sys_call_table[] = { [ __NR_restart_syscall ] = (syscall_handler_t *) sys_restart_syscall, @@ -305,7 +306,7 @@ syscall_handler_t *sys_call_table[] = { [ __NR_tgkill ] (syscall_handler_t *) sys_tgkill, [ __NR_utimes ] (syscall_handler_t *) sys_utimes, [ __NR_fadvise64_64 ] (syscall_handler_t *) sys_fadvise64_64, - [ __NR_vserver ] (syscall_handler_t *) sys_ni_syscall, + [ __NR_vserver ] (syscall_handler_t *) sys_vserver, ARCH_SYSCALLS [ LAST_SYSCALL + 1 ... NR_syscalls ] = diff -NurpP --minimal linux-2.6.10-rc1/arch/um/kernel/syscall_kern.c linux-2.6.10-rc1-vs1.9.3/arch/um/kernel/syscall_kern.c --- linux-2.6.10-rc1/arch/um/kernel/syscall_kern.c 2004-10-18 23:54:39.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/um/kernel/syscall_kern.c 2004-10-31 00:41:27.000000000 +0200 @@ -15,6 +15,8 @@ #include "linux/unistd.h" #include "linux/slab.h" #include "linux/utime.h" +#include + #include "asm/mman.h" #include "asm/uaccess.h" #include "asm/ipc.h" @@ -224,7 +226,7 @@ int sys_uname(struct old_utsname * name) if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } @@ -232,6 +234,7 @@ int sys_uname(struct old_utsname * name) int sys_olduname(struct oldold_utsname * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -240,19 +243,20 @@ int sys_olduname(struct oldold_utsname * down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname, + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname, __OLD_UTS_LEN); error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename, + error |= __copy_to_user(&name->nodename,ptr->nodename, __OLD_UTS_LEN); error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release, + error |= __copy_to_user(&name->release,ptr->release, __OLD_UTS_LEN); error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version, + error |= __copy_to_user(&name->version,ptr->version, __OLD_UTS_LEN); error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine, + error |= __copy_to_user(&name->machine,ptr->machine, __OLD_UTS_LEN); error |= __put_user(0,name->machine+__OLD_UTS_LEN); diff -NurpP --minimal linux-2.6.10-rc1/arch/v850/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/v850/Kconfig --- linux-2.6.10-rc1/arch/v850/Kconfig 2004-10-18 23:54:07.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/v850/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -304,6 +304,8 @@ source "drivers/usb/Kconfig" source "arch/v850/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/v850/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/v850/kernel/ptrace.c --- linux-2.6.10-rc1/arch/v850/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/v850/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -138,6 +138,8 @@ int sys_ptrace(long request, long pid, l read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; rval = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/x86_64/Kconfig linux-2.6.10-rc1-vs1.9.3/arch/x86_64/Kconfig --- linux-2.6.10-rc1/arch/x86_64/Kconfig 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/x86_64/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -439,6 +439,8 @@ source "arch/x86_64/oprofile/Kconfig" source "arch/x86_64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.10-rc1/arch/x86_64/ia32/ia32_aout.c linux-2.6.10-rc1-vs1.9.3/arch/x86_64/ia32/ia32_aout.c --- linux-2.6.10-rc1/arch/x86_64/ia32/ia32_aout.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/x86_64/ia32/ia32_aout.c 2004-10-31 00:41:27.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -309,7 +310,8 @@ static int load_aout_binary(struct linux (current->mm->start_brk = N_BSSADDR(ex)); current->mm->free_area_cache = TASK_UNMAPPED_BASE; - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; diff -NurpP --minimal linux-2.6.10-rc1/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.10-rc1-vs1.9.3/arch/x86_64/ia32/ia32_binfmt.c --- linux-2.6.10-rc1/arch/x86_64/ia32/ia32_binfmt.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/x86_64/ia32/ia32_binfmt.c 2004-10-31 00:41:27.000000000 +0200 @@ -331,7 +331,7 @@ static void elf32_init(struct pt_regs *r int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) { - unsigned long stack_base; + unsigned long stack_base, grow; struct vm_area_struct *mpnt; struct mm_struct *mm = current->mm; int i; @@ -348,7 +348,10 @@ int setup_arg_pages(struct linux_binprm if (!mpnt) return -ENOMEM; - if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + grow = (IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p)) + >> PAGE_SHIFT; + if (security_vm_enough_memory(grow) || + !vx_vmpages_avail(mm, grow)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -369,7 +372,9 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? PAGE_COPY_EXEC : PAGE_COPY; insert_vm_struct(mm, mpnt); - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + // mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.10-rc1/arch/x86_64/ia32/ia32entry.S linux-2.6.10-rc1-vs1.9.3/arch/x86_64/ia32/ia32entry.S --- linux-2.6.10-rc1/arch/x86_64/ia32/ia32entry.S 2004-10-18 23:54:07.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/x86_64/ia32/ia32entry.S 2004-10-31 00:41:27.000000000 +0200 @@ -575,7 +575,7 @@ ia32_sys_call_table: .quad sys_tgkill /* 270 */ .quad compat_sys_utimes .quad sys32_fadvise64_64 - .quad quiet_ni_syscall /* sys_vserver */ + .quad sys_vserver .quad sys_mbind .quad compat_get_mempolicy /* 275 */ .quad sys_set_mempolicy diff -NurpP --minimal linux-2.6.10-rc1/arch/x86_64/ia32/sys_ia32.c linux-2.6.10-rc1-vs1.9.3/arch/x86_64/ia32/sys_ia32.c --- linux-2.6.10-rc1/arch/x86_64/ia32/sys_ia32.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/x86_64/ia32/sys_ia32.c 2004-10-31 00:41:27.000000000 +0200 @@ -1051,6 +1051,7 @@ asmlinkage long sys32_mmap2(unsigned lon asmlinkage long sys32_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -1059,13 +1060,14 @@ asmlinkage long sys32_olduname(struct ol down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); __put_user(0,name->sysname+__OLD_UTS_LEN); - __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); __put_user(0,name->nodename+__OLD_UTS_LEN); - __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); __put_user(0,name->release+__OLD_UTS_LEN); - __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); __put_user(0,name->version+__OLD_UTS_LEN); { char *arch = "x86_64"; @@ -1088,7 +1090,7 @@ long sys32_uname(struct old_utsname __us if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); diff -NurpP --minimal linux-2.6.10-rc1/arch/x86_64/kernel/ptrace.c linux-2.6.10-rc1-vs1.9.3/arch/x86_64/kernel/ptrace.c --- linux-2.6.10-rc1/arch/x86_64/kernel/ptrace.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/x86_64/kernel/ptrace.c 2004-10-31 00:41:27.000000000 +0200 @@ -209,6 +209,8 @@ asmlinkage long sys_ptrace(long request, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff -NurpP --minimal linux-2.6.10-rc1/arch/x86_64/kernel/sys_x86_64.c linux-2.6.10-rc1-vs1.9.3/arch/x86_64/kernel/sys_x86_64.c --- linux-2.6.10-rc1/arch/x86_64/kernel/sys_x86_64.c 2004-10-18 23:54:38.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/arch/x86_64/kernel/sys_x86_64.c 2004-10-31 00:41:27.000000000 +0200 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -146,7 +147,7 @@ asmlinkage long sys_uname(struct new_uts { int err; down_read(&uts_sem); - err = copy_to_user(name, &system_utsname, sizeof (*name)); + err = copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); diff -NurpP --minimal linux-2.6.10-rc1/drivers/char/tty_io.c linux-2.6.10-rc1-vs1.9.3/drivers/char/tty_io.c --- linux-2.6.10-rc1/drivers/char/tty_io.c 2004-10-31 00:41:01.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/drivers/char/tty_io.c 2004-10-31 00:41:27.000000000 +0200 @@ -102,6 +102,7 @@ #include #include #include +#include #include @@ -2092,13 +2093,16 @@ static int tiocsctty(struct tty_struct * static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { + pid_t pgrp; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; - return put_user(real_tty->pgrp, p); + + pgrp = vx_map_pid(real_tty->pgrp); + return put_user(pgrp, p); } static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) @@ -2116,6 +2120,8 @@ static int tiocspgrp(struct tty_struct * return -ENOTTY; if (get_user(pgrp, p)) return -EFAULT; + + pgrp = vx_rmap_pid(pgrp); if (pgrp < 0) return -EINVAL; if (session_of_pgrp(pgrp) != current->signal->session) diff -NurpP --minimal linux-2.6.10-rc1/fs/attr.c linux-2.6.10-rc1-vs1.9.3/fs/attr.c --- linux-2.6.10-rc1/fs/attr.c 2004-10-18 23:53:21.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/attr.c 2004-10-31 00:41:27.000000000 +0200 @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include /* Taken over from the old code... */ @@ -55,6 +58,31 @@ int inode_change_ok(struct inode *inode, if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) goto error; } + + /* Check for evil vserver activity */ + if (vx_check(0, VX_ADMIN)) + goto fine; + + if (IS_BARRIER(inode)) { + printk(KERN_WARNING + "VSW: xid=%d messing with the barrier.\n", + vx_current_xid()); + goto error; + } + switch (inode->i_sb->s_magic) { + case PROC_SUPER_MAGIC: + printk(KERN_WARNING + "VSW: xid=%d messing with the procfs.\n", + vx_current_xid()); + goto error; + case DEVPTS_SUPER_MAGIC: + if (vx_check(inode->i_xid, VX_IDENT)) + goto fine; + printk(KERN_WARNING + "VSW: xid=%d messing with the devpts.\n", + vx_current_xid()); + goto error; + } fine: retval = 0; error: @@ -63,6 +91,24 @@ error: EXPORT_SYMBOL(inode_change_ok); +int inode_setattr_flags(struct inode *inode, unsigned int flags) +{ + unsigned int oldflags, newflags; + + oldflags = inode->i_flags; + newflags = oldflags & ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= S_IMMUTABLE; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= S_IUNLINK; + if (flags & ATTR_FLAG_BARRIER) + newflags |= S_BARRIER; + + if (oldflags ^ newflags) + inode->i_flags = newflags; + return 0; +} + int inode_setattr(struct inode * inode, struct iattr * attr) { unsigned int ia_valid = attr->ia_valid; @@ -86,6 +132,8 @@ int inode_setattr(struct inode * inode, inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if (ia_valid & ATTR_XID) + inode->i_xid = attr->ia_xid; if (ia_valid & ATTR_ATIME) inode->i_atime = attr->ia_atime; if (ia_valid & ATTR_MTIME) @@ -99,6 +147,8 @@ int inode_setattr(struct inode * inode, mode &= ~S_ISGID; inode->i_mode = mode; } + if (ia_valid & ATTR_ATTR_FLAG) + inode_setattr_flags(inode, attr->ia_attr_flags); mark_inode_dirty(inode); out: return error; @@ -114,6 +164,8 @@ int setattr_mask(unsigned int ia_valid) dn_mask |= DN_ATTRIB; if (ia_valid & ATTR_GID) dn_mask |= DN_ATTRIB; + if (ia_valid & ATTR_XID) + dn_mask |= DN_ATTRIB; if (ia_valid & ATTR_SIZE) dn_mask |= DN_MODIFY; /* both times implies a utime(s) call */ @@ -177,7 +229,8 @@ int notify_change(struct dentry * dentry error = security_inode_setattr(dentry, attr); if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_XID && attr->ia_xid != inode->i_xid)) error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; if (!error) error = inode_setattr(inode, attr); diff -NurpP --minimal linux-2.6.10-rc1/fs/binfmt_aout.c linux-2.6.10-rc1-vs1.9.3/fs/binfmt_aout.c --- linux-2.6.10-rc1/fs/binfmt_aout.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/binfmt_aout.c 2004-10-31 00:41:27.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -309,7 +310,8 @@ static int load_aout_binary(struct linux (current->mm->start_brk = N_BSSADDR(ex)); current->mm->free_area_cache = current->mm->mmap_base; - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; diff -NurpP --minimal linux-2.6.10-rc1/fs/binfmt_elf.c linux-2.6.10-rc1-vs1.9.3/fs/binfmt_elf.c --- linux-2.6.10-rc1/fs/binfmt_elf.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/binfmt_elf.c 2004-10-31 00:41:27.000000000 +0200 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -716,7 +717,8 @@ static int load_elf_binary(struct linux_ /* Do this so that we can load the interpreter, if need be. We will change some of these later */ - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); current->mm->free_area_cache = current->mm->mmap_base; retval = setup_arg_pages(bprm, executable_stack); if (retval < 0) { diff -NurpP --minimal linux-2.6.10-rc1/fs/binfmt_flat.c linux-2.6.10-rc1-vs1.9.3/fs/binfmt_flat.c --- linux-2.6.10-rc1/fs/binfmt_flat.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/binfmt_flat.c 2004-10-31 00:41:27.000000000 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -650,7 +651,8 @@ static int load_flat_file(struct linux_b current->mm->start_brk = datapos + data_len + bss_len; current->mm->brk = (current->mm->start_brk + 3) & ~3; current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len; - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); } if (flags & FLAT_FLAG_KTRACE) diff -NurpP --minimal linux-2.6.10-rc1/fs/binfmt_som.c linux-2.6.10-rc1-vs1.9.3/fs/binfmt_som.c --- linux-2.6.10-rc1/fs/binfmt_som.c 2004-10-18 23:53:51.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/binfmt_som.c 2004-10-31 00:41:27.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -259,7 +260,8 @@ load_som_binary(struct linux_binprm * bp create_som_tables(bprm); current->mm->start_stack = bprm->p; - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); #if 0 printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk); diff -NurpP --minimal linux-2.6.10-rc1/fs/devpts/inode.c linux-2.6.10-rc1-vs1.9.3/fs/devpts/inode.c --- linux-2.6.10-rc1/fs/devpts/inode.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/devpts/inode.c 2004-10-31 00:41:27.000000000 +0200 @@ -18,10 +18,9 @@ #include #include #include +#include #include -#define DEVPTS_SUPER_MAGIC 0x1cd1 - extern struct xattr_handler devpts_xattr_security_handler; static struct xattr_handler *devpts_xattr_handlers[] = { @@ -31,6 +30,15 @@ static struct xattr_handler *devpts_xatt NULL }; +static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + int ret = -EACCES; + + if (vx_check(inode->i_xid, VX_IDENT)) + ret = generic_permission(inode, mask, NULL); + return ret; +} + struct inode_operations devpts_file_inode_operations = { #ifdef CONFIG_DEVPTS_FS_XATTR .setxattr = generic_setxattr, @@ -38,6 +46,7 @@ struct inode_operations devpts_file_inod .listxattr = generic_listxattr, .removexattr = generic_removexattr, #endif + .permission = devpts_permission, }; static struct vfsmount *devpts_mnt; @@ -88,6 +97,68 @@ static int devpts_remount(struct super_b return 0; } +static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_dentry; + struct dentry *cursor = filp->private_data; + struct list_head *p, *q = &cursor->d_child; + ino_t ino; + int i = filp->f_pos; + + switch (i) { + case 0: + ino = dentry->d_inode->i_ino; + if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) + break; + filp->f_pos++; + i++; + /* fallthrough */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) + break; + filp->f_pos++; + i++; + /* fallthrough */ + default: + spin_lock(&dcache_lock); + if (filp->f_pos == 2) { + list_del(q); + list_add(q, &dentry->d_subdirs); + } + for (p=q->next; p != &dentry->d_subdirs; p=p->next) { + struct dentry *next; + next = list_entry(p, struct dentry, d_child); + if (d_unhashed(next) || !next->d_inode) + continue; + if (!vx_check(next->d_inode->i_xid, VX_IDENT)) + continue; + + spin_unlock(&dcache_lock); + if (filldir(dirent, next->d_name.name, + next->d_name.len, filp->f_pos, + next->d_inode->i_ino, DT_CHR) < 0) + return 0; + spin_lock(&dcache_lock); + /* next is still alive */ + list_del(q); + list_add(q, p); + p = q; + filp->f_pos++; + } + spin_unlock(&dcache_lock); + } + return 0; +} + +static struct file_operations devpts_dir_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .readdir = devpts_readdir, +}; + static struct super_operations devpts_sops = { .statfs = simple_statfs, .remount_fs = devpts_remount, @@ -114,8 +185,9 @@ devpts_fill_super(struct super_block *s, inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_fop = &devpts_dir_operations; inode->i_nlink = 2; + inode->i_xid = vx_current_xid(); devpts_root = s->s_root = d_alloc_root(inode); if (s->s_root) @@ -174,6 +246,7 @@ int devpts_pty_new(struct tty_struct *tt inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|config.mode, device); + inode->i_xid = vx_current_xid(); inode->i_op = &devpts_file_inode_operations; inode->u.generic_ip = tty; diff -NurpP --minimal linux-2.6.10-rc1/fs/exec.c linux-2.6.10-rc1-vs1.9.3/fs/exec.c --- linux-2.6.10-rc1/fs/exec.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/exec.c 2004-10-31 00:41:27.000000000 +0200 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -320,7 +321,8 @@ void install_arg_page(struct vm_area_str pte_unmap(pte); goto out; } - mm->rss++; + // mm->rss++; + vx_rsspages_inc(mm); lru_cache_add_active(page); set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte( page, vma->vm_page_prot)))); @@ -404,7 +406,8 @@ int setup_arg_pages(struct linux_binprm if (!mpnt) return -ENOMEM; - if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) { + if (security_vm_enough_memory(arg_size >> PAGE_SHIFT) || + !vx_vmpages_avail(mm, arg_size >> PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -434,7 +437,9 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_flags |= mm->def_flags; mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7]; insert_vm_struct(mm, mpnt); - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + // mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.10-rc1/fs/ext2/balloc.c linux-2.6.10-rc1-vs1.9.3/fs/ext2/balloc.c --- linux-2.6.10-rc1/fs/ext2/balloc.c 2004-10-18 23:53:51.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext2/balloc.c 2004-10-31 00:41:27.000000000 +0200 @@ -16,6 +16,8 @@ #include #include #include +#include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -108,6 +110,8 @@ static int reserve_blocks(struct super_b free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(es->s_r_blocks_count); + DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks); + if (free_blocks < count) count = free_blocks; @@ -258,6 +262,7 @@ do_more: } error_return: brelse(bitmap_bh); + DLIMIT_FREE_BLOCK(sb, inode->i_xid, freed); release_blocks(sb, freed); DQUOT_FREE_BLOCK(inode, freed); } @@ -361,6 +366,10 @@ int ext2_new_block(struct inode *inode, *err = -ENOSPC; goto out_dquot; } + if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, es_alloc)) { + *err = -ENOSPC; + goto out_dlimit; + } ext2_debug ("goal=%lu.\n", goal); @@ -508,6 +517,8 @@ got_block: *err = 0; out_release: group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + DLIMIT_FREE_BLOCK(sb, inode->i_xid, es_alloc); +out_dlimit: release_blocks(sb, es_alloc); out_dquot: DQUOT_FREE_BLOCK(inode, dq_alloc); diff -NurpP --minimal linux-2.6.10-rc1/fs/ext2/ialloc.c linux-2.6.10-rc1-vs1.9.3/fs/ext2/ialloc.c --- linux-2.6.10-rc1/fs/ext2/ialloc.c 2004-10-18 23:54:19.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext2/ialloc.c 2004-10-31 00:41:27.000000000 +0200 @@ -18,6 +18,9 @@ #include #include #include +#include +#include + #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -124,6 +127,7 @@ void ext2_free_inode (struct inode * ino if (!is_bad_inode(inode)) { /* Quota is already initialized in iput() */ ext2_xattr_delete_inode(inode); + DLIMIT_FREE_INODE(sb, inode->i_xid); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); } @@ -465,6 +469,15 @@ struct inode *ext2_new_inode(struct inod if (!inode) return ERR_PTR(-ENOMEM); + if (sb->s_flags & MS_TAGXID) + inode->i_xid = current->xid; + else + inode->i_xid = 0; + + if (DLIMIT_ALLOC_INODE(sb, inode->i_xid)) { + err = -ENOSPC; + goto fail_dlim; + } ei = EXT2_I(inode); sbi = EXT2_SB(sb); es = sbi->s_es; @@ -579,7 +592,8 @@ got: inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; + ei->i_flags = EXT2_I(dir)->i_flags & + ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); /* dirsync is only applied to directories */ @@ -620,12 +634,15 @@ got: return inode; fail2: + DLIMIT_FREE_INODE(sb, inode->i_xid); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); return ERR_PTR(err); fail: + DLIMIT_FREE_INODE(sb, inode->i_xid); +fail_dlim: make_bad_inode(inode); iput(inode); return ERR_PTR(err); diff -NurpP --minimal linux-2.6.10-rc1/fs/ext2/inode.c linux-2.6.10-rc1-vs1.9.3/fs/ext2/inode.c --- linux-2.6.10-rc1/fs/ext2/inode.c 2004-10-18 23:53:11.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext2/inode.c 2004-10-31 00:41:27.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include "ext2.h" #include "acl.h" @@ -65,6 +66,8 @@ void ext2_put_inode(struct inode *inode) ext2_discard_prealloc(inode); } +static void ext2_truncate_nocheck (struct inode * inode); + /* * Called at the last iput() if i_nlink is zero. */ @@ -78,7 +81,7 @@ void ext2_delete_inode (struct inode * i inode->i_size = 0; if (inode->i_blocks) - ext2_truncate (inode); + ext2_truncate_nocheck(inode); ext2_free_inode (inode); return; @@ -878,7 +881,7 @@ static void ext2_free_branches(struct in ext2_free_data(inode, p, q); } -void ext2_truncate (struct inode * inode) +static void ext2_truncate_nocheck(struct inode * inode) { __le32 *i_data = EXT2_I(inode)->i_data; int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); @@ -895,8 +898,6 @@ void ext2_truncate (struct inode * inode return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; ext2_discard_prealloc(inode); @@ -1018,6 +1019,13 @@ Egdp: return ERR_PTR(-EIO); } +void ext2_truncate (struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + ext2_truncate_nocheck(inode); +} + void ext2_set_inode_flags(struct inode *inode) { unsigned int flags = EXT2_I(inode)->i_flags; @@ -1029,6 +1037,10 @@ void ext2_set_inode_flags(struct inode * inode->i_flags |= S_APPEND; if (flags & EXT2_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & EXT2_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT2_BARRIER_FL) + inode->i_flags |= S_BARRIER; if (flags & EXT2_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT2_DIRSYNC_FL) @@ -1041,6 +1053,8 @@ void ext2_read_inode (struct inode * ino ino_t ino = inode->i_ino; struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + uid_t uid; + gid_t gid; int n; #ifdef CONFIG_EXT2_FS_POSIX_ACL @@ -1051,12 +1065,17 @@ void ext2_read_inode (struct inode * ino goto bad_inode; inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if (!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -1149,8 +1168,8 @@ static int ext2_update_inode(struct inod struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - uid_t uid = inode->i_uid; - gid_t gid = inode->i_gid; + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); int n; @@ -1185,6 +1204,9 @@ static int ext2_update_inode(struct inod raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_INOXID_GID32 + raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(inode->i_size); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -1262,6 +1284,27 @@ int ext2_sync_inode(struct inode *inode) return sync_inode(inode, &wbc); } +int ext2_setattr_flags(struct inode *inode, unsigned int flags) +{ + unsigned int oldflags, newflags; + + oldflags = EXT2_I(inode)->i_flags; + newflags = oldflags & + ~(EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL | EXT2_BARRIER_FL); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= EXT2_IMMUTABLE_FL; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= EXT2_IUNLINK_FL; + if (flags & ATTR_FLAG_BARRIER) + newflags |= EXT2_BARRIER_FL; + + if (oldflags ^ newflags) { + EXT2_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + } + return 0; +} + int ext2_setattr(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; @@ -1271,11 +1314,15 @@ int ext2_setattr(struct dentry *dentry, if (error) return error; if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) || + (iattr->ia_valid & ATTR_XID && iattr->ia_xid != inode->i_xid)) { error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0; if (error) return error; } + if (iattr->ia_valid & ATTR_ATTR_FLAG) + ext2_setattr_flags(inode, iattr->ia_attr_flags); + error = inode_setattr(inode, iattr); if (!error && (iattr->ia_valid & ATTR_MODE)) error = ext2_acl_chmod(inode); diff -NurpP --minimal linux-2.6.10-rc1/fs/ext2/ioctl.c linux-2.6.10-rc1-vs1.9.3/fs/ext2/ioctl.c --- linux-2.6.10-rc1/fs/ext2/ioctl.c 2004-10-18 23:55:36.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext2/ioctl.c 2004-10-31 00:41:27.000000000 +0200 @@ -49,7 +49,9 @@ int ext2_ioctl (struct inode * inode, st * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { + if ((oldflags & EXT2_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT2_APPEND_FL | + EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } diff -NurpP --minimal linux-2.6.10-rc1/fs/ext2/super.c linux-2.6.10-rc1-vs1.9.3/fs/ext2/super.c --- linux-2.6.10-rc1/fs/ext2/super.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext2/super.c 2004-10-31 00:41:27.000000000 +0200 @@ -249,7 +249,7 @@ enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, + Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_tagxid, Opt_ignore, Opt_err, }; @@ -278,6 +278,7 @@ static match_table_t tokens = { {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_tagxid, "tagxid"}, {Opt_ignore, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, @@ -341,6 +342,11 @@ static int parse_options (char * options case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + set_opt (sbi->s_mount_opt, TAG_XID); + break; +#endif case Opt_check: #ifdef CONFIG_EXT2_CHECK set_opt (sbi->s_mount_opt, CHECK); @@ -626,6 +632,8 @@ static int ext2_fill_super(struct super_ if (!parse_options ((char *) data, sbi)) goto failed_mount; + if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAG_XID) + sb->s_flags |= MS_TAGXID; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff -NurpP --minimal linux-2.6.10-rc1/fs/ext2/xattr.c linux-2.6.10-rc1-vs1.9.3/fs/ext2/xattr.c --- linux-2.6.10-rc1/fs/ext2/xattr.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext2/xattr.c 2004-10-31 00:41:27.000000000 +0200 @@ -60,6 +60,7 @@ #include #include #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -648,8 +649,12 @@ ext2_xattr_set2(struct inode *inode, str the inode. */ ea_bdebug(new_bh, "reusing block"); + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1)) + goto cleanup; error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) { + DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1); unlock_buffer(new_bh); goto cleanup; } @@ -729,6 +734,7 @@ ext2_xattr_set2(struct inode *inode, str /* Decrement the refcount only. */ HDR(old_bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(old_bh)->h_refcount) - 1); + DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1); DQUOT_FREE_BLOCK(inode, 1); mark_buffer_dirty(old_bh); ea_bdebug(old_bh, "refcount now=%d", @@ -784,6 +790,7 @@ ext2_xattr_delete_inode(struct inode *in mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); + DLIMIT_FREE_BLOCK(inode->i_sb, inode->i_xid, 1); DQUOT_FREE_BLOCK(inode, 1); } ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); diff -NurpP --minimal linux-2.6.10-rc1/fs/ext3/balloc.c linux-2.6.10-rc1-vs1.9.3/fs/ext3/balloc.c --- linux-2.6.10-rc1/fs/ext3/balloc.c 2004-10-18 23:53:10.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext3/balloc.c 2004-10-31 00:41:27.000000000 +0200 @@ -19,6 +19,8 @@ #include #include #include +#include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -275,8 +277,10 @@ do_more: error_return: brelse(bitmap_bh); ext3_std_error(sb, err); - if (dquot_freed_blocks) + if (dquot_freed_blocks) { + DLIMIT_FREE_BLOCK(sb, inode->i_xid, dquot_freed_blocks); DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + } return; } @@ -465,18 +469,32 @@ fail: return -1; } -static int ext3_has_free_blocks(struct ext3_sb_info *sbi) +static int ext3_has_free_blocks(struct super_block *sb) { - int free_blocks, root_blocks; + struct ext3_sb_info *sbi = EXT3_SB(sb); + int free_blocks, root_blocks, cond; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): free=%u, root=%u", + sb, free_blocks, root_blocks); + + DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks); + + cond = (free_blocks < root_blocks + 1 && + !capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))); + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d", + sb, free_blocks, root_blocks, + !capable(CAP_SYS_RESOURCE)?'1':'0', + sbi->s_resuid, current->fsuid, cond?0:1); + + return (cond ? 0 : 1); } /* @@ -487,7 +505,7 @@ static int ext3_has_free_blocks(struct e */ int ext3_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3) + if (!ext3_has_free_blocks(sb) || (*retries)++ > 3) return 0; jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); @@ -537,12 +555,14 @@ ext3_new_block(handle_t *handle, struct *errp = -EDQUOT; return 0; } + if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1)) + goto out_dlimit; sbi = EXT3_SB(sb); es = EXT3_SB(sb)->s_es; ext3_debug("goal=%lu.\n", goal); - if (!ext3_has_free_blocks(sbi)) { + if (!ext3_has_free_blocks(sb)) { *errp = -ENOSPC; goto out; } @@ -697,6 +717,9 @@ allocated: io_error: *errp = -EIO; out: + if (!performed_allocation) + DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1); +out_dlimit: if (fatal) { *errp = fatal; ext3_std_error(sb, fatal); diff -NurpP --minimal linux-2.6.10-rc1/fs/ext3/ialloc.c linux-2.6.10-rc1-vs1.9.3/fs/ext3/ialloc.c --- linux-2.6.10-rc1/fs/ext3/ialloc.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext3/ialloc.c 2004-10-31 00:41:27.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -125,6 +126,7 @@ void ext3_free_inode (handle_t *handle, */ DQUOT_INIT(inode); ext3_xattr_delete_inode(handle, inode); + DLIMIT_FREE_INODE(sb, inode->i_xid); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); @@ -445,6 +447,16 @@ struct inode *ext3_new_inode(handle_t *h inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); + + if (sb->s_flags & MS_TAGXID) + inode->i_xid = current->xid; + else + inode->i_xid = 0; + + if (DLIMIT_ALLOC_INODE(sb, inode->i_xid)) { + err = -ENOSPC; + goto out; + } ei = EXT3_I(inode); sbi = EXT3_SB(sb); @@ -568,7 +580,8 @@ got: ei->i_dir_start_lookup = 0; ei->i_disksize = 0; - ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; + ei->i_flags = EXT3_I(dir)->i_flags & + ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); /* dirsync only applies to directories */ @@ -619,6 +632,7 @@ got: ext3_debug("allocating inode %lu\n", inode->i_ino); goto really_out; fail: + DLIMIT_FREE_INODE(sb, inode->i_xid); ext3_std_error(sb, err); out: iput(inode); @@ -628,6 +642,7 @@ really_out: return ret; fail2: + DLIMIT_FREE_INODE(sb, inode->i_xid); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); diff -NurpP --minimal linux-2.6.10-rc1/fs/ext3/inode.c linux-2.6.10-rc1-vs1.9.3/fs/ext3/inode.c --- linux-2.6.10-rc1/fs/ext3/inode.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext3/inode.c 2004-10-31 00:41:27.000000000 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -191,6 +192,8 @@ void ext3_put_inode(struct inode *inode) ext3_discard_prealloc(inode); } +static void ext3_truncate_nocheck (struct inode *inode); + /* * Called at the last iput() if i_nlink is zero. */ @@ -214,7 +217,7 @@ void ext3_delete_inode (struct inode * i handle->h_sync = 1; inode->i_size = 0; if (inode->i_blocks) - ext3_truncate(inode); + ext3_truncate_nocheck(inode); /* * Kill off the orphan record which ext3_truncate created. * AKPM: I think this can be inside the above `if'. @@ -2146,7 +2149,7 @@ static void ext3_free_branches(handle_t * ext3_truncate() run will find them and release them. */ -void ext3_truncate(struct inode * inode) +void ext3_truncate_nocheck(struct inode * inode) { handle_t *handle; struct ext3_inode_info *ei = EXT3_I(inode); @@ -2167,8 +2170,6 @@ void ext3_truncate(struct inode * inode) return; if (ext3_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; ext3_discard_prealloc(inode); @@ -2475,6 +2476,13 @@ has_buffer: return 0; } +void ext3_truncate(struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + ext3_truncate_nocheck(inode); +} + void ext3_set_inode_flags(struct inode *inode) { unsigned int flags = EXT3_I(inode)->i_flags; @@ -2486,6 +2494,10 @@ void ext3_set_inode_flags(struct inode * inode->i_flags |= S_APPEND; if (flags & EXT3_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & EXT3_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT3_BARRIER_FL) + inode->i_flags |= S_BARRIER; if (flags & EXT3_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT3_DIRSYNC_FL) @@ -2499,6 +2511,8 @@ void ext3_read_inode(struct inode * inod struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh; int block; + uid_t uid; + gid_t gid; #ifdef CONFIG_EXT3_FS_POSIX_ACL ei->i_acl = EXT3_ACL_NOT_CACHED; @@ -2509,12 +2523,17 @@ void ext3_read_inode(struct inode * inod bh = iloc.bh; raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if(!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -2622,6 +2641,8 @@ static int ext3_do_update_inode(handle_t struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); int err = 0, rc, block; /* For fields not not tracking in the in-memory inode, @@ -2631,29 +2652,32 @@ static int ext3_do_update_inode(handle_t raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ if(!ei->i_dtime) { raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); + cpu_to_le16(high_16_bits(uid)); raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); + cpu_to_le16(high_16_bits(gid)); } else { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); + cpu_to_le16(fs_high2lowuid(uid)); raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); + cpu_to_le16(fs_high2lowgid(gid)); raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_INOXID_GID32 + raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -2775,6 +2799,44 @@ int ext3_write_inode(struct inode *inode return ext3_force_commit(inode->i_sb); } +int ext3_setattr_flags(struct inode *inode, unsigned int flags) +{ + unsigned int oldflags, newflags; + int err = 0; + + oldflags = EXT3_I(inode)->i_flags; + newflags = oldflags & + ~(EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL | EXT3_BARRIER_FL); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= EXT3_IMMUTABLE_FL; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= EXT3_IUNLINK_FL; + if (flags & ATTR_FLAG_BARRIER) + newflags |= EXT3_BARRIER_FL; + + if (oldflags ^ newflags) { + handle_t *handle; + struct ext3_iloc iloc; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (IS_SYNC(inode)) + handle->h_sync = 1; + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + goto flags_err; + + EXT3_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + flags_err: + ext3_journal_stop(handle); + } + return err; +} + /* * ext3_setattr() * @@ -2803,7 +2865,8 @@ int ext3_setattr(struct dentry *dentry, return error; if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_XID && attr->ia_xid != inode->i_xid)) { handle_t *handle; /* (user+group)*(old+new) structure, inode write (sb, @@ -2824,6 +2887,10 @@ int ext3_setattr(struct dentry *dentry, inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_XID) + && inode->i_sb + && (inode->i_sb->s_flags & MS_TAGXID)) + inode->i_xid = attr->ia_xid; error = ext3_mark_inode_dirty(handle, inode); ext3_journal_stop(handle); } @@ -2846,6 +2913,12 @@ int ext3_setattr(struct dentry *dentry, ext3_journal_stop(handle); } + if (ia_valid & ATTR_ATTR_FLAG) { + rc = ext3_setattr_flags(inode, attr->ia_attr_flags); + if (!error) + error = rc; + } + rc = inode_setattr(inode, attr); /* If inode_setattr's call to ext3_truncate failed to get a diff -NurpP --minimal linux-2.6.10-rc1/fs/ext3/ioctl.c linux-2.6.10-rc1-vs1.9.3/fs/ext3/ioctl.c --- linux-2.6.10-rc1/fs/ext3/ioctl.c 2004-10-18 23:53:51.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext3/ioctl.c 2004-10-31 00:41:27.000000000 +0200 @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -57,7 +58,9 @@ int ext3_ioctl (struct inode * inode, st * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { + if ((oldflags & EXT3_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT3_APPEND_FL | + EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } @@ -151,6 +154,38 @@ flags_err: return ret; } #endif +#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE) + case EXT3_IOC_SETXID: { + handle_t *handle; + struct ext3_iloc iloc; + int xid; + int err; + + /* fixme: if stealth, return -ENOTTY */ + if (!capable(CAP_CONTEXT)) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + if (!(inode->i_sb->s_flags & MS_TAGXID)) + return -ENOSYS; + if (get_user(xid, (int *) arg)) + return -EFAULT; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; + + inode->i_xid = (xid & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + ext3_journal_stop(handle); + return err; + } +#endif default: return -ENOTTY; } diff -NurpP --minimal linux-2.6.10-rc1/fs/ext3/super.c linux-2.6.10-rc1-vs1.9.3/fs/ext3/super.c --- linux-2.6.10-rc1/fs/ext3/super.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext3/super.c 2004-10-31 00:41:27.000000000 +0200 @@ -584,7 +584,7 @@ enum { Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, - Opt_ignore, Opt_barrier, Opt_err, + Opt_tagxid, Opt_barrier, Opt_ignore, Opt_err, }; static match_table_t tokens = { @@ -625,6 +625,7 @@ static match_table_t tokens = { {Opt_grpjquota, "grpjquota=%s"}, {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, + {Opt_tagxid, "tagxid"}, {Opt_ignore, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, @@ -719,6 +720,16 @@ static int parse_options (char * options case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + if (is_remount) { + printk(KERN_ERR "EXT3-fs: cannot specify " + "tagxid on remount\n"); + return 0; + } + set_opt (sbi->s_mount_opt, TAG_XID); + break; +#endif case Opt_check: #ifdef CONFIG_EXT3_CHECK set_opt (sbi->s_mount_opt, CHECK); @@ -1297,6 +1308,8 @@ static int ext3_fill_super (struct super if (!parse_options ((char *) data, sb, &journal_inum, 0)) goto failed_mount; + if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAG_XID) + sb->s_flags |= MS_TAGXID; sb->s_flags |= MS_ONE_SECOND; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff -NurpP --minimal linux-2.6.10-rc1/fs/ext3/xattr.c linux-2.6.10-rc1-vs1.9.3/fs/ext3/xattr.c --- linux-2.6.10-rc1/fs/ext3/xattr.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ext3/xattr.c 2004-10-31 00:41:27.000000000 +0200 @@ -61,6 +61,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -661,8 +662,12 @@ ext3_xattr_set_handle2(handle_t *handle, the inode. */ ea_bdebug(new_bh, "reusing block"); + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1)) + goto cleanup; error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) { + DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1); unlock_buffer(new_bh); journal_release_buffer(handle, new_bh, credits); @@ -748,6 +753,7 @@ getblk_failed: /* Decrement the refcount only. */ HDR(old_bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(old_bh)->h_refcount) - 1); + DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1); DQUOT_FREE_BLOCK(inode, 1); ext3_journal_dirty_metadata(handle, old_bh); ea_bdebug(old_bh, "refcount now=%d", @@ -839,6 +845,7 @@ ext3_xattr_delete_inode(handle_t *handle ext3_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; + DLIMIT_FREE_BLOCK(inode->i_sb, inode->i_xid, 1); DQUOT_FREE_BLOCK(inode, 1); } ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); diff -NurpP --minimal linux-2.6.10-rc1/fs/fcntl.c linux-2.6.10-rc1-vs1.9.3/fs/fcntl.c --- linux-2.6.10-rc1/fs/fcntl.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/fcntl.c 2004-10-31 00:41:27.000000000 +0200 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -108,6 +109,8 @@ repeat: error = -EMFILE; if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; + if (!vx_files_avail(1)) + goto out; error = expand_files(files, newfd); if (error < 0) @@ -140,6 +143,7 @@ static int dupfd(struct file *file, unsi FD_SET(fd, files->open_fds); FD_CLR(fd, files->close_on_exec); spin_unlock(&files->file_lock); + // vx_openfd_inc(fd); fd_install(fd, file); } else { spin_unlock(&files->file_lock); @@ -187,6 +191,7 @@ asmlinkage long sys_dup2(unsigned int ol FD_SET(newfd, files->open_fds); FD_CLR(newfd, files->close_on_exec); spin_unlock(&files->file_lock); + // vx_openfd_inc(newfd); if (tofree) filp_close(tofree, files); @@ -491,7 +496,7 @@ void send_sigio(struct fown_struct *fown read_lock(&tasklist_lock); if (pid > 0) { - p = find_task_by_pid(pid); + p = find_task_by_real_pid(pid); if (p) { send_sigio_to_task(p, fown, fd, band); } @@ -526,7 +531,7 @@ int send_sigurg(struct fown_struct *fown read_lock(&tasklist_lock); if (pid > 0) { - p = find_task_by_pid(pid); + p = find_task_by_real_pid(pid); if (p) { send_sigurg_to_task(p, fown); } diff -NurpP --minimal linux-2.6.10-rc1/fs/file_table.c linux-2.6.10-rc1-vs1.9.3/fs/file_table.c --- linux-2.6.10-rc1/fs/file_table.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/file_table.c 2004-11-04 19:52:50.750726329 +0100 @@ -16,6 +16,8 @@ #include #include #include +#include +#include /* sysctl tunables... */ struct files_stat_struct files_stat = { @@ -85,6 +87,9 @@ static int old_max; f->f_owner.lock = RW_LOCK_UNLOCKED; /* f->f_version: 0 */ INIT_LIST_HEAD(&f->f_list); + // set_vx_info(&f->f_vx_info, current->vx_info); + f->f_xid = current->xid; + vx_files_inc(f); return f; } } @@ -137,6 +142,8 @@ void fastcall __fput(struct file *file) fops_put(file->f_op); if (file->f_mode & FMODE_WRITE) put_write_access(inode); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file->f_dentry = NULL; file->f_vfsmnt = NULL; @@ -192,6 +199,8 @@ void put_filp(struct file *file) { if (atomic_dec_and_test(&file->f_count)) { security_file_free(file); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file_free(file); } diff -NurpP --minimal linux-2.6.10-rc1/fs/inode.c linux-2.6.10-rc1-vs1.9.3/fs/inode.c --- linux-2.6.10-rc1/fs/inode.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/inode.c 2004-10-31 00:41:27.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include /* * This is needed for the following functions: @@ -115,6 +116,10 @@ static struct inode *alloc_inode(struct struct address_space * const mapping = &inode->i_data; inode->i_sb = sb; + // inode->i_dqh = dqhget(sb->s_dqh); + + /* important because of inode slab reuse */ + inode->i_xid = 0; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; atomic_set(&inode->i_count, 1); diff -NurpP --minimal linux-2.6.10-rc1/fs/ioctl.c linux-2.6.10-rc1-vs1.9.3/fs/ioctl.c --- linux-2.6.10-rc1/fs/ioctl.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/ioctl.c 2004-10-31 00:41:27.000000000 +0200 @@ -12,10 +12,18 @@ #include #include #include +#include +#include +#include #include #include +#ifdef CONFIG_VSERVER_LEGACY +extern int vx_proc_ioctl(struct inode *, struct file *, + unsigned int, unsigned long); +#endif + static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg) { int error; @@ -123,6 +131,48 @@ asmlinkage long sys_ioctl(unsigned int f else error = -ENOTTY; break; +#ifdef CONFIG_VSERVER_LEGACY +#ifndef CONFIG_INOXID_NONE + case FIOC_GETXID: { + struct inode *inode = filp->f_dentry->d_inode; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (capable(CAP_CONTEXT)) + error = put_user(inode->i_xid, (int *) arg); + break; + } + case FIOC_SETXID: { + struct inode *inode = filp->f_dentry->d_inode; + int xid; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -ENOSYS; + if (!(inode->i_sb->s_flags & MS_TAGXID)) + break; + error = -EFAULT; + if (get_user(xid, (int *) arg)) + break; + error = 0; + inode->i_xid = (xid & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + break; + } +#endif + case FIOC_GETXFLG: + case FIOC_SETXFLG: + error = -ENOTTY; + if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC) + error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); + break; +#endif default: error = -ENOTTY; if (S_ISREG(filp->f_dentry->d_inode->i_mode)) diff -NurpP --minimal linux-2.6.10-rc1/fs/jfs/jfs_imap.c linux-2.6.10-rc1-vs1.9.3/fs/jfs/jfs_imap.c --- linux-2.6.10-rc1/fs/jfs/jfs_imap.c 2004-10-18 23:53:22.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/jfs/jfs_imap.c 2004-10-31 00:41:27.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" @@ -3091,14 +3092,21 @@ static void duplicateIXtree(struct super static int copy_from_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + uid_t uid; + gid_t gid; jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; ip->i_nlink = le32_to_cpu(dip->di_nlink); - ip->i_uid = le32_to_cpu(dip->di_uid); - ip->i_gid = le32_to_cpu(dip->di_gid); + + uid = le32_to_cpu(dip->di_uid); + gid = le32_to_cpu(dip->di_gid); + ip->i_uid = INOXID_UID(XID_TAG(ip), uid, gid); + ip->i_gid = INOXID_GID(XID_TAG(ip), uid, gid); + ip->i_xid = INOXID_XID(XID_TAG(ip), uid, gid, 0); + ip->i_size = le64_to_cpu(dip->di_size); ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); @@ -3149,6 +3157,8 @@ static int copy_from_dinode(struct dinod static void copy_to_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + uid_t uid; + gid_t gid; dip->di_fileset = cpu_to_le32(jfs_ip->fileset); dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp); @@ -3157,8 +3167,11 @@ static void copy_to_dinode(struct dinode dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - dip->di_uid = cpu_to_le32(ip->i_uid); - dip->di_gid = cpu_to_le32(ip->i_gid); + + uid = XIDINO_UID(XID_TAG(ip), ip->i_uid, ip->i_xid); + gid = XIDINO_GID(XID_TAG(ip), ip->i_gid, ip->i_xid); + dip->di_uid = cpu_to_le32(uid); + dip->di_gid = cpu_to_le32(gid); /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones diff -NurpP --minimal linux-2.6.10-rc1/fs/namei.c linux-2.6.10-rc1-vs1.9.3/fs/namei.c --- linux-2.6.10-rc1/fs/namei.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/namei.c 2004-10-31 00:41:27.000000000 +0200 @@ -28,6 +28,8 @@ #include #include #include +#include + #include #include @@ -224,6 +226,20 @@ int generic_permission(struct inode *ino return -EACCES; } +static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + if (inode->i_xid == 0) + return 0; + if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT)) + return 0; +/* + printk("VSW: xid=%d denied access to %p[#%d,%lu] »%*s«.\n", + vx_current_xid(), inode, inode->i_xid, inode->i_ino, + nd->dentry->d_name.len, nd->dentry->d_name.name); +*/ + return -EACCES; +} + int permission(struct inode * inode,int mask, struct nameidata *nd) { int retval; @@ -232,6 +248,8 @@ int permission(struct inode * inode,int /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; + if ((retval = xid_permission(inode, mask, nd))) + return retval; if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, submask, nd); else @@ -1175,7 +1193,7 @@ static inline int may_delete(struct inod if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IXORUNLINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -1981,7 +1999,7 @@ int vfs_link(struct dentry *old_dentry, /* * A link to an append-only or immutable file cannot be created. */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return -EPERM; if (!dir->i_op || !dir->i_op->link) return -EPERM; diff -NurpP --minimal linux-2.6.10-rc1/fs/namespace.c linux-2.6.10-rc1-vs1.9.3/fs/namespace.c --- linux-2.6.10-rc1/fs/namespace.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/namespace.c 2004-10-31 00:41:27.000000000 +0200 @@ -22,6 +22,9 @@ #include #include #include +#include +#include + #include #include @@ -230,6 +233,7 @@ static int show_vfsmnt(struct seq_file * { MS_MANDLOCK, ",mand" }, { MS_NOATIME, ",noatime" }, { MS_NODIRATIME, ",nodiratime" }, + { MS_TAGXID, ",tagxid" }, { 0, NULL } }; static struct proc_fs_info mnt_info[] = { @@ -240,6 +244,11 @@ static int show_vfsmnt(struct seq_file * }; struct proc_fs_info *fs_infop; + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!vx_check_vfsmount(current->vx_info, mnt)) + return 0; + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); seq_putc(m, ' '); seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); @@ -339,18 +348,10 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -void umount_tree(struct vfsmount *mnt) +static inline void __umount_tree(struct vfsmount *mnt, struct list_head *kill) { - struct vfsmount *p; - LIST_HEAD(kill); - - for (p = mnt; p; p = next_mnt(p, mnt)) { - list_del(&p->mnt_list); - list_add(&p->mnt_list, &kill); - } - - while (!list_empty(&kill)) { - mnt = list_entry(kill.next, struct vfsmount, mnt_list); + while (!list_empty(kill)) { + mnt = list_entry(kill->next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); list_del_init(&mnt->mnt_fslink); if (mnt->mnt_parent == mnt) { @@ -366,6 +367,32 @@ void umount_tree(struct vfsmount *mnt) } } +void umount_tree(struct vfsmount *mnt) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + } + __umount_tree(mnt, &kill); +} + +void umount_unused(struct vfsmount *mnt, struct fs_struct *fs) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + if (p == fs->rootmnt || p == fs->pwdmnt) + continue; + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + } + __umount_tree(mnt, &kill); +} + static int do_umount(struct vfsmount *mnt, int flags) { struct super_block * sb = mnt->mnt_sb; @@ -481,7 +508,7 @@ asmlinkage long sys_umount(char __user * goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) goto dput_and_out; retval = do_umount(nd.mnt, flags); @@ -508,6 +535,8 @@ static int mount_is_safe(struct nameidat { if (capable(CAP_SYS_ADMIN)) return 0; + if (vx_ccaps(VXC_SECURE_MOUNT)) + return 0; return -EPERM; #ifdef notyet if (S_ISLNK(nd->dentry->d_inode->i_mode)) @@ -674,7 +703,7 @@ static int do_remount(struct nameidata * int err; struct super_block * sb = nd->mnt->mnt_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_REMOUNT)) return -EPERM; if (!check_mnt(nd->mnt)) @@ -683,6 +712,8 @@ static int do_remount(struct nameidata * if (nd->dentry != nd->mnt->mnt_root) return -EINVAL; + if (vx_ccaps(VXC_SECURE_REMOUNT)) + mnt_flags |= MNT_NODEV; down_write(&sb->s_umount); err = do_remount_sb(sb, flags, data, 0); if (!err) @@ -698,7 +729,7 @@ static int do_move_mount(struct nameidat struct nameidata old_nd, parent_nd; struct vfsmount *p; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -770,7 +801,7 @@ static int do_new_mount(struct nameidata return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) return -EPERM; mnt = do_kern_mount(type, flags, name, data); @@ -1036,6 +1067,9 @@ long do_mount(char * dev_name, char * di mnt_flags |= MNT_NOEXEC; flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); + if (vx_ccaps(VXC_SECURE_MOUNT)) + mnt_flags |= MNT_NODEV; + /* ... and get the mountpoint */ retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); if (retval) @@ -1076,7 +1110,7 @@ int copy_namespace(int flags, struct tas if (!(flags & CLONE_NEWNS)) return 0; - if (!capable(CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) { put_namespace(namespace); return -EPERM; } diff -NurpP --minimal linux-2.6.10-rc1/fs/nfs/inode.c linux-2.6.10-rc1-vs1.9.3/fs/nfs/inode.c --- linux-2.6.10-rc1/fs/nfs/inode.c 2004-10-18 23:54:55.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/nfs/inode.c 2004-11-04 19:52:50.754725697 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -313,6 +314,9 @@ nfs_sb_init(struct super_block *sb, rpc_ } server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + if (server->flags & NFS_MOUNT_TAGXID) + sb->s_flags |= MS_TAGXID; + sb->s_maxbytes = fsinfo.maxfilesize; if (sb->s_maxbytes > MAX_LFS_FILESIZE) sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -367,6 +371,7 @@ nfs_create_client(struct nfs_server *ser clnt->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; clnt->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; clnt->cl_droppriv = (server->flags & NFS_MOUNT_BROKEN_SUID) ? 1 : 0; + clnt->cl_tagxid = (server->flags & NFS_MOUNT_TAGXID) ? 1 : 0; clnt->cl_chatty = 1; return clnt; @@ -524,6 +529,7 @@ static int nfs_show_options(struct seq_f { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", ",lock" }, { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" }, + { NFS_MOUNT_TAGXID, ",tagxid", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; @@ -688,8 +694,10 @@ nfs_fhget(struct super_block *sb, struct nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); + /* maybe fattr->xid someday */ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* * report the blocks in 512byte units @@ -715,7 +723,12 @@ nfs_fhget(struct super_block *sb, struct out: return inode; - +/* +fail_dlim: + make_bad_inode(inode); + iput(inode); + inode = NULL; +*/ out_no_inode: printk("nfs_fhget: iget failed\n"); goto out; @@ -761,6 +774,8 @@ nfs_setattr(struct dentry *dentry, struc inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_XID) != 0) + inode->i_xid = attr->ia_xid; if ((attr->ia_valid & ATTR_SIZE) != 0) { inode->i_size = attr->ia_size; vmtruncate(inode, attr->ia_size); @@ -1114,6 +1129,9 @@ int nfs_refresh_inode(struct inode *inod struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; int data_unstable; + uid_t uid; + gid_t gid; + xid_t xid = 0; /* Do we hold a delegation? */ if (nfs_have_delegation(inode, FMODE_READ)) @@ -1157,10 +1175,15 @@ int nfs_refresh_inode(struct inode *inod } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) nfsi->flags |= NFS_INO_INVALID_ATTR; + uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); + /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) - || inode->i_uid != fattr->uid - || inode->i_gid != fattr->gid) + || inode->i_uid != uid + || inode->i_gid != gid + || inode->i_xid != xid) nfsi->flags |= NFS_INO_INVALID_ATTR; /* Has the link count changed? */ @@ -1194,6 +1217,9 @@ static int nfs_update_inode(struct inode unsigned int invalid = 0; loff_t cur_isize; int data_unstable; + uid_t uid; + gid_t gid; + xid_t xid = 0; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __FUNCTION__, inode->i_sb->s_id, inode->i_ino, @@ -1276,9 +1302,14 @@ static int nfs_update_inode(struct inode memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || - inode->i_uid != fattr->uid || - inode->i_gid != fattr->gid) { + inode->i_uid != uid || + inode->i_gid != gid || + inode->i_xid != xid) { struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; if (*cred) { put_rpccred(*cred); @@ -1289,8 +1320,9 @@ static int nfs_update_inode(struct inode inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = uid; + inode->i_gid = gid; + inode->i_xid = xid; if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* diff -NurpP --minimal linux-2.6.10-rc1/fs/nfs/nfs3xdr.c linux-2.6.10-rc1-vs1.9.3/fs/nfs/nfs3xdr.c --- linux-2.6.10-rc1/fs/nfs/nfs3xdr.c 2004-10-18 23:55:36.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/nfs/nfs3xdr.c 2004-10-31 00:41:27.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_XDR @@ -173,7 +174,7 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt } static inline u32 * -xdr_encode_sattr(u32 *p, struct iattr *attr) +xdr_encode_sattr(u32 *p, struct iattr *attr, int tagxid) { if (attr->ia_valid & ATTR_MODE) { *p++ = xdr_one; @@ -181,15 +182,17 @@ xdr_encode_sattr(u32 *p, struct iattr *a } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_UID) { + if (attr->ia_valid & ATTR_UID || + (tagxid && (attr->ia_valid & ATTR_XID))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_uid); + *p++ = htonl(XIDINO_UID(tagxid, attr->ia_uid, attr->ia_xid)); } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_GID) { + if (attr->ia_valid & ATTR_GID || + (tagxid && (attr->ia_valid & ATTR_XID))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_gid); + *p++ = htonl(XIDINO_GID(tagxid, attr->ia_gid, attr->ia_xid)); } else { *p++ = xdr_zero; } @@ -274,7 +277,8 @@ static int nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); *p++ = htonl(args->guard); if (args->guard) p = xdr_encode_time3(p, &args->guardtime); @@ -365,7 +369,8 @@ nfs3_xdr_createargs(struct rpc_rqst *req *p++ = args->verifier[0]; *p++ = args->verifier[1]; } else - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -379,7 +384,8 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req, { p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -392,7 +398,8 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *re { p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); p = xdr_encode_array(p, args->topath, args->tolen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -407,7 +414,8 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); *p++ = htonl(args->type); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); if (args->type == NF3CHR || args->type == NF3BLK) { *p++ = htonl(MAJOR(args->rdev)); *p++ = htonl(MINOR(args->rdev)); diff -NurpP --minimal linux-2.6.10-rc1/fs/nfs/nfsroot.c linux-2.6.10-rc1-vs1.9.3/fs/nfs/nfsroot.c --- linux-2.6.10-rc1/fs/nfs/nfsroot.c 2004-10-18 23:54:40.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/nfs/nfsroot.c 2004-10-31 00:41:27.000000000 +0200 @@ -87,6 +87,7 @@ #include #include #include +#include /* Define this to allow debugging output */ #undef NFSROOT_DEBUG @@ -124,7 +125,7 @@ enum { Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, - Opt_broken_suid, + Opt_broken_suid, Opt_tagxid, /* Error token */ Opt_err }; @@ -160,6 +161,7 @@ static match_table_t __initdata tokens = {Opt_tcp, "proto=tcp"}, {Opt_tcp, "tcp"}, {Opt_broken_suid, "broken_suid"}, + {Opt_tagxid, "tagxid"}, {Opt_err, NULL} }; @@ -271,6 +273,9 @@ static int __init root_nfs_parse(char *n case Opt_broken_suid: nfs_data.flags |= NFS_MOUNT_BROKEN_SUID; break; + case Opt_tagxid: + nfs_data.flags |= NFS_MOUNT_TAGXID; + break; default : return 0; } @@ -306,7 +311,7 @@ static int __init root_nfs_name(char *na /* Override them by options set on kernel command-line */ root_nfs_parse(name, buf); - cp = system_utsname.nodename; + cp = vx_new_uts(nodename); if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); return -1; diff -NurpP --minimal linux-2.6.10-rc1/fs/nfsd/auth.c linux-2.6.10-rc1-vs1.9.3/fs/nfsd/auth.c --- linux-2.6.10-rc1/fs/nfsd/auth.c 2004-10-18 23:55:07.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/nfsd/auth.c 2004-10-31 00:41:27.000000000 +0200 @@ -9,6 +9,7 @@ #include #include #include +#include #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) @@ -42,18 +43,20 @@ int nfsd_setuser(struct svc_rqst *rqstp, } if (cred->cr_uid != (uid_t) -1) - current->fsuid = cred->cr_uid; + current->fsuid = INOXID_UID(1, cred->cr_uid, cred->cr_gid); else current->fsuid = exp->ex_anon_uid; if (cred->cr_gid != (gid_t) -1) - current->fsgid = cred->cr_gid; + current->fsgid = INOXID_GID(1, cred->cr_uid, cred->cr_gid); else current->fsgid = exp->ex_anon_gid; + current->xid = INOXID_XID(1, cred->cr_uid, cred->cr_gid, 0); + if (!cred->cr_group_info) return -ENOMEM; ret = set_current_groups(cred->cr_group_info); - if ((cred->cr_uid)) { + if (INOXID_UID(1, cred->cr_uid, cred->cr_gid)) { cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; } else { cap_t(current->cap_effective) |= (CAP_NFSD_MASK & diff -NurpP --minimal linux-2.6.10-rc1/fs/nfsd/nfs3xdr.c linux-2.6.10-rc1-vs1.9.3/fs/nfsd/nfs3xdr.c --- linux-2.6.10-rc1/fs/nfsd/nfs3xdr.c 2004-10-18 23:53:05.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/nfsd/nfs3xdr.c 2004-10-31 00:41:27.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -121,6 +122,8 @@ static inline u32 * decode_sattr3(u32 *p, struct iattr *iap) { u32 tmp; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -130,12 +133,15 @@ decode_sattr3(u32 *p, struct iattr *iap) } if (*p++) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = ntohl(*p++); + uid = ntohl(*p++); } if (*p++) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = ntohl(*p++); + gid = ntohl(*p++); } + iap->ia_uid = INOXID_UID(1, uid, gid); + iap->ia_gid = INOXID_GID(1, uid, gid); + iap->ia_xid = INOXID_XID(1, uid, gid, 0); if (*p++) { u64 newsize; @@ -176,8 +182,10 @@ encode_fattr3(struct svc_rqst *rqstp, u3 *p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]); *p++ = htonl((u32) stat.mode); *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + XIDINO_UID(XID_TAG(dentry->d_inode), stat.uid, stat.xid))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + XIDINO_GID(XID_TAG(dentry->d_inode), stat.gid, stat.xid))); if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { diff -NurpP --minimal linux-2.6.10-rc1/fs/nfsd/nfs4xdr.c linux-2.6.10-rc1-vs1.9.3/fs/nfsd/nfs4xdr.c --- linux-2.6.10-rc1/fs/nfsd/nfs4xdr.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/nfsd/nfs4xdr.c 2004-10-31 00:41:27.000000000 +0200 @@ -57,6 +57,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -1660,14 +1661,18 @@ out_acl: WRITE32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); + status = nfsd4_encode_user(rqstp, + XIDINO_UID(XID_TAG(dentry->d_inode), + stat.uid, stat.xid), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); + status = nfsd4_encode_group(rqstp, + XIDINO_GID(XID_TAG(dentry->d_inode), + stat.gid, stat.xid), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) diff -NurpP --minimal linux-2.6.10-rc1/fs/nfsd/nfsxdr.c linux-2.6.10-rc1-vs1.9.3/fs/nfsd/nfsxdr.c --- linux-2.6.10-rc1/fs/nfsd/nfsxdr.c 2004-10-18 23:53:06.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/nfsd/nfsxdr.c 2004-10-31 00:41:27.000000000 +0200 @@ -15,6 +15,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -96,6 +97,8 @@ static inline u32 * decode_sattr(u32 *p, struct iattr *iap) { u32 tmp, tmp1; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -109,12 +112,15 @@ decode_sattr(u32 *p, struct iattr *iap) } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = tmp; + uid = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = tmp; + gid = tmp; } + iap->ia_uid = INOXID_UID(1, uid, gid); + iap->ia_gid = INOXID_GID(1, uid, gid); + iap->ia_xid = INOXID_XID(1, uid, gid, 0); if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; iap->ia_size = tmp; @@ -160,8 +166,10 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat.mode); *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + XIDINO_UID(XID_TAG(dentry->d_inode), stat.uid, stat.xid))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + XIDINO_GID(XID_TAG(dentry->d_inode), stat.gid, stat.xid))); if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); diff -NurpP --minimal linux-2.6.10-rc1/fs/open.c linux-2.6.10-rc1-vs1.9.3/fs/open.c --- linux-2.6.10-rc1/fs/open.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/open.c 2004-10-31 00:41:27.000000000 +0200 @@ -23,6 +23,10 @@ #include #include #include +#include +#include +#include +#include #include @@ -41,6 +45,8 @@ int vfs_statfs(struct super_block *sb, s if (retval == 0 && buf->f_frsize == 0) buf->f_frsize = buf->f_bsize; } + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + vx_vsi_statfs(sb, buf); } return retval; } @@ -679,14 +685,15 @@ static int chown_common(struct dentry * error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out; + newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { newattrs.ia_valid |= ATTR_UID; - newattrs.ia_uid = user; + newattrs.ia_uid = vx_map_uid(user); } if (group != (gid_t) -1) { newattrs.ia_valid |= ATTR_GID; - newattrs.ia_gid = group; + newattrs.ia_gid = vx_map_gid(group); } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; @@ -881,6 +888,7 @@ repeat: FD_SET(fd, files->open_fds); FD_CLR(fd, files->close_on_exec); files->next_fd = fd + 1; + // vx_openfd_inc(fd); #if 1 /* Sanity check */ if (files->fd[fd] != NULL) { @@ -902,6 +910,7 @@ static inline void __put_unused_fd(struc __FD_CLR(fd, files->open_fds); if (fd < files->next_fd) files->next_fd = fd; + // vx_openfd_dec(fd); } void fastcall put_unused_fd(unsigned int fd) diff -NurpP --minimal linux-2.6.10-rc1/fs/proc/array.c linux-2.6.10-rc1-vs1.9.3/fs/proc/array.c --- linux-2.6.10-rc1/fs/proc/array.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/proc/array.c 2004-10-31 00:41:27.000000000 +0200 @@ -73,6 +73,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -132,7 +136,8 @@ static const char *task_state_array[] = "T (stopped)", /* 4 */ "T (tracing stop)", /* 8 */ "Z (zombie)", /* 16 */ - "X (dead)" /* 32 */ + "X (dead)", /* 32 */ + "H (on hold)" /* 64 */ }; static inline const char * get_task_state(struct task_struct *tsk) @@ -141,7 +146,8 @@ static inline const char * get_task_stat TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE | TASK_STOPPED | - TASK_TRACED)) | + TASK_TRACED | + TASK_ONHOLD)) | (tsk->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)); const char **p = &task_state_array[0]; @@ -157,8 +163,13 @@ static inline char * task_state(struct t { struct group_info *group_info; int g; + pid_t pid, ptgid, tppid, tgid; read_lock(&tasklist_lock); + tgid = vx_map_tgid(p->tgid); + pid = vx_map_pid(p->pid); + ptgid = vx_map_pid(p->group_leader->real_parent->tgid); + tppid = vx_map_pid(p->parent->pid); buffer += sprintf(buffer, "State:\t%s\n" "SleepAVG:\t%lu%%\n" @@ -170,9 +181,8 @@ static inline char * task_state(struct t "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), (p->sleep_avg/1024)*100/(1020000000/1024), - p->tgid, - p->pid, p->pid ? p->group_leader->real_parent->tgid : 0, - p->pid && p->ptrace ? p->parent->pid : 0, + tgid, pid, (pid > 1) ? ptgid : 0, + p->pid && p->ptrace ? tppid : 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); read_unlock(&tasklist_lock); @@ -283,6 +293,10 @@ static inline char *task_cap(struct task int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; +#ifdef CONFIG_VSERVER_LEGACY + struct vx_info *vxi; + struct nx_info *nxi; +#endif struct mm_struct *mm = get_task_mm(task); buffer = task_name(task, buffer); @@ -294,6 +308,39 @@ int proc_pid_status(struct task_struct * } buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); + +#ifdef CONFIG_VSERVER_LEGACY + buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task)); + vxi = task_get_vx_info(task); + if (vxi) { + buffer += sprintf (buffer,"ctxflags: %08llx\n" + ,(unsigned long long)vxi->vx_flags); + buffer += sprintf (buffer,"initpid: %d\n" + ,vxi->vx_initpid); + } else { + buffer += sprintf (buffer,"ctxflags: none\n"); + buffer += sprintf (buffer,"initpid: none\n"); + } + put_vx_info(vxi); + nxi = task_get_nx_info(task); + if (nxi) { + int i; + + buffer += sprintf (buffer,"ipv4root:"); + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer," %08x/%08x" + ,nxi->ipv4[i] + ,nxi->mask[i]); + } + *buffer++ = '\n'; + buffer += sprintf (buffer,"ipv4root_bcast: %08x\n" + ,nxi->v4_bcast); + } else { + buffer += sprintf (buffer,"ipv4root: 0\n"); + buffer += sprintf (buffer,"ipv4root_bcast: 0\n"); + } + put_nx_info(nxi); +#endif #if defined(CONFIG_ARCH_S390) buffer = task_show_regs(task, buffer); #endif @@ -304,11 +351,12 @@ static int do_task_stat(struct task_stru { unsigned long vsize, eip, esp, wchan = ~0UL; long priority, nice; + unsigned long long bias_uptime = 0; int tty_pgrp = -1, tty_nr = 0; sigset_t sigign, sigcatch; char state; int res; - pid_t ppid, pgid = -1, sid = -1; + pid_t pid, ppid, pgid = -1, sid = -1; int num_threads = 0; struct mm_struct *mm; unsigned long long start_time; @@ -370,7 +418,15 @@ static int do_task_stat(struct task_stru stime += task->signal->stime; } } - ppid = task->pid ? task->group_leader->real_parent->tgid : 0; + pid = vx_info_map_pid(task->vx_info, task->pid); + ppid = (!(pid > 1)) ? 0 : vx_info_map_tgid(task->vx_info, + task->group_leader->real_parent->tgid); + pgid = vx_info_map_pid(task->vx_info, pgid); + + if (task_vx_flags(task, VXF_VIRT_UPTIME, 0)) { + bias_uptime = task->vx_info->cvirt.bias_uptime.tv_sec * NSEC_PER_SEC + + task->vx_info->cvirt.bias_uptime.tv_nsec; + } read_unlock(&tasklist_lock); if (!whole || num_threads<2) @@ -392,12 +448,12 @@ static int do_task_stat(struct task_stru start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC + task->start_time.tv_nsec; /* convert nsec -> ticks */ - start_time = nsec_to_clock_t(start_time); + start_time = nsec_to_clock_t(start_time - bias_uptime); res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", - task->pid, + pid, tcomm, state, ppid, diff -NurpP --minimal linux-2.6.10-rc1/fs/proc/base.c linux-2.6.10-rc1-vs1.9.3/fs/proc/base.c --- linux-2.6.10-rc1/fs/proc/base.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/proc/base.c 2004-11-04 19:52:50.771723011 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include /* * For hysterical raisins we keep the same inumbers as in the old procfs. @@ -70,6 +71,8 @@ enum pid_directory_inos { PROC_TGID_ATTR_EXEC, PROC_TGID_ATTR_FSCREATE, #endif + PROC_TGID_VX_INFO, + PROC_TGID_IP_INFO, PROC_TGID_FD_DIR, PROC_TID_INO, PROC_TID_STATUS, @@ -96,6 +99,8 @@ enum pid_directory_inos { PROC_TID_ATTR_EXEC, PROC_TID_ATTR_FSCREATE, #endif + PROC_TID_VX_INFO, + PROC_TID_IP_INFO, PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; @@ -132,6 +137,8 @@ static struct pid_entry tgid_base_stuff[ #ifdef CONFIG_SCHEDSTATS E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), #endif + E(PROC_TGID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TGID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), {0,0,NULL,0} }; static struct pid_entry tid_base_stuff[] = { @@ -157,6 +164,8 @@ static struct pid_entry tid_base_stuff[] #ifdef CONFIG_SCHEDSTATS E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), #endif + E(PROC_TID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), {0,0,NULL,0} }; @@ -966,6 +975,7 @@ static struct inode *proc_pid_make_inode inode->i_uid = task->euid; inode->i_gid = task->egid; } + inode->i_xid = vx_task_xid(task); security_task_to_inode(task, inode); out: @@ -991,6 +1001,11 @@ static int pid_revalidate(struct dentry { struct inode *inode = dentry->d_inode; struct task_struct *task = proc_task(inode); + + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto out_drop; + /* discard wrong fakeinit */ + if (pid_alive(task)) { if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { inode->i_uid = task->euid; @@ -1002,6 +1017,7 @@ static int pid_revalidate(struct dentry security_task_to_inode(task, inode); return 1; } +out_drop: d_drop(dentry); return 0; } @@ -1382,6 +1398,16 @@ static struct dentry *proc_pident_lookup ei->op.proc_read = proc_pid_schedstat; break; #endif + case PROC_TID_VX_INFO: + case PROC_TGID_VX_INFO: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_vx_info; + break; + case PROC_TID_IP_INFO: + case PROC_TGID_IP_INFO: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_nx_info; + break; default: printk("procfs: impossible type (%d)",p->type); iput(inode); @@ -1474,14 +1500,14 @@ static int proc_self_readlink(struct den int buflen) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_pid(current->tgid)); return vfs_readlink(dentry,buffer,buflen,tmp); } static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_pid(current->tgid)); return vfs_follow_link(nd,tmp); } @@ -1576,13 +1602,13 @@ struct dentry *proc_pid_lookup(struct in if (!task) goto out; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto out_drop_task; + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + if (!inode) + goto out_drop_task; - if (!inode) { - put_task_struct(task); - goto out; - } inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; @@ -1607,6 +1633,8 @@ struct dentry *proc_pid_lookup(struct in goto out; } return NULL; +out_drop_task: + put_task_struct(task); out: return ERR_PTR(-ENOENT); } @@ -1622,6 +1650,8 @@ static struct dentry *proc_task_lookup(s tid = name_to_int(dentry); if (tid == ~0U) goto out; + if (vx_current_initpid(tid)) + goto out; read_lock(&tasklist_lock); task = find_task_by_pid(tid); @@ -1633,11 +1663,13 @@ static struct dentry *proc_task_lookup(s if (leader->tgid != task->tgid) goto out_drop_task; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); - + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto out_drop_task; + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); if (!inode) goto out_drop_task; + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; @@ -1673,7 +1705,7 @@ static int get_tgid_list(int index, unsi read_lock(&tasklist_lock); p = NULL; if (version) { - p = find_task_by_pid(version); + p = find_task_by_real_pid(version); if (p && !thread_group_leader(p)) p = NULL; } @@ -1685,11 +1717,14 @@ static int get_tgid_list(int index, unsi for ( ; p != &init_task; p = next_task(p)) { int tgid = p->pid; + if (!pid_alive(p)) continue; + if (!vx_check(vx_task_xid(p), VX_WATCH|VX_IDENT)) + continue; if (--index >= 0) continue; - tgids[nr_tgids] = tgid; + tgids[nr_tgids] = vx_map_tgid(tgid); nr_tgids++; if (nr_tgids >= PROC_MAXPIDS) break; @@ -1719,9 +1754,11 @@ static int get_tid_list(int index, unsig if (pid_alive(task)) do { int tid = task->pid; + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + continue; if (--index >= 0) continue; - tids[nr_tids] = tid; + tids[nr_tids] = vx_map_pid(tid); nr_tids++; if (nr_tids >= PROC_MAXPIDS) break; @@ -1797,11 +1834,14 @@ static int proc_task_readdir(struct file unsigned int nr_tids, i; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; + struct task_struct *task = proc_task(inode); int retval = -ENOENT; ino_t ino; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ - if (!pid_alive(proc_task(inode))) + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto out; + if (!pid_alive(task)) goto out; retval = 0; diff -NurpP --minimal linux-2.6.10-rc1/fs/proc/generic.c linux-2.6.10-rc1-vs1.9.3/fs/proc/generic.c --- linux-2.6.10-rc1/fs/proc/generic.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/proc/generic.c 2004-10-31 00:41:27.000000000 +0200 @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include static ssize_t proc_file_read(struct file *file, char __user *buf, @@ -351,8 +353,15 @@ static int proc_delete_dentry(struct den return 1; } +static int proc_revalidate_dentry(struct dentry *de, struct nameidata *nd) +{ + /* maybe add a check if it's really necessary? */ + return 0; +} + static struct dentry_operations proc_dentry_operations = { + .d_revalidate = proc_revalidate_dentry, .d_delete = proc_delete_dentry, }; @@ -372,11 +381,14 @@ struct dentry *proc_lookup(struct inode for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) continue; + if (!vx_hide_check(0, de->vx_flags)) + continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino = de->low_ino; error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); + inode->i_xid = vx_current_xid(); break; } } @@ -448,9 +460,12 @@ int proc_readdir(struct file * filp, } do { + if (!vx_hide_check(0, de->vx_flags)) + goto skip; if (filldir(dirent, de->name, de->namelen, filp->f_pos, de->low_ino, de->mode >> 12) < 0) goto out; + skip: filp->f_pos++; de = de->next; } while (de); @@ -562,6 +577,7 @@ static struct proc_dir_entry *proc_creat ent->namelen = len; ent->mode = mode; ent->nlink = nlink; + ent->vx_flags = IATTR_PROC_DEFAULT; out: return ent; } @@ -582,7 +598,8 @@ struct proc_dir_entry *proc_symlink(cons kfree(ent->data); kfree(ent); ent = NULL; - } + } else + ent->vx_flags = IATTR_PROC_SYMLINK; } else { kfree(ent); ent = NULL; diff -NurpP --minimal linux-2.6.10-rc1/fs/proc/inode.c linux-2.6.10-rc1-vs1.9.3/fs/proc/inode.c --- linux-2.6.10-rc1/fs/proc/inode.c 2004-10-18 23:55:07.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/proc/inode.c 2004-10-31 00:41:27.000000000 +0200 @@ -211,6 +211,8 @@ struct inode *proc_get_inode(struct supe inode->i_uid = de->uid; inode->i_gid = de->gid; } + if (de->vx_flags) + PROC_I(inode)->vx_flags = de->vx_flags; if (de->size) inode->i_size = de->size; if (de->nlink) diff -NurpP --minimal linux-2.6.10-rc1/fs/proc/proc_misc.c linux-2.6.10-rc1-vs1.9.3/fs/proc/proc_misc.c --- linux-2.6.10-rc1/fs/proc/proc_misc.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/proc/proc_misc.c 2004-10-31 00:41:27.000000000 +0200 @@ -44,6 +44,9 @@ #include #include #include +#include +#include + #include #include #include @@ -81,17 +84,32 @@ static int proc_calc_metrics(char *page, static int loadavg_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { + unsigned int running, threads; int a, b, c; int len; - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); - len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + if (vx_flags(VXF_VIRT_LOAD, 0)) { + struct vx_info *vxi = current->vx_info; + + a = vxi->cvirt.load[0] + (FIXED_1/200); + b = vxi->cvirt.load[1] + (FIXED_1/200); + c = vxi->cvirt.load[2] + (FIXED_1/200); + + running = atomic_read(&vxi->cvirt.nr_running); + threads = atomic_read(&vxi->cvirt.nr_threads); + } else { + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + + running = nr_running(); + threads = nr_threads; + } + len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n", LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, last_pid); + running, threads, last_pid); return proc_calc_metrics(page, start, off, count, eof, len); } @@ -140,6 +158,9 @@ static int uptime_read_proc(char *page, do_posix_clock_monotonic_gettime(&uptime); jiffies_to_timespec(idle_jiffies, &idle); + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&uptime, &idle); + len = sprintf(page,"%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, (uptime.tv_nsec / (NSEC_PER_SEC / 100)), diff -NurpP --minimal linux-2.6.10-rc1/fs/proc/root.c linux-2.6.10-rc1-vs1.9.3/fs/proc/root.c --- linux-2.6.10-rc1/fs/proc/root.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/proc/root.c 2004-10-31 00:41:27.000000000 +0200 @@ -23,6 +23,9 @@ struct proc_dir_entry *proc_net, *proc_n #ifdef CONFIG_SYSCTL struct proc_dir_entry *proc_sys_root; #endif +struct proc_dir_entry *proc_virtual; + +extern void proc_vx_init(void); static struct super_block *proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -77,6 +80,7 @@ void __init proc_root_init(void) proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", NULL); + proc_vx_init(); } static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) diff -NurpP --minimal linux-2.6.10-rc1/fs/reiserfs/inode.c linux-2.6.10-rc1-vs1.9.3/fs/reiserfs/inode.c --- linux-2.6.10-rc1/fs/reiserfs/inode.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/reiserfs/inode.c 2004-10-31 00:41:27.000000000 +0200 @@ -17,6 +17,7 @@ #include #include #include +#include extern int reiserfs_default_io_size; /* default io size devuned in super.c */ @@ -1049,6 +1050,8 @@ static void init_inode (struct inode * i struct buffer_head * bh; struct item_head * ih; __u32 rdev; + uid_t uid; + gid_t gid; //int version = ITEM_VERSION_1; bh = PATH_PLAST_BUFFER (path); @@ -1072,12 +1075,13 @@ static void init_inode (struct inode * i struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih); unsigned long blocks; + uid = sd_v1_uid(sd); + gid = sd_v1_gid(sd); + set_inode_item_key_version (inode, KEY_FORMAT_3_5); set_inode_sd_version (inode, STAT_DATA_V1); inode->i_mode = sd_v1_mode(sd); inode->i_nlink = sd_v1_nlink(sd); - inode->i_uid = sd_v1_uid(sd); - inode->i_gid = sd_v1_gid(sd); inode->i_size = sd_v1_size(sd); inode->i_atime.tv_sec = sd_v1_atime(sd); inode->i_mtime.tv_sec = sd_v1_mtime(sd); @@ -1117,11 +1121,12 @@ static void init_inode (struct inode * i // (directories and symlinks) struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih); + uid = sd_v2_uid(sd); + gid = sd_v2_gid(sd); + inode->i_mode = sd_v2_mode(sd); inode->i_nlink = sd_v2_nlink(sd); - inode->i_uid = sd_v2_uid(sd); inode->i_size = sd_v2_size(sd); - inode->i_gid = sd_v2_gid(sd); inode->i_mtime.tv_sec = sd_v2_mtime(sd); inode->i_atime.tv_sec = sd_v2_atime(sd); inode->i_ctime.tv_sec = sd_v2_ctime(sd); @@ -1148,6 +1153,9 @@ static void init_inode (struct inode * i REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd ); sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode ); } + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, 0); pathrelse (path); if (S_ISREG (inode->i_mode)) { @@ -1172,13 +1180,15 @@ static void init_inode (struct inode * i static void inode2sd (void * sd, struct inode * inode, loff_t size) { struct stat_data * sd_v2 = (struct stat_data *)sd; + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); __u16 flags; + set_sd_v2_uid(sd_v2, uid ); + set_sd_v2_gid(sd_v2, gid ); set_sd_v2_mode(sd_v2, inode->i_mode ); set_sd_v2_nlink(sd_v2, inode->i_nlink ); - set_sd_v2_uid(sd_v2, inode->i_uid ); set_sd_v2_size(sd_v2, size ); - set_sd_v2_gid(sd_v2, inode->i_gid ); set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec ); set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec ); set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec ); @@ -2552,6 +2562,14 @@ void sd_attrs_to_i_attrs( __u16 sd_attrs inode -> i_flags |= S_IMMUTABLE; else inode -> i_flags &= ~S_IMMUTABLE; + if( sd_attrs & REISERFS_IUNLINK_FL ) + inode -> i_flags |= S_IUNLINK; + else + inode -> i_flags &= ~S_IUNLINK; + if( sd_attrs & REISERFS_BARRIER_FL ) + inode -> i_flags |= S_BARRIER; + else + inode -> i_flags &= ~S_BARRIER; if( sd_attrs & REISERFS_APPEND_FL ) inode -> i_flags |= S_APPEND; else @@ -2574,6 +2592,14 @@ void i_attrs_to_sd_attrs( struct inode * *sd_attrs |= REISERFS_IMMUTABLE_FL; else *sd_attrs &= ~REISERFS_IMMUTABLE_FL; + if( inode -> i_flags & S_IUNLINK ) + *sd_attrs |= REISERFS_IUNLINK_FL; + else + *sd_attrs &= ~REISERFS_IUNLINK_FL; + if( inode -> i_flags & S_BARRIER ) + *sd_attrs |= REISERFS_BARRIER_FL; + else + *sd_attrs &= ~REISERFS_BARRIER_FL; if( inode -> i_flags & S_SYNC ) *sd_attrs |= REISERFS_SYNC_FL; else @@ -2746,6 +2772,27 @@ static ssize_t reiserfs_direct_IO(int rw offset, nr_segs, reiserfs_get_blocks_direct_io, NULL); } +int reiserfs_setattr_flags(struct inode *inode, unsigned int flags) +{ + unsigned int oldflags, newflags; + + oldflags = REISERFS_I(inode)->i_flags; + newflags = oldflags & ~(REISERFS_IMMUTABLE_FL | + REISERFS_IUNLINK_FL | REISERFS_BARRIER_FL); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= REISERFS_IMMUTABLE_FL; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= REISERFS_IUNLINK_FL; + if (flags & ATTR_FLAG_BARRIER) + newflags |= REISERFS_BARRIER_FL; + + if (oldflags ^ newflags) { + REISERFS_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + } + return 0; +} + int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode ; int error ; @@ -2789,6 +2836,10 @@ int reiserfs_setattr(struct dentry *dent } error = inode_change_ok(inode, attr) ; + + if (!error && attr->ia_valid & ATTR_ATTR_FLAG) + reiserfs_setattr_flags(inode, attr->ia_attr_flags); + if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { diff -NurpP --minimal linux-2.6.10-rc1/fs/reiserfs/ioctl.c linux-2.6.10-rc1-vs1.9.3/fs/reiserfs/ioctl.c --- linux-2.6.10-rc1/fs/reiserfs/ioctl.c 2004-10-18 23:53:51.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/reiserfs/ioctl.c 2004-10-31 00:41:27.000000000 +0200 @@ -20,7 +20,7 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg) { - unsigned int flags; + unsigned int flags, oldflags; switch (cmd) { case REISERFS_IOC_UNPACK: @@ -36,6 +36,7 @@ int reiserfs_ioctl (struct inode * inode case REISERFS_IOC_GETFLAGS: flags = REISERFS_I(inode) -> i_attrs; i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags ); + flags &= REISERFS_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case REISERFS_IOC_SETFLAGS: { if (IS_RDONLY(inode)) @@ -47,8 +48,12 @@ int reiserfs_ioctl (struct inode * inode if (get_user(flags, (int __user *) arg)) return -EFAULT; - if ( ( ( flags ^ REISERFS_I(inode) -> i_attrs) & ( REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) && - !capable( CAP_LINUX_IMMUTABLE ) ) + oldflags = REISERFS_I(inode) -> i_attrs; + if ( ( (oldflags & REISERFS_IMMUTABLE_FL) || + ( (flags ^ oldflags) & + (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | + REISERFS_APPEND_FL) ) ) && + !capable( CAP_LINUX_IMMUTABLE ) ) return -EPERM; if( ( flags & REISERFS_NOTAIL_FL ) && @@ -59,6 +64,9 @@ int reiserfs_ioctl (struct inode * inode if( result ) return result; } + + flags = flags & REISERFS_FL_USER_MODIFYABLE; + flags |= oldflags & ~REISERFS_FL_USER_MODIFYABLE; sd_attrs_to_i_attrs( flags, inode ); REISERFS_I(inode) -> i_attrs = flags; inode->i_ctime = CURRENT_TIME; diff -NurpP --minimal linux-2.6.10-rc1/fs/reiserfs/super.c linux-2.6.10-rc1-vs1.9.3/fs/reiserfs/super.c --- linux-2.6.10-rc1/fs/reiserfs/super.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/reiserfs/super.c 2004-10-31 00:41:27.000000000 +0200 @@ -757,6 +757,7 @@ static int reiserfs_parse_options (struc {"user_xattr", .setmask = 1<nlink = inode->i_nlink; stat->uid = inode->i_uid; stat->gid = inode->i_gid; + stat->xid = inode->i_xid; stat->rdev = inode->i_rdev; stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; diff -NurpP --minimal linux-2.6.10-rc1/fs/super.c linux-2.6.10-rc1-vs1.9.3/fs/super.c --- linux-2.6.10-rc1/fs/super.c 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/super.c 2004-10-31 00:41:27.000000000 +0200 @@ -37,6 +37,8 @@ #include /* for the emergency remount stuff */ #include #include +#include +#include #include @@ -824,6 +826,13 @@ do_kern_mount(const char *fstype, int fl sb = type->get_sb(type, flags, name, data); if (IS_ERR(sb)) goto out_free_secdata; + + error = -EPERM; + if (!capable(CAP_SYS_ADMIN) && !sb->s_bdev && + (sb->s_magic != PROC_SUPER_MAGIC) && + (sb->s_magic != DEVPTS_SUPER_MAGIC)) + goto out_sb; + error = security_sb_kern_mount(sb, secdata); if (error) goto out_sb; diff -NurpP --minimal linux-2.6.10-rc1/fs/sysfs/mount.c linux-2.6.10-rc1-vs1.9.3/fs/sysfs/mount.c --- linux-2.6.10-rc1/fs/sysfs/mount.c 2004-10-18 23:55:06.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/sysfs/mount.c 2004-10-31 00:41:27.000000000 +0200 @@ -11,8 +11,6 @@ #include "sysfs.h" -/* Random magic number */ -#define SYSFS_MAGIC 0x62656572 struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; @@ -29,7 +27,7 @@ static int sysfs_fill_super(struct super sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = SYSFS_MAGIC; + sb->s_magic = SYSFS_SUPER_MAGIC; sb->s_op = &sysfs_ops; sysfs_sb = sb; diff -NurpP --minimal linux-2.6.10-rc1/fs/xfs/linux-2.6/xfs_ioctl.c linux-2.6.10-rc1-vs1.9.3/fs/xfs/linux-2.6/xfs_ioctl.c --- linux-2.6.10-rc1/fs/xfs/linux-2.6/xfs_ioctl.c 2004-10-18 23:55:37.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/xfs/linux-2.6/xfs_ioctl.c 2004-10-31 00:41:27.000000000 +0200 @@ -1008,6 +1008,8 @@ xfs_ioc_fsgeometry( #define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */ #define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */ #define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */ +#define LINUX_XFLAG_BARRIER 0x00004000 /* chroot() barrier */ +#define LINUX_XFLAG_IUNLINK 0x00008000 /* Immutable unlink */ STATIC unsigned int xfs_merge_ioc_xflags( @@ -1048,6 +1050,10 @@ xfs_di2lxflags( if (di_flags & XFS_DIFLAG_IMMUTABLE) flags |= LINUX_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_IUNLINK) + flags |= LINUX_XFLAG_IUNLINK; + if (di_flags & XFS_DIFLAG_BARRIER) + flags |= LINUX_XFLAG_BARRIER; if (di_flags & XFS_DIFLAG_APPEND) flags |= LINUX_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) diff -NurpP --minimal linux-2.6.10-rc1/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.10-rc1-vs1.9.3/fs/xfs/linux-2.6/xfs_iops.c --- linux-2.6.10-rc1/fs/xfs/linux-2.6/xfs_iops.c 2004-10-18 23:53:46.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/xfs/linux-2.6/xfs_iops.c 2004-10-31 00:41:27.000000000 +0200 @@ -495,6 +495,28 @@ linvfs_getattr( } STATIC int +linvfs_setattr_flags( + vattr_t *vap, + unsigned int flags) +{ + unsigned int oldflags, newflags; + + oldflags = vap->va_xflags; + newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE | + XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= XFS_XFLAG_IMMUTABLE; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= XFS_XFLAG_IUNLINK; + if (flags & ATTR_FLAG_BARRIER) + newflags |= XFS_XFLAG_BARRIER; + + if (oldflags ^ newflags) + vap->va_xflags = newflags; + return 0; +} + +STATIC int linvfs_setattr( struct dentry *dentry, struct iattr *attr) @@ -545,6 +567,11 @@ linvfs_setattr( flags |= ATTR_NONBLOCK; #endif + if (ia_valid & ATTR_ATTR_FLAG) { + vattr.va_mask |= XFS_AT_XFLAGS; + linvfs_setattr_flags(&vattr, attr->ia_attr_flags); + } + VOP_SETATTR(vp, &vattr, flags, NULL, error); if (error) return -error; diff -NurpP --minimal linux-2.6.10-rc1/fs/xfs/linux-2.6/xfs_super.c linux-2.6.10-rc1-vs1.9.3/fs/xfs/linux-2.6/xfs_super.c --- linux-2.6.10-rc1/fs/xfs/linux-2.6/xfs_super.c 2004-10-18 23:55:07.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/xfs/linux-2.6/xfs_super.c 2004-10-31 00:41:27.000000000 +0200 @@ -192,6 +192,14 @@ xfs_revalidate_inode( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) inode->i_flags |= S_APPEND; else diff -NurpP --minimal linux-2.6.10-rc1/fs/xfs/linux-2.6/xfs_vnode.c linux-2.6.10-rc1-vs1.9.3/fs/xfs/linux-2.6/xfs_vnode.c --- linux-2.6.10-rc1/fs/xfs/linux-2.6/xfs_vnode.c 2004-10-18 23:53:46.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/xfs/linux-2.6/xfs_vnode.c 2004-10-31 00:41:27.000000000 +0200 @@ -212,6 +212,14 @@ vn_revalidate_core( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else diff -NurpP --minimal linux-2.6.10-rc1/fs/xfs/xfs_dinode.h linux-2.6.10-rc1-vs1.9.3/fs/xfs/xfs_dinode.h --- linux-2.6.10-rc1/fs/xfs/xfs_dinode.h 2004-10-18 23:54:37.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/xfs/xfs_dinode.h 2004-10-31 00:41:27.000000000 +0200 @@ -459,6 +459,9 @@ xfs_dinode_t *xfs_buf_to_dinode(struct x #define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */ #define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */ #define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ +#define XFS_DIFLAG_BARRIER_BIT 12 /* chroot() barrier */ +#define XFS_DIFLAG_IUNLINK_BIT 13 /* inode has iunlink */ + #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) @@ -470,6 +473,9 @@ xfs_dinode_t *xfs_buf_to_dinode(struct x #define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT) #define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT) #define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) +#define XFS_DIFLAG_BARRIER (1 << XFS_DIFLAG_BARRIER_BIT) +#define XFS_DIFLAG_IUNLINK (1 << XFS_DIFLAG_IUNLINK_BIT) + #define XFS_DIFLAG_ANY \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ diff -NurpP --minimal linux-2.6.10-rc1/fs/xfs/xfs_fs.h linux-2.6.10-rc1-vs1.9.3/fs/xfs/xfs_fs.h --- linux-2.6.10-rc1/fs/xfs/xfs_fs.h 2004-10-18 23:53:43.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/xfs/xfs_fs.h 2004-10-31 00:41:27.000000000 +0200 @@ -79,6 +79,8 @@ struct fsxattr { #define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ #define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ #define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ +#define XFS_XFLAG_BARRIER 0x00004000 /* chroot() barrier */ +#define XFS_XFLAG_IUNLINK 0x00008000 /* Immutable unlink */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* diff -NurpP --minimal linux-2.6.10-rc1/fs/xfs/xfs_vnodeops.c linux-2.6.10-rc1-vs1.9.3/fs/xfs/xfs_vnodeops.c --- linux-2.6.10-rc1/fs/xfs/xfs_vnodeops.c 2004-10-18 23:55:36.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/fs/xfs/xfs_vnodeops.c 2004-10-31 00:41:27.000000000 +0200 @@ -832,6 +832,10 @@ xfs_setattr( di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) di_flags |= XFS_DIFLAG_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + di_flags |= XFS_DIFLAG_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + di_flags |= XFS_DIFLAG_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) di_flags |= XFS_DIFLAG_APPEND; if (vap->va_xflags & XFS_XFLAG_SYNC) diff -NurpP --minimal linux-2.6.10-rc1/include/asm-alpha/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-alpha/unistd.h --- linux-2.6.10-rc1/include/asm-alpha/unistd.h 2004-10-18 23:53:51.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-alpha/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -233,6 +233,7 @@ #define __NR_osf_memcntl 260 /* not implemented */ #define __NR_osf_fdatasync 261 /* not implemented */ +#define __NR_vserver 273 /* * Linux-specific system calls begin at 300 diff -NurpP --minimal linux-2.6.10-rc1/include/asm-arm/tlb.h linux-2.6.10-rc1-vs1.9.3/include/asm-arm/tlb.h --- linux-2.6.10-rc1/include/asm-arm/tlb.h 2004-10-18 23:53:07.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-arm/tlb.h 2004-10-31 00:41:27.000000000 +0200 @@ -58,7 +58,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u if (rss < freed) freed = rss; - mm->rss = rss - freed; + // mm->rss = rss - freed; + vx_rsspages_sub(mm, freed); if (freed) { flush_tlb_mm(mm); diff -NurpP --minimal linux-2.6.10-rc1/include/asm-arm26/tlb.h linux-2.6.10-rc1-vs1.9.3/include/asm-arm26/tlb.h --- linux-2.6.10-rc1/include/asm-arm26/tlb.h 2004-10-18 23:55:29.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-arm26/tlb.h 2004-10-31 00:41:27.000000000 +0200 @@ -39,7 +39,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u if (rss < freed) freed = rss; - mm->rss = rss - freed; + // mm->rss = rss - freed; + vx_rsspages_sub(mm, freed); if (freed) { flush_tlb_mm(mm); diff -NurpP --minimal linux-2.6.10-rc1/include/asm-generic/tlb.h linux-2.6.10-rc1-vs1.9.3/include/asm-generic/tlb.h --- linux-2.6.10-rc1/include/asm-generic/tlb.h 2004-10-18 23:53:05.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-generic/tlb.h 2004-10-31 00:41:27.000000000 +0200 @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -92,7 +93,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u if (rss < freed) freed = rss; - mm->rss = rss - freed; + // mm->rss = rss - freed; + vx_rsspages_sub(mm, freed); tlb_flush_mmu(tlb, start, end); /* keep the page table cache within bounds */ diff -NurpP --minimal linux-2.6.10-rc1/include/asm-i386/elf.h linux-2.6.10-rc1-vs1.9.3/include/asm-i386/elf.h --- linux-2.6.10-rc1/include/asm-i386/elf.h 2004-10-18 23:54:39.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-i386/elf.h 2004-11-04 19:52:50.771723011 +0100 @@ -70,7 +70,7 @@ typedef struct user_fxsr_struct elf_fpxr the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +#define ELF_ET_DYN_BASE ((TASK_UNMAPPED_BASE) * 2) /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is now struct_user_regs, they are different) */ diff -NurpP --minimal linux-2.6.10-rc1/include/asm-i386/page.h linux-2.6.10-rc1-vs1.9.3/include/asm-i386/page.h --- linux-2.6.10-rc1/include/asm-i386/page.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-i386/page.h 2004-11-04 19:52:50.784720956 +0100 @@ -119,16 +119,23 @@ extern int sysctl_legacy_va_layout; #endif /* __ASSEMBLY__ */ -#ifdef __ASSEMBLY__ +#if defined(CONFIG_SPLIT_3GB) #define __PAGE_OFFSET (0xC0000000) -#else -#define __PAGE_OFFSET (0xC0000000UL) +#elif defined(CONFIG_SPLIT_25GB) +#define __PAGE_OFFSET (0xA0000000) +#elif defined(CONFIG_SPLIT_2GB) +#define __PAGE_OFFSET (0x80000000) +#elif defined(CONFIG_SPLIT_15GB) +#define __PAGE_OFFSET (0x60000000) +#elif defined(CONFIG_SPLIT_1GB) +#define __PAGE_OFFSET (0x40000000) #endif - #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) -#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) +#define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) +#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) + #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) diff -NurpP --minimal linux-2.6.10-rc1/include/asm-i386/processor.h linux-2.6.10-rc1-vs1.9.3/include/asm-i386/processor.h --- linux-2.6.10-rc1/include/asm-i386/processor.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-i386/processor.h 2004-11-04 19:52:50.795719218 +0100 @@ -289,9 +289,10 @@ extern unsigned int BIOS_revision; extern unsigned int mca_pentium_flag; /* - * User space process size: 3GB (default). + * User space process size: (3GB default). */ -#define TASK_SIZE (PAGE_OFFSET) +#define __TASK_SIZE (__PAGE_OFFSET) +#define TASK_SIZE ((unsigned long)__TASK_SIZE) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. diff -NurpP --minimal linux-2.6.10-rc1/include/asm-ia64/tlb.h linux-2.6.10-rc1-vs1.9.3/include/asm-ia64/tlb.h --- linux-2.6.10-rc1/include/asm-ia64/tlb.h 2004-10-18 23:53:51.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-ia64/tlb.h 2004-11-04 19:52:50.803717954 +0100 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -165,7 +166,8 @@ tlb_finish_mmu (struct mmu_gather *tlb, if (rss < freed) freed = rss; - mm->rss = rss - freed; + // mm->rss = rss - freed; + vx_rsspages_sub(mm, freed); /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and * tlb->end_addr. diff -NurpP --minimal linux-2.6.10-rc1/include/asm-m68k/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-m68k/unistd.h --- linux-2.6.10-rc1/include/asm-m68k/unistd.h 2004-10-18 23:54:08.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-m68k/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -239,7 +239,9 @@ #define __NR_fremovexattr 234 #define __NR_futex 235 -#define NR_syscalls 236 +#define __NR_vserver 273 + +#define NR_syscalls 274 /* user-visible error numbers are in the range -1 - -124: see */ diff -NurpP --minimal linux-2.6.10-rc1/include/asm-m68knommu/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-m68knommu/unistd.h --- linux-2.6.10-rc1/include/asm-m68knommu/unistd.h 2004-10-18 23:54:20.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-m68knommu/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -221,7 +221,9 @@ #define __NR_setfsuid32 215 #define __NR_setfsgid32 216 -#define NR_syscalls 256 +#define __NR_vserver 273 + +#define NR_syscalls 274 /* user-visible error numbers are in the range -1 - -122: see */ diff -NurpP --minimal linux-2.6.10-rc1/include/asm-parisc/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-parisc/unistd.h --- linux-2.6.10-rc1/include/asm-parisc/unistd.h 2004-10-18 23:53:06.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-parisc/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -728,7 +728,9 @@ #define __NR_mq_notify (__NR_Linux + 233) #define __NR_mq_getsetattr (__NR_Linux + 234) -#define __NR_Linux_syscalls 235 +#define __NR_vserver (__NR_Linux + 273) + +#define __NR_Linux_syscalls 273 #define HPUX_GATEWAY_ADDR 0xC0000004 #define LINUX_GATEWAY_ADDR 0x100 diff -NurpP --minimal linux-2.6.10-rc1/include/asm-ppc/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-ppc/unistd.h --- linux-2.6.10-rc1/include/asm-ppc/unistd.h 2004-10-18 23:53:42.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-ppc/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -261,7 +261,7 @@ #define __NR_fadvise64_64 254 #define __NR_rtas 255 /* Number 256 is reserved for sys_debug_setcontext */ -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 /* Number 258 is reserved for new sys_remap_file_pages */ /* Number 259 is reserved for new sys_mbind */ /* Number 260 is reserved for new sys_get_mempolicy */ diff -NurpP --minimal linux-2.6.10-rc1/include/asm-ppc64/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-ppc64/unistd.h --- linux-2.6.10-rc1/include/asm-ppc64/unistd.h 2004-10-18 23:53:06.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-ppc64/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -267,7 +267,7 @@ #define __NR_fadvise64_64 254 #define __NR_rtas 255 /* Number 256 is reserved for sys_debug_setcontext */ -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 /* Number 258 is reserved for new sys_remap_file_pages */ #define __NR_mbind 259 #define __NR_get_mempolicy 260 diff -NurpP --minimal linux-2.6.10-rc1/include/asm-s390/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-s390/unistd.h --- linux-2.6.10-rc1/include/asm-s390/unistd.h 2004-10-18 23:55:43.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-s390/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -255,7 +255,7 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) -/* Number 263 is reserved for vserver */ +#define __NR_vserver 263 #define __NR_fadvise64_64 264 #define __NR_statfs64 265 #define __NR_fstatfs64 266 diff -NurpP --minimal linux-2.6.10-rc1/include/asm-sparc/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-sparc/unistd.h --- linux-2.6.10-rc1/include/asm-sparc/unistd.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-sparc/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -283,7 +283,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff -NurpP --minimal linux-2.6.10-rc1/include/asm-sparc64/tlb.h linux-2.6.10-rc1-vs1.9.3/include/asm-sparc64/tlb.h --- linux-2.6.10-rc1/include/asm-sparc64/tlb.h 2004-10-18 23:54:54.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-sparc64/tlb.h 2004-11-04 19:52:50.803717954 +0100 @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include diff -NurpP --minimal linux-2.6.10-rc1/include/asm-sparc64/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-sparc64/unistd.h --- linux-2.6.10-rc1/include/asm-sparc64/unistd.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-sparc64/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -285,7 +285,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff -NurpP --minimal linux-2.6.10-rc1/include/asm-x86_64/unistd.h linux-2.6.10-rc1-vs1.9.3/include/asm-x86_64/unistd.h --- linux-2.6.10-rc1/include/asm-x86_64/unistd.h 2004-10-18 23:54:39.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/asm-x86_64/unistd.h 2004-10-31 00:41:27.000000000 +0200 @@ -531,9 +531,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_utimes 235 __SYSCALL(__NR_utimes, sys_utimes) #define __NR_vserver 236 -__SYSCALL(__NR_vserver, sys_ni_syscall) -#define __NR_vserver 236 -__SYSCALL(__NR_vserver, sys_ni_syscall) +__SYSCALL(__NR_vserver, sys_vserver) #define __NR_mbind 237 __SYSCALL(__NR_mbind, sys_mbind) #define __NR_set_mempolicy 238 diff -NurpP --minimal linux-2.6.10-rc1/include/linux/capability.h linux-2.6.10-rc1-vs1.9.3/include/linux/capability.h --- linux-2.6.10-rc1/include/linux/capability.h 2004-10-18 23:53:44.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/capability.h 2004-10-31 00:41:27.000000000 +0200 @@ -235,6 +235,7 @@ typedef __u32 kernel_cap_t; /* Allow enabling/disabling tagged queuing on SCSI controllers and sending arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ +/* Allow the selection of a security context */ #define CAP_SYS_ADMIN 21 @@ -284,6 +285,11 @@ typedef __u32 kernel_cap_t; #define CAP_LEASE 28 +/* Allow context manipulations */ +/* Allow changing context info on files */ + +#define CAP_CONTEXT 29 + #ifdef __KERNEL__ /* * Bounding set diff -NurpP --minimal linux-2.6.10-rc1/include/linux/devpts_fs.h linux-2.6.10-rc1-vs1.9.3/include/linux/devpts_fs.h --- linux-2.6.10-rc1/include/linux/devpts_fs.h 2004-10-18 23:54:55.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/devpts_fs.h 2004-10-31 00:41:27.000000000 +0200 @@ -30,5 +30,7 @@ static inline void devpts_pty_kill(int n #endif +#define DEVPTS_SUPER_MAGIC 0x1cd1 + #endif /* _LINUX_DEVPTS_FS_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/ext2_fs.h linux-2.6.10-rc1-vs1.9.3/include/linux/ext2_fs.h --- linux-2.6.10-rc1/include/linux/ext2_fs.h 2004-10-18 23:53:21.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/ext2_fs.h 2004-10-31 00:41:27.000000000 +0200 @@ -192,10 +192,17 @@ struct ext2_group_desc #define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT2_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT2_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define EXT2_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#endif /* * ioctl commands @@ -240,7 +247,7 @@ struct ext2_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_xid; /* LRU Context */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -272,6 +279,7 @@ struct ext2_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_xid osd2.linux2.l_i_xid #define i_reserved2 osd2.linux2.l_i_reserved2 #endif @@ -312,6 +320,7 @@ struct ext2_inode { #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ #define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ +#define EXT2_MOUNT_TAG_XID (1<<16) /* Enable Context Tags */ #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt #define set_opt(o, opt) o |= EXT2_MOUNT_##opt diff -NurpP --minimal linux-2.6.10-rc1/include/linux/ext3_fs.h linux-2.6.10-rc1-vs1.9.3/include/linux/ext3_fs.h --- linux-2.6.10-rc1/include/linux/ext3_fs.h 2004-10-18 23:54:32.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/ext3_fs.h 2004-10-31 00:41:27.000000000 +0200 @@ -185,10 +185,17 @@ struct ext3_group_desc #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT3_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT3_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT3_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#endif /* * Inode dynamic state flags @@ -208,6 +215,9 @@ struct ext3_group_desc #ifdef CONFIG_JBD_DEBUG #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_IOC_SETXID FIOC_SETXIDJ +#endif /* * Structure of an inode on the disk @@ -244,7 +254,7 @@ struct ext3_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_xid; /* LRU Context */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -276,6 +286,7 @@ struct ext3_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_xid osd2.linux2.l_i_xid #define i_reserved2 osd2.linux2.l_i_reserved2 #elif defined(__GNU__) @@ -325,6 +336,7 @@ struct ext3_inode { #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ #define EXT3_MOUNT_BARRIER 0x10000 /* Use block barriers */ +#define EXT3_MOUNT_TAG_XID (1<<16) /* Enable Context Tags */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H diff -NurpP --minimal linux-2.6.10-rc1/include/linux/fs.h linux-2.6.10-rc1-vs1.9.3/include/linux/fs.h --- linux-2.6.10-rc1/include/linux/fs.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/fs.h 2004-10-31 00:41:27.000000000 +0200 @@ -120,6 +120,7 @@ extern int leases_enable, dir_notify_ena #define MS_VERBOSE 32768 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ #define MS_ONE_SECOND (1<<17) /* fs has 1 sec a/m/ctime resolution */ +#define MS_TAGXID (1<<24) /* tag inodes with context information */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -146,6 +147,8 @@ extern int leases_enable, dir_notify_ena #define S_DIRSYNC 64 /* Directory modifications are synchronous */ #define S_NOCMTIME 128 /* Do not update file c/mtime */ #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ +#define S_BARRIER 1024 /* Barrier for chroot() */ +#define S_IUNLINK 2048 /* Immutable unlink */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -172,11 +175,14 @@ extern int leases_enable, dir_notify_ena #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) +#define IS_IUNLINK(inode) ((inode)->i_flags & S_IUNLINK) +#define IS_IXORUNLINK(inode) ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode)) #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) #define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND) +#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER)) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) @@ -258,6 +264,7 @@ typedef void (dio_iodone_t)(struct inode #define ATTR_ATTR_FLAG 1024 #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 +#define ATTR_XID 8192 /* * This is the Inode Attributes structure, used for notify_change(). It @@ -273,6 +280,7 @@ struct iattr { umode_t ia_mode; uid_t ia_uid; gid_t ia_gid; + xid_t ia_xid; loff_t ia_size; struct timespec ia_atime; struct timespec ia_mtime; @@ -289,6 +297,9 @@ struct iattr { #define ATTR_FLAG_IMMUTABLE 8 /* Immutable file */ #define ATTR_FLAG_NODIRATIME 16 /* Don't update atime for directory */ +#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */ +#define ATTR_FLAG_IUNLINK 1024 /* Immutable unlink */ + /* * Includes for diskquotas. */ @@ -427,6 +438,7 @@ struct inode { unsigned int i_nlink; uid_t i_uid; gid_t i_gid; + xid_t i_xid; dev_t i_rdev; loff_t i_size; struct timespec i_atime; @@ -577,6 +589,8 @@ struct file { unsigned int f_uid, f_gid; struct file_ra_state f_ra; + xid_t f_xid; + unsigned long f_version; void *f_security; diff -NurpP --minimal linux-2.6.10-rc1/include/linux/init_task.h linux-2.6.10-rc1-vs1.9.3/include/linux/init_task.h --- linux-2.6.10-rc1/include/linux/init_task.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/init_task.h 2004-10-31 00:41:27.000000000 +0200 @@ -112,6 +112,10 @@ extern struct group_info init_groups; .proc_lock = SPIN_LOCK_UNLOCKED, \ .switch_lock = SPIN_LOCK_UNLOCKED, \ .journal_info = NULL, \ + .xid = 0, \ + .vx_info = NULL, \ + .nid = 0, \ + .nx_info = NULL, \ } diff -NurpP --minimal linux-2.6.10-rc1/include/linux/ip.h linux-2.6.10-rc1-vs1.9.3/include/linux/ip.h --- linux-2.6.10-rc1/include/linux/ip.h 2004-10-18 23:53:45.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/ip.h 2004-10-31 00:41:27.000000000 +0200 @@ -111,6 +111,7 @@ struct inet_opt { /* Socket demultiplex comparisons on incoming packets. */ __u32 daddr; /* Foreign IPv4 addr */ __u32 rcv_saddr; /* Bound local IPv4 addr */ + __u32 rcv_saddr2; /* Second bound ipv4 addr, for ipv4root */ __u16 dport; /* Destination port */ __u16 num; /* Local port */ __u32 saddr; /* Sending source */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/ipc.h linux-2.6.10-rc1-vs1.9.3/include/linux/ipc.h --- linux-2.6.10-rc1/include/linux/ipc.h 2004-10-18 23:53:05.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/ipc.h 2004-10-31 00:41:27.000000000 +0200 @@ -66,6 +66,7 @@ struct kern_ipc_perm mode_t mode; unsigned long seq; void *security; + xid_t xid; }; #endif /* __KERNEL__ */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/namespace.h linux-2.6.10-rc1-vs1.9.3/include/linux/namespace.h --- linux-2.6.10-rc1/include/linux/namespace.h 2004-10-18 23:54:30.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/namespace.h 2004-10-31 00:41:27.000000000 +0200 @@ -13,6 +13,7 @@ struct namespace { }; extern void umount_tree(struct vfsmount *); +extern void umount_unused(struct vfsmount *, struct fs_struct *); extern int copy_namespace(int, struct task_struct *); extern void __put_namespace(struct namespace *namespace); diff -NurpP --minimal linux-2.6.10-rc1/include/linux/net.h linux-2.6.10-rc1-vs1.9.3/include/linux/net.h --- linux-2.6.10-rc1/include/linux/net.h 2004-10-18 23:53:51.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/net.h 2004-10-31 00:41:27.000000000 +0200 @@ -61,6 +61,8 @@ typedef enum { #define SOCK_ASYNC_NOSPACE 0 #define SOCK_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 +#define SOCK_PASS_CRED 16 +#define SOCK_USER_SOCKET 17 #ifndef ARCH_HAS_SOCKET_TYPES /** sock_type - Socket types @@ -111,7 +113,6 @@ struct socket { struct sock *sk; wait_queue_head_t wait; short type; - unsigned char passcred; }; struct vm_area_struct; diff -NurpP --minimal linux-2.6.10-rc1/include/linux/nfs_mount.h linux-2.6.10-rc1-vs1.9.3/include/linux/nfs_mount.h --- linux-2.6.10-rc1/include/linux/nfs_mount.h 2004-10-18 23:53:06.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/nfs_mount.h 2004-10-31 00:41:27.000000000 +0200 @@ -60,6 +60,7 @@ struct nfs_mount_data { #define NFS_MOUNT_BROKEN_SUID 0x0400 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ +#define NFS_MOUNT_TAGXID 0x8000 /* tagxid */ #define NFS_MOUNT_FLAGMASK 0xFFFF #endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/proc_fs.h linux-2.6.10-rc1-vs1.9.3/include/linux/proc_fs.h --- linux-2.6.10-rc1/include/linux/proc_fs.h 2004-10-18 23:55:36.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/proc_fs.h 2004-10-31 00:41:27.000000000 +0200 @@ -55,6 +55,7 @@ struct proc_dir_entry { nlink_t nlink; uid_t uid; gid_t gid; + int vx_flags; unsigned long size; struct inode_operations * proc_iops; struct file_operations * proc_fops; @@ -242,9 +243,11 @@ extern struct kcore_list *kclist_del(voi struct proc_inode { struct task_struct *task; int type; + int vx_flags; union { int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); int (*proc_read)(struct task_struct *task, char *page); + int (*proc_vid_read)(int vid, char *page); } op; struct proc_dir_entry *pde; struct inode vfs_inode; diff -NurpP --minimal linux-2.6.10-rc1/include/linux/ptrace.h linux-2.6.10-rc1-vs1.9.3/include/linux/ptrace.h --- linux-2.6.10-rc1/include/linux/ptrace.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/ptrace.h 2004-10-31 00:41:27.000000000 +0200 @@ -75,6 +75,8 @@ #include /* For unlikely. */ #include /* For struct task_struct. */ +#include +#include extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); diff -NurpP --minimal linux-2.6.10-rc1/include/linux/reiserfs_fs.h linux-2.6.10-rc1-vs1.9.3/include/linux/reiserfs_fs.h --- linux-2.6.10-rc1/include/linux/reiserfs_fs.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/reiserfs_fs.h 2004-10-31 00:41:27.000000000 +0200 @@ -888,6 +888,18 @@ struct stat_data_v1 #define REISERFS_COMPR_FL EXT2_COMPR_FL #define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL +/* unfortunately reiserfs sdattr is only 16 bit */ +#define REISERFS_BARRIER_FL (EXT2_BARRIER_FL >> 16) +#define REISERFS_IUNLINK_FL (EXT2_IUNLINK_FL >> 16) + +#ifdef CONFIG_VSERVER_LEGACY +#define REISERFS_FL_USER_VISIBLE (REISERFS_IUNLINK_FL|0x80FF) +#define REISERFS_FL_USER_MODIFYABLE (REISERFS_IUNLINK_FL|0x80FF) +#else +#define REISERFS_FL_USER_VISIBLE 0x80FF +#define REISERFS_FL_USER_MODIFYABLE 0x80FF +#endif + /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ REISERFS_SYNC_FL | \ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/reiserfs_fs_sb.h linux-2.6.10-rc1-vs1.9.3/include/linux/reiserfs_fs_sb.h --- linux-2.6.10-rc1/include/linux/reiserfs_fs_sb.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/reiserfs_fs_sb.h 2004-10-31 00:41:27.000000000 +0200 @@ -457,6 +457,7 @@ enum reiserfs_mount_options { REISERFS_POSIXACL, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, + REISERFS_TAGXID, /* Actions on error */ REISERFS_ERROR_PANIC, diff -NurpP --minimal linux-2.6.10-rc1/include/linux/sched.h linux-2.6.10-rc1-vs1.9.3/include/linux/sched.h --- linux-2.6.10-rc1/include/linux/sched.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/sched.h 2004-10-31 00:41:27.000000000 +0200 @@ -108,6 +108,7 @@ extern unsigned long nr_iowait(void); #define TASK_UNINTERRUPTIBLE 2 #define TASK_STOPPED 4 #define TASK_TRACED 8 +#define TASK_ONHOLD 64 #define EXIT_ZOMBIE 16 #define EXIT_DEAD 32 @@ -236,6 +237,7 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; + struct vx_info *mm_vx_info; /* Token based thrashing protection. */ unsigned long swap_token_time; @@ -364,9 +366,10 @@ struct user_struct { /* Hash table maintenance information */ struct list_head uidhash_list; uid_t uid; + xid_t xid; }; -extern struct user_struct *find_user(uid_t); +extern struct user_struct *find_user(xid_t, uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -629,6 +632,14 @@ struct task_struct { void *security; struct audit_context *audit_context; +/* vserver context data */ + xid_t xid; + struct vx_info *vx_info; + +/* vserver network data */ + nid_t nid; + struct nx_info *nx_info; + /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; @@ -754,13 +765,19 @@ extern struct task_struct init_task; extern struct mm_struct init_mm; -#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) + +#define find_task_by_real_pid(nr) \ + find_task_by_pid_type(PIDTYPE_PID, nr) +#define find_task_by_pid(nr) \ + find_task_by_pid_type(PIDTYPE_PID, \ + vx_rmap_pid(nr)) + extern struct task_struct *find_task_by_pid_type(int type, int pid); extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(xid_t, uid_t); static inline struct user_struct *get_uid(struct user_struct *u) { atomic_inc(&u->__count); diff -NurpP --minimal linux-2.6.10-rc1/include/linux/shmem_fs.h linux-2.6.10-rc1-vs1.9.3/include/linux/shmem_fs.h --- linux-2.6.10-rc1/include/linux/shmem_fs.h 2004-10-18 23:54:55.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/shmem_fs.h 2004-10-31 00:41:27.000000000 +0200 @@ -8,6 +8,9 @@ #define SHMEM_NR_DIRECT 16 +#define TMPFS_SUPER_MAGIC 0x01021994 + + struct shmem_inode_info { spinlock_t lock; unsigned long flags; diff -NurpP --minimal linux-2.6.10-rc1/include/linux/stat.h linux-2.6.10-rc1-vs1.9.3/include/linux/stat.h --- linux-2.6.10-rc1/include/linux/stat.h 2004-10-18 23:53:46.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/stat.h 2004-10-31 00:41:27.000000000 +0200 @@ -63,6 +63,7 @@ struct kstat { unsigned int nlink; uid_t uid; gid_t gid; + xid_t xid; dev_t rdev; loff_t size; struct timespec atime; diff -NurpP --minimal linux-2.6.10-rc1/include/linux/sunrpc/auth.h linux-2.6.10-rc1-vs1.9.3/include/linux/sunrpc/auth.h --- linux-2.6.10-rc1/include/linux/sunrpc/auth.h 2004-10-18 23:54:30.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/sunrpc/auth.h 2004-10-31 00:41:27.000000000 +0200 @@ -28,6 +28,7 @@ struct auth_cred { uid_t uid; gid_t gid; + xid_t xid; struct group_info *group_info; }; diff -NurpP --minimal linux-2.6.10-rc1/include/linux/sunrpc/clnt.h linux-2.6.10-rc1-vs1.9.3/include/linux/sunrpc/clnt.h --- linux-2.6.10-rc1/include/linux/sunrpc/clnt.h 2004-10-18 23:54:32.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/sunrpc/clnt.h 2004-10-31 00:41:27.000000000 +0200 @@ -53,7 +53,8 @@ struct rpc_clnt { cl_autobind : 1,/* use getport() */ cl_droppriv : 1,/* enable NFS suid hack */ cl_oneshot : 1,/* dispose after use */ - cl_dead : 1;/* abandoned */ + cl_dead : 1,/* abandoned */ + cl_tagxid : 1;/* do xid tagging */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ struct rpc_portmap * cl_pmap; /* port mapping */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/sysctl.h linux-2.6.10-rc1-vs1.9.3/include/linux/sysctl.h --- linux-2.6.10-rc1/include/linux/sysctl.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/sysctl.h 2004-10-31 00:41:27.000000000 +0200 @@ -134,6 +134,7 @@ enum KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */ KERN_HZ_TIMER=65, /* int: hz timer on or off */ KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */ + KERN_VSHELPER=67, /* string: path to vshelper policy agent */ }; diff -NurpP --minimal linux-2.6.10-rc1/include/linux/sysfs.h linux-2.6.10-rc1-vs1.9.3/include/linux/sysfs.h --- linux-2.6.10-rc1/include/linux/sysfs.h 2004-10-18 23:53:51.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/sysfs.h 2004-10-31 00:41:27.000000000 +0200 @@ -9,6 +9,8 @@ #ifndef _SYSFS_H_ #define _SYSFS_H_ +#define SYSFS_SUPER_MAGIC 0x62656572 + struct kobject; struct module; diff -NurpP --minimal linux-2.6.10-rc1/include/linux/types.h linux-2.6.10-rc1-vs1.9.3/include/linux/types.h --- linux-2.6.10-rc1/include/linux/types.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/types.h 2004-10-31 00:41:27.000000000 +0200 @@ -36,6 +36,8 @@ typedef __kernel_uid32_t uid_t; typedef __kernel_gid32_t gid_t; typedef __kernel_uid16_t uid16_t; typedef __kernel_gid16_t gid16_t; +typedef unsigned int xid_t; +typedef unsigned int nid_t; #ifdef CONFIG_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vs_base.h linux-2.6.10-rc1-vs1.9.3/include/linux/vs_base.h --- linux-2.6.10-rc1/include/linux/vs_base.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vs_base.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,91 @@ +#ifndef _VX_VS_BASE_H +#define _VX_VS_BASE_H + +#include "vserver/context.h" + + +#define vx_task_xid(t) ((t)->xid) + +#define vx_current_xid() vx_task_xid(current) + +#define vx_check(c,m) __vx_check(vx_current_xid(),c,m) + +#define vx_weak_check(c,m) ((m) ? vx_check(c,m) : 1) + + +/* + * check current context for ADMIN/WATCH and + * optionally agains supplied argument + */ +static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode) +{ + if (mode & VX_ARG_MASK) { + if ((mode & VX_IDENT) && + (id == cid)) + return 1; + } + if (mode & VX_ATR_MASK) { + if ((mode & VX_DYNAMIC) && + (id >= MIN_D_CONTEXT) && + (id <= MAX_S_CONTEXT)) + return 1; + if ((mode & VX_STATIC) && + (id > 1) && (id < MIN_D_CONTEXT)) + return 1; + } + return (((mode & VX_ADMIN) && (cid == 0)) || + ((mode & VX_WATCH) && (cid == 1))); +} + + +#define __vx_state(v) ((v) ? ((v)->vx_state) : 0) + +#define vx_info_state(v,m) (__vx_state(v) & (m)) + + +/* generic flag merging */ + +#define vx_check_flags(v,m,f) (((v) & (m)) ^ (f)) + +#define vx_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m))) + +#define vx_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m))) + + +/* context flags */ + +#define __vx_flags(v) ((v) ? (v)->vx_flags : 0) + +#define vx_current_flags() __vx_flags(current->vx_info) + +#define vx_info_flags(v,m,f) \ + vx_check_flags(__vx_flags(v),(m),(f)) + +#define task_vx_flags(t,m,f) \ + ((t) && vx_info_flags((t)->vx_info, (m), (f))) + +#define vx_flags(m,f) vx_info_flags(current->vx_info,(m),(f)) + + +/* context caps */ + +#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0) + +#define vx_current_ccaps() __vx_ccaps(current->vx_info) + +#define vx_info_ccaps(v,c) (__vx_ccaps(v) & (c)) + +#define vx_ccaps(c) vx_info_ccaps(current->vx_info,(c)) + + +#define vx_current_bcaps() \ + (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \ + current->vx_info->vx_bcaps : cap_bset) + + +#define vx_current_initpid(n) \ + (current->vx_info && \ + (current->vx_info->vx_initpid == (n))) + + +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vs_context.h linux-2.6.10-rc1-vs1.9.3/include/linux/vs_context.h --- linux-2.6.10-rc1/include/linux/vs_context.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vs_context.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,122 @@ +#ifndef _VX_VS_CONTEXT_H +#define _VX_VS_CONTEXT_H + + +#include +#include +#include + +#include "vserver/context.h" +#include "vserver/debug.h" + + +extern int proc_pid_vx_info(struct task_struct *, char *); + + +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__) + +static inline struct vx_info *__get_vx_info(struct vx_info *vxi, + const char *_file, int _line) +{ + if (!vxi) + return NULL; + vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + atomic_inc(&vxi->vx_usecnt); + return vxi; +} + + +extern void free_vx_info(struct vx_info *); + +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__) + +static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line) +{ + if (!vxi) + return; + vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&vxi->vx_usecnt)) + free_vx_info(vxi); +} + +#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__) + +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line) +{ + struct vx_info *vxo; + + if (!vxi) + return; + + vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d.%d])", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_refcnt):0, + _file, _line); + + atomic_inc(&vxi->vx_refcnt); + vxo = xchg(vxp, __get_vx_info(vxi, _file, _line)); + BUG_ON(vxo); +} + +#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__) + +static inline void __clr_vx_info(struct vx_info **vxp, + const char *_file, int _line) +{ + struct vx_info *vxo; + + vxo = xchg(vxp, NULL); + if (!vxo) + return; + + vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d.%d])", + vxo, vxo?vxo->vx_id:0, + vxo?atomic_read(&vxo->vx_usecnt):0, + vxo?atomic_read(&vxo->vx_refcnt):0, + _file, _line); + + if (atomic_dec_and_test(&vxo->vx_refcnt)) + unhash_vx_info(vxo); + __put_vx_info(vxo, _file, _line); +} + + +#define task_get_vx_info(i) __task_get_vx_info(i,__FILE__,__LINE__) + +static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct vx_info *vxi; + + task_lock(p); + vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)", + p, _file, _line); + vxi = __get_vx_info(p->vx_info, _file, _line); + task_unlock(p); + return vxi; +} + + +#define vx_verify_info(p,i) \ + __vx_verify_info((p)->vx_info,i,__FILE__,__LINE__) + +static __inline__ void __vx_verify_info( + struct vx_info *vxa, struct vx_info *vxb, + const char *_file, int _line) +{ + if (vxa == vxb) + return; + printk(KERN_ERR "vx bad assumption (%p==%p) at %s:%d\n", + vxa, vxb, _file, _line); +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vs_cvirt.h linux-2.6.10-rc1-vs1.9.3/include/linux/vs_cvirt.h --- linux-2.6.10-rc1/include/linux/vs_cvirt.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vs_cvirt.h 2004-11-04 19:52:50.810716848 +0100 @@ -0,0 +1,97 @@ +#ifndef _VX_VS_CVIRT_H +#define _VX_VS_CVIRT_H + +#include "vserver/cvirt.h" +#include "vserver/debug.h" +#include "vs_base.h" + + +/* utsname virtualization */ + +static inline struct new_utsname *vx_new_utsname(void) +{ + if (current->vx_info) + return ¤t->vx_info->cvirt.utsname; + return &system_utsname; +} + +#define vx_new_uts(x) ((vx_new_utsname())->x) + + +/* pid faking stuff */ + + +#define vx_info_map_pid(v,p) \ + __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p) +#define vx_map_pid(p) vx_info_map_pid(current->vx_info, p) +#define vx_map_tgid(p) vx_map_pid(p) + +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_map_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid && pid == vxi->vx_initpid)?1:pid, + func, file, line); + if (pid == 0) + return 0; + if (pid == vxi->vx_initpid) + return 1; + } + return pid; +} + +#define vx_info_rmap_pid(v,p) \ + __vx_info_rmap_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p) +#define vx_rmap_tgid(p) vx_rmap_pid(p) + +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_rmap_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid == 1)?vxi->vx_initpid:pid, + func, file, line); + if ((pid == 1) && vxi->vx_initpid) + return vxi->vx_initpid; + if (pid == vxi->vx_initpid) + return ~0U; + } + return pid; +} + + +static inline void vx_activate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + // if ((vxi = task_get_vx_info(p))) { + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_inc(&vxi->cvirt.nr_running); + // put_vx_info(vxi); + } +} + +static inline void vx_deactivate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + // if ((vxi = task_get_vx_info(p))) { + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_dec(&vxi->cvirt.nr_running); + // put_vx_info(vxi); + } +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vs_dlimit.h linux-2.6.10-rc1-vs1.9.3/include/linux/vs_dlimit.h --- linux-2.6.10-rc1/include/linux/vs_dlimit.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vs_dlimit.h 2004-11-04 19:52:50.811716690 +0100 @@ -0,0 +1,210 @@ +#ifndef _VX_VS_DLIMIT_H +#define _VX_VS_DLIMIT_H + +#include +#include +#include + +#include "vserver/context.h" +#include "vserver/dlimit.h" +#include "vserver/debug.h" + + +#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__) + +static inline struct dl_info *__get_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return NULL; + vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + atomic_inc(&dli->dl_usecnt); + return dli; +} + + +#define free_dl_info(i) \ + call_rcu(&i->dl_rcu, rcu_free_dl_info); + +#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__) + +static inline void __put_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return; + vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&dli->dl_usecnt)) + free_dl_info(dli); +} + + +#define __dlimit_char(d) ((d)?'*':' ') + +static inline int __dl_alloc_space(struct super_block *sb, + xid_t xid, dlsize_t nr, const char *file, int line) +{ + struct dl_info *dli = NULL; + int ret = 0; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_space_used + nr > dli->dl_space_total); + if (!ret) + dli->dl_space_used += nr; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "ALLOC (%p,#%d)%c %lld bytes (%d)", + sb, xid, __dlimit_char(dli), (long long)nr, + ret, file, line); + return ret; +} + +static inline void __dl_free_space(struct super_block *sb, + xid_t xid, dlsize_t nr, const char *_file, int _line) +{ + struct dl_info *dli = NULL; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_space_used > nr) + dli->dl_space_used -= nr; + else + dli->dl_space_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "FREE (%p,#%d)%c %lld bytes", + sb, xid, __dlimit_char(dli), (long long)nr, + _file, _line); +} + +static inline int __dl_alloc_inode(struct super_block *sb, + xid_t xid, const char *_file, int _line) +{ + struct dl_info *dli; + int ret = 0; + + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_inodes_used >= dli->dl_inodes_total); + if (!ret) + dli->dl_inodes_used++; +#if 0 + else + printk("VSW: DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d\n", + sb, xid, + dli->dl_inodes_used, dli->dl_inodes_total, + file, line); +#endif + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "ALLOC (%p,#%d)%c inode (%d)", + sb, xid, __dlimit_char(dli), ret, _file, _line); + return ret; +} + +static inline void __dl_free_inode(struct super_block *sb, + xid_t xid, const char *_file, int _line) +{ + struct dl_info *dli; + + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_used > 1) + dli->dl_inodes_used--; + else + dli->dl_inodes_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "FREE (%p,#%d)%c inode", + sb, xid, __dlimit_char(dli), _file, _line); +} + +static inline void __dl_adjust_block(struct super_block *sb, xid_t xid, + unsigned int *free_blocks, unsigned int *root_blocks, + const char *_file, int _line) +{ + struct dl_info *dli; + uint64_t broot, bfree; + + dli = locate_dl_info(sb, xid); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + broot = (dli->dl_space_total - + (dli->dl_space_total >> 10) * dli->dl_nrlmult) + >> sb->s_blocksize_bits; + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + spin_unlock(&dli->dl_lock); + + vxlprintk(VXD_CBIT(dlim, 2), + "ADJUST: %lld,%lld on %d,%d [mult=%d]", + (long long)bfree, (long long)broot, + *free_blocks, *root_blocks, dli->dl_nrlmult, + _file, _line); + if (free_blocks) { + if (*free_blocks > bfree) + *free_blocks = bfree; + } + if (root_blocks) { + if (*root_blocks > broot) + *root_blocks = broot; + } + put_dl_info(dli); +} + + +#define DLIMIT_ALLOC_BLOCK(sb, xid, nr) \ + __dl_alloc_space(sb, xid, \ + ((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_BLOCK(sb, xid, nr) \ + __dl_free_space(sb, xid, \ + ((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \ + __FILE__, __LINE__ ) + +#define DLIMIT_ALLOC_INODE(sb, xid) \ + __dl_alloc_inode(sb, xid, __FILE__, __LINE__ ) + +#define DLIMIT_FREE_INODE(sb, xid) \ + __dl_free_inode(sb, xid, __FILE__, __LINE__ ) + + +#define DLIMIT_ADJUST_BLOCK(sb, xid, fb, rb) \ + __dl_adjust_block(sb, xid, fb, rb, __FILE__, __LINE__ ) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vs_limit.h linux-2.6.10-rc1-vs1.9.3/include/linux/vs_limit.h --- linux-2.6.10-rc1/include/linux/vs_limit.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vs_limit.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,102 @@ +#ifndef _VX_VS_LIMIT_H +#define _VX_VS_LIMIT_H + +#include +#include +#include + +#include "vserver/context.h" +#include "vserver/limit.h" +#include "vserver/debug.h" + + +/* file limits */ + + +static inline void __vx_acc_cres(struct vx_info *vxi, + int res, int dir, void *_data, char *_file, int _line) +{ + if (VXD_RLIMIT(res, RLIMIT_NOFILE) || + VXD_RLIMIT(res, RLIMIT_NPROC) || + VXD_RLIMIT(res, VLIMIT_NSOCK)) + vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5d%s (%p)", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + (dir>0)?"++":"--", _data, _file, _line); + if (vxi) { + if (dir > 0) + atomic_inc(&vxi->limit.rcur[res]); + else + atomic_dec(&vxi->limit.rcur[res]); + } +} + +#define vx_acc_cres(v,d,p,r) \ + __vx_acc_cres((v), (r), (d), (p), __FILE__, __LINE__) + +#define vx_acc_cres_cond(x,d,p,r) \ + __vx_acc_cres(((x) == current->xid) ? current->vx_info : 0,\ + (r), (d), (p), __FILE__, __LINE__) + +#define vx_nproc_inc(p) \ + vx_acc_cres((p)->vx_info, 1, (p), RLIMIT_NPROC) + +#define vx_nproc_dec(p) \ + vx_acc_cres((p)->vx_info,-1, (p), RLIMIT_NPROC) + +#define vx_files_inc(f) \ + vx_acc_cres_cond((f)->f_xid, 1, (f), RLIMIT_NOFILE) + +#define vx_files_dec(f) \ + vx_acc_cres_cond((f)->f_xid,-1, (f), RLIMIT_NOFILE) + + +#define vx_cres_avail(v,n,r) \ + __vx_cres_avail((v), (r), (n), __FILE__, __LINE__) + +static inline int __vx_cres_avail(struct vx_info *vxi, + int res, int num, char *_file, int _line) +{ + unsigned long value; + + if (VXD_RLIMIT(res, RLIMIT_NOFILE) || + VXD_RLIMIT(res, RLIMIT_NPROC) || + VXD_RLIMIT(res, VLIMIT_NSOCK)) + vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?vxi->limit.rlim[res]:1), + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + num, _file, _line); + if (!vxi) + return 1; + value = atomic_read(&vxi->limit.rcur[res]); + if (value > vxi->limit.rmax[res]) + vxi->limit.rmax[res] = value; + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + if (value + num <= vxi->limit.rlim[res]) + return 1; + atomic_inc(&vxi->limit.lhit[res]); + return 0; +} + +#define vx_nproc_avail(n) \ + vx_cres_avail(current->vx_info, (n), RLIMIT_NPROC) + +#define vx_files_avail(n) \ + vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE) + + +/* socket limits */ + +#define vx_sock_inc(s) \ + vx_acc_cres((s)->sk_vx_info, 1, (s), VLIMIT_NSOCK) +#define vx_sock_dec(s) \ + vx_acc_cres((s)->sk_vx_info,-1, (s), VLIMIT_NSOCK) + +#define vx_sock_avail(n) \ + vx_cres_avail(current->vx_info, (n), VLIMIT_NSOCK) + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vs_memory.h linux-2.6.10-rc1-vs1.9.3/include/linux/vs_memory.h --- linux-2.6.10-rc1/include/linux/vs_memory.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vs_memory.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,127 @@ +#ifndef _VX_VS_MEMORY_H +#define _VX_VS_MEMORY_H + +#include +#include +#include + +#include "vserver/context.h" +#include "vserver/limit.h" +#include "vserver/debug.h" + + +#define vx_acc_page(m, d, v, r) \ + __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__) + +static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi, + int res, int dir, char *file, int line) +{ + if (VXD_RLIMIT(res, RLIMIT_RSS) || + VXD_RLIMIT(res, RLIMIT_AS) || + VXD_RLIMIT(res, RLIMIT_MEMLOCK)) + vxlprintk(1, "vx_acc_page[%5d,%s,%2d]: %5d%s", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + (dir?"++":"--"), file, line); + if (v) { + if (dir > 0) + ++(*v); + else + --(*v); + } + if (vxi) { + if (dir > 0) + atomic_inc(&vxi->limit.rcur[res]); + else + atomic_dec(&vxi->limit.rcur[res]); + } +} + + +#define vx_acc_pages(m, p, v, r) \ + __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__) + +static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi, + int res, int pages, char *_file, int _line) +{ + if (VXD_RLIMIT(res, RLIMIT_RSS) || + VXD_RLIMIT(res, RLIMIT_AS) || + VXD_RLIMIT(res, RLIMIT_MEMLOCK)) + vxlprintk(1, "vx_acc_pages[%5d,%s,%2d]: %5d += %5d", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + pages, _file, _line); + if (pages == 0) + return; + if (v) + *v += pages; + if (vxi) + atomic_add(pages, &vxi->limit.rcur[res]); +} + + + +#define vx_acc_vmpage(m,d) vx_acc_page(m, d, total_vm, RLIMIT_AS) +#define vx_acc_vmlpage(m,d) vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspage(m,d) vx_acc_page(m, d, rss, RLIMIT_RSS) + +#define vx_acc_vmpages(m,p) vx_acc_pages(m, p, total_vm, RLIMIT_AS) +#define vx_acc_vmlpages(m,p) vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspages(m,p) vx_acc_pages(m, p, rss, RLIMIT_RSS) + +#define vx_pages_add(s,r,p) __vx_acc_pages(0, s, r, p, __FILE__, __LINE__) +#define vx_pages_sub(s,r,p) vx_pages_add(s, r, -(p)) + +#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1) +#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1) +#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p) +#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p)) + +#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1) +#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1) +#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p) +#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p)) + +#define vx_rsspages_inc(m) vx_acc_rsspage(m, 1) +#define vx_rsspages_dec(m) vx_acc_rsspage(m,-1) +#define vx_rsspages_add(m,p) vx_acc_rsspages(m, p) +#define vx_rsspages_sub(m,p) vx_acc_rsspages(m,-(p)) + + + +#define vx_pages_avail(m, p, r) \ + __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__) + +static inline int __vx_pages_avail(struct vx_info *vxi, + int res, int pages, char *_file, int _line) +{ + unsigned long value; + + if (VXD_RLIMIT(res, RLIMIT_RSS) || + VXD_RLIMIT(res, RLIMIT_AS) || + VXD_RLIMIT(res, RLIMIT_MEMLOCK)) + vxlprintk(1, "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d", + (vxi?vxi->vx_id:-1), vlimit_name[res], res, + (vxi?vxi->limit.rlim[res]:1), + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + pages, _file, _line); + if (!vxi) + return 1; + value = atomic_read(&vxi->limit.rcur[res]); + if (value > vxi->limit.rmax[res]) + vxi->limit.rmax[res] = value; + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + if (value + pages <= vxi->limit.rlim[res]) + return 1; + atomic_inc(&vxi->limit.lhit[res]); + return 0; +} + +#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS) +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK) +#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS) + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vs_network.h linux-2.6.10-rc1-vs1.9.3/include/linux/vs_network.h --- linux-2.6.10-rc1/include/linux/vs_network.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vs_network.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,164 @@ +#ifndef _NX_VS_NETWORK_H +#define _NX_VS_NETWORK_H + +#include +#include +#include + +#include "vserver/network.h" +#include "vserver/debug.h" + + +extern int proc_pid_nx_info(struct task_struct *, char *); + + +#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__) + +static inline struct nx_info *__get_nx_info(struct nx_info *nxi, + const char *_file, int _line) +{ + if (!nxi) + return NULL; + vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + atomic_inc(&nxi->nx_usecnt); + return nxi; +} + + +#define free_nx_info(i) \ + call_rcu(&i->nx_rcu, rcu_free_nx_info); + +#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__) + +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line) +{ + if (!nxi) + return; + vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&nxi->nx_usecnt)) + free_nx_info(nxi); +} + +#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__) + +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + struct nx_info *nxo; + + if (!nxi) + return; + + vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d.%d])", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_refcnt):0, + _file, _line); + + atomic_inc(&nxi->nx_refcnt); + nxo = xchg(nxp, __get_nx_info(nxi, _file, _line)); + BUG_ON(nxo); +} + +#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__) + +static inline void __clr_nx_info(struct nx_info **nxp, + const char *_file, int _line) +{ + struct nx_info *nxo; + + nxo = xchg(nxp, NULL); + if (!nxo) + return; + + vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d.%d])", + nxo, nxo?nxo->nx_id:0, + nxo?atomic_read(&nxo->nx_usecnt):0, + nxo?atomic_read(&nxo->nx_refcnt):0, + _file, _line); + + if (atomic_dec_and_test(&nxo->nx_refcnt)) + unhash_nx_info(nxo); + __put_nx_info(nxo, _file, _line); +} + + +#define task_get_nx_info(i) __task_get_nx_info(i,__FILE__,__LINE__) + +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct nx_info *nxi; + + task_lock(p); + nxi = __get_nx_info(p->nx_info, _file, _line); + vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)", + p, _file, _line); + task_unlock(p); + return nxi; +} + +#define nx_verify_info(p,i) \ + __nx_verify_info((p)->nx_info,i,__FILE__,__LINE__) + +static __inline__ void __nx_verify_info( + struct nx_info *ipa, struct nx_info *ipb, + const char *_file, int _line) +{ + if (ipa == ipb) + return; + printk(KERN_ERR "ip bad assumption (%p==%p) at %s:%d\n", + ipa, ipb, _file, _line); +} + + +#define nx_task_nid(t) ((t)->nid) + +#define nx_current_nid() nx_task_nid(current) + +#define nx_check(c,m) __nx_check(nx_current_nid(),c,m) + +#define nx_weak_check(c,m) ((m) ? nx_check(c,m) : 1) + + +#define __nx_flags(v,m,f) (((v) & (m)) ^ (f)) + +#define __nx_task_flags(t,m,f) \ + (((t) && ((t)->nx_info)) ? \ + __nx_flags((t)->nx_info->nx_flags,(m),(f)) : 0) + +#define nx_current_flags() \ + ((current->nx_info) ? current->nx_info->nx_flags : 0) + +#define nx_flags(m,f) __nx_flags(nx_current_flags(),(m),(f)) + + +#define nx_current_ncaps() \ + ((current->nx_info) ? current->nx_info->nx_ncaps : 0) + +#define nx_ncaps(c) (nx_current_ncaps() & (c)) + + +static inline int addr_in_nx_info(struct nx_info *nxi, uint32_t addr) +{ + int n,i; + + if (!nxi) + return 1; + + n = nxi->nbipv4; + for (i=0; iipv4[i] == addr) + return 1; + } + return 0; +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vs_socket.h linux-2.6.10-rc1-vs1.9.3/include/linux/vs_socket.h --- linux-2.6.10-rc1/include/linux/vs_socket.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vs_socket.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,66 @@ +#ifndef _VX_VS_SOCKET_H +#define _VX_VS_SOCKET_H + + +// #define VX_DEBUG + +#include +#include +#include + +#include "vserver/context.h" +#include "vserver/network.h" +#include "vserver/debug.h" + + +/* socket accounting */ + +#include + +static inline int vx_sock_type(int family) +{ + int type = 4; + + if (family > 0 && family < 3) + type = family; + else if (family == PF_INET6) + type = 3; + return type; +} + +#define vx_acc_sock(v,f,p,s) \ + __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__) + +static inline void __vx_acc_sock(struct vx_info *vxi, + int family, int pos, int size, char *file, int line) +{ + if (vxi) { + int type = vx_sock_type(family); + + atomic_inc(&vxi->cacct.sock[type][pos].count); + atomic_add(size, &vxi->cacct.sock[type][pos].total); + } +} + +#define vx_sock_recv(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s)) +#define vx_sock_send(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s)) +#define vx_sock_fail(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s)) + + +#define sock_vx_init(s) do { \ + (s)->sk_xid = 0; \ + (s)->sk_vx_info = NULL; \ + } while (0) + +#define sock_nx_init(s) do { \ + (s)->sk_nid = 0; \ + (s)->sk_nx_info = NULL; \ + } while (0) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/context.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/context.h --- linux-2.6.10-rc1/include/linux/vserver/context.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/context.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,199 @@ +#ifndef _VX_CONTEXT_H +#define _VX_CONTEXT_H + +#include + +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */ +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */ + +#define VX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +#ifdef __KERNEL__ + +#include +#include +#include + +#define _VX_INFO_DEF_ +#include "cvirt.h" +#include "limit.h" +#include "sched.h" +#undef _VX_INFO_DEF_ + +struct vx_info { + struct hlist_node vx_hlist; /* linked list of contexts */ + struct rcu_head vx_rcu; /* the rcu head */ + xid_t vx_id; /* context id */ + atomic_t vx_usecnt; /* usage count */ + atomic_t vx_refcnt; /* reference count */ + struct vx_info *vx_parent; /* parent context */ + int vx_state; /* context state */ + + struct namespace *vx_namespace; /* private namespace */ + struct fs_struct *vx_fs; /* private namespace fs */ + uint64_t vx_flags; /* context flags */ + uint64_t vx_bcaps; /* bounding caps (system) */ + uint64_t vx_ccaps; /* context caps (vserver) */ + + pid_t vx_initpid; /* PID of fake init process */ + + spinlock_t vx_lock; + wait_queue_head_t vx_exit; /* context exit waitqueue */ + + struct _vx_limit limit; /* vserver limits */ + struct _vx_sched sched; /* vserver scheduler */ + struct _vx_cvirt cvirt; /* virtual/bias stuff */ + struct _vx_cacct cacct; /* context accounting */ + + char vx_name[65]; /* vserver name */ +}; + +/* status flags */ + +#define VXS_HASHED 0x0001 +#define VXS_PAUSED 0x0010 +#define VXS_ONHOLD 0x0020 +#define VXS_SHUTDOWN 0x0100 +#define VXS_DEFUNCT 0x1000 +#define VXS_RELEASED 0x8000 + +/* check conditions */ + +#define VX_ADMIN 0x0001 +#define VX_WATCH 0x0002 +#define VX_DUMMY 0x0008 + +#define VX_IDENT 0x0010 +#define VX_EQUIV 0x0020 +#define VX_PARENT 0x0040 +#define VX_CHILD 0x0080 + +#define VX_ARG_MASK 0x00F0 + +#define VX_DYNAMIC 0x0100 +#define VX_STATIC 0x0200 + +#define VX_ATR_MASK 0x0F00 + + +struct rcu_head; + +// extern void rcu_free_vx_info(struct rcu_head *); +extern void unhash_vx_info(struct vx_info *); + +extern struct vx_info *locate_vx_info(int); +extern struct vx_info *locate_or_create_vx_info(int); + +extern int get_xid_list(int, unsigned int *, int); +extern int vx_info_is_hashed(xid_t); + +extern int vx_migrate_task(struct task_struct *, struct vx_info *); + +#endif /* __KERNEL__ */ + +#include "switch.h" + +/* vinfo commands */ + +#define VCMD_task_xid VC_CMD(VINFO, 1, 0) +#define VCMD_task_nid VC_CMD(VINFO, 2, 0) + +#ifdef __KERNEL__ +extern int vc_task_xid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_vx_info VC_CMD(VINFO, 5, 0) +#define VCMD_nx_info VC_CMD(VINFO, 6, 0) + +struct vcmd_vx_info_v0 { + uint32_t xid; + uint32_t initpid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_vx_info(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_ctx_create VC_CMD(VPROC, 1, 0) +#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 0) + +#ifdef __KERNEL__ +extern int vc_ctx_create(uint32_t, void __user *); +extern int vc_ctx_migrate(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0) +#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0) + +struct vcmd_ctx_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_cflags(uint32_t, void __user *); +extern int vc_set_cflags(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VXF_INFO_LOCK 0x00000001 +#define VXF_INFO_SCHED 0x00000002 +#define VXF_INFO_NPROC 0x00000004 +#define VXF_INFO_PRIVATE 0x00000008 + +#define VXF_INFO_INIT 0x00000010 +#define VXF_INFO_HIDE 0x00000020 +#define VXF_INFO_ULIMIT 0x00000040 +#define VXF_INFO_NSPACE 0x00000080 + +#define VXF_SCHED_HARD 0x00000100 +#define VXF_SCHED_PRIO 0x00000200 +#define VXF_SCHED_PAUSE 0x00000400 + +#define VXF_VIRT_MEM 0x00010000 +#define VXF_VIRT_UPTIME 0x00020000 +#define VXF_VIRT_CPU 0x00040000 +#define VXF_VIRT_LOAD 0x00080000 + +#define VXF_HIDE_MOUNT 0x01000000 +#define VXF_HIDE_NETIF 0x02000000 + +#define VXF_STATE_SETUP (1ULL<<32) +#define VXF_STATE_INIT (1ULL<<33) + +#define VXF_FORK_RSS (1ULL<<48) +#define VXF_PROLIFIC (1ULL<<49) + +#define VXF_IGNEG_NICE (1ULL<<52) + +#define VXF_ONE_TIME (0x0003ULL<<32) + +#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 0) +#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 0) + +struct vcmd_ctx_caps_v0 { + uint64_t bcaps; + uint64_t ccaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ccaps(uint32_t, void __user *); +extern int vc_set_ccaps(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VXC_SET_UTSNAME 0x00000001 +#define VXC_SET_RLIMIT 0x00000002 + +#define VXC_RAW_ICMP 0x00000100 + +#define VXC_SECURE_MOUNT 0x00010000 +#define VXC_SECURE_REMOUNT 0x00020000 + + +#endif /* _VX_CONTEXT_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/cvirt.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/cvirt.h --- linux-2.6.10-rc1/include/linux/vserver/cvirt.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/cvirt.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,205 @@ +/* _VX_CVIRT_H defined below */ + +#if defined(__KERNEL__) && defined(_VX_INFO_DEF_) + +#include +#include +#include +#include +#include +#include +#include + +/* context sub struct */ + +struct _vx_cvirt { + int max_threads; /* maximum allowed threads */ + atomic_t nr_threads; /* number of current threads */ + atomic_t nr_running; /* number of running threads */ + + atomic_t nr_onhold; /* processes on hold */ + uint32_t onhold_last; /* jiffies when put on hold */ + + struct timespec bias_idle; + struct timespec bias_uptime; /* context creation point */ + + struct new_utsname utsname; + + spinlock_t load_lock; /* lock for the load averages */ + uint32_t load_last; /* last time load was cacled */ + uint32_t load[3]; /* load averages 1,5,15 */ + + struct cpu_usage_stat cpustat[NR_CPUS]; +}; + +struct sock_acc { + atomic_t count; + atomic_t total; +}; + +struct _vx_cacct { + unsigned long total_forks; + + struct sock_acc sock[5][3]; +}; + + +static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].count); +} + + +static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].total); +} + + +extern uint64_t vx_idle_jiffies(void); + +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt) +{ + uint64_t idle_jiffies = vx_idle_jiffies(); + + do_posix_clock_monotonic_gettime(&cvirt->bias_uptime); + jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle); + atomic_set(&cvirt->nr_threads, 0); + atomic_set(&cvirt->nr_running, 0); + atomic_set(&cvirt->nr_onhold, 0); + + down_read(&uts_sem); + cvirt->utsname = system_utsname; + up_read(&uts_sem); + + spin_lock_init(&cvirt->load_lock); + cvirt->load_last = jiffies; + cvirt->load[0] = 0; + cvirt->load[1] = 0; + cvirt->load[2] = 0; +} + +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt) +{ +#ifdef CONFIG_VSERVER_DEBUG + int value; + + if ((value = atomic_read(&cvirt->nr_threads))) + printk("!!! cvirt: %p[nr_threads] = %d on exit.\n", + cvirt, value); + if ((value = atomic_read(&cvirt->nr_running))) + printk("!!! cvirt: %p[nr_running] = %d on exit.\n", + cvirt, value); +#endif + return; +} + +static inline void vx_info_init_cacct(struct _vx_cacct *cacct) +{ + int i,j; + + for (i=0; i<5; i++) { + for (j=0; j<3; j++) { + atomic_set(&cacct->sock[i][j].count, 0); + atomic_set(&cacct->sock[i][j].total, 0); + } + } +} + +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct) +{ + return; +} + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + + +static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer) +{ + int length = 0; + int a, b, c; + + length += sprintf(buffer + length, + "BiasUptime:\t%lu.%02lu\n", + (unsigned long)cvirt->bias_uptime.tv_sec, + (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100))); + length += sprintf(buffer + length, + "SysName:\t%.*s\n" + "NodeName:\t%.*s\n" + "Release:\t%.*s\n" + "Version:\t%.*s\n" + "Machine:\t%.*s\n" + "DomainName:\t%.*s\n" + ,__NEW_UTS_LEN, cvirt->utsname.sysname + ,__NEW_UTS_LEN, cvirt->utsname.nodename + ,__NEW_UTS_LEN, cvirt->utsname.release + ,__NEW_UTS_LEN, cvirt->utsname.version + ,__NEW_UTS_LEN, cvirt->utsname.machine + ,__NEW_UTS_LEN, cvirt->utsname.domainname + ); + + a = cvirt->load[0] + (FIXED_1/200); + b = cvirt->load[1] + (FIXED_1/200); + c = cvirt->load[2] + (FIXED_1/200); + length += sprintf(buffer + length, + "nr_threads:\t%d\n" + "nr_running:\t%d\n" + "nr_onhold:\t%d\n" + "loadavg:\t%d.%02d %d.%02d %d.%02d\n" + ,atomic_read(&cvirt->nr_threads) + ,atomic_read(&cvirt->nr_running) + ,atomic_read(&cvirt->nr_onhold) + ,LOAD_INT(a), LOAD_FRAC(a) + ,LOAD_INT(b), LOAD_FRAC(b) + ,LOAD_INT(c), LOAD_FRAC(c) + ); + return length; +} + +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer) +{ + int i,j, length = 0; + static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" }; + + for (i=0; i<5; i++) { + length += sprintf(buffer + length, + "%s:", type[i]); + for (j=0; j<3; j++) { + length += sprintf(buffer + length, + "\t%12lu/%-12lu" + ,vx_sock_count(cacct, i, j) + ,vx_sock_total(cacct, i, j) + ); + } + buffer[length++] = '\n'; + } + length += sprintf(buffer + length, + "forks:\t%lu\n", cacct->total_forks); + return length; +} + +#else /* _VX_INFO_DEF_ */ +#ifndef _VX_CVIRT_H +#define _VX_CVIRT_H + +#include "switch.h" + +/* cvirt vserver commands */ + + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *, struct timespec *); + +struct vx_info; + +void vx_update_load(struct vx_info *); + + +#endif /* __KERNEL__ */ + +#endif /* _VX_CVIRT_H */ +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/debug.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/debug.h --- linux-2.6.10-rc1/include/linux/vserver/debug.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/debug.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,67 @@ +#ifndef _VX_DEBUG_H +#define _VX_DEBUG_H + + +#define VXD_CBIT(n,m) (vx_debug_ ## n & (1 << (m))) +#define VXD_CMIN(n,m) (vx_debug_ ## n > (m)) +#define VXD_MASK(n,m) (vx_debug_ ## n & (m)) + +#define VXD_QPOS(v,p) (((uint32_t)(v) >> ((p)*8)) & 0xFF) +#define VXD_QUAD(v) VXD_QPOS(v,0), VXD_QPOS(v,1), \ + VXD_QPOS(v,2), VXD_QPOS(v,3) + +// #define VXD_HERE __FILE__, __LINE__ + +#define __FUNC__ __func__ + + +#ifdef CONFIG_VSERVER_DEBUG + +extern unsigned int vx_debug_switch; +extern unsigned int vx_debug_xid; +extern unsigned int vx_debug_nid; +extern unsigned int vx_debug_net; +extern unsigned int vx_debug_limit; +extern unsigned int vx_debug_dlim; +extern unsigned int vx_debug_cvirt; + + +#define VX_LOGLEVEL "vxD: " + +#define vxdprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f "\n", x); \ + } while (0) + +#define vxlprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f " @%s:%d\n", x); \ + } while (0) + +#define vxfprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \ + } while (0) + +#else + +#define vx_debug_switch 0 +#define vx_debug_xid 0 +#define vx_debug_nid 0 +#define vx_debug_net 0 +#define vx_debug_limit 0 +#define vx_debug_dlim 0 +#define vx_debug_cvirt 0 + +#define vxdprintk(x...) do { } while (0) +#define vxlprintk(x...) do { } while (0) +#define vxfprintk(x...) do { } while (0) + +#endif + + + +#endif /* _VX_DEBUG_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/dlimit.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/dlimit.h --- linux-2.6.10-rc1/include/linux/vserver/dlimit.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/dlimit.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,85 @@ +#ifndef _VX_DLIMIT_H +#define _VX_DLIMIT_H + +#include "switch.h" +#include + +/* inode vserver commands */ + +#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0) +#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0) + +#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0) +#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0) + + +struct vcmd_ctx_dlimit_base_v0 { + const char __user *name; + uint32_t flags; +}; + +struct vcmd_ctx_dlimit_v0 { + const char __user *name; + uint32_t space_used; /* used space in kbytes */ + uint32_t space_total; /* maximum space in kbytes */ + uint32_t inodes_used; /* used inodes */ + uint32_t inodes_total; /* maximum inodes */ + uint32_t reserved; /* reserved for root in % */ + uint32_t flags; +}; + +#define CDLIM_UNSET (0ULL) +#define CDLIM_INFINITY (~0ULL) +#define CDLIM_KEEP (~1ULL) + + +#ifdef __KERNEL__ + +struct super_block; + +struct dl_info { + struct hlist_node dl_hlist; /* linked list of contexts */ + struct rcu_head dl_rcu; /* the rcu head */ + xid_t dl_xid; /* context id */ + atomic_t dl_usecnt; /* usage count */ + atomic_t dl_refcnt; /* reference count */ + + struct super_block *dl_sb; /* associated superblock */ + +// struct rw_semaphore dl_sem; /* protect the values */ + spinlock_t dl_lock; /* protect the values */ + + uint64_t dl_space_used; /* used space in bytes */ + uint64_t dl_space_total; /* maximum space in bytes */ + uint32_t dl_inodes_used; /* used inodes */ + uint32_t dl_inodes_total; /* maximum inodes */ + + unsigned int dl_nrlmult; /* non root limit mult */ +}; + +struct rcu_head; + +extern void rcu_free_dl_info(struct rcu_head *); +extern void unhash_dl_info(struct dl_info *); + +extern struct dl_info *locate_dl_info(struct super_block *, xid_t); + + +struct kstatfs; + +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *); + + +extern int vc_add_dlimit(uint32_t, void __user *); +extern int vc_rem_dlimit(uint32_t, void __user *); + +extern int vc_set_dlimit(uint32_t, void __user *); +extern int vc_get_dlimit(uint32_t, void __user *); + + +typedef uint64_t dlsize_t; + + +#endif /* __KERNEL__ */ + +#endif /* _VX_DLIMIT_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/inode.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/inode.h --- linux-2.6.10-rc1/include/linux/vserver/inode.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/inode.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,67 @@ +#ifndef _VX_INODE_H +#define _VX_INODE_H + +#include "switch.h" + +/* inode vserver commands */ + +#define VCMD_get_iattr_v0 VC_CMD(INODE, 1, 0) +#define VCMD_set_iattr_v0 VC_CMD(INODE, 2, 0) + +#define VCMD_get_iattr VC_CMD(INODE, 1, 1) +#define VCMD_set_iattr VC_CMD(INODE, 2, 1) + +struct vcmd_ctx_iattr_v0 { + /* device handle in id */ + uint64_t ino; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + +struct vcmd_ctx_iattr_v1 { + const char __user *name; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + + +#define IATTR_XID 0x01000000 + +#define IATTR_ADMIN 0x00000001 +#define IATTR_WATCH 0x00000002 +#define IATTR_HIDE 0x00000004 +#define IATTR_FLAGS 0x00000007 + +#define IATTR_BARRIER 0x00010000 +#define IATTR_IUNLINK 0x00020000 +#define IATTR_IMMUTABLE 0x00040000 + + +#ifdef CONFIG_VSERVER_PROC_SECURE +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#else +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#endif + +#ifdef __KERNEL__ + +#define vx_hide_check(c,m) (((m) & IATTR_HIDE) ? vx_check(c,m) : 1) + +extern int vc_get_iattr_v0(uint32_t, void __user *); +extern int vc_set_iattr_v0(uint32_t, void __user *); + +extern int vc_get_iattr(uint32_t, void __user *); +extern int vc_set_iattr(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +/* inode ioctls */ + +#define FIOC_GETXFLG _IOR('x', 5, long) +#define FIOC_SETXFLG _IOW('x', 6, long) + +#endif /* _VX_INODE_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/legacy.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/legacy.h --- linux-2.6.10-rc1/include/linux/vserver/legacy.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/legacy.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,54 @@ +#ifndef _VX_LEGACY_H +#define _VX_LEGACY_H + +#include "switch.h" +#include "network.h" + +/* compatibiliy vserver commands */ + +#define VCMD_new_s_context VC_CMD(COMPAT, 1, 1) +#define VCMD_set_ipv4root VC_CMD(COMPAT, 2, 3) + +#define VCMD_create_context VC_CMD(VSETUP, 1, 0) + +/* compatibiliy vserver arguments */ + +struct vcmd_new_s_context_v1 { + uint32_t remove_cap; + uint32_t flags; +}; + +struct vcmd_set_ipv4root_v3 { + /* number of pairs in id */ + uint32_t broadcast; + struct { + uint32_t ip; + uint32_t mask; + } nx_mask_pair[NB_IPV4ROOT]; +}; + + +#define VX_INFO_LOCK 1 /* Can't request a new vx_id */ +#define VX_INFO_NPROC 4 /* Limit number of processes in a context */ +#define VX_INFO_PRIVATE 8 /* Noone can join this security context */ +#define VX_INFO_INIT 16 /* This process wants to become the */ + /* logical process 1 of the security */ + /* context */ +#define VX_INFO_HIDEINFO 32 /* Hide some information in /proc */ +#define VX_INFO_ULIMIT 64 /* Use ulimit of the current process */ + /* to become the global limits */ + /* of the context */ +#define VX_INFO_NAMESPACE 128 /* save private namespace */ + + +#define NB_S_CONTEXT 16 + +#define NB_IPV4ROOT 16 + + +#ifdef __KERNEL__ +extern int vc_new_s_context(uint32_t, void __user *); +extern int vc_set_ipv4root(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_LEGACY_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/limit.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/limit.h --- linux-2.6.10-rc1/include/linux/vserver/limit.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/limit.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,144 @@ +/* _VX_LIMIT_H defined below */ + +#if defined(__KERNEL__) && defined(_VX_INFO_DEF_) + +#include +#include + +/* context sub struct */ + +#define NUM_LIMITS 20 + +#define VLIMIT_NSOCK 16 + + +extern const char *vlimit_name[NUM_LIMITS]; + +struct _vx_limit { + atomic_t ticks; + + unsigned long rlim[NUM_LIMITS]; /* Context limit */ + unsigned long rmax[NUM_LIMITS]; /* Context maximum */ + atomic_t rcur[NUM_LIMITS]; /* Current value */ + atomic_t lhit[NUM_LIMITS]; /* Limit hits */ +}; + +static inline void vx_info_init_limit(struct _vx_limit *limit) +{ + int lim; + + for (lim=0; limrlim[lim] = RLIM_INFINITY; + limit->rmax[lim] = 0; + atomic_set(&limit->rcur[lim], 0); + atomic_set(&limit->lhit[lim], 0); + } +} + +static inline void vx_info_exit_limit(struct _vx_limit *limit) +{ +#ifdef CONFIG_VSERVER_DEBUG + unsigned long value; + unsigned int lim; + + for (lim=0; limrcur[lim]); + if (value) + printk("!!! limit: %p[%s,%d] = %ld on exit.\n", + limit, vlimit_name[lim], lim, value); + } +#endif +} + +static inline void vx_limit_fixup(struct _vx_limit *limit) +{ + unsigned long value; + unsigned int lim; + + for (lim=0; limrcur[lim]); + if (value > limit->rmax[lim]) + limit->rmax[lim] = value; + if (limit->rmax[lim] > limit->rlim[lim]) + limit->rmax[lim] = limit->rlim[lim]; + } +} + +#define VX_LIMIT_FMT ":\t%10d\t%10ld\t%10ld\t%6d\n" + +#define VX_LIMIT_ARG(r) \ + ,atomic_read(&limit->rcur[r]) \ + ,limit->rmax[r] \ + ,limit->rlim[r] \ + ,atomic_read(&limit->lhit[r]) + +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer) +{ + vx_limit_fixup(limit); + return sprintf(buffer, + "PROC" VX_LIMIT_FMT + "VM" VX_LIMIT_FMT + "VML" VX_LIMIT_FMT + "RSS" VX_LIMIT_FMT + "FILES" VX_LIMIT_FMT + "SOCK" VX_LIMIT_FMT + VX_LIMIT_ARG(RLIMIT_NPROC) + VX_LIMIT_ARG(RLIMIT_AS) + VX_LIMIT_ARG(RLIMIT_MEMLOCK) + VX_LIMIT_ARG(RLIMIT_RSS) + VX_LIMIT_ARG(RLIMIT_NOFILE) + VX_LIMIT_ARG(VLIMIT_NSOCK) + ); +} + +#else /* _VX_INFO_DEF_ */ +#ifndef _VX_LIMIT_H +#define _VX_LIMIT_H + +#include "switch.h" + +#define VXD_RLIMIT(r,l) (VXD_CBIT(limit, (l)) && ((r) == (l))) + +/* rlimit vserver commands */ + +#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0) +#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0) +#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0) + +struct vcmd_ctx_rlimit_v0 { + uint32_t id; + uint64_t minimum; + uint64_t softlimit; + uint64_t maximum; +}; + +struct vcmd_ctx_rlimit_mask_v0 { + uint32_t minimum; + uint32_t softlimit; + uint32_t maximum; +}; + +#define CRLIM_UNSET (0ULL) +#define CRLIM_INFINITY (~0ULL) +#define CRLIM_KEEP (~1ULL) + +#ifdef __KERNEL__ + +#include + +extern int vc_get_rlimit(uint32_t, void __user *); +extern int vc_set_rlimit(uint32_t, void __user *); +extern int vc_get_rlimit_mask(uint32_t, void __user *); + +struct sysinfo; + +void vx_vsi_meminfo(struct sysinfo *); +void vx_vsi_swapinfo(struct sysinfo *); + + +#endif /* __KERNEL__ */ + +#endif /* _VX_LIMIT_H */ +#endif + + diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/namespace.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/namespace.h --- linux-2.6.10-rc1/include/linux/vserver/namespace.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/namespace.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,58 @@ +#ifndef _VX_NAMESPACE_H +#define _VX_NAMESPACE_H + +#include + + +/* virtual host info names */ + +#define VCMD_vx_set_vhi_name VC_CMD(VHOST, 1, 0) +#define VCMD_vx_get_vhi_name VC_CMD(VHOST, 2, 0) + +struct vcmd_vx_vhi_name_v0 { + uint32_t field; + char name[65]; +}; + + +enum vx_vhi_name_field { + VHIN_CONTEXT=0, + VHIN_SYSNAME, + VHIN_NODENAME, + VHIN_RELEASE, + VHIN_VERSION, + VHIN_MACHINE, + VHIN_DOMAINNAME, +}; + + +#ifdef __KERNEL__ + +#include + +extern int vc_set_vhi_name(uint32_t, void __user *); +extern int vc_get_vhi_name(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_enter_namespace VC_CMD(PROCALT, 1, 0) +#define VCMD_cleanup_namespace VC_CMD(PROCALT, 2, 0) +#define VCMD_set_namespace VC_CMD(PROCALT, 3, 0) + +#ifdef __KERNEL__ + +struct vx_info; +struct namespace; +struct fs_struct; +struct vfsmount; + +extern int vx_check_vfsmount(struct vx_info *, struct vfsmount *); + +extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *); + +extern int vc_enter_namespace(uint32_t, void __user *); +extern int vc_cleanup_namespace(uint32_t, void __user *); +extern int vc_set_namespace(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_NAMESPACE_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/network.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/network.h --- linux-2.6.10-rc1/include/linux/vserver/network.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/network.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,152 @@ +#ifndef _VX_NETWORK_H +#define _VX_NETWORK_H + +#define MAX_N_CONTEXT 65535 /* Arbitrary limit */ + +#define NX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +#define NB_IPV4ROOT 16 + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include +#include + + +struct nx_info { + struct hlist_node nx_hlist; /* linked list of nxinfos */ + struct rcu_head nx_rcu; /* the rcu head */ + nid_t nx_id; /* vnet id */ + atomic_t nx_usecnt; /* usage count */ + atomic_t nx_refcnt; /* reference count */ + + uint64_t nx_flags; /* network flag word */ + uint64_t nx_ncaps; /* network capabilities */ + + int nbipv4; + __u32 ipv4[NB_IPV4ROOT]; /* Process can only bind to these IPs */ + /* The first one is used to connect */ + /* and for bind any service */ + /* The other must be used explicity */ + __u32 mask[NB_IPV4ROOT]; /* Netmask for each ipv4 */ + /* Used to select the proper source */ + /* address for sockets */ + __u32 v4_bcast; /* Broadcast address to receive UDP */ + + char nx_name[65]; /* network context name */ +}; + + +struct rcu_head; + +extern void rcu_free_nx_info(struct rcu_head *); +extern void unhash_nx_info(struct nx_info *); + +extern struct nx_info *locate_nx_info(int); +extern struct nx_info *locate_or_create_nx_info(int); + +extern int get_nid_list(int, unsigned int *, int); +extern int nx_info_is_hashed(nid_t); + +extern int nx_migrate_task(struct task_struct *, struct nx_info *); + +struct in_ifaddr; +struct net_device; + +int ifa_in_nx_info(struct in_ifaddr *, struct nx_info *); +int dev_in_nx_info(struct net_device *, struct nx_info *); + +struct sock; + +int nx_addr_conflict(struct nx_info *, uint32_t, struct sock *); + + +#endif /* __KERNEL__ */ + +#include "switch.h" + +/* vinfo commands */ + +#define VCMD_task_nid VC_CMD(VINFO, 2, 0) + +#ifdef __KERNEL__ +extern int vc_task_nid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_nx_info VC_CMD(VINFO, 6, 0) + +struct vcmd_nx_info_v0 { + uint32_t nid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_nx_info(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_net_create VC_CMD(VNET, 1, 0) +#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0) + +#define VCMD_net_add VC_CMD(NETALT, 1, 0) +#define VCMD_net_remove VC_CMD(NETALT, 2, 0) + +struct vcmd_net_nx_v0 { + uint16_t type; + uint16_t count; + uint32_t ip[4]; + uint32_t mask[4]; + /* more to come */ +}; + +// IPN_TYPE_IPV4 + + +#ifdef __KERNEL__ +extern int vc_net_create(uint32_t, void __user *); +extern int vc_net_migrate(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0) +#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0) + +struct vcmd_net_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_nflags(uint32_t, void __user *); +extern int vc_set_nflags(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define IPF_STATE_SETUP (1ULL<<32) + + +#define IPF_ONE_TIME (0x0001ULL<<32) + +#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0) +#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0) + +struct vcmd_net_caps_v0 { + uint64_t ncaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ncaps(uint32_t, void __user *); +extern int vc_set_ncaps(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define IPC_WOSSNAME 0x00000001 + + +#endif /* _VX_NETWORK_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/sched.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/sched.h --- linux-2.6.10-rc1/include/linux/vserver/sched.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/sched.h 2004-11-04 19:52:50.825714477 +0100 @@ -0,0 +1,221 @@ +/* _VX_SCHED_H defined below */ + +#if defined(__KERNEL__) && defined(_VX_INFO_DEF_) + +#include +#include +#include +#include +#include + +struct _vx_ticks { + uint64_t user_ticks; /* token tick events */ + uint64_t sys_ticks; /* token tick events */ + uint64_t hold_ticks; /* token ticks paused */ + uint64_t unused[5]; /* cacheline ? */ +}; + +/* context sub struct */ + +struct _vx_sched { + atomic_t tokens; /* number of CPU tokens */ + spinlock_t tokens_lock; /* lock for token bucket */ + + int fill_rate; /* Fill rate: add X tokens... */ + int interval; /* Divisor: per Y jiffies */ + int tokens_min; /* Limit: minimum for unhold */ + int tokens_max; /* Limit: no more than N tokens */ + uint32_t jiffies; /* last time accounted */ + + int priority_bias; /* bias offset for priority */ + cpumask_t cpus_allowed; /* cpu mask for context */ + + struct _vx_ticks cpu[NR_CPUS]; +}; + +static inline void vx_info_init_sched(struct _vx_sched *sched) +{ + int i; + + /* scheduling; hard code starting values as constants */ + sched->fill_rate = 1; + sched->interval = 4; + sched->tokens_min = HZ >> 4; + sched->tokens_max = HZ >> 1; + sched->jiffies = jiffies; + sched->tokens_lock = SPIN_LOCK_UNLOCKED; + + atomic_set(&sched->tokens, HZ >> 2); + sched->cpus_allowed = CPU_MASK_ALL; + sched->priority_bias = 0; + + for_each_cpu(i) { + sched->cpu[i].user_ticks = 0; + sched->cpu[i].sys_ticks = 0; + sched->cpu[i].hold_ticks = 0; + } +} + +static inline void vx_info_exit_sched(struct _vx_sched *sched) +{ + return; +} + +static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer) +{ + int length = 0; + int i; + + length += sprintf(buffer, + "Token:\t\t%8d\n" + "FillRate:\t%8d\n" + "Interval:\t%8d\n" + "TokensMin:\t%8d\n" + "TokensMax:\t%8d\n" + "PrioBias:\t%8d\n" + ,atomic_read(&sched->tokens) + ,sched->fill_rate + ,sched->interval + ,sched->tokens_min + ,sched->tokens_max + ,sched->priority_bias + ); + + for_each_online_cpu(i) { + length += sprintf(buffer + length, + "cpu %d: %lld %lld %lld\n" + ,i + ,(long long)sched->cpu[i].user_ticks + ,(long long)sched->cpu[i].sys_ticks + ,(long long)sched->cpu[i].hold_ticks + ); + } + + return length; +} + + +#else /* _VX_INFO_DEF_ */ +#ifndef _VX_SCHED_H +#define _VX_SCHED_H + +#include "switch.h" + +/* sched vserver commands */ + +#define VCMD_set_sched_v2 VC_CMD(SCHED, 1, 2) +#define VCMD_set_sched VC_CMD(SCHED, 1, 3) + +struct vcmd_set_sched_v2 { + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + uint64_t cpu_mask; +}; + +struct vcmd_set_sched_v3 { + uint32_t set_mask; + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + int32_t priority_bias; +}; + + +#define VXSM_FILL_RATE 0x0001 +#define VXSM_INTERVAL 0x0002 +#define VXSM_TOKENS 0x0010 +#define VXSM_TOKENS_MIN 0x0020 +#define VXSM_TOKENS_MAX 0x0040 +#define VXSM_PRIO_BIAS 0x0100 + +#define SCHED_KEEP (-2) + +#ifdef __KERNEL__ + +extern int vc_set_sched_v1(uint32_t, void __user *); +extern int vc_set_sched_v2(uint32_t, void __user *); +extern int vc_set_sched(uint32_t, void __user *); + + +#define VAVAVOOM_RATIO 50 + +#define MAX_PRIO_BIAS 20 +#define MIN_PRIO_BIAS -20 + +#include "context.h" + + +/* scheduling stuff */ + +int effective_vavavoom(struct task_struct *, int); + +int vx_tokens_recalc(struct vx_info *); + +/* new stuff ;) */ + +static inline int vx_tokens_avail(struct vx_info *vxi) +{ + return atomic_read(&vxi->sched.tokens); +} + +static inline void vx_consume_token(struct vx_info *vxi) +{ + atomic_dec(&vxi->sched.tokens); +} + +static inline int vx_need_resched(struct task_struct *p) +{ +#ifdef CONFIG_VSERVER_HARDCPU + struct vx_info *vxi = p->vx_info; +#endif + int slice = --p->time_slice; + +#ifdef CONFIG_VSERVER_HARDCPU + if (vxi) { + int tokens; + + if ((tokens = vx_tokens_avail(vxi)) > 0) + vx_consume_token(vxi); + /* for tokens > 0, one token was consumed */ + if (tokens < 2) + return 1; + } +#endif + return (slice == 0); +} + + +static inline void vx_onhold_inc(struct vx_info *vxi) +{ + int onhold = atomic_read(&vxi->cvirt.nr_onhold); + + atomic_inc(&vxi->cvirt.nr_onhold); + if (!onhold) + vxi->cvirt.onhold_last = jiffies; +} + +static inline void __vx_onhold_update(struct vx_info *vxi) +{ + int cpu = smp_processor_id(); + uint32_t now = jiffies; + uint32_t delta = now - vxi->cvirt.onhold_last; + + vxi->cvirt.onhold_last = now; + vxi->sched.cpu[cpu].hold_ticks += delta; +} + +static inline void vx_onhold_dec(struct vx_info *vxi) +{ + if (atomic_dec_and_test(&vxi->cvirt.nr_onhold)) + __vx_onhold_update(vxi); +} + +#endif /* __KERNEL__ */ + +#endif /* _VX_SCHED_H */ +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/signal.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/signal.h --- linux-2.6.10-rc1/include/linux/vserver/signal.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/signal.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,27 @@ +#ifndef _VX_SIGNAL_H +#define _VX_SIGNAL_H + +#include "switch.h" + +/* context signalling */ + +#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0) +#define VCMD_wait_exit VC_CMD(EVENT, 99, 0) + +struct vcmd_ctx_kill_v0 { + int32_t pid; + int32_t sig; +}; + +struct vcmd_wait_exit_v0 { + int32_t a; + int32_t b; +}; + +#ifdef __KERNEL__ + +extern int vc_ctx_kill(uint32_t, void __user *); +extern int vc_wait_exit(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_SIGNAL_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/switch.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/switch.h --- linux-2.6.10-rc1/include/linux/vserver/switch.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/switch.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,96 @@ +#ifndef _VX_SWITCH_H +#define _VX_SWITCH_H + +#include + +#define VC_CATEGORY(c) (((c) >> 24) & 0x3F) +#define VC_COMMAND(c) (((c) >> 16) & 0xFF) +#define VC_VERSION(c) ((c) & 0xFFF) + +#define VC_CMD(c,i,v) ((((VC_CAT_ ## c) & 0x3F) << 24) \ + | (((i) & 0xFF) << 16) | ((v) & 0xFFF)) + +/* + + Syscall Matrix V2.8 + + |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL| + |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | | + |INFO |SETUP | |MOVE | | | | | | + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICES| | + HOST | 00| 01| 02| 03| 04| 05| | 06| 07| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | | + PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + MEMORY | | | | | | | |SWAP | | + | 16| 17| 18| 19| 20| 21| | 22| 23| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | | + | 24| 25| 26| 27| 28| 29| | 30| 31| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + DISK | | | | |DLIMIT | | |INODE | | + VFS | 32| 33| 34| 35| 36| 37| | 38| 39| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + OTHER | | | | | | | |VINFO | | + | 40| 41| 42| 43| 44| 45| | 46| 47| + =======+=======+=======+=======+=======+=======+=======+ +=======+=======+ + SPECIAL|EVENT | | | |FLAGS | | | | | + | 48| 49| 50| 51| 52| 53| | 54| 55| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SPECIAL| | | | |RLIMIT |SYSCALL| | |COMPAT | + | 56| 57| 58| 59| 60|TEST 61| | 62| 63| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + +*/ + +#define VC_CAT_VERSION 0 + +#define VC_CAT_VSETUP 1 +#define VC_CAT_VHOST 2 + +#define VC_CAT_VPROC 9 +#define VC_CAT_PROCALT 10 +#define VC_CAT_PROCMIG 11 +#define VC_CAT_PROCTRL 12 + +#define VC_CAT_SCHED 14 + +#define VC_CAT_VNET 25 +#define VC_CAT_NETALT 26 +#define VC_CAT_NETMIG 27 +#define VC_CAT_NETCTRL 28 + +#define VC_CAT_DLIMIT 36 +#define VC_CAT_INODE 38 + +#define VC_CAT_VINFO 46 +#define VC_CAT_EVENT 48 + +#define VC_CAT_FLAGS 52 +#define VC_CAT_RLIMIT 60 + +#define VC_CAT_SYSTEST 61 +#define VC_CAT_COMPAT 63 + +/* interface version */ + +#define VCI_VERSION 0x00010022 + + +/* query version */ + +#define VCMD_get_version VC_CMD(VERSION, 0, 0) + + +#ifdef __KERNEL__ + +#include + + +#else /* __KERNEL__ */ +#define __user +#endif /* __KERNEL__ */ + +#endif /* _VX_SWITCH_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver/xid.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/xid.h --- linux-2.6.10-rc1/include/linux/vserver/xid.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver/xid.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,123 @@ +#ifndef _VX_XID_H +#define _VX_XID_H + + +#define XID_TAG(in) (!(in) || \ + (((struct inode *)in)->i_sb && \ + (((struct inode *)in)->i_sb->s_flags & MS_TAGXID))) + + +#ifdef CONFIG_INOXID_NONE + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) (0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_GID16 + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0x0000FFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? (((gid) >> 16) & 0xFFFF) : 0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) \ + ((tag) ? (((gid) & 0xFFFF) | ((xid) << 16)) : (gid)) + +#endif + + +#ifdef CONFIG_INOXID_UGID24 + +#define MAX_UID 0x00FFFFFF +#define MAX_GID 0x00FFFFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0) + +#define XIDINO_UID(tag, uid, xid) \ + ((tag) ? (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16)) : (uid)) +#define XIDINO_GID(tag, gid, xid) \ + ((tag) ? (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24)) : (gid)) + +#endif + + +#ifdef CONFIG_INOXID_UID16 + +#define MAX_UID 0x0000FFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? ((uid) >> 16) & 0xFFFF) : 0) + +#define XIDINO_UID(tag, uid, xid) \ + ((tag) ? (((uid) & 0xFFFF) | ((xid) << 16)) : (uid)) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_INTERN + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? (xid) : 0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_RUNTIME + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) (0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#define INOXID_UID(tag, uid, gid) \ + ((tag) ? ((uid) & MAX_UID) : (uid)) +#define INOXID_GID(tag, uid, gid) \ + ((tag) ? ((gid) & MAX_GID) : (gid)) + + +static inline uid_t vx_map_uid(uid_t uid) +{ + if ((uid > MAX_UID) && (uid != -1)) + uid = -2; + return (uid & MAX_UID); +} + +static inline gid_t vx_map_gid(gid_t gid) +{ + if ((gid > MAX_GID) && (gid != -1)) + gid = -2; + return (gid & MAX_GID); +} + + +#ifdef CONFIG_VSERVER_LEGACY +#define FIOC_GETXID _IOR('x', 1, long) +#define FIOC_SETXID _IOW('x', 2, long) +#define FIOC_SETXIDJ _IOW('x', 3, long) +#endif + +#endif /* _VX_XID_H */ diff -NurpP --minimal linux-2.6.10-rc1/include/linux/vserver.h linux-2.6.10-rc1-vs1.9.3/include/linux/vserver.h --- linux-2.6.10-rc1/include/linux/vserver.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/include/linux/vserver.h 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,9 @@ +#ifndef _LINUX_VSERVER_H +#define _LINUX_VSERVER_H + +#include +#include + +extern long vs_reboot(unsigned int, void *); + +#endif diff -NurpP --minimal linux-2.6.10-rc1/include/net/af_unix.h linux-2.6.10-rc1-vs1.9.3/include/net/af_unix.h --- linux-2.6.10-rc1/include/net/af_unix.h 2004-10-18 23:54:40.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/net/af_unix.h 2004-10-31 00:41:27.000000000 +0200 @@ -1,5 +1,8 @@ #ifndef __LINUX_NET_AFUNIX_H #define __LINUX_NET_AFUNIX_H + +#include + extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); @@ -11,9 +14,9 @@ extern rwlock_t unix_table_lock; extern atomic_t unix_tot_inflight; -static inline struct sock *first_unix_socket(int *i) +static inline struct sock *next_unix_socket_table(int *i) { - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { + for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { if (!hlist_empty(&unix_socket_table[*i])) return __sk_head(&unix_socket_table[*i]); } @@ -22,16 +25,19 @@ static inline struct sock *first_unix_so static inline struct sock *next_unix_socket(int *i, struct sock *s) { - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; + do { + if (s) + s = sk_next(s); + if (!s) + s = next_unix_socket_table(i); + } while (s && !vx_check(s->sk_xid, VX_IDENT|VX_WATCH)); + return s; +} + +static inline struct sock *first_unix_socket(int *i) +{ + *i = 0; + return next_unix_socket(i, NULL); } #define forall_unix_sockets(i, s) \ diff -NurpP --minimal linux-2.6.10-rc1/include/net/route.h linux-2.6.10-rc1-vs1.9.3/include/net/route.h --- linux-2.6.10-rc1/include/net/route.h 2004-10-18 23:53:06.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/net/route.h 2004-10-31 00:41:27.000000000 +0200 @@ -33,6 +33,9 @@ #include #include #include +#include +#include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -143,6 +146,59 @@ static inline char rt_tos2priority(u8 to return ip_tos2prio[IPTOS_TOS(tos)>>1]; } +#define IPI_LOOPBACK 0x0100007f + +static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl) +{ + int err; + int i, n = nxi->nbipv4; + u32 ipv4root = nxi->ipv4[0]; + + if (ipv4root == 0) + return 0; + + if (fl->fl4_src == 0) { + if (n > 1) { + u32 foundsrc; + + err = __ip_route_output_key(rp, fl); + if (err) { + fl->fl4_src = ipv4root; + err = __ip_route_output_key(rp, fl); + } + if (err) + return err; + + foundsrc = (*rp)->rt_src; + ip_rt_put(*rp); + + for (i=0; imask[i]; + u32 ipv4 = nxi->ipv4[i]; + u32 net4 = ipv4 & mask; + + if (foundsrc == ipv4) { + fl->fl4_src = ipv4; + break; + } + if (!fl->fl4_src && (foundsrc & mask) == net4) + fl->fl4_src = ipv4; + } + } + if (fl->fl4_src == 0) + fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK) + ? IPI_LOOPBACK : ipv4root; + } else { + for (i=0; iipv4[i] == fl->fl4_src) + break; + } + if (i == n) + return -EPERM; + } + return 0; +} + static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif, u8 protocol, u16 sport, u16 dport, struct sock *sk) @@ -157,7 +213,23 @@ static inline int ip_route_connect(struc .dport = dport } } }; int err; - if (!dst || !src) { + struct nx_info *nx_info = current->nx_info; + + if (sk) + nx_info = sk->sk_nx_info; + vxdprintk(VXD_CBIT(net, 4), + "ip_route_connect(%p) %p,%p;%lx", + sk, nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (nx_info) { + err = ip_find_src(nx_info, rp, &fl); + if (err) + return err; + if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + fl.fl4_dst = nx_info->ipv4[0]; + } + if (!fl.fl4_dst || !fl.fl4_src) { err = __ip_route_output_key(rp, &fl); if (err) return err; diff -NurpP --minimal linux-2.6.10-rc1/include/net/scm.h linux-2.6.10-rc1-vs1.9.3/include/net/scm.h --- linux-2.6.10-rc1/include/net/scm.h 2004-10-18 23:54:08.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/net/scm.h 2004-10-31 00:41:27.000000000 +0200 @@ -51,13 +51,13 @@ static __inline__ void scm_recv(struct s { if (!msg->msg_control) { - if (sock->passcred || scm->fp) + if (test_bit(SOCK_PASS_CRED, &sock->flags) || scm->fp) msg->msg_flags |= MSG_CTRUNC; scm_destroy(scm); return; } - if (sock->passcred) + if (test_bit(SOCK_PASS_CRED, &sock->flags)) put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(scm->creds), &scm->creds); if (!scm->fp) diff -NurpP --minimal linux-2.6.10-rc1/include/net/sock.h linux-2.6.10-rc1-vs1.9.3/include/net/sock.h --- linux-2.6.10-rc1/include/net/sock.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/net/sock.h 2004-10-31 00:41:27.000000000 +0200 @@ -110,6 +110,10 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + xid_t skc_xid; + struct vx_info *skc_vx_info; + nid_t skc_nid; + struct nx_info *skc_nx_info; }; /** @@ -191,6 +195,10 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_xid __sk_common.skc_xid +#define sk_vx_info __sk_common.skc_vx_info +#define sk_nid __sk_common.skc_nid +#define sk_nx_info __sk_common.skc_nx_info volatile unsigned char sk_zapped; unsigned char sk_shutdown; unsigned char sk_use_write_queue; diff -NurpP --minimal linux-2.6.10-rc1/include/net/tcp.h linux-2.6.10-rc1-vs1.9.3/include/net/tcp.h --- linux-2.6.10-rc1/include/net/tcp.h 2004-10-31 00:41:02.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/include/net/tcp.h 2004-10-31 00:41:27.000000000 +0200 @@ -193,6 +193,10 @@ struct tcp_tw_bucket { #define tw_node __tw_common.skc_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt +#define tw_xid __tw_common.skc_xid +#define tw_vx_info __tw_common.skc_vx_info +#define tw_nid __tw_common.skc_nid +#define tw_nx_info __tw_common.skc_nx_info volatile unsigned char tw_substate; unsigned char tw_rcv_wscale; __u16 tw_sport; diff -NurpP --minimal linux-2.6.10-rc1/ipc/msg.c linux-2.6.10-rc1-vs1.9.3/ipc/msg.c --- linux-2.6.10-rc1/ipc/msg.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/ipc/msg.c 2004-10-31 00:41:27.000000000 +0200 @@ -25,6 +25,8 @@ #include #include #include +#include + #include #include #include "util.h" @@ -97,6 +99,7 @@ static int newque (key_t key, int msgflg msq->q_perm.mode = (msgflg & S_IRWXUGO); msq->q_perm.key = key; + msq->q_perm.xid = current->xid; msq->q_perm.security = NULL; retval = security_msg_queue_alloc(msq); @@ -817,7 +820,11 @@ static int sysvipc_msg_read_proc(char *b for(i = 0; i <= msg_ids.max_id; i++) { struct msg_queue * msq; msq = msg_lock(i); - if(msq != NULL) { + if (msq) { + if (!vx_check(msq->q_perm.xid, VX_IDENT)) { + msg_unlock(msq); + continue; + } len += sprintf(buffer + len, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", msq->q_perm.key, msg_buildid(i,msq->q_perm.seq), diff -NurpP --minimal linux-2.6.10-rc1/ipc/sem.c linux-2.6.10-rc1-vs1.9.3/ipc/sem.c --- linux-2.6.10-rc1/ipc/sem.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/ipc/sem.c 2004-10-31 00:41:27.000000000 +0200 @@ -72,6 +72,8 @@ #include #include #include +#include + #include #include "util.h" @@ -176,6 +178,7 @@ static int newary (key_t key, int nsems, sma->sem_perm.mode = (semflg & S_IRWXUGO); sma->sem_perm.key = key; + sma->sem_perm.xid = current->xid; sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); @@ -1329,7 +1332,11 @@ static int sysvipc_sem_read_proc(char *b for(i = 0; i <= sem_ids.max_id; i++) { struct sem_array *sma; sma = sem_lock(i); - if(sma) { + if (sma) { + if (!vx_check(sma->sem_perm.xid, VX_IDENT)) { + sem_unlock(sma); + continue; + } len += sprintf(buffer + len, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", sma->sem_perm.key, sem_buildid(i,sma->sem_perm.seq), diff -NurpP --minimal linux-2.6.10-rc1/ipc/shm.c linux-2.6.10-rc1-vs1.9.3/ipc/shm.c --- linux-2.6.10-rc1/ipc/shm.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/ipc/shm.c 2004-10-31 00:41:27.000000000 +0200 @@ -27,6 +27,8 @@ #include #include #include +#include + #include #include "util.h" @@ -193,6 +195,7 @@ static int newseg (key_t key, int shmflg return -ENOMEM; shp->shm_perm.key = key; + shp->shm_perm.xid = current->xid; shp->shm_flags = (shmflg & S_IRWXUGO); shp->mlock_user = NULL; @@ -860,11 +863,15 @@ static int sysvipc_shm_read_proc(char *b struct shmid_kernel* shp; shp = shm_lock(i); - if(shp!=NULL) { + if (shp) { #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" char *format; + if (!vx_check(shp->shm_perm.xid, VX_IDENT)) { + shm_unlock(shp); + continue; + } if (sizeof(size_t) <= sizeof(int)) format = SMALL_STRING; else diff -NurpP --minimal linux-2.6.10-rc1/ipc/util.c linux-2.6.10-rc1-vs1.9.3/ipc/util.c --- linux-2.6.10-rc1/ipc/util.c 2004-10-18 23:55:36.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/ipc/util.c 2004-10-31 00:41:27.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -105,7 +106,9 @@ int ipc_findkey(struct ipc_ids* ids, key */ for (id = 0; id <= max_id; id++) { p = ids->entries[id].p; - if(p==NULL) + if (p==NULL) + continue; + if (!vx_check(p->xid, VX_IDENT)) continue; if (key == p->key) return id; @@ -418,6 +421,8 @@ int ipcperms (struct kern_ipc_perm *ipcp { /* flag will most probably be 0 or S_...UGO from */ int requested_mode, granted_mode; + if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */ + return -1; requested_mode = (flag >> 6) | (flag >> 3) | flag; granted_mode = ipcp->mode; if (current->euid == ipcp->cuid || current->euid == ipcp->uid) diff -NurpP --minimal linux-2.6.10-rc1/kernel/Makefile linux-2.6.10-rc1-vs1.9.3/kernel/Makefile --- linux-2.6.10-rc1/kernel/Makefile 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/Makefile 2004-10-31 00:41:27.000000000 +0200 @@ -9,6 +9,11 @@ obj-y = sched.o fork.o exec_domain.o rcupdate.o intermodule.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o +# mod-subdirs := vserver + +subdir-y += vserver +obj-y += vserver/vserver.o + obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o spinlock.o diff -NurpP --minimal linux-2.6.10-rc1/kernel/capability.c linux-2.6.10-rc1-vs1.9.3/kernel/capability.c --- linux-2.6.10-rc1/kernel/capability.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/capability.c 2004-10-31 00:41:27.000000000 +0200 @@ -11,6 +11,7 @@ #include #include #include +#include #include unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ diff -NurpP --minimal linux-2.6.10-rc1/kernel/exit.c linux-2.6.10-rc1-vs1.9.3/kernel/exit.c --- linux-2.6.10-rc1/kernel/exit.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/exit.c 2004-10-31 00:41:27.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,11 @@ int getrusage(struct task_struct *, int, static void __unhash_process(struct task_struct *p) { nr_threads--; + /* tasklist_lock is held, is this sufficient? */ + if (p->vx_info) { + atomic_dec(&p->vx_info->cvirt.nr_threads); + vx_nproc_dec(p); + } detach_pid(p, PIDTYPE_PID); detach_pid(p, PIDTYPE_TGID); if (thread_group_leader(p)) { @@ -226,6 +232,7 @@ void reparent_to_init(void) ptrace_unlink(current); /* Reparent to init */ REMOVE_LINKS(current); + /* FIXME handle vchild_reaper/initpid */ current->parent = child_reaper; current->real_parent = child_reaper; SET_LINKS(current); @@ -371,6 +378,7 @@ static inline void close_files(struct fi struct file * file = xchg(&files->fd[i], NULL); if (file) filp_close(file, files); + // vx_openfd_dec(i); } i++; set >>= 1; @@ -595,6 +603,7 @@ static inline void forget_original_paren struct task_struct *p, *reaper = father; struct list_head *_p, *_n; + /* FIXME handle vchild_reaper/initpid */ do { reaper = next_thread(reaper); if (reaper == father) { diff -NurpP --minimal linux-2.6.10-rc1/kernel/fork.c linux-2.6.10-rc1-vs1.9.3/kernel/fork.c --- linux-2.6.10-rc1/kernel/fork.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/fork.c 2004-10-31 00:41:27.000000000 +0200 @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include #include #include @@ -81,6 +84,8 @@ static kmem_cache_t *task_struct_cachep; void free_task(struct task_struct *tsk) { free_thread_info(tsk->thread_info); + clr_vx_info(&tsk->vx_info); + clr_nx_info(&tsk->nx_info); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -300,6 +305,7 @@ static struct mm_struct * mm_init(struct if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + set_vx_info(&mm->mm_vx_info, current->vx_info); return mm; } free_mm(mm); @@ -331,6 +337,7 @@ void fastcall __mmdrop(struct mm_struct BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); + clr_vx_info(&mm->mm_vx_info); free_mm(mm); } @@ -457,6 +464,7 @@ static int copy_mm(unsigned long clone_f /* Copy the current MM stuff.. */ memcpy(mm, oldmm, sizeof(*mm)); + mm->mm_vx_info = NULL; if (!mm_init(mm)) goto fail_nomem; @@ -786,6 +794,7 @@ static task_t *copy_process(unsigned lon { int retval; struct task_struct *p = NULL; + struct vx_info *vxi; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -814,12 +823,32 @@ static task_t *copy_process(unsigned lon if (!p) goto fork_out; + p->vx_info = NULL; + set_vx_info(&p->vx_info, current->vx_info); + p->nx_info = NULL; + set_nx_info(&p->nx_info, current->nx_info); + + /* check vserver memory */ + if (p->mm && !(clone_flags & CLONE_VM)) { + if (vx_vmpages_avail(p->mm, p->mm->total_vm)) + vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm); + else + goto bad_fork_free; + } + if (p->mm && vx_flags(VXF_FORK_RSS, 0)) { + if (!vx_rsspages_avail(p->mm, p->mm->rss)) + goto bad_fork_cleanup_vm; + } + retval = -EAGAIN; + if (!vx_nproc_avail(1)) + goto bad_fork_cleanup_vm; + if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) - goto bad_fork_free; + goto bad_fork_cleanup_vm; } atomic_inc(&p->user->__count); @@ -1015,6 +1044,12 @@ static task_t *copy_process(unsigned lon } nr_threads++; + /* p is copy of current */ + vxi = p->vx_info; + if (vxi) { + atomic_inc(&vxi->cvirt.nr_threads); + vx_nproc_inc(p); + } write_unlock_irq(&tasklist_lock); retval = 0; @@ -1057,6 +1092,9 @@ bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); +bad_fork_cleanup_vm: + if (p->mm && !(clone_flags & CLONE_VM)) + vx_pages_sub(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm); bad_fork_free: free_task(p); goto fork_out; diff -NurpP --minimal linux-2.6.10-rc1/kernel/kthread.c linux-2.6.10-rc1-vs1.9.3/kernel/kthread.c --- linux-2.6.10-rc1/kernel/kthread.c 2004-10-18 23:55:24.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/kthread.c 2004-10-31 00:41:27.000000000 +0200 @@ -108,7 +108,7 @@ static void keventd_create_kthread(void create->result = ERR_PTR(pid); } else { wait_for_completion(&create->started); - create->result = find_task_by_pid(pid); + create->result = find_task_by_real_pid(pid); } complete(&create->done); } diff -NurpP --minimal linux-2.6.10-rc1/kernel/posix-timers.c linux-2.6.10-rc1-vs1.9.3/kernel/posix-timers.c --- linux-2.6.10-rc1/kernel/posix-timers.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/posix-timers.c 2004-10-31 00:41:27.000000000 +0200 @@ -50,6 +50,7 @@ #include #include #include +#include #ifndef div_long_long_rem #include @@ -512,7 +513,7 @@ static inline struct task_struct * good_ struct task_struct *rtn = current->group_leader; if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) || + (!(rtn = find_task_by_real_pid(event->sigev_notify_thread_id)) || rtn->tgid != current->tgid || (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) return NULL; diff -NurpP --minimal linux-2.6.10-rc1/kernel/printk.c linux-2.6.10-rc1-vs1.9.3/kernel/printk.c --- linux-2.6.10-rc1/kernel/printk.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/printk.c 2004-10-31 00:41:27.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -251,7 +252,10 @@ int do_syslog(int type, char __user * bu unsigned long i, j, limit, count; int do_clear = 0; char c; - int error = 0; + int error = -EPERM; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return error; error = security_syslog(type); if (error) @@ -521,6 +525,8 @@ asmlinkage int printk(const char *fmt, . return r; } +static volatile int printk_cpu = -1; + asmlinkage int vprintk(const char *fmt, va_list args) { unsigned long flags; @@ -529,11 +535,12 @@ asmlinkage int vprintk(const char *fmt, static char printk_buf[1024]; static int log_level_unknown = 1; - if (unlikely(oops_in_progress)) + if (unlikely(oops_in_progress && printk_cpu == smp_processor_id())) zap_locks(); /* This stops the holder of console_sem just where we want him */ spin_lock_irqsave(&logbuf_lock, flags); + printk_cpu = smp_processor_id(); /* Emit the output into the temporary buffer */ printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args); diff -NurpP --minimal linux-2.6.10-rc1/kernel/sched.c linux-2.6.10-rc1-vs1.9.3/kernel/sched.c --- linux-2.6.10-rc1/kernel/sched.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/sched.c 2004-10-31 00:41:27.000000000 +0200 @@ -44,6 +44,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -235,6 +239,10 @@ struct runqueue { task_t *migration_thread; struct list_head migration_queue; #endif +#ifdef CONFIG_VSERVER_HARDCPU + struct list_head hold_queue; + int idle_tokens; +#endif #ifdef CONFIG_SCHEDSTATS /* latency stats */ @@ -604,6 +612,9 @@ static int effective_prio(task_t *p) bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; prio = p->static_prio - bonus; + if (task_vx_flags(p, VXF_SCHED_PRIO, 0)) + prio += effective_vavavoom(p, MAX_USER_PRIO); + if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) @@ -749,21 +760,29 @@ static void activate_task(task_t *p, run } p->timestamp = now; + vx_activate_task(p); __activate_task(p, rq); } /* * deactivate_task - remove a task from the runqueue. */ -static void deactivate_task(struct task_struct *p, runqueue_t *rq) +static void __deactivate_task(struct task_struct *p, runqueue_t *rq) { rq->nr_running--; if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; dequeue_task(p, p->array); + p->array = NULL; } +static void deactivate_task(struct task_struct *p, runqueue_t *rq) +{ + __deactivate_task(p, rq); + vx_deactivate_task(p); +} + /* * resched_task - mark a task 'to be rescheduled now'. * @@ -1222,6 +1241,7 @@ void fastcall wake_up_new_task(task_t * p->prio = effective_prio(p); + vx_activate_task(p); if (likely(cpu == this_cpu)) { if (!(clone_flags & CLONE_VM)) { /* @@ -2236,12 +2256,18 @@ void scheduler_tick(int user_ticks, int struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; runqueue_t *rq = this_rq(); task_t *p = current; + struct vx_info *vxi = p->vx_info; rq->timestamp_last_tick = sched_clock(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_ticks); + if (vxi) { + vxi->sched.cpu[cpu].user_ticks += user_ticks; + vxi->sched.cpu[cpu].sys_ticks += sys_ticks; + } + /* note: this timer irq context must be accounted for as well */ if (hardirq_count() - HARDIRQ_OFFSET) { cpustat->irq += sys_ticks; @@ -2254,10 +2280,18 @@ void scheduler_tick(int user_ticks, int if (p == rq->idle) { if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait += sys_ticks; + // vx_cpustat_acc(vxi, iowait, cpu, cpustat, sys_ticks); else cpustat->idle += sys_ticks; + // vx_cpustat_acc(vxi, idle, cpu, cpustat, sys_ticks); + if (wake_priority_sleeper(rq)) goto out; + +#ifdef CONFIG_VSERVER_HARDCPU_IDLE + if (!--rq->idle_tokens && !list_empty(&rq->hold_queue)) + set_need_resched(); +#endif rebalance_tick(cpu, rq, SCHED_IDLE); return; } @@ -2296,7 +2330,7 @@ void scheduler_tick(int user_ticks, int } goto out_unlock; } - if (!--p->time_slice) { + if (vx_need_resched(p)) { dequeue_task(p, rq->active); set_tsk_need_resched(p); p->prio = effective_prio(p); @@ -2482,6 +2516,10 @@ asmlinkage void __sched schedule(void) struct list_head *queue; unsigned long long now; unsigned long run_time; +#ifdef CONFIG_VSERVER_HARDCPU + struct vx_info *vxi; + int maxidle = -HZ; +#endif int cpu, idx; /* @@ -2547,6 +2585,45 @@ need_resched: deactivate_task(prev, rq); } +#ifdef CONFIG_VSERVER_HARDCPU + if (!list_empty(&rq->hold_queue)) { + struct list_head *l, *n; + int ret; + + vxi = NULL; + list_for_each_safe(l, n, &rq->hold_queue) { + next = list_entry(l, task_t, run_list); + if (vxi == next->vx_info) + continue; + + vxi = next->vx_info; + ret = vx_tokens_recalc(vxi); + // tokens = vx_tokens_avail(next); + + if (ret > 0) { + list_del(&next->run_list); + next->state &= ~TASK_ONHOLD; + // one less waiting + vx_onhold_dec(vxi); + array = rq->expired; + next->prio = MAX_PRIO-1; + enqueue_task(next, array); + rq->nr_running++; + if (next->static_prio < rq->best_expired_prio) + rq->best_expired_prio = next->static_prio; + + // printk("··· %8lu unhold %p [%d]\n", jiffies, next, next->prio); + break; + } + if ((ret < 0) && (maxidle < ret)) + maxidle = ret; + } + } + rq->idle_tokens = -maxidle; + +pick_next: +#endif + cpu = smp_processor_id(); if (unlikely(!rq->nr_running)) { go_idle: @@ -2596,6 +2673,26 @@ go_idle: queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); +#ifdef CONFIG_VSERVER_HARDCPU + vxi = next->vx_info; + if (vx_info_flags(vxi, VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) { + int ret = vx_tokens_recalc(vxi); + + if (unlikely(ret <= 0)) { + if (ret && (rq->idle_tokens > -ret)) + rq->idle_tokens = -ret; + __deactivate_task(next, rq); + recalc_task_prio(next, now); + // a new one on hold + vx_onhold_inc(vxi); + next->state |= TASK_ONHOLD; + list_add_tail(&next->run_list, &rq->hold_queue); + //printk("··· %8lu hold %p [%d]\n", jiffies, next, next->prio); + goto pick_next; + } + } +#endif + if (!rt_task(next) && next->activated > 0) { unsigned long long delta = now - next->timestamp; @@ -2951,6 +3048,8 @@ asmlinkage long sys_nice(int increment) * and we have a single winner. */ if (increment < 0) { + if (vx_flags(VXF_IGNEG_NICE, 0)) + return 0; if (!capable(CAP_SYS_NICE)) return -EPERM; if (increment < -40) @@ -3106,6 +3205,7 @@ static int setscheduler(pid_t pid, int p oldprio = p->prio; __setscheduler(p, policy, lp.sched_priority); if (array) { + vx_activate_task(p); __activate_task(p, task_rq(p)); /* * Reschedule if we are currently running on this runqueue and @@ -4533,6 +4633,9 @@ void __init sched_init(void) rq->migration_thread = NULL; INIT_LIST_HEAD(&rq->migration_queue); #endif +#ifdef CONFIG_VSERVER_HARDCPU + INIT_LIST_HEAD(&rq->hold_queue); +#endif atomic_set(&rq->nr_iowait, 0); for (j = 0; j < 2; j++) { diff -NurpP --minimal linux-2.6.10-rc1/kernel/signal.c linux-2.6.10-rc1-vs1.9.3/kernel/signal.c --- linux-2.6.10-rc1/kernel/signal.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/signal.c 2004-10-31 00:41:27.000000000 +0200 @@ -621,17 +621,26 @@ static int check_kill_permission(int sig struct task_struct *t) { int error = -EINVAL; + int user; + if (sig < 0 || sig > _NSIG) return error; + + user = (!info || ((unsigned long)info != 1 && + (unsigned long)info != 2 && SI_FROMUSER(info))); + error = -EPERM; - if ((!info || ((unsigned long)info != 1 && - (unsigned long)info != 2 && SI_FROMUSER(info))) - && ((sig != SIGCONT) || + if (user && ((sig != SIGCONT) || (current->signal->session != t->signal->session)) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) && !capable(CAP_KILL)) return error; + + error = -ESRCH; + if (user && !vx_check(vx_task_xid(t), VX_ADMIN|VX_IDENT)) + return error; + return security_task_kill(t, info, sig); } diff -NurpP --minimal linux-2.6.10-rc1/kernel/sys.c linux-2.6.10-rc1-vs1.9.3/kernel/sys.c --- linux-2.6.10-rc1/kernel/sys.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/sys.c 2004-11-04 19:52:50.840712107 +0100 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,8 @@ #include #include #include +#include +#include /* Don't include this - it breaks ia64's cond_syscall() implementation */ #if 0 @@ -303,7 +306,10 @@ static int set_one_prio(struct task_stru goto out; } if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) { - error = -EACCES; + if (vx_flags(VXF_IGNEG_NICE, 0)) + error = 0; + else + error = -EACCES; goto out; } no_nice = security_task_setnice(p, niceval); @@ -354,7 +360,7 @@ asmlinkage long sys_setpriority(int whic if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) goto out_unlock; @@ -413,7 +419,7 @@ asmlinkage long sys_getpriority(int whic if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) goto out_unlock; @@ -435,6 +441,7 @@ out_unlock: return retval; } +long vs_reboot(unsigned int, void *); /* * Reboot system call: for obvious reasons only root may call it, @@ -460,6 +467,9 @@ asmlinkage long sys_reboot(int magic1, i magic2 != LINUX_REBOOT_MAGIC2C)) return -EINVAL; + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return vs_reboot(cmd, arg); + lock_kernel(); switch (cmd) { case LINUX_REBOOT_CMD_RESTART: @@ -657,7 +667,7 @@ static int set_user(uid_t new_ruid, int { struct user_struct *new_user; - new_user = alloc_uid(new_ruid); + new_user = alloc_uid(vx_current_xid(), new_ruid); if (!new_user) return -EAGAIN; @@ -1026,14 +1036,17 @@ asmlinkage long sys_setpgid(pid_t pid, p { struct task_struct *p; int err = -EINVAL; + pid_t rpgid; if (!pid) - pid = current->pid; + pid = vx_map_pid(current->pid); if (!pgid) pgid = pid; if (pgid < 0) return -EINVAL; + rpgid = vx_rmap_pid(pgid); + /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ @@ -1068,22 +1081,22 @@ asmlinkage long sys_setpgid(pid_t pid, p if (pgid != pid) { struct task_struct *p; - do_each_task_pid(pgid, PIDTYPE_PGID, p) { + do_each_task_pid(rpgid, PIDTYPE_PGID, p) { if (p->signal->session == current->signal->session) goto ok_pgid; - } while_each_task_pid(pgid, PIDTYPE_PGID, p); + } while_each_task_pid(rpgid, PIDTYPE_PGID, p); goto out; } ok_pgid: - err = security_task_setpgid(p, pgid); + err = security_task_setpgid(p, rpgid); if (err) goto out; - if (process_group(p) != pgid) { + if (process_group(p) != rpgid) { detach_pid(p, PIDTYPE_PGID); - p->signal->pgrp = pgid; - attach_pid(p, PIDTYPE_PGID, pgid); + p->signal->pgrp = rpgid; + attach_pid(p, PIDTYPE_PGID, rpgid); } err = 0; @@ -1096,7 +1109,7 @@ out: asmlinkage long sys_getpgid(pid_t pid) { if (!pid) { - return process_group(current); + return vx_rmap_pid(process_group(current)); } else { int retval; struct task_struct *p; @@ -1108,7 +1121,7 @@ asmlinkage long sys_getpgid(pid_t pid) if (p) { retval = security_task_getpgid(p); if (!retval) - retval = process_group(p); + retval = vx_rmap_pid(process_group(p)); } read_unlock(&tasklist_lock); return retval; @@ -1444,7 +1457,7 @@ asmlinkage long sys_newuname(struct new_ int errno = 0; down_read(&uts_sem); - if (copy_to_user(name,&system_utsname,sizeof *name)) + if (copy_to_user(name, vx_new_utsname(), sizeof *name)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1455,15 +1468,17 @@ asmlinkage long sys_sethostname(char __u int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.nodename, tmp, len); - system_utsname.nodename[len] = 0; + char *ptr = vx_new_uts(nodename); + + memcpy(ptr, tmp, len); + ptr[len] = 0; errno = 0; } up_write(&uts_sem); @@ -1475,15 +1490,17 @@ asmlinkage long sys_sethostname(char __u asmlinkage long sys_gethostname(char __user *name, int len) { int i, errno; + char *ptr; if (len < 0) return -EINVAL; down_read(&uts_sem); - i = 1 + strlen(system_utsname.nodename); + ptr = vx_new_uts(nodename); + i = 1 + strlen(ptr); if (i > len) i = len; errno = 0; - if (copy_to_user(name, system_utsname.nodename, i)) + if (copy_to_user(name, ptr, i)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1500,7 +1517,7 @@ asmlinkage long sys_setdomainname(char _ int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -1508,8 +1525,10 @@ asmlinkage long sys_setdomainname(char _ down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.domainname, tmp, len); - system_utsname.domainname[len] = 0; + char *ptr = vx_new_uts(domainname); + + memcpy(ptr, tmp, len); + ptr[len] = 0; errno = 0; } up_write(&uts_sem); @@ -1566,7 +1585,7 @@ asmlinkage long sys_setrlimit(unsigned i return -EINVAL; old_rlim = current->signal->rlim + resource; if ((new_rlim.rlim_max > old_rlim->rlim_max) && - !capable(CAP_SYS_RESOURCE)) + !capable(CAP_SYS_RESOURCE) && !vx_ccaps(VXC_SET_RLIMIT)) return -EPERM; if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) return -EPERM; diff -NurpP --minimal linux-2.6.10-rc1/kernel/sysctl.c linux-2.6.10-rc1-vs1.9.3/kernel/sysctl.c --- linux-2.6.10-rc1/kernel/sysctl.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/sysctl.c 2004-10-31 00:41:27.000000000 +0200 @@ -86,6 +86,7 @@ extern char modprobe_path[]; #ifdef CONFIG_HOTPLUG extern char hotplug_path[]; #endif +extern char vshelper_path[]; #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif @@ -400,6 +401,15 @@ static ctl_table kern_table[] = { .strategy = &sysctl_string, }, #endif + { + .ctl_name = KERN_VSHELPER, + .procname = "vshelper", + .data = &vshelper_path, + .maxlen = 256, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, #ifdef CONFIG_CHR_DEV_SG { .ctl_name = KERN_SG_BIG_BUFF, diff -NurpP --minimal linux-2.6.10-rc1/kernel/timer.c linux-2.6.10-rc1-vs1.9.3/kernel/timer.c --- linux-2.6.10-rc1/kernel/timer.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/timer.c 2004-10-31 00:41:27.000000000 +0200 @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #include @@ -1005,7 +1007,7 @@ asmlinkage unsigned long sys_alarm(unsig */ asmlinkage long sys_getpid(void) { - return current->tgid; + return vx_map_tgid(current->tgid); } /* @@ -1049,7 +1051,7 @@ asmlinkage long sys_getppid(void) #endif break; } - return pid; + return vx_map_pid(pid); } asmlinkage long sys_getuid(void) @@ -1257,6 +1259,8 @@ asmlinkage long sys_sysinfo(struct sysin tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; tp.tv_sec++; } + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&tp, NULL); val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); @@ -1266,6 +1270,9 @@ asmlinkage long sys_sysinfo(struct sysin val.procs = nr_threads; } while (read_seqretry(&xtime_lock, seq)); +/* if (vx_flags(VXF_VIRT_CPU, 0)) + vx_vsi_cpu(val); +*/ si_meminfo(&val); si_swapinfo(&val); diff -NurpP --minimal linux-2.6.10-rc1/kernel/user.c linux-2.6.10-rc1-vs1.9.3/kernel/user.c --- linux-2.6.10-rc1/kernel/user.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/kernel/user.c 2004-10-31 00:41:27.000000000 +0200 @@ -21,8 +21,8 @@ #define UIDHASH_BITS 8 #define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) -#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) -#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) +#define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK) +#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid))) static kmem_cache_t *uid_cachep; static struct list_head uidhash_table[UIDHASH_SZ]; @@ -54,7 +54,7 @@ static inline void uid_hash_remove(struc list_del(&up->uidhash_list); } -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) +static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent) { struct list_head *up; @@ -63,7 +63,7 @@ static inline struct user_struct *uid_ha user = list_entry(up, struct user_struct, uidhash_list); - if(user->uid == uid) { + if(user->uid == uid && user->xid == xid) { atomic_inc(&user->__count); return user; } @@ -78,12 +78,12 @@ static inline struct user_struct *uid_ha * * If the user_struct could not be found, return NULL. */ -struct user_struct *find_user(uid_t uid) +struct user_struct *find_user(xid_t xid, uid_t uid) { struct user_struct *ret; spin_lock(&uidhash_lock); - ret = uid_hash_find(uid, uidhashentry(uid)); + ret = uid_hash_find(xid, uid, uidhashentry(xid, uid)); spin_unlock(&uidhash_lock); return ret; } @@ -99,13 +99,13 @@ void free_uid(struct user_struct *up) } } -struct user_struct * alloc_uid(uid_t uid) +struct user_struct * alloc_uid(xid_t xid, uid_t uid) { - struct list_head *hashent = uidhashentry(uid); + struct list_head *hashent = uidhashentry(xid, uid); struct user_struct *up; spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); spin_unlock(&uidhash_lock); if (!up) { @@ -115,6 +115,7 @@ struct user_struct * alloc_uid(uid_t uid if (!new) return NULL; new->uid = uid; + new->xid = xid; atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); atomic_set(&new->files, 0); @@ -133,7 +134,7 @@ struct user_struct * alloc_uid(uid_t uid * on adding the same user already.. */ spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); if (up) { key_put(new->uid_keyring); key_put(new->session_keyring); @@ -179,7 +180,7 @@ static int __init uid_cache_init(void) /* Insert the root user immediately (init already runs as root) */ spin_lock(&uidhash_lock); - uid_hash_insert(&root_user, uidhashentry(0)); + uid_hash_insert(&root_user, uidhashentry(0,0)); spin_unlock(&uidhash_lock); return 0; diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/Kconfig linux-2.6.10-rc1-vs1.9.3/kernel/vserver/Kconfig --- linux-2.6.10-rc1/kernel/vserver/Kconfig 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/Kconfig 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,96 @@ +# +# Linux VServer configuration +# + +menu "Linux VServer" + +config VSERVER_LEGACY + bool "Enable Legacy Kernel API" + default y + help + This enables the legacy API used in vs1.xx, which allows + to use older tools (for migration purposes). + +config VSERVER_PROC_SECURE + bool "Enable Proc Security" + depends on PROC_FS + default y + help + Hide proc entries by default for xid>1 + +config VSERVER_HARDCPU + bool "Enable Hard CPU Limits" + depends on EXPERIMENTAL + default n + help + Activate the Hard CPU Limits + +config VSERVER_HARDCPU_IDLE + bool "Limit the IDLE task" + depends on VSERVER_HARDCPU + default n + help + Limit the idle slices, so the the next context + will be scheduled as soon as possible. + might improve interactivity/latency but + increases scheduling overhead. + +choice + prompt "Persistent Inode Context Tagging" + default INOXID_UGID24 + help + This adds persistent context information to filesystems + mounted with the tagxid option. Tagging is a requirement + for per context disk limits and per context quota. + + +config INOXID_NONE + bool "Disabled" + help + no context information is store for inodes + +config INOXID_UID16 + bool "UID16/GID32" + help + reduces UID to 16 bit, but leaves GID at 32 bit. + +config INOXID_GID16 + bool "UID32/GID16" + help + reduces GID to 16 bit, but leaves UID at 32 bit. + +config INOXID_UGID24 + bool "UID24/GID24" + help + uses the upper 8bit from UID and GID for XID tagging + which leaves 24bit for UID/GID each, which should be + more than sufficient for normal use. + +config INOXID_INTERN + bool "UID32/GID32" + help + this uses otherwise reserved inode fields in the on + disk representation, which limits the use to a few + filesystems (currently ext2 and ext3) + +config INOXID_RUNTIME + bool "Runtime" + depends on EXPERIMENTAL + help + inodes are tagged when first accessed, this doesn't + require any persistant information, but might give + funny results for mixed access. + +endchoice + +config VSERVER_DEBUG + bool "Compile Debugging Code" + default n + help + Set this to yes if you want to be able to activate + debugging output at runtime. It adds a probably small + overhead (~ ??%) to all vserver related functions and + increases the kernel size by about 20k. + +endmenu + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/Makefile linux-2.6.10-rc1-vs1.9.3/kernel/vserver/Makefile --- linux-2.6.10-rc1/kernel/vserver/Makefile 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/Makefile 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,13 @@ +# +# Makefile for the Linux vserver routines. +# + + +obj-y += vserver.o + +vserver-y := switch.o context.o namespace.o sched.o network.o inode.o \ + limit.o cvirt.o signal.o proc.o helper.o init.o dlimit.o + +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o +vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/context.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/context.c --- linux-2.6.10-rc1/kernel/vserver/context.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/context.c 2004-11-04 19:52:50.847711001 +0100 @@ -0,0 +1,761 @@ +/* + * linux/kernel/vserver/context.c + * + * Virtual Server: Context Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 context helper + * V0.02 vx_ctx_kill syscall command + * V0.03 replaced context_info calls + * V0.04 redesign of struct (de)alloc + * V0.05 rlimit basic implementation + * V0.06 task_xid and info commands + * V0.07 context flags and caps + * V0.08 switch to RCU based hash + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +/* __alloc_vx_info() + + * allocate an initialized vx_info struct + * doesn't make it visible (hash) */ + +static struct vx_info *__alloc_vx_info(xid_t xid) +{ + struct vx_info *new = NULL; + + vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct vx_info)); + new->vx_id = xid; + INIT_RCU_HEAD(&new->vx_rcu); + INIT_HLIST_NODE(&new->vx_hlist); + atomic_set(&new->vx_refcnt, 0); + atomic_set(&new->vx_usecnt, 0); + new->vx_parent = NULL; + new->vx_state = 0; + new->vx_lock = SPIN_LOCK_UNLOCKED; + init_waitqueue_head(&new->vx_exit); + + /* rest of init goes here */ + vx_info_init_limit(&new->limit); + vx_info_init_sched(&new->sched); + vx_info_init_cvirt(&new->cvirt); + vx_info_init_cacct(&new->cacct); + + + new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT; + new->vx_bcaps = CAP_INIT_EFF_SET; + new->vx_ccaps = 0; + + vxdprintk(VXD_CBIT(xid, 0), + "alloc_vx_info(%d) = %p", xid, new); + return new; +} + +/* __dealloc_vx_info() + + * final disposal of vx_info */ + +static void __dealloc_vx_info(struct vx_info *vxi) +{ + vxdprintk(VXD_CBIT(xid, 0), + "dealloc_vx_info(%p)", vxi); + + vxi->vx_hlist.next = LIST_POISON1; + vxi->vx_id = -1; + + vx_info_exit_limit(&vxi->limit); + vx_info_exit_sched(&vxi->sched); + vx_info_exit_cvirt(&vxi->cvirt); + vx_info_exit_cacct(&vxi->cacct); + + + BUG_ON(atomic_read(&vxi->vx_usecnt)); + BUG_ON(atomic_read(&vxi->vx_refcnt)); + + BUG_ON(vx_info_state(vxi, VXS_HASHED)); + // BUG_ON(!vx_state(vxi, VXS_DEFUNCT)); + + vxi->vx_state |= VXS_RELEASED; + kfree(vxi); +} + +static inline int __free_vx_info(struct vx_info *vxi) +{ + int usecnt, refcnt; + + BUG_ON(!vxi); + + usecnt = atomic_read(&vxi->vx_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&vxi->vx_refcnt); + BUG_ON(refcnt < 0); + + if (!usecnt) + __dealloc_vx_info(vxi); + return usecnt; +} + +#if 0 + +static void __rcu_free_vx_info(struct rcu_head *head) +{ + struct vx_info *vxi = container_of(head, struct vx_info, vx_rcu); + + BUG_ON(!head); + vxdprintk(VXD_CBIT(xid, 3), + "rcu_free_vx_info(%p): uc=%d", vxi, + atomic_read(&vxi->vx_usecnt)); + + __free_vx_info(vxi); +} + +#endif + +void free_vx_info(struct vx_info *vxi) +{ + struct namespace *namespace; + struct fs_struct *fs; + + /* context shutdown is mandatory */ + // BUG_ON(vxi->vx_state != VXS_SHUTDOWN); + + namespace = xchg(&vxi->vx_namespace, NULL); + fs = xchg(&vxi->vx_fs, NULL); + + if (namespace) + put_namespace(namespace); + if (fs) + put_fs_struct(fs); + + BUG_ON(__free_vx_info(vxi)); + // call_rcu(&i->vx_rcu, __rcu_free_vx_info); +} + + +/* hash table for vx_info hash */ + +#define VX_HASH_SIZE 13 + +struct hlist_head vx_info_hash[VX_HASH_SIZE]; + +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(xid_t xid) +{ + return (xid % VX_HASH_SIZE); +} + + + +/* __hash_vx_info() + + * add the vxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_vx_info(struct vx_info *vxi) +{ + struct hlist_head *head; + + vxdprintk(VXD_CBIT(xid, 4), + "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id); + get_vx_info(vxi); + vxi->vx_state |= VXS_HASHED; + head = &vx_info_hash[__hashval(vxi->vx_id)]; + hlist_add_head_rcu(&vxi->vx_hlist, head); +} + +/* __unhash_vx_info() + + * remove the vxi from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_vx_info(struct vx_info *vxi) +{ + vxdprintk(VXD_CBIT(xid, 4), + "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id); + vxi->vx_state &= ~VXS_HASHED; + hlist_del_rcu(&vxi->vx_hlist); + put_vx_info(vxi); +} + + +/* __lookup_vx_info() + + * requires the rcu_read_lock() + * doesn't increment the vx_refcnt */ + +static inline struct vx_info *__lookup_vx_info(xid_t xid) +{ + struct hlist_head *head = &vx_info_hash[__hashval(xid)]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct vx_info *vxi = + hlist_entry(pos, struct vx_info, vx_hlist); + + if ((vxi->vx_id == xid) && + vx_info_state(vxi, VXS_HASHED)) + return vxi; + } + return NULL; +} + + +/* __vx_dynamic_id() + + * find unused dynamic xid + * requires the hash_lock to be held */ + +static inline xid_t __vx_dynamic_id(void) +{ + static xid_t seq = MAX_S_CONTEXT; + xid_t barrier = seq; + + do { + if (++seq > MAX_S_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__lookup_vx_info(seq)) { + vxdprintk(VXD_CBIT(xid, 4), + "__vx_dynamic_id: [#%d]", seq); + return seq; + } + } while (barrier != seq); + return 0; +} + +/* __loc_vx_info() + + * locate or create the requested context + * get() it and if new hash it */ + +static struct vx_info * __loc_vx_info(int id, int *err) +{ + struct vx_info *new, *vxi = NULL; + + vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id); + + if (!(new = __alloc_vx_info(id))) { + *err = -ENOMEM; + return NULL; + } + + spin_lock(&vx_info_hash_lock); + + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + goto out_unlock; + } + new->vx_id = id; + } + /* existing context requested */ + else if ((vxi = __lookup_vx_info(id))) { + /* context in setup is not available */ + if (vxi->vx_flags & VXF_STATE_SETUP) { + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (not available)", id, vxi); + vxi = NULL; + *err = -EBUSY; + } else { + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (found)", id, vxi); + get_vx_info(vxi); + *err = 0; + } + goto out_unlock; + } + + /* new context requested */ + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (new)", id, new); + __hash_vx_info(get_vx_info(new)); + vxi = new, new = NULL; + *err = 1; + +out_unlock: + spin_unlock(&vx_info_hash_lock); + if (new) + __dealloc_vx_info(new); + return vxi; +} + + + +/* exported stuff */ + + +void unhash_vx_info(struct vx_info *vxi) +{ + spin_lock(&vx_info_hash_lock); + __unhash_vx_info(vxi); + spin_unlock(&vx_info_hash_lock); +} + +/* locate_vx_info() + + * search for a vx_info and get() it + * negative id means current */ + +struct vx_info *locate_vx_info(int id) +{ + struct vx_info *vxi; + + if (id < 0) { + vxi = get_vx_info(current->vx_info); + } else { + rcu_read_lock(); + vxi = get_vx_info(__lookup_vx_info(id)); + rcu_read_unlock(); + } + return vxi; +} + +/* vx_info_is_hashed() + + * verify that xid is still hashed */ + +int vx_info_is_hashed(xid_t xid) +{ + int hashed; + + rcu_read_lock(); + hashed = (__lookup_vx_info(xid) != NULL); + rcu_read_unlock(); + return hashed; +} + +#ifdef CONFIG_VSERVER_LEGACY + +#if 0 +struct vx_info *alloc_vx_info(xid_t xid) +{ + return __alloc_vx_info(xid); +} +#endif + +struct vx_info *locate_or_create_vx_info(int id) +{ + int err; + + return __loc_vx_info(id, &err); +} + +#endif + +#ifdef CONFIG_PROC_FS + +int get_xid_list(int index, unsigned int *xids, int size) +{ + int hindex, nr_xids = 0; + + rcu_read_lock(); + for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) { + struct hlist_head *head = &vx_info_hash[hindex]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct vx_info *vxi; + + if (--index > 0) + continue; + + vxi = hlist_entry(pos, struct vx_info, vx_hlist); + xids[nr_xids] = vxi->vx_id; + if (++nr_xids >= size) + goto out; + } + } +out: + rcu_read_unlock(); + return nr_xids; +} +#endif + +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) +{ + struct user_struct *new_user, *old_user; + + if (!p || !vxi) + BUG(); + new_user = alloc_uid(vxi->vx_id, p->uid); + if (!new_user) + return -ENOMEM; + + old_user = p->user; + if (new_user != old_user) { + atomic_inc(&new_user->processes); + atomic_dec(&old_user->processes); + p->user = new_user; + } + free_uid(old_user); + return 0; +} + +void vx_mask_bcaps(struct task_struct *p) +{ + struct vx_info *vxi = p->vx_info; + + p->cap_effective &= vxi->vx_bcaps; + p->cap_inheritable &= vxi->vx_bcaps; + p->cap_permitted &= vxi->vx_bcaps; +} + + +#include + +static inline int vx_nofiles_task(struct task_struct *tsk) +{ + struct files_struct *files = tsk->files; + unsigned long *obptr; + int count, total; + + spin_lock(&files->file_lock); + obptr = files->open_fds->fds_bits; + count = files->max_fds / (sizeof(unsigned long) * 8); + for (total = 0; count > 0; count--) { + if (*obptr) + total += hweight_long(*obptr); + obptr++; + } + spin_unlock(&files->file_lock); + return total; +} + +#if 0 + +static inline int vx_openfd_task(struct task_struct *tsk) +{ + struct files_struct *files = tsk->files; + const unsigned long *bptr; + int count, total; + + spin_lock(&files->file_lock); + bptr = files->open_fds->fds_bits; + count = files->max_fds / (sizeof(unsigned long) * 8); + for (total = 0; count > 0; count--) { + if (*bptr) + total += hweight_long(*bptr); + bptr++; + } + spin_unlock(&files->file_lock); + return total; +} + +#endif + +/* + * migrate task to new context + * gets vxi, puts old_vxi on change + */ + +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) +{ + struct vx_info *old_vxi; + int ret = 0; + + if (!p || !vxi) + BUG(); + + old_vxi = task_get_vx_info(p); + if (old_vxi == vxi) + goto out; + + vxdprintk(VXD_CBIT(xid, 5), + "vx_migrate_task(%p,%p[#%d.%d])", p, vxi, + vxi->vx_id, atomic_read(&vxi->vx_usecnt)); + + if (!(ret = vx_migrate_user(p, vxi))) { + int nofiles; + + task_lock(p); + // openfd = vx_openfd_task(p); + nofiles = vx_nofiles_task(p); + + if (old_vxi) { + atomic_dec(&old_vxi->cvirt.nr_threads); + atomic_dec(&old_vxi->cvirt.nr_running); + atomic_dec(&old_vxi->limit.rcur[RLIMIT_NPROC]); + /* FIXME: what about the struct files here? */ + // atomic_sub(nofiles, &old_vxi->limit.rcur[RLIMIT_NOFILE]); + // atomic_sub(openfd, &old_vxi->limit.rcur[RLIMIT_OPENFD]); + } + atomic_inc(&vxi->cvirt.nr_threads); + atomic_inc(&vxi->cvirt.nr_running); + atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]); + /* FIXME: what about the struct files here? */ + // atomic_add(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]); + // atomic_add(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]); + + vxdprintk(VXD_CBIT(xid, 5), + "moved task %p into vxi:%p[#%d]", + p, vxi, vxi->vx_id); + + /* should be handled in set_vx_info !! */ + if (old_vxi) + clr_vx_info(&p->vx_info); + set_vx_info(&p->vx_info, vxi); + p->xid = vxi->vx_id; + vx_mask_bcaps(p); + task_unlock(p); + + /* obsoleted by clr/set */ + // put_vx_info(old_vxi); + } +out: + put_vx_info(old_vxi); + return ret; +} + +int vx_set_init(struct vx_info *vxi, struct task_struct *p) +{ + if (!vxi) + return -EINVAL; + if (vxi->vx_initpid) + return -EPERM; + + vxdprintk(VXD_CBIT(xid, 6), + "vx_set_init(%p[#%d],%p[#%d,%d,%d])", + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid); + + vxi->vx_initpid = p->tgid; + return 0; +} + + +/* vserver syscall commands below here */ + +/* taks xid and vx_info functions */ + +#include + + +int vc_task_xid(uint32_t id, void __user *data) +{ + xid_t xid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + xid = (tsk) ? tsk->xid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + xid = current->xid; + return xid; +} + + +int vc_vx_info(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vx_info_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.xid = vxi->vx_id; + vc_data.initpid = vxi->vx_initpid; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* context functions */ + +int vc_ctx_create(uint32_t xid, void __user *data) +{ + struct vx_info *new_vxi; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID)) + return -EINVAL; + + if (xid < 1) + return -EINVAL; + + new_vxi = __loc_vx_info(xid, &ret); + if (!new_vxi) + return ret; + if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) { + ret = -EEXIST; + goto out_put; + } + + ret = new_vxi->vx_id; + vx_migrate_task(current, new_vxi); + /* if this fails, we might end up with a hashed vx_info */ +out_put: + put_vx_info(new_vxi); + return ret; +} + + +int vc_ctx_migrate(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* dirty hack until Spectator becomes a cap */ + if (id == 1) { + current->xid = 1; + return 0; + } + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + vx_migrate_task(current, vxi); + put_vx_info(vxi); + return 0; +} + + +int vc_get_cflags(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_flags_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.flagword = vxi->vx_flags; + + /* special STATE flag handling */ + vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME); + + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_cflags(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_flags_v0 vc_data; + uint64_t mask, trigger; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + /* special STATE flag handling */ + mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME); + trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword); + + if (trigger & VXF_STATE_SETUP) + vx_mask_bcaps(current); + if (trigger & VXF_STATE_INIT) + if (vxi == current->vx_info) + vx_set_init(vxi, current); + + vxi->vx_flags = vx_mask_flags(vxi->vx_flags, + vc_data.flagword, mask); + put_vx_info(vxi); + return 0; +} + +int vc_get_ccaps(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.bcaps = vxi->vx_bcaps; + vc_data.ccaps = vxi->vx_ccaps; + vc_data.cmask = ~0UL; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_ccaps(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + vxi->vx_bcaps &= vc_data.bcaps; + vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps, + vc_data.ccaps, vc_data.cmask); + put_vx_info(vxi); + return 0; +} + +#include + +// EXPORT_SYMBOL_GPL(rcu_free_vx_info); +EXPORT_SYMBOL_GPL(free_vx_info); +EXPORT_SYMBOL_GPL(vx_info_hash_lock); +EXPORT_SYMBOL_GPL(unhash_vx_info); + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/cvirt.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/cvirt.c --- linux-2.6.10-rc1/kernel/vserver/cvirt.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/cvirt.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,84 @@ +/* + * linux/kernel/vserver/cvirt.c + * + * Virtual Server: Context Virtualization + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 broken out from limit.c + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle) +{ + struct vx_info *vxi = current->vx_info; + + set_normalized_timespec(uptime, + uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec, + uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec); + if (!idle) + return; + set_normalized_timespec(idle, + idle->tv_sec - vxi->cvirt.bias_idle.tv_sec, + idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec); + return; +} + +uint64_t vx_idle_jiffies() +{ + return init_task.utime + init_task.stime; +} + + + +static inline uint32_t __update_loadavg(uint32_t load, + int wsize, int delta, int n) +{ + unsigned long long calc; + + /* just set it to n */ + if (unlikely(delta >= wsize)) + return (n << FSHIFT); + + calc = (delta * n) << FSHIFT; + calc += (wsize - delta) * load; + do_div(calc, wsize); + return calc; +} + + +void vx_update_load(struct vx_info *vxi) +{ + uint32_t now, last, delta; + + spin_lock(&vxi->cvirt.load_lock); + + now = jiffies; + last = vxi->cvirt.load_last; + delta = now - last; + + vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0], + 60*HZ, delta, atomic_read(&vxi->cvirt.nr_running)); + vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1], + 5*60*HZ, delta, atomic_read(&vxi->cvirt.nr_running)); + vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2], + 15*60*HZ, delta, atomic_read(&vxi->cvirt.nr_running)); + + vxi->cvirt.load_last = now; + spin_unlock(&vxi->cvirt.load_lock); +} + + + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/dlimit.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/dlimit.c --- linux-2.6.10-rc1/kernel/vserver/dlimit.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/dlimit.c 2004-11-04 19:52:50.848710843 +0100 @@ -0,0 +1,451 @@ +/* + * linux/kernel/vserver/dlimit.c + * + * Virtual Server: Context Disk Limits + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 initial version + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* __alloc_dl_info() + + * allocate an initialized dl_info struct + * doesn't make it visible (hash) */ + +static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid) +{ + struct dl_info *new = NULL; + + vxdprintk(VXD_CBIT(dlim, 5), + "alloc_dl_info(%p,%d)*", sb, xid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct dl_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct dl_info)); + new->dl_xid = xid; + new->dl_sb = sb; + INIT_RCU_HEAD(&new->dl_rcu); + INIT_HLIST_NODE(&new->dl_hlist); + spin_lock_init(&new->dl_lock); + atomic_set(&new->dl_refcnt, 0); + atomic_set(&new->dl_usecnt, 0); + + /* rest of init goes here */ + + vxdprintk(VXD_CBIT(dlim, 4), + "alloc_dl_info(%p,%d) = %p", sb, xid, new); + return new; +} + +/* __dealloc_dl_info() + + * final disposal of dl_info */ + +static void __dealloc_dl_info(struct dl_info *dli) +{ + vxdprintk(VXD_CBIT(dlim, 4), + "dealloc_dl_info(%p)", dli); + + dli->dl_hlist.next = LIST_POISON1; + dli->dl_xid = -1; + dli->dl_sb = 0; + + BUG_ON(atomic_read(&dli->dl_usecnt)); + BUG_ON(atomic_read(&dli->dl_refcnt)); + + kfree(dli); +} + + +/* hash table for dl_info hash */ + +#define DL_HASH_SIZE 13 + +struct hlist_head dl_info_hash[DL_HASH_SIZE]; + +static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(struct super_block *sb, xid_t xid) +{ + return ((xid ^ (unsigned long)sb) % DL_HASH_SIZE); +} + + + +/* __hash_dl_info() + + * add the dli to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_dl_info(struct dl_info *dli) +{ + struct hlist_head *head; + + vxdprintk(VXD_CBIT(dlim, 6), + "__hash_dl_info: %p[#%d]", dli, dli->dl_xid); + get_dl_info(dli); + head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)]; + hlist_add_head_rcu(&dli->dl_hlist, head); +} + +/* __unhash_dl_info() + + * remove the dli from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_dl_info(struct dl_info *dli) +{ + vxdprintk(VXD_CBIT(dlim, 6), + "__unhash_dl_info: %p[#%d]", dli, dli->dl_xid); + hlist_del_rcu(&dli->dl_hlist); + put_dl_info(dli); +} + + +/* __lookup_dl_info() + + * requires the rcu_read_lock() + * doesn't increment the dl_refcnt */ + +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, xid_t xid) +{ + struct hlist_head *head = &dl_info_hash[__hashval(sb, xid)]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct dl_info *dli = + hlist_entry(pos, struct dl_info, dl_hlist); + + if (dli->dl_xid == xid && dli->dl_sb == sb) { + return dli; + } + } + return NULL; +} + + +struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid) +{ + struct dl_info *dli; + + rcu_read_lock(); + dli = get_dl_info(__lookup_dl_info(sb, xid)); + vxdprintk(VXD_CBIT(dlim, 7), + "locate_dl_info(%p,#%d) = %p", sb, xid, dli); + rcu_read_unlock(); + return dli; +} + +void rcu_free_dl_info(struct rcu_head *head) +{ + struct dl_info *dli = container_of(head, struct dl_info, dl_rcu); + int usecnt, refcnt; + + BUG_ON(!dli || !head); + + usecnt = atomic_read(&dli->dl_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&dli->dl_refcnt); + BUG_ON(refcnt < 0); + + vxdprintk(VXD_CBIT(dlim, 3), + "rcu_free_dl_info(%p)", dli); + if (!usecnt) + __dealloc_dl_info(dli); + else + printk("!!! rcu didn't free\n"); +} + + + + +int vc_add_dlimit(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_dlimit_base_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + dli = __alloc_dl_info(sb, id); + spin_lock(&dl_info_hash_lock); + + ret = -EEXIST; + if (__lookup_dl_info(sb, id)) + goto out_unlock; + __hash_dl_info(dli); + dli = NULL; + ret = 0; + + out_unlock: + spin_unlock(&dl_info_hash_lock); + if (dli) + __dealloc_dl_info(dli); + out_release: + path_release(&nd); + } + return ret; +} + + +int vc_rem_dlimit(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_dlimit_base_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + spin_lock(&dl_info_hash_lock); + dli = __lookup_dl_info(sb, id); + + ret = -ESRCH; + if (!dli) + goto out_unlock; + + __unhash_dl_info(dli); + ret = 0; + + out_unlock: + spin_unlock(&dl_info_hash_lock); + out_release: + path_release(&nd); + } + return ret; +} + + +int vc_set_dlimit(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_dlimit_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + if ((vc_data.reserved != (uint32_t)CDLIM_KEEP && + vc_data.reserved > 100) || + (vc_data.inodes_used != (uint32_t)CDLIM_KEEP && + vc_data.inodes_used > vc_data.inodes_total) || + (vc_data.space_used != (uint32_t)CDLIM_KEEP && + vc_data.space_used > vc_data.space_total)) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + + if (vc_data.inodes_used != (uint32_t)CDLIM_KEEP) + dli->dl_inodes_used = vc_data.inodes_used; + if (vc_data.inodes_total != (uint32_t)CDLIM_KEEP) + dli->dl_inodes_total = vc_data.inodes_total; + if (vc_data.space_used != (uint32_t)CDLIM_KEEP) { + dli->dl_space_used = vc_data.space_used; + dli->dl_space_used <<= 10; + } + if (vc_data.space_total == (uint32_t)CDLIM_INFINITY) + dli->dl_space_total = (uint64_t)CDLIM_INFINITY; + else if (vc_data.space_total != (uint32_t)CDLIM_KEEP) { + dli->dl_space_total = vc_data.space_total; + dli->dl_space_total <<= 10; + } + if (vc_data.reserved != (uint32_t)CDLIM_KEEP) + dli->dl_nrlmult = (1 << 10) * (100 - vc_data.reserved) / 100; + + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = 0; + + out_release: + path_release(&nd); + } + return ret; +} + +int vc_get_dlimit(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_dlimit_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + if (vc_data.reserved > 100 || + vc_data.inodes_used > vc_data.inodes_total || + vc_data.space_used > vc_data.space_total) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + vc_data.inodes_used = dli->dl_inodes_used; + vc_data.inodes_total = dli->dl_inodes_total; + vc_data.space_used = dli->dl_space_used >> 10; + if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY) + vc_data.space_total = (uint32_t)CDLIM_INFINITY; + else + vc_data.space_total = dli->dl_space_total >> 10; + + vc_data.reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10); + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = -EFAULT; + if (copy_to_user(data, &vc_data, sizeof(vc_data))) + goto out_release; + + ret = 0; + out_release: + path_release(&nd); + } + return ret; +} + + +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct dl_info *dli; + __u64 blimit, bfree, bavail; + __u32 ifree; + + dli = locate_dl_info(sb, current->xid); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_total == (uint32_t)CDLIM_INFINITY) + goto no_ilim; + + /* reduce max inodes available to limit */ + if (buf->f_files > dli->dl_inodes_total) + buf->f_files = dli->dl_inodes_total; + + ifree = dli->dl_inodes_total - dli->dl_inodes_used; + /* reduce free inodes to min */ + if (ifree < buf->f_ffree) + buf->f_ffree = ifree; + +no_ilim: + if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY) + goto no_blim; + + blimit = dli->dl_space_total >> sb->s_blocksize_bits; + + if (dli->dl_space_total < dli->dl_space_used) + bfree = 0; + else + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + + bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult); + if (bavail < dli->dl_space_used) + bavail = 0; + else + bavail = (bavail - dli->dl_space_used) + >> sb->s_blocksize_bits; + + /* reduce max space available to limit */ + if (buf->f_blocks > blimit) + buf->f_blocks = blimit; + + /* reduce free space to min */ + if (bfree < buf->f_bfree) + buf->f_bfree = bfree; + + /* reduce avail space to min */ + if (bavail < buf->f_bavail) + buf->f_bavail = bavail; + +no_blim: + spin_unlock(&dli->dl_lock); + put_dl_info(dli); + + return; +} + +#include + +EXPORT_SYMBOL_GPL(locate_dl_info); +EXPORT_SYMBOL_GPL(rcu_free_dl_info); + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/helper.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/helper.c --- linux-2.6.10-rc1/kernel/vserver/helper.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/helper.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,92 @@ +/* + * linux/kernel/vserver/helper.c + * + * Virtual Context Support + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 basic helper + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +char vshelper_path[255] = "/sbin/vshelper"; + + +/* + * vshelper path is set via /proc/sys + * invoked by vserver sys_reboot(), with + * the following arguments + * + * argv [0] = vshelper_path; + * argv [1] = action: "restart", "halt", "poweroff", ... + * argv [2] = context identifier + * argv [3] = additional argument (restart2) + * + * envp [*] = type-specific parameters + */ + +long vs_reboot(unsigned int cmd, void * arg) +{ + char id_buf[8], cmd_buf[32]; + char uid_buf[32], pid_buf[32]; + char buffer[256]; + + char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + uid_buf, pid_buf, cmd_buf, 0}; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid()); + + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid); + snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid); + + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + argv[1] = "restart"; + break; + + case LINUX_REBOOT_CMD_HALT: + argv[1] = "halt"; + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + argv[1] = "poweroff"; + break; + + case LINUX_REBOOT_CMD_SW_SUSPEND: + argv[1] = "swsusp"; + break; + + case LINUX_REBOOT_CMD_RESTART2: + if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) + return -EFAULT; + argv[3] = buffer; + default: + argv[1] = "restart2"; + break; + } + + /* maybe we should wait ? */ + if (call_usermodehelper(*argv, argv, envp, 0)) { + printk( KERN_WARNING + "vs_reboot(): failed to exec (%s %s %s %s)\n", + vshelper_path, argv[1], argv[2], argv[3]); + return -EPERM; + } + return 0; +} + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/init.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/init.c --- linux-2.6.10-rc1/kernel/vserver/init.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/init.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,45 @@ +/* + * linux/kernel/init.c + * + * Virtual Server Init + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include +#include +#include + +int vserver_register_sysctl(void); +void vserver_unregister_sysctl(void); + + +static int __init init_vserver(void) +{ + int ret = 0; + +#ifdef CONFIG_VSERVER_DEBUG + vserver_register_sysctl(); +#endif + return ret; +} + + +static void __exit exit_vserver(void) +{ + +#ifdef CONFIG_VSERVER_DEBUG + vserver_unregister_sysctl(); +#endif + return; +} + + +module_init(init_vserver); +module_exit(exit_vserver); + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/inode.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/inode.c --- linux-2.6.10-rc1/kernel/vserver/inode.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/inode.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,223 @@ +/* + * linux/kernel/vserver/inode.c + * + * Virtual Server: File System Support + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 separated from vcontext V0.05 + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint32_t *mask) +{ + if (!in || !in->i_sb) + return -ESRCH; + + *flags = IATTR_XID + | (IS_BARRIER(in) ? IATTR_BARRIER : 0) + | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0) + | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0); + *mask = IATTR_IUNLINK | IATTR_IMMUTABLE; + + if (S_ISDIR(in->i_mode)) + *mask |= IATTR_BARRIER; + + if (in->i_sb->s_flags & MS_TAGXID) { + *xid = in->i_xid; + *mask |= IATTR_XID; + } + + if (in->i_sb->s_magic == PROC_SUPER_MAGIC) { + struct proc_dir_entry *entry = PROC_I(in)->pde; + + // check for specific inodes ? + if (entry) + *mask |= IATTR_FLAGS; + if (entry) + *flags |= (entry->vx_flags & IATTR_FLAGS); + else + *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS); + } + return 0; +} + +int vc_get_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_get_iattr(nd.dentry->d_inode, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uint32_t *mask) +{ + struct inode *in = de->d_inode; + int error = 0, is_proc = 0; + + if (!in || !in->i_sb) + return -ESRCH; + + is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC); + if ((*mask & IATTR_FLAGS) && !is_proc) + return -EINVAL; + if ((*mask & IATTR_XID) && !(in->i_sb->s_flags & MS_TAGXID)) + return -EINVAL; + + down(&in->i_sem); + if (*mask & IATTR_XID) + in->i_xid = *xid; + + if (*mask & IATTR_FLAGS) { + struct proc_dir_entry *entry = PROC_I(in)->pde; + unsigned int iflags = PROC_I(in)->vx_flags; + + iflags = (iflags & ~(*mask & IATTR_FLAGS)) + | (*flags & IATTR_FLAGS); + PROC_I(in)->vx_flags = iflags; + if (entry) + entry->vx_flags = iflags; + } + + if (*mask & (IATTR_BARRIER | IATTR_IUNLINK | IATTR_IMMUTABLE)) { + struct iattr attr; + + attr.ia_valid = ATTR_ATTR_FLAG; + attr.ia_attr_flags = + (IS_IMMUTABLE(in) ? ATTR_FLAG_IMMUTABLE : 0) | + (IS_IUNLINK(in) ? ATTR_FLAG_IUNLINK : 0) | + (IS_BARRIER(in) ? ATTR_FLAG_BARRIER : 0); + + if (*mask & IATTR_IMMUTABLE) { + if (*flags & IATTR_IMMUTABLE) + attr.ia_attr_flags |= ATTR_FLAG_IMMUTABLE; + else + attr.ia_attr_flags &= ~ATTR_FLAG_IMMUTABLE; + } + if (*mask & IATTR_IUNLINK) { + if (*flags & IATTR_IUNLINK) + attr.ia_attr_flags |= ATTR_FLAG_IUNLINK; + else + attr.ia_attr_flags &= ~ATTR_FLAG_IUNLINK; + } + if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) { + if (*flags & IATTR_BARRIER) + attr.ia_attr_flags |= ATTR_FLAG_BARRIER; + else + attr.ia_attr_flags &= ~ATTR_FLAG_BARRIER; + } + if (in->i_op && in->i_op->setattr) + error = in->i_op->setattr(de, &attr); + else { + error = inode_change_ok(in, &attr); + if (!error) + error = inode_setattr(in, &attr); + } + } + + mark_inode_dirty(in); + up(&in->i_sem); + return 0; +} + +int vc_set_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data; + int ret; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_set_iattr(nd.dentry, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + + +#ifdef CONFIG_VSERVER_LEGACY +#include + +#define PROC_DYNAMIC_FIRST 0xF0000000UL + +int vx_proc_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + struct proc_dir_entry *entry; + int error = 0; + int flags; + + if (inode->i_ino < PROC_DYNAMIC_FIRST) + return -ENOTTY; + + entry = PROC_I(inode)->pde; + if (!entry) + return -ENOTTY; + + switch(cmd) { + case FIOC_GETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + flags = entry->vx_flags; + if (capable(CAP_CONTEXT)) + error = put_user(flags, (int *) arg); + break; + } + case FIOC_SETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -EFAULT; + if (get_user(flags, (int *) arg)) + break; + error = 0; + entry->vx_flags = flags; + break; + } + default: + return -ENOTTY; + } + return error; +} +#endif + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/legacy.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/legacy.c --- linux-2.6.10-rc1/kernel/vserver/legacy.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/legacy.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,170 @@ +/* + * linux/kernel/vserver/legacy.c + * + * Virtual Server: Legacy Funtions + * + * Copyright (C) 2001-2003 Jacques Gelinas + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from vcontext.c V0.05 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + + +static int vx_set_initpid(struct vx_info *vxi, int pid) +{ + if (vxi->vx_initpid) + return -EPERM; + + vxi->vx_initpid = pid; + return 0; +} + +int vc_new_s_context(uint32_t ctx, void __user *data) +{ + int ret = -ENOMEM; + struct vcmd_new_s_context_v1 vc_data; + struct vx_info *new_vxi; + + if (copy_from_user(&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* legacy hack, will be removed soon */ + if (ctx == -2) { + /* assign flags and initpid */ + if (!current->vx_info) + return -EINVAL; + ret = 0; + if (vc_data.flags & VX_INFO_INIT) + ret = vx_set_initpid(current->vx_info, current->tgid); + if (ret == 0) { + /* We keep the same vx_id, but lower the capabilities */ + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + // current->cap_bset &= (~vc_data.remove_cap); + ret = vx_current_xid(); + current->vx_info->vx_flags |= vc_data.flags; + } + return ret; + } + + if (!vx_check(0, VX_ADMIN) || + !capable(CAP_SYS_ADMIN) || vx_flags(VX_INFO_PRIVATE, 0)) + return -EPERM; + + /* ugly hack for Spectator */ + if (ctx == 1) { + current->xid = 1; + return 0; + } + + if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) || + (ctx == 0)) + return -EINVAL; + + if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT)) + new_vxi = locate_or_create_vx_info(ctx); + else + new_vxi = locate_vx_info(ctx); + + if (!new_vxi) + return -EINVAL; + new_vxi->vx_flags &= ~(VXF_STATE_SETUP|VXF_STATE_INIT); + + ret = vx_migrate_task(current, new_vxi); + if (ret == 0) { + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + // current->cap_bset &= (~vc_data.remove_cap); + new_vxi->vx_flags |= vc_data.flags; + if (vc_data.flags & VX_INFO_INIT) + vx_set_initpid(new_vxi, current->tgid); + if (vc_data.flags & VX_INFO_NAMESPACE) + vx_set_namespace(new_vxi, + current->namespace, current->fs); + if (vc_data.flags & VX_INFO_NPROC) + new_vxi->limit.rlim[RLIMIT_NPROC] = + current->signal->rlim[RLIMIT_NPROC].rlim_max; + ret = new_vxi->vx_id; + } + put_vx_info(new_vxi); + return ret; +} + + +extern struct nx_info *create_nx_info(void); + +/* set ipv4 root (syscall) */ + +int vc_set_ipv4root(uint32_t nbip, void __user *data) +{ + int i, err = -EPERM; + struct vcmd_set_ipv4root_v3 vc_data; + struct nx_info *new_nxi, *nxi = current->nx_info; + + if (nbip < 0 || nbip > NB_IPV4ROOT) + return -EINVAL; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN)) + // We are allowed to change everything + err = 0; + else if (nxi) { + int found = 0; + + // We are allowed to select a subset of the currently + // installed IP numbers. No new one allowed + // We can't change the broadcast address though + for (i=0; inbipv4; j++) { + if (nxip == nxi->ipv4[j]) { + found++; + break; + } + } + } + if ((found == nbip) && + (vc_data.broadcast == nxi->v4_bcast)) + err = 0; + } + if (err) + return err; + + new_nxi = create_nx_info(); + if (!new_nxi) + return -EINVAL; + + new_nxi->nbipv4 = nbip; + for (i=0; iipv4[i] = vc_data.nx_mask_pair[i].ip; + new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask; + } + new_nxi->v4_bcast = vc_data.broadcast; + // current->nx_info = new_nxi; + if (nxi) { + printk("!!! switching nx_info %p->%p\n", nxi, new_nxi); + clr_nx_info(¤t->nx_info); + } + nx_migrate_task(current, new_nxi); + // set_nx_info(¤t->nx_info, new_nxi); + // current->nid = new_nxi->nx_id; + put_nx_info(new_nxi); + return 0; +} + + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/limit.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/limit.c --- linux-2.6.10-rc1/kernel/vserver/limit.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/limit.c 2004-11-04 19:52:50.856709579 +0100 @@ -0,0 +1,166 @@ +/* + * linux/kernel/vserver/limit.c + * + * Virtual Server: Context Limits + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +const char *vlimit_name[NUM_LIMITS] = { + [RLIMIT_CPU] = "CPU", + [RLIMIT_RSS] = "RSS", + [RLIMIT_NPROC] = "NPROC", + [RLIMIT_NOFILE] = "NOFILE", + [RLIMIT_MEMLOCK] = "VML", + [RLIMIT_AS] = "VM", + [RLIMIT_LOCKS] = "LOCKS", + [RLIMIT_MSGQUEUE] = "MSGQ", + [VLIMIT_NSOCK] = "NSOCK", +}; + +EXPORT_SYMBOL_GPL(vlimit_name); + + +static int is_valid_rlimit(int id) +{ + int valid = 0; + + switch (id) { + case RLIMIT_NPROC: + case RLIMIT_AS: + case RLIMIT_RSS: + case RLIMIT_MEMLOCK: + case RLIMIT_NOFILE: + valid = 1; + break; + } + return valid; +} + +static inline uint64_t vc_get_rlim(struct vx_info *vxi, int id) +{ + unsigned long limit; + + limit = vxi->limit.rlim[id]; + if (limit == RLIM_INFINITY) + return CRLIM_INFINITY; + return limit; +} + +int vc_get_rlimit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_rlimit_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (!is_valid_rlimit(vc_data.id)) + return -ENOTSUPP; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.maximum = vc_get_rlim(vxi, vc_data.id); + vc_data.minimum = CRLIM_UNSET; + vc_data.softlimit = CRLIM_UNSET; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_rlimit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_rlimit_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (!is_valid_rlimit(vc_data.id)) + return -ENOTSUPP; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + if (vc_data.maximum != CRLIM_KEEP) + vxi->limit.rlim[vc_data.id] = vc_data.maximum; + put_vx_info(vxi); + + return 0; +} + +int vc_get_rlimit_mask(uint32_t id, void __user *data) +{ + static struct vcmd_ctx_rlimit_mask_v0 mask = { + /* minimum */ + 0 + , /* softlimit */ + 0 + , /* maximum */ + (1 << RLIMIT_NPROC) | + (1 << RLIMIT_NOFILE) | + (1 << RLIMIT_MEMLOCK) | + (1 << RLIMIT_AS) | + (1 << RLIMIT_RSS) + }; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_to_user(data, &mask, sizeof(mask))) + return -EFAULT; + return 0; +} + + +void vx_vsi_meminfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long v; + + v = vxi->limit.rlim[RLIMIT_RSS]; + if (v != RLIM_INFINITY) + val->totalram = min(val->totalram, v); + v = atomic_read(&vxi->limit.rcur[RLIMIT_RSS]); + val->freeram = (v < val->totalram) ? val->totalram - v : 0; + val->bufferram = 0; + val->totalhigh = 0; + val->freehigh = 0; + return; +} + +void vx_vsi_swapinfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long v, w; + + v = vxi->limit.rlim[RLIMIT_RSS]; + w = vxi->limit.rlim[RLIMIT_AS]; + if (w != RLIM_INFINITY) + val->totalswap = min(val->totalswap, w - + ((v != RLIM_INFINITY) ? v : 0)); + w = atomic_read(&vxi->limit.rcur[RLIMIT_AS]); + val->freeswap = (w < val->totalswap) ? val->totalswap - w : 0; + return; +} + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/namespace.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/namespace.c --- linux-2.6.10-rc1/kernel/vserver/namespace.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/namespace.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,240 @@ +/* + * linux/kernel/vserver/namespace.c + * + * Virtual Server: Context Namespace Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from context.c 0.07 + * V0.02 added task locking for namespace + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +int vx_check_vfsmount(struct vx_info *vxi, struct vfsmount *mnt) +{ + struct vfsmount *root_mnt, *altroot_mnt; + struct dentry *root, *altroot, *point; + int r1, r2, s1, s2, ret = 0; + + if (!vxi || !mnt) + return 1; + + spin_lock(&dcache_lock); + altroot_mnt = current->fs->rootmnt; + altroot = current->fs->root; + point = altroot; + + if (vxi->vx_fs) { + root_mnt = vxi->vx_fs->rootmnt; + root = vxi->vx_fs->root; + } else { + root_mnt = altroot_mnt; + root = altroot; + } + /* printk("··· %p:%p/%p:%p ", + root_mnt, root, altroot_mnt, altroot); */ + + while ((mnt != mnt->mnt_parent) && + (mnt != root_mnt) && (mnt != altroot_mnt)) { + point = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + } + + r1 = (mnt == root_mnt); + s1 = is_subdir(point, root); + r2 = (mnt == altroot_mnt); + s2 = is_subdir(point, altroot); + + ret = (((mnt == root_mnt) && is_subdir(point, root)) || + ((mnt == altroot_mnt) && is_subdir(point, altroot))); + /* printk("··· for %p:%p -> %d:%d/%d:%d = %d\n", + mnt, point, r1, s1, r2, s2, ret); */ + spin_unlock(&dcache_lock); + + return (r2 && s2); +} + + +/* virtual host info names */ + +static char * vx_vhi_name(struct vx_info *vxi, int id) +{ + switch (id) { + case VHIN_CONTEXT: + return vxi->vx_name; + case VHIN_SYSNAME: + return vxi->cvirt.utsname.sysname; + case VHIN_NODENAME: + return vxi->cvirt.utsname.nodename; + case VHIN_RELEASE: + return vxi->cvirt.utsname.release; + case VHIN_VERSION: + return vxi->cvirt.utsname.version; + case VHIN_MACHINE: + return vxi->cvirt.utsname.machine; + case VHIN_DOMAINNAME: + return vxi->cvirt.utsname.domainname; + default: + return NULL; + } + return NULL; +} + +int vc_set_vhi_name(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vx_vhi_name_v0 vc_data; + char *name; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (name) + memcpy(name, vc_data.name, 65); + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} + +int vc_get_vhi_name(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vx_vhi_name_v0 vc_data; + char *name; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (!name) + goto out_put; + + memcpy(vc_data.name, name, 65); + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; +out_put: + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} + +/* namespace functions */ + +#include + +int vx_set_namespace(struct vx_info *vxi, struct namespace *ns, struct fs_struct *fs) +{ + struct fs_struct *fs_copy; + + if (vxi->vx_namespace) + return -EPERM; + if (!ns || !fs) + return -EINVAL; + + fs_copy = copy_fs_struct(fs); + if (!fs_copy) + return -ENOMEM; + + get_namespace(ns); + vxi->vx_namespace = ns; + vxi->vx_fs = fs_copy; + return 0; +} + +int vc_enter_namespace(uint32_t id, void *data) +{ + struct vx_info *vxi; + struct fs_struct *old_fs, *fs; + struct namespace *old_ns; + int ret = 0; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = -EINVAL; + if (!vxi->vx_namespace) + goto out_put; + + ret = -ENOMEM; + fs = copy_fs_struct(vxi->vx_fs); + if (!fs) + goto out_put; + + ret = 0; + task_lock(current); + old_ns = current->namespace; + old_fs = current->fs; + get_namespace(vxi->vx_namespace); + current->namespace = vxi->vx_namespace; + current->fs = fs; + task_unlock(current); + + put_namespace(old_ns); + put_fs_struct(old_fs); +out_put: + put_vx_info(vxi); + return ret; +} + +int vc_cleanup_namespace(uint32_t id, void *data) +{ + down_write(¤t->namespace->sem); + spin_lock(&vfsmount_lock); + umount_unused(current->namespace->root, current->fs); + spin_unlock(&vfsmount_lock); + up_write(¤t->namespace->sem); + return 0; +} + +int vc_set_namespace(uint32_t id, void __user *data) +{ + struct fs_struct *fs; + struct namespace *ns; + struct vx_info *vxi; + int ret; + + if (vx_check(0, VX_ADMIN|VX_WATCH)) + return -ENOSYS; + + task_lock(current); + vxi = get_vx_info(current->vx_info); + fs = current->fs; + atomic_inc(&fs->count); + ns = current->namespace; + get_namespace(current->namespace); + task_unlock(current); + + ret = vx_set_namespace(vxi, ns, fs); + + put_namespace(ns); + put_fs_struct(fs); + put_vx_info(vxi); + return ret; +} + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/network.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/network.c --- linux-2.6.10-rc1/kernel/vserver/network.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/network.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,702 @@ +/* + * linux/kernel/vserver/network.c + * + * Virtual Server: Network Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * V0.02 cleaned up implementation + * V0.03 added equiv nx commands + * V0.04 switch to RCU based hash + * + */ + +#include +#include +#include +#include +#include +#include + +#include + + +/* __alloc_nx_info() + + * allocate an initialized nx_info struct + * doesn't make it visible (hash) */ + +static struct nx_info *__alloc_nx_info(nid_t nid) +{ + struct nx_info *new = NULL; + + vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct nx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct nx_info)); + new->nx_id = nid; + INIT_RCU_HEAD(&new->nx_rcu); + INIT_HLIST_NODE(&new->nx_hlist); + atomic_set(&new->nx_refcnt, 0); + atomic_set(&new->nx_usecnt, 0); + + /* rest of init goes here */ + + vxdprintk(VXD_CBIT(nid, 0), + "alloc_nx_info() = %p", new); + return new; +} + +/* __dealloc_nx_info() + + * final disposal of nx_info */ + +static void __dealloc_nx_info(struct nx_info *nxi) +{ + vxdprintk(VXD_CBIT(nid, 0), + "dealloc_nx_info(%p)", nxi); + + nxi->nx_hlist.next = LIST_POISON1; + nxi->nx_id = -1; + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_refcnt)); + + kfree(nxi); +} + + +/* hash table for nx_info hash */ + +#define NX_HASH_SIZE 13 + +struct hlist_head nx_info_hash[NX_HASH_SIZE]; + +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(nid_t nid) +{ + return (nid % NX_HASH_SIZE); +} + + + +/* __hash_nx_info() + + * add the nxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_nx_info(struct nx_info *nxi) +{ + struct hlist_head *head; + + vxdprintk(VXD_CBIT(nid, 4), + "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id); + get_nx_info(nxi); + head = &nx_info_hash[__hashval(nxi->nx_id)]; + hlist_add_head_rcu(&nxi->nx_hlist, head); +} + +/* __unhash_nx_info() + + * remove the nxi from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_nx_info(struct nx_info *nxi) +{ + vxdprintk(VXD_CBIT(nid, 4), + "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id); + hlist_del_rcu(&nxi->nx_hlist); + put_nx_info(nxi); +} + + +/* __lookup_nx_info() + + * requires the rcu_read_lock() + * doesn't increment the nx_refcnt */ + +static inline struct nx_info *__lookup_nx_info(nid_t nid) +{ + struct hlist_head *head = &nx_info_hash[__hashval(nid)]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct nx_info *nxi = + hlist_entry(pos, struct nx_info, nx_hlist); + + if (nxi->nx_id == nid) { + return nxi; + } + } + return NULL; +} + + +/* __nx_dynamic_id() + + * find unused dynamic nid + * requires the hash_lock to be held */ + +static inline nid_t __nx_dynamic_id(void) +{ + static nid_t seq = MAX_N_CONTEXT; + nid_t barrier = seq; + + do { + if (++seq > MAX_N_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__lookup_nx_info(seq)) { + vxdprintk(VXD_CBIT(nid, 4), + "__nx_dynamic_id: [#%d]", seq); + return seq; + } + } while (barrier != seq); + return 0; +} + +/* __loc_nx_info() + + * locate or create the requested context + * get() it and if new hash it */ + +static struct nx_info * __loc_nx_info(int id, int *err) +{ + struct nx_info *new, *nxi = NULL; + + vxdprintk(VXD_CBIT(nid, 1), "loc_nx_info(%d)*", id); + + if (!(new = __alloc_nx_info(id))) { + *err = -ENOMEM; + return NULL; + } + + spin_lock(&nx_info_hash_lock); + + /* dynamic context requested */ + if (id == NX_DYNAMIC_ID) { + id = __nx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + goto out_unlock; + } + new->nx_id = id; + } + /* existing context requested */ + else if ((nxi = __lookup_nx_info(id))) { + /* context in setup is not available */ + if (nxi->nx_flags & VXF_STATE_SETUP) { + vxdprintk(VXD_CBIT(nid, 0), + "loc_nx_info(%d) = %p (not available)", id, nxi); + nxi = NULL; + *err = -EBUSY; + } else { + vxdprintk(VXD_CBIT(nid, 0), + "loc_nx_info(%d) = %p (found)", id, nxi); + get_nx_info(nxi); + *err = 0; + } + goto out_unlock; + } + + /* new context requested */ + vxdprintk(VXD_CBIT(nid, 0), + "loc_nx_info(%d) = %p (new)", id, new); + __hash_nx_info(get_nx_info(new)); + nxi = new, new = NULL; + *err = 1; + +out_unlock: + spin_unlock(&nx_info_hash_lock); + if (new) + __dealloc_nx_info(new); + return nxi; +} + + + +/* exported stuff */ + + + + +void rcu_free_nx_info(struct rcu_head *head) +{ + struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu); + int usecnt, refcnt; + + BUG_ON(!nxi || !head); + + usecnt = atomic_read(&nxi->nx_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&nxi->nx_refcnt); + BUG_ON(refcnt < 0); + + vxdprintk(VXD_CBIT(nid, 3), + "rcu_free_nx_info(%p): uc=%d", nxi, usecnt); + if (!usecnt) + __dealloc_nx_info(nxi); + else + printk("!!! rcu didn't free\n"); +} + +void unhash_nx_info(struct nx_info *nxi) +{ + spin_lock(&nx_info_hash_lock); + __unhash_nx_info(nxi); + spin_unlock(&nx_info_hash_lock); +} + +/* locate_nx_info() + + * search for a nx_info and get() it + * negative id means current */ + +struct nx_info *locate_nx_info(int id) +{ + struct nx_info *nxi; + + if (id < 0) { + nxi = get_nx_info(current->nx_info); + } else { + rcu_read_lock(); + nxi = get_nx_info(__lookup_nx_info(id)); + rcu_read_unlock(); + } + return nxi; +} + +/* nx_info_is_hashed() + + * verify that nid is still hashed */ + +int nx_info_is_hashed(nid_t nid) +{ + int hashed; + + rcu_read_lock(); + hashed = (__lookup_nx_info(nid) != NULL); + rcu_read_unlock(); + return hashed; +} + +#ifdef CONFIG_VSERVER_LEGACY + +struct nx_info *locate_or_create_nx_info(int id) +{ + int err; + + return __loc_nx_info(id, &err); +} + +struct nx_info *create_nx_info(void) +{ + struct nx_info *new; + int err; + + vxdprintk(VXD_CBIT(nid, 5), "create_nx_info(%s)", "void"); + if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err))) + return NULL; + return new; +} + + +#endif + +#ifdef CONFIG_PROC_FS + +int get_nid_list(int index, unsigned int *nids, int size) +{ + int hindex, nr_nids = 0; + + rcu_read_lock(); + for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) { + struct hlist_head *head = &nx_info_hash[hindex]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct nx_info *nxi; + + if (--index > 0) + continue; + + nxi = hlist_entry(pos, struct nx_info, nx_hlist); + nids[nr_nids] = nxi->nx_id; + if (++nr_nids >= size) + goto out; + } + } +out: + rcu_read_unlock(); + return nr_nids; +} +#endif + + +/* + * migrate task to new network + */ + +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) +{ + struct nx_info *old_nxi; + int ret = 0; + + if (!p || !nxi) + BUG(); + + vxdprintk(VXD_CBIT(nid, 5), + "nx_migrate_task(%p,%p[#%d.%d.%d])", + p, nxi, nxi->nx_id, + atomic_read(&nxi->nx_usecnt), + atomic_read(&nxi->nx_refcnt)); + + old_nxi = task_get_nx_info(p); + if (old_nxi == nxi) + goto out; + + task_lock(p); + /* should be handled in set_nx_info !! */ + if (old_nxi) + clr_nx_info(&p->nx_info); + set_nx_info(&p->nx_info, nxi); + p->nid = nxi->nx_id; + task_unlock(p); + + /* obsoleted by clr/set */ + // put_nx_info(old_nxi); +out: + put_nx_info(old_nxi); + return ret; +} + + +#include +#include + + +int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi) +{ + if (!nxi) + return 1; + if (!ifa) + return 0; + return addr_in_nx_info(nxi, ifa->ifa_address); +} + +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) +{ + struct in_device *in_dev = __in_dev_get(dev); + struct in_ifaddr **ifap = NULL; + struct in_ifaddr *ifa = NULL; + + if (!nxi) + return 1; + if (!in_dev) + return 0; + + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + ifap = &ifa->ifa_next) { + if (addr_in_nx_info(nxi, ifa->ifa_address)) + return 1; + } + return 0; +} + +/* + * check if address is covered by socket + * + * sk: the socket to check against + * addr: the address in question (must be != 0) + */ +static inline int __addr_in_socket(struct sock *sk, uint32_t addr) +{ + struct nx_info *nxi = sk->sk_nx_info; + uint32_t saddr = tcp_v4_rcv_saddr(sk); + + vxdprintk(VXD_CBIT(net, 5), + "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx", + sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (saddr) { + /* direct address match */ + return (saddr == addr); + } else if (nxi) { + /* match against nx_info */ + return addr_in_nx_info(nxi, addr); + } else { + /* unrestricted any socket */ + return 1; + } +} + + +int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk) +{ + vxdprintk(VXD_CBIT(net, 2), + "nx_addr_conflict(%p,%p) %d.%d,%d.%d", + nxi, sk, VXD_QUAD(addr)); + + if (addr) { + /* check real address */ + return __addr_in_socket(sk, addr); + } else if (nxi) { + /* check against nx_info */ + int i, n = nxi->nbipv4; + + for (i=0; iipv4[i])) + return 1; + return 0; + } else { + /* check against any */ + return 1; + } +} + + +/* vserver syscall commands below here */ + +/* taks nid and nx_info functions */ + +#include + + +int vc_task_nid(uint32_t id, void __user *data) +{ + nid_t nid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + nid = (tsk) ? tsk->nid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + nid = current->nid; + return nid; +} + + +int vc_nx_info(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_nx_info_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + nxi = locate_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.nid = nxi->nx_id; + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* network functions */ + +int vc_net_create(uint32_t nid, void __user *data) +{ + // int ret = -ENOMEM; + struct nx_info *new_nxi; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((nid >= MIN_D_CONTEXT) && (nid != VX_DYNAMIC_ID)) + return -EINVAL; + + if (nid < 1) + return -EINVAL; + + new_nxi = __loc_nx_info(nid, &ret); + if (!new_nxi) + return ret; + if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) { + ret = -EEXIST; + goto out_put; + } + + ret = new_nxi->nx_id; + nx_migrate_task(current, new_nxi); +out_put: + put_nx_info(new_nxi); + return ret; +} + + +int vc_net_migrate(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = locate_nx_info(id); + if (!nxi) + return -ESRCH; + nx_migrate_task(current, nxi); + put_nx_info(nxi); + return 0; +} + +int vc_net_add(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_nx_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = locate_nx_info(id); + if (!nxi) + return -ESRCH; + + // add ip to net context here + put_nx_info(nxi); + return 0; +} + +int vc_net_remove(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_nx_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = locate_nx_info(id); + if (!nxi) + return -ESRCH; + + // rem ip from net context here + put_nx_info(nxi); + return 0; +} + + + +int vc_get_nflags(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_flags_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = locate_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.flagword = nxi->nx_flags; + + /* special STATE flag handling */ + vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME); + + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_nflags(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_flags_v0 vc_data; + uint64_t mask, trigger; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = locate_nx_info(id); + if (!nxi) + return -ESRCH; + + /* special STATE flag handling */ + mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, IPF_ONE_TIME); + trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword); + // if (trigger & IPF_STATE_SETUP) + + nxi->nx_flags = vx_mask_flags(nxi->nx_flags, + vc_data.flagword, mask); + put_nx_info(nxi); + return 0; +} + +int vc_get_ncaps(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = locate_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.ncaps = nxi->nx_ncaps; + vc_data.cmask = ~0UL; + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_ncaps(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = locate_nx_info(id); + if (!nxi) + return -ESRCH; + + nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps, + vc_data.ncaps, vc_data.cmask); + put_nx_info(nxi); + return 0; +} + + +#include + +EXPORT_SYMBOL_GPL(rcu_free_nx_info); +EXPORT_SYMBOL_GPL(nx_info_hash_lock); +EXPORT_SYMBOL_GPL(unhash_nx_info); + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/proc.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/proc.c --- linux-2.6.10-rc1/kernel/vserver/proc.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/proc.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,899 @@ +/* + * linux/kernel/vserver/proc.c + * + * Virtual Context Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 basic structure + * V0.02 adaptation vs1.3.0 + * V0.03 proc permissions + * V0.04 locking/generic + * V0.05 next generation procfs + * V0.06 inode validation + * V0.07 generic rewrite vid + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +static struct proc_dir_entry *proc_virtual; + +static struct proc_dir_entry *proc_vnet; + + +enum vid_directory_inos { + PROC_XID_INO = 32, + PROC_XID_INFO, + PROC_XID_STATUS, + PROC_XID_LIMIT, + PROC_XID_SCHED, + PROC_XID_CVIRT, + PROC_XID_CACCT, + + PROC_NID_INO = 64, + PROC_NID_INFO, + PROC_NID_STATUS, +}; + +#define PROC_VID_MASK 0x60 + + +/* first the actual feeds */ + + +static int proc_virtual_info(int vid, char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + "VCISyscall:\t%d\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF + ,__NR_vserver + ); +} + + +int proc_xid_info (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = locate_vx_info(vid); + if (!vxi) + return 0; + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + "Init:\t%d\n" + ,vxi->vx_id + ,vxi + ,vxi->vx_initpid + ); + put_vx_info(vxi); + return length; +} + +int proc_xid_status (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = locate_vx_info(vid); + if (!vxi) + return 0; + length = sprintf(buffer, + "UseCnt:\t%d\n" + "RefCnt:\t%d\n" + "Flags:\t%016llx\n" + "BCaps:\t%016llx\n" + "CCaps:\t%016llx\n" + "Ticks:\t%d\n" + ,atomic_read(&vxi->vx_usecnt) + ,atomic_read(&vxi->vx_refcnt) + ,(unsigned long long)vxi->vx_flags + ,(unsigned long long)vxi->vx_bcaps + ,(unsigned long long)vxi->vx_ccaps + ,atomic_read(&vxi->limit.ticks) + ); + put_vx_info(vxi); + return length; +} + +int proc_xid_limit (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = locate_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_limit(&vxi->limit, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_sched (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = locate_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_sched(&vxi->sched, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_cvirt (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = locate_vx_info(vid); + if (!vxi) + return 0; + vx_update_load(vxi); + length = vx_info_proc_cvirt(&vxi->cvirt, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_cacct (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = locate_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_cacct(&vxi->cacct, buffer); + put_vx_info(vxi); + return length; +} + + +static int proc_vnet_info(int vid, char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + "VCISyscall:\t%d\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF + ,__NR_vserver + ); +} + +#define atoquad(a) \ + (((a)>>0) & 0xff), (((a)>>8) & 0xff), \ + (((a)>>16) & 0xff), (((a)>>24) & 0xff) + +int proc_nid_info (int vid, char *buffer) +{ + struct nx_info *nxi; + int length, i; + + nxi = locate_nx_info(vid); + if (!nxi) + return 0; + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + ,nxi->nx_id + ,nxi + ); + for (i=0; inbipv4; i++) { + length += sprintf(buffer + length, + "%d:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i, + atoquad(nxi->ipv4[i]), + atoquad(nxi->mask[i])); + } + put_nx_info(nxi); + return length; +} + +int proc_nid_status (int vid, char *buffer) +{ + struct nx_info *nxi; + int length; + + nxi = locate_nx_info(vid); + if (!nxi) + return 0; + length = sprintf(buffer, + "UseCnt:\t%d\n" + "RefCnt:\t%d\n" + ,atomic_read(&nxi->nx_usecnt) + ,atomic_read(&nxi->nx_refcnt) + ); + put_nx_info(nxi); + return length; +} + +/* here the inode helpers */ + + +#define fake_ino(id,nr) (((nr) & 0xFFFF) | \ + (((id) & 0xFFFF) << 16)) + +#define inode_vid(i) (((i)->i_ino >> 16) & 0xFFFF) +#define inode_type(i) ((i)->i_ino & 0xFFFF) + +#define MAX_MULBY10 ((~0U-9)/10) + + +static struct inode *proc_vid_make_inode(struct super_block * sb, + int vid, int ino) +{ + struct inode *inode = new_inode(sb); + + if (!inode) + goto out; + + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(vid, ino); + + inode->i_uid = 0; + inode->i_gid = 0; + // inode->i_xid = xid; +out: + return inode; +} + +static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd) +{ + struct inode * inode = dentry->d_inode; + int vid, hashed=0; + + vid = inode_vid(inode); + switch (inode_type(inode) & PROC_VID_MASK) { + case PROC_XID_INO: + hashed = vx_info_is_hashed(vid); + break; + case PROC_NID_INO: + hashed = nx_info_is_hashed(vid); + break; + } + if (hashed) + return 1; + d_drop(dentry); + return 0; +} + +/* +static int proc_vid_delete_dentry(struct dentry * dentry) +{ + return 1; +} +*/ + + +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) + +static ssize_t proc_vid_info_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + unsigned long page; + ssize_t length; + ssize_t end; + int vid; + + if (count > PROC_BLOCK_SIZE) + count = PROC_BLOCK_SIZE; + if (!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + vid = inode_vid(inode); + length = PROC_I(inode)->op.proc_vid_read(vid, (char*)page); + + if (length < 0) { + free_page(page); + return length; + } + /* Static 4kB (or whatever) block capacity */ + if (*ppos >= length) { + free_page(page); + return 0; + } + if (count + *ppos > length) + count = length - *ppos; + end = count + *ppos; + copy_to_user(buf, (char *) page + *ppos, count); + *ppos = end; + free_page(page); + return count; +} + + + + + +/* here comes the lower level (vid) */ + +static struct file_operations proc_vid_info_file_operations = { + read: proc_vid_info_read, +}; + +static struct dentry_operations proc_vid_dentry_operations = { + d_revalidate: proc_vid_revalidate, +// d_delete: proc_vid_delete_dentry, +}; + + +struct vid_entry { + int type; + int len; + char *name; + mode_t mode; +}; + +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} + +static struct vid_entry vx_base_stuff[] = { + E(PROC_XID_INFO, "info", S_IFREG|S_IRUGO), + E(PROC_XID_STATUS, "status", S_IFREG|S_IRUGO), + E(PROC_XID_LIMIT, "limit", S_IFREG|S_IRUGO), + E(PROC_XID_SCHED, "sched", S_IFREG|S_IRUGO), + E(PROC_XID_CVIRT, "cvirt", S_IFREG|S_IRUGO), + E(PROC_XID_CACCT, "cacct", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + +static struct vid_entry vn_base_stuff[] = { + E(PROC_NID_INFO, "info", S_IFREG|S_IRUGO), + E(PROC_NID_STATUS, "status", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + + + +static struct dentry *proc_vid_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode; + struct vid_entry *p; + int error; + + error = -ENOENT; + inode = NULL; + + switch (inode_type(dir)) { + case PROC_XID_INO: + p = vx_base_stuff; + break; + case PROC_NID_INO: + p = vn_base_stuff; + break; + default: + goto out; + } + + for (; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + goto out; + + error = -EINVAL; + inode = proc_vid_make_inode(dir->i_sb, inode_vid(dir), p->type); + if (!inode) + goto out; + + switch(p->type) { + case PROC_XID_INFO: + PROC_I(inode)->op.proc_vid_read = proc_xid_info; + break; + case PROC_XID_STATUS: + PROC_I(inode)->op.proc_vid_read = proc_xid_status; + break; + case PROC_XID_LIMIT: + PROC_I(inode)->op.proc_vid_read = proc_xid_limit; + break; + case PROC_XID_SCHED: + PROC_I(inode)->op.proc_vid_read = proc_xid_sched; + break; + case PROC_XID_CVIRT: + PROC_I(inode)->op.proc_vid_read = proc_xid_cvirt; + break; + case PROC_XID_CACCT: + PROC_I(inode)->op.proc_vid_read = proc_xid_cacct; + break; + + case PROC_NID_INFO: + PROC_I(inode)->op.proc_vid_read = proc_nid_info; + break; + case PROC_NID_STATUS: + PROC_I(inode)->op.proc_vid_read = proc_nid_status; + break; + + default: + printk("procfs: impossible type (%d)",p->type); + iput(inode); + return ERR_PTR(-EINVAL); + } + inode->i_mode = p->mode; +// inode->i_op = &proc_vid_info_inode_operations; + inode->i_fop = &proc_vid_info_file_operations; + inode->i_nlink = 1; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + error = 0; +out: + return ERR_PTR(error); +} + + +static int proc_vid_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + int i, size; + struct inode *inode = filp->f_dentry->d_inode; + struct vid_entry *p; + + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, + inode->i_ino, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, i, + PROC_ROOT_INO, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + switch (inode_type(inode)) { + case PROC_XID_INO: + size = sizeof(vx_base_stuff); + p = vx_base_stuff + i; + break; + case PROC_NID_INO: + size = sizeof(vn_base_stuff); + p = vn_base_stuff + i; + break; + default: + return 1; + } + if (i >= size/sizeof(struct vid_entry)) + return 1; + while (p->name) { + if (filldir(dirent, p->name, p->len, + filp->f_pos, fake_ino(inode_vid(inode), + p->type), p->mode >> 12) < 0) + return 0; + filp->f_pos++; + p++; + } + } + return 1; +} + + + + +/* now the upper level (virtual) */ + +static struct file_operations proc_vid_file_operations = { + read: generic_read_dir, + readdir: proc_vid_readdir, +}; + +static struct inode_operations proc_vid_inode_operations = { + lookup: proc_vid_lookup, +}; + + + +static __inline__ int atovid(const char *str, int len) +{ + int vid, c; + + vid = 0; + while (len-- > 0) { + c = *str - '0'; + str++; + if (c > 9) + return -1; + if (vid >= MAX_MULBY10) + return -1; + vid *= 10; + vid += c; + if (!vid) + return -1; + } + return vid; +} + + +struct dentry *proc_virtual_lookup(struct inode *dir, + struct dentry * dentry, struct nameidata *nd) +{ + int xid, len, ret; + struct vx_info *vxi; + const char *name; + struct inode *inode; + + name = dentry->d_name.name; + len = dentry->d_name.len; + ret = -ENOMEM; + + if (len == 7 && !memcmp(name, "current", 7)) { + inode = new_inode(dir->i_sb); + if (!inode) + goto out; + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(1, PROC_XID_INO); + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + if (len == 4 && !memcmp(name, "info", 4)) { + inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_INFO); + if (!inode) + goto out; + inode->i_fop = &proc_vid_info_file_operations; + PROC_I(inode)->op.proc_vid_read = proc_virtual_info; + inode->i_mode = S_IFREG|S_IRUGO; +// inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + + ret = -ENOENT; + xid = atovid(name, len); + if (xid < 0) + goto out; + vxi = locate_vx_info(xid); + if (!vxi) + goto out; + + inode = NULL; + if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT)) + inode = proc_vid_make_inode(dir->i_sb, + vxi->vx_id, PROC_XID_INO); + if (!inode) + goto out_release; + + inode->i_mode = S_IFDIR|S_IRUGO; + inode->i_op = &proc_vid_inode_operations; + inode->i_fop = &proc_vid_file_operations; + inode->i_nlink = 2; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + ret = 0; + +out_release: + put_vx_info(vxi); +out: + return ERR_PTR(ret); +} + + +struct dentry *proc_vnet_lookup(struct inode *dir, + struct dentry * dentry, struct nameidata *nd) +{ + int nid, len, ret; + struct nx_info *nxi; + const char *name; + struct inode *inode; + + name = dentry->d_name.name; + len = dentry->d_name.len; + ret = -ENOMEM; + if (len == 7 && !memcmp(name, "current", 7)) { + inode = new_inode(dir->i_sb); + if (!inode) + goto out; + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(1, PROC_NID_INO); + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + if (len == 4 && !memcmp(name, "info", 4)) { + inode = proc_vid_make_inode(dir->i_sb, 0, PROC_NID_INFO); + if (!inode) + goto out; + inode->i_fop = &proc_vid_info_file_operations; + PROC_I(inode)->op.proc_vid_read = proc_vnet_info; + inode->i_mode = S_IFREG|S_IRUGO; +// inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + + ret = -ENOENT; + nid = atovid(name, len); + if (nid < 0) + goto out; + nxi = locate_nx_info(nid); + if (!nxi) + goto out; + + inode = NULL; + if (1) + inode = proc_vid_make_inode(dir->i_sb, + nxi->nx_id, PROC_NID_INO); + if (!inode) + goto out_release; + + inode->i_mode = S_IFDIR|S_IRUGO; + inode->i_op = &proc_vid_inode_operations; + inode->i_fop = &proc_vid_file_operations; + inode->i_nlink = 2; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + ret = 0; + +out_release: + put_nx_info(nxi); +out: + return ERR_PTR(ret); +} + + + + +#define PROC_NUMBUF 10 +#define PROC_MAXVIDS 32 + +int proc_virtual_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int xid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos-3; + unsigned int nr_xids, i; + ino_t ino; + + switch ((long)filp->f_pos) { + case 0: + ino = fake_ino(0, PROC_XID_INO); + if (filldir(dirent, ".", 1, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + ino = filp->f_dentry->d_parent->d_inode->i_ino; + if (filldir(dirent, "..", 2, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 2: + ino = fake_ino(0, PROC_XID_INFO); + if (filldir(dirent, "info", 4, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 3: + if (current->xid > 1) { + ino = fake_ino(1, PROC_XID_INO); + if (filldir(dirent, "current", 7, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + } + filp->f_pos++; + } + + nr_xids = get_xid_list(nr, xid_array, PROC_MAXVIDS); + for (i = 0; i < nr_xids; i++) { + int xid = xid_array[i]; + ino_t ino = fake_ino(xid, PROC_XID_INO); + unsigned int j = PROC_NUMBUF; + + do buf[--j] = '0' + (xid % 10); while (xid/=10); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, + filp->f_pos, ino, DT_DIR) < 0) + break; + filp->f_pos++; + } + return 0; +} + + +static struct file_operations proc_virtual_dir_operations = { + read: generic_read_dir, + readdir: proc_virtual_readdir, +}; + +static struct inode_operations proc_virtual_dir_inode_operations = { + lookup: proc_virtual_lookup, +}; + + +int proc_vnet_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int nid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos-3; + unsigned int nr_nids, i; + ino_t ino; + + switch ((long)filp->f_pos) { + case 0: + ino = fake_ino(0, PROC_NID_INO); + if (filldir(dirent, ".", 1, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + ino = filp->f_dentry->d_parent->d_inode->i_ino; + if (filldir(dirent, "..", 2, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 2: + ino = fake_ino(0, PROC_NID_INFO); + if (filldir(dirent, "info", 4, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 3: + if (current->xid > 1) { + ino = fake_ino(1, PROC_NID_INO); + if (filldir(dirent, "current", 7, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + } + filp->f_pos++; + } + + nr_nids = get_nid_list(nr, nid_array, PROC_MAXVIDS); + for (i = 0; i < nr_nids; i++) { + int nid = nid_array[i]; + ino_t ino = fake_ino(nid, PROC_NID_INO); + unsigned long j = PROC_NUMBUF; + + do buf[--j] = '0' + (nid % 10); while (nid/=10); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, + filp->f_pos, ino, DT_DIR) < 0) + break; + filp->f_pos++; + } + return 0; +} + + +static struct file_operations proc_vnet_dir_operations = { + read: generic_read_dir, + readdir: proc_vnet_readdir, +}; + +static struct inode_operations proc_vnet_dir_inode_operations = { + lookup: proc_vnet_lookup, +}; + + + +void proc_vx_init(void) +{ + struct proc_dir_entry *ent; + + ent = proc_mkdir("virtual", 0); + if (ent) { + ent->proc_fops = &proc_virtual_dir_operations; + ent->proc_iops = &proc_virtual_dir_inode_operations; + } + proc_virtual = ent; + + ent = proc_mkdir("vnet", 0); + if (ent) { + ent->proc_fops = &proc_vnet_dir_operations; + ent->proc_iops = &proc_vnet_dir_inode_operations; + } + proc_vnet = ent; +} + + + + +/* per pid info */ + + +char *task_vx_info(struct task_struct *p, char *buffer) +{ + struct vx_info *vxi; + + buffer += sprintf (buffer,"XID:\t%d\n", vx_task_xid(p)); + vxi = task_get_vx_info(p); + if (vxi && !vx_flags(VXF_INFO_HIDE, 0)) { + buffer += sprintf (buffer,"BCaps:\t%016llx\n" + ,(unsigned long long)vxi->vx_bcaps); + buffer += sprintf (buffer,"CCaps:\t%016llx\n" + ,(unsigned long long)vxi->vx_ccaps); + buffer += sprintf (buffer,"CFlags:\t%016llx\n" + ,(unsigned long long)vxi->vx_flags); + buffer += sprintf (buffer,"CIPid:\t%d\n" + ,vxi->vx_initpid); + } + put_vx_info(vxi); + return buffer; +} + +int proc_pid_vx_info(struct task_struct *p, char *buffer) +{ + char * orig = buffer; + + buffer = task_vx_info(p, buffer); + return buffer - orig; +} + +char *task_nx_info(struct task_struct *p, char *buffer) +{ + struct nx_info *nxi; + + buffer += sprintf (buffer,"NID:\t%d\n", nx_task_nid(p)); + nxi = task_get_nx_info(p); + if (nxi && !vx_flags(VXF_INFO_HIDE, 0)) { + int i; + + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer, + "V4Root[%d]:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i + ,NIPQUAD(nxi->ipv4[i]) + ,NIPQUAD(nxi->mask[i])); + } + buffer += sprintf (buffer, + "V4Root[bcast]:\t%d.%d.%d.%d\n" + ,NIPQUAD(nxi->v4_bcast)); + } + put_nx_info(nxi); + return buffer; +} + +int proc_pid_nx_info(struct task_struct *p, char *buffer) +{ + char * orig = buffer; + + buffer = task_nx_info(p, buffer); + return buffer - orig; +} + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/sched.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/sched.c --- linux-2.6.10-rc1/kernel/vserver/sched.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/sched.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,225 @@ +/* + * linux/kernel/vserver/sched.c + * + * Virtual Server: Scheduler Support + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 adapted Sam Vilains version to 2.6.3 + * V0.02 removed legacy interface + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* + * recalculate the context's scheduling tokens + * + * ret > 0 : number of tokens available + * ret = 0 : context is paused + * ret < 0 : number of jiffies until new tokens arrive + * + */ +int vx_tokens_recalc(struct vx_info *vxi) +{ + long delta, tokens = 0; + + if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0)) + /* we are paused */ + return 0; + + delta = jiffies - vxi->sched.jiffies; + + if (delta >= vxi->sched.interval) { + /* lockdown scheduler info */ + spin_lock(&vxi->sched.tokens_lock); + + /* calc integral token part */ + delta = jiffies - vxi->sched.jiffies; + tokens = delta / vxi->sched.interval; + delta = tokens * vxi->sched.interval; + tokens *= vxi->sched.fill_rate; + + atomic_add(tokens, &vxi->sched.tokens); + vxi->sched.jiffies += delta; + tokens = atomic_read(&vxi->sched.tokens); + + if (tokens > vxi->sched.tokens_max) { + tokens = vxi->sched.tokens_max; + atomic_set(&vxi->sched.tokens, tokens); + } + spin_unlock(&vxi->sched.tokens_lock); + } else { + /* no new tokens */ + tokens = vx_tokens_avail(vxi); + if (tokens <= 0) + vxi->vx_state |= VXS_ONHOLD; + if (tokens < vxi->sched.tokens_min) { + /* enough tokens will be available in */ + if (vxi->sched.tokens_min == 0) + return delta - vxi->sched.interval; + return delta - vxi->sched.interval * + vxi->sched.tokens_min / vxi->sched.fill_rate; + } + } + + /* we have some tokens left */ + if (vx_info_state(vxi, VXS_ONHOLD) && + (tokens >= vxi->sched.tokens_min)) + vxi->vx_state &= ~VXS_ONHOLD; + if (vx_info_state(vxi, VXS_ONHOLD)) + tokens -= vxi->sched.tokens_min; + + return tokens; +} + +/* + * effective_prio - return the priority that is based on the static + * priority but is modified by bonuses/penalties. + * + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] + * into a -4 ... 0 ... +4 bonus/penalty range. + * + * Additionally, we scale another amount based on the number of + * CPU tokens currently held by the context, if the process is + * part of a context (and the appropriate SCHED flag is set). + * This ranges from -5 ... 0 ... +15, quadratically. + * + * So, the total bonus is -9 .. 0 .. +19 + * We use ~50% of the full 0...39 priority range so that: + * + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. + * unless that context is far exceeding its CPU allocation. + * + * Both properties are important to certain workloads. + */ +int effective_vavavoom(task_t *p, int max_prio) +{ + struct vx_info *vxi = p->vx_info; + int vavavoom, max; + + /* lots of tokens = lots of vavavoom + * no tokens = no vavavoom */ + if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) { + max = vxi->sched.tokens_max; + vavavoom = max - vavavoom; + max = max * max; + vavavoom = max_prio * VAVAVOOM_RATIO / 100 + * (vavavoom*vavavoom - (max >> 2)) / max; + /* alternative, geometric mapping + vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom + / vxi->sched.tokens_max - + MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */ + } else + vavavoom = 0; + /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) - + MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */ + + return vavavoom; +} + + +int vc_set_sched_v2(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v2 vc_data; + struct vx_info *vxi; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = locate_vx_info(xid); + if (!vxi) + return -EINVAL; + + spin_lock(&vxi->sched.tokens_lock); + + if (vc_data.interval != SCHED_KEEP) + vxi->sched.interval = vc_data.interval; + if (vc_data.fill_rate != SCHED_KEEP) + vxi->sched.fill_rate = vc_data.fill_rate; + if (vc_data.tokens_min != SCHED_KEEP) + vxi->sched.tokens_min = vc_data.tokens_min; + if (vc_data.tokens_max != SCHED_KEEP) + vxi->sched.tokens_max = vc_data.tokens_max; + if (vc_data.tokens != SCHED_KEEP) + atomic_set(&vxi->sched.tokens, vc_data.tokens); + + /* Sanity check the resultant values */ + if (vxi->sched.fill_rate <= 0) + vxi->sched.fill_rate = 1; + if (vxi->sched.interval <= 0) + vxi->sched.interval = HZ; + if (vxi->sched.tokens_max == 0) + vxi->sched.tokens_max = 1; + if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max) + atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max); + if (vxi->sched.tokens_min > vxi->sched.tokens_max) + vxi->sched.tokens_min = vxi->sched.tokens_max; + + spin_unlock(&vxi->sched.tokens_lock); + put_vx_info(vxi); + return 0; +} + + +int vc_set_sched(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v3 vc_data; + struct vx_info *vxi; + unsigned int set_mask; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = locate_vx_info(xid); + if (!vxi) + return -EINVAL; + + set_mask = vc_data.set_mask; + + spin_lock(&vxi->sched.tokens_lock); + + if (set_mask & VXSM_FILL_RATE) + vxi->sched.fill_rate = vc_data.fill_rate; + if (set_mask & VXSM_INTERVAL) + vxi->sched.interval = vc_data.interval; + if (set_mask & VXSM_TOKENS) + atomic_set(&vxi->sched.tokens, vc_data.tokens); + if (set_mask & VXSM_TOKENS_MIN) + vxi->sched.tokens_min = vc_data.tokens_min; + if (set_mask & VXSM_TOKENS_MAX) + vxi->sched.tokens_max = vc_data.tokens_max; + if (set_mask & VXSM_PRIO_BIAS) + vxi->sched.priority_bias = vc_data.priority_bias; + + /* Sanity check the resultant values */ + if (vxi->sched.fill_rate <= 0) + vxi->sched.fill_rate = 1; + if (vxi->sched.interval <= 0) + vxi->sched.interval = HZ; + if (vxi->sched.tokens_max == 0) + vxi->sched.tokens_max = 1; + if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max) + atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max); + if (vxi->sched.tokens_min > vxi->sched.tokens_max) + vxi->sched.tokens_min = vxi->sched.tokens_max; + if (vxi->sched.priority_bias > MAX_PRIO_BIAS) + vxi->sched.priority_bias = MAX_PRIO_BIAS; + if (vxi->sched.priority_bias < MIN_PRIO_BIAS) + vxi->sched.priority_bias = MIN_PRIO_BIAS; + + spin_unlock(&vxi->sched.tokens_lock); + put_vx_info(vxi); + return 0; +} + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/signal.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/signal.c --- linux-2.6.10-rc1/kernel/vserver/signal.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/signal.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,127 @@ +/* + * linux/kernel/vserver/signal.c + * + * Virtual Server: Signal Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +#include + +#include +#include + +#include +#include +#include + + +int vc_ctx_kill(uint32_t id, void __user *data) +{ + int retval, count=0; + struct vcmd_ctx_kill_v0 vc_data; + struct siginfo info; + struct task_struct *p; + struct vx_info *vxi; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + info.si_signo = vc_data.sig; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->pid; + info.si_uid = current->uid; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + retval = -ESRCH; + read_lock(&tasklist_lock); + switch (vc_data.pid) { + case -1: + case 0: + for_each_process(p) { + int err = 0; + + if (vx_task_xid(p) != id || p->pid <= 1 || + (vc_data.pid && vxi->vx_initpid == p->pid) || + !thread_group_leader(p)) + continue; + + err = send_sig_info(vc_data.sig, &info, p); + ++count; + if (err != -EPERM) + retval = err; + } + break; + + default: + p = find_task_by_real_pid(vc_data.pid); + if (p) { + if (!thread_group_leader(p)) { + struct task_struct *tg; + + tg = find_task_by_real_pid(p->tgid); + if (tg) + p = tg; + } + if ((id == -1) || (vx_task_xid(p) == id)) + retval = send_sig_info(vc_data.sig, &info, p); + } + break; + } + read_unlock(&tasklist_lock); + put_vx_info(vxi); + return retval; +} + + +static int __wait_exit(struct vx_info *vxi) +{ + DECLARE_WAITQUEUE(wait, current); + int ret = 0; + + add_wait_queue(&vxi->vx_exit, &wait); + set_current_state(TASK_INTERRUPTIBLE); + +wait: + if (vx_info_state(vxi, VXS_DEFUNCT)) + goto out; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + goto out; + } + schedule(); + goto wait; + +out: + set_current_state(TASK_RUNNING); + remove_wait_queue(&vxi->vx_exit, &wait); + return ret; +} + + + +int vc_wait_exit(uint32_t id, void __user *data) +{ +// struct vcmd_wait_exit_v0 vc_data; + struct vx_info *vxi; + int ret; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = __wait_exit(vxi); + put_vx_info(vxi); + return ret; +} + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/switch.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/switch.c --- linux-2.6.10-rc1/kernel/vserver/switch.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/switch.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,186 @@ +/* + * linux/kernel/vserver/switch.c + * + * Virtual Server: Syscall Switch + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 syscall switch + * V0.02 added signal to context + * V0.03 added rlimit functions + * V0.04 added iattr, task/xid functions + * + */ + +#include +#include +#include + +#include +#include +#include + + +static inline int +vc_get_version(uint32_t id) +{ + return VCI_VERSION; +} + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +extern asmlinkage long +sys_vserver(uint32_t cmd, uint32_t id, void __user *data) +{ + if (!capable(CAP_CONTEXT)) + return -EPERM; + + vxdprintk(VXD_CBIT(switch, 0), + "vc: VCMD_%02d_%d[%d], %d", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), id); + + switch (cmd) { + case VCMD_get_version: + return vc_get_version(id); + +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_new_s_context: + return vc_new_s_context(id, data); + case VCMD_set_ipv4root: + return vc_set_ipv4root(id, data); +#endif + + case VCMD_task_xid: + return vc_task_xid(id, data); + case VCMD_vx_info: + return vc_vx_info(id, data); + + case VCMD_task_nid: + return vc_task_nid(id, data); + case VCMD_nx_info: + return vc_nx_info(id, data); + + case VCMD_set_namespace: + return vc_set_namespace(id, data); + case VCMD_cleanup_namespace: + return vc_cleanup_namespace(id, data); + } + + /* those are allowed while in setup too */ + if (!vx_check(0, VX_ADMIN|VX_WATCH) && + !vx_flags(VXF_STATE_SETUP,0)) + return -EPERM; + +#ifdef CONFIG_VSERVER_LEGACY + switch (cmd) { + case VCMD_set_cflags: + case VCMD_set_ccaps: + if (vx_check(0, VX_WATCH)) + return 0; + } +#endif + + switch (cmd) { + case VCMD_get_rlimit: + return vc_get_rlimit(id, data); + case VCMD_set_rlimit: + return vc_set_rlimit(id, data); + case VCMD_get_rlimit_mask: + return vc_get_rlimit_mask(id, data); + + case VCMD_vx_get_vhi_name: + return vc_get_vhi_name(id, data); + case VCMD_vx_set_vhi_name: + return vc_set_vhi_name(id, data); + + case VCMD_set_cflags: + return vc_set_cflags(id, data); + case VCMD_get_cflags: + return vc_get_cflags(id, data); + + case VCMD_set_ccaps: + return vc_set_ccaps(id, data); + case VCMD_get_ccaps: + return vc_get_ccaps(id, data); + + case VCMD_set_nflags: + return vc_set_nflags(id, data); + case VCMD_get_nflags: + return vc_get_nflags(id, data); + + case VCMD_set_ncaps: + return vc_set_ncaps(id, data); + case VCMD_get_ncaps: + return vc_get_ncaps(id, data); + + case VCMD_set_sched_v2: + return vc_set_sched_v2(id, data); + /* this is version 3 */ + case VCMD_set_sched: + return vc_set_sched(id, data); + + case VCMD_add_dlimit: + return vc_add_dlimit(id, data); + case VCMD_rem_dlimit: + return vc_rem_dlimit(id, data); + case VCMD_set_dlimit: + return vc_set_dlimit(id, data); + case VCMD_get_dlimit: + return vc_get_dlimit(id, data); + } + + /* below here only with VX_ADMIN */ + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + switch (cmd) { + case VCMD_ctx_kill: + return vc_ctx_kill(id, data); + + case VCMD_wait_exit: + return vc_wait_exit(id, data); + +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_create_context: + return vc_ctx_create(id, data); +#endif + + case VCMD_get_iattr: + return vc_get_iattr(id, data); + case VCMD_set_iattr: + return vc_set_iattr(id, data); + + case VCMD_enter_namespace: + return vc_enter_namespace(id, data); + + case VCMD_ctx_create: +#ifdef CONFIG_VSERVER_LEGACY + if (id == 1) { + current->xid = 1; + return 1; + } +#endif + return vc_ctx_create(id, data); + case VCMD_ctx_migrate: + return vc_ctx_migrate(id, data); + + case VCMD_net_create: + return vc_net_create(id, data); + case VCMD_net_migrate: + return vc_net_migrate(id, data); + + } + return -ENOSYS; +} + diff -NurpP --minimal linux-2.6.10-rc1/kernel/vserver/sysctl.c linux-2.6.10-rc1-vs1.9.3/kernel/vserver/sysctl.c --- linux-2.6.10-rc1/kernel/vserver/sysctl.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.10-rc1-vs1.9.3/kernel/vserver/sysctl.c 2004-10-31 00:41:27.000000000 +0200 @@ -0,0 +1,209 @@ +/* + * linux/kernel/sysctl.c + * + * Virtual Context Support + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#define CTL_VSERVER 4242 /* unused? */ + +enum { + CTL_DEBUG_SWITCH = 1, + CTL_DEBUG_XID, + CTL_DEBUG_NID, + CTL_DEBUG_NET, + CTL_DEBUG_LIMIT, + CTL_DEBUG_DLIM, + CTL_DEBUG_CVIRT, +}; + + +unsigned int vx_debug_switch = 0; +unsigned int vx_debug_xid = 0; +unsigned int vx_debug_nid = 0; +unsigned int vx_debug_net = 0; +unsigned int vx_debug_limit = 0; +unsigned int vx_debug_dlim = 0; +unsigned int vx_debug_cvirt = 0; + + +static struct ctl_table_header *vserver_table_header; +static ctl_table vserver_table[]; + + +void vserver_register_sysctl(void) +{ + if (!vserver_table_header) { + vserver_table_header = register_sysctl_table(vserver_table, 1); +#ifdef CONFIG_PROC_FS +// if (vserver_table[0].de) +// vserver_table[0].de->owner = THIS_MODULE; +#endif + } + +} + +void vserver_unregister_sysctl(void) +{ + if (vserver_table_header) { + unregister_sysctl_table(vserver_table_header); + vserver_table_header = NULL; + } +} + + +static int proc_dodebug(ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char tmpbuf[20], *p, c; + unsigned int value; + size_t left, len; + + if ((*ppos && !write) || !*lenp) { + *lenp = 0; + return 0; + } + + left = *lenp; + + if (write) { + if (!access_ok(VERIFY_READ, buffer, left)) + return -EFAULT; + p = (char *) buffer; + while (left && __get_user(c, p) >= 0 && isspace(c)) + left--, p++; + if (!left) + goto done; + + if (left > sizeof(tmpbuf) - 1) + return -EINVAL; + if (copy_from_user(tmpbuf, p, left)) + return -EFAULT; + tmpbuf[left] = '\0'; + + for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--) + value = 10 * value + (*p - '0'); + if (*p && !isspace(*p)) + return -EINVAL; + while (left && isspace(*p)) + left--, p++; + *(unsigned int *) table->data = value; + } else { + if (!access_ok(VERIFY_WRITE, buffer, left)) + return -EFAULT; + len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data); + if (len > left) + len = left; + if (__copy_to_user(buffer, tmpbuf, len)) + return -EFAULT; + if ((left -= len) > 0) { + if (put_user('\n', (char *)buffer + len)) + return -EFAULT; + left--; + } + } + +done: + *lenp -= left; + *ppos += *lenp; + return 0; +} + + + +static ctl_table debug_table[] = { + { + .ctl_name = CTL_DEBUG_SWITCH, + .procname = "debug_switch", + .data = &vx_debug_switch, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_XID, + .procname = "debug_xid", + .data = &vx_debug_xid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_NID, + .procname = "debug_nid", + .data = &vx_debug_nid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_NET, + .procname = "debug_net", + .data = &vx_debug_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_LIMIT, + .procname = "debug_limit", + .data = &vx_debug_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_DLIM, + .procname = "debug_dlim", + .data = &vx_debug_dlim, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_CVIRT, + .procname = "debug_cvirt", + .data = &vx_debug_cvirt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { .ctl_name = 0 } +}; + +static ctl_table vserver_table[] = { + { + .ctl_name = CTL_VSERVER, + .procname = "vserver", + .mode = 0555, + .child = debug_table + }, + { .ctl_name = 0 } +}; + + +EXPORT_SYMBOL_GPL(vx_debug_switch); +EXPORT_SYMBOL_GPL(vx_debug_xid); +EXPORT_SYMBOL_GPL(vx_debug_nid); +EXPORT_SYMBOL_GPL(vx_debug_net); +EXPORT_SYMBOL_GPL(vx_debug_limit); +EXPORT_SYMBOL_GPL(vx_debug_dlim); +EXPORT_SYMBOL_GPL(vx_debug_cvirt); + diff -NurpP --minimal linux-2.6.10-rc1/mm/fremap.c linux-2.6.10-rc1-vs1.9.3/mm/fremap.c --- linux-2.6.10-rc1/mm/fremap.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/fremap.c 2004-10-31 00:41:27.000000000 +0200 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -39,7 +40,8 @@ static inline void zap_pte(struct mm_str set_page_dirty(page); page_remove_rmap(page); page_cache_release(page); - mm->rss--; + // mm->rss--; + vx_rsspages_dec(mm); } } } else { @@ -67,6 +69,9 @@ int install_page(struct mm_struct *mm, s pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); + if (!vx_rsspages_avail(mm, 1)) + goto err_unlock; + pmd = pmd_alloc(mm, pgd, addr); if (!pmd) goto err_unlock; @@ -87,7 +92,8 @@ int install_page(struct mm_struct *mm, s zap_pte(mm, vma, addr, pte); - mm->rss++; + // mm->rss++; + vx_rsspages_inc(mm); flush_icache_page(vma, page); set_pte(pte, mk_pte(page, prot)); page_add_file_rmap(page); diff -NurpP --minimal linux-2.6.10-rc1/mm/memory.c linux-2.6.10-rc1-vs1.9.3/mm/memory.c --- linux-2.6.10-rc1/mm/memory.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/memory.c 2004-10-31 00:41:27.000000000 +0200 @@ -283,6 +283,10 @@ skip_copy_pte_range: struct page *page; unsigned long pfn; + if (!vx_rsspages_avail(dst, 1)) { + spin_unlock(&src->page_table_lock); + goto nomem; + } /* copy_one_pte */ if (pte_none(pte)) @@ -333,7 +337,8 @@ skip_copy_pte_range: pte = pte_mkclean(pte); pte = pte_mkold(pte); get_page(page); - dst->rss++; + // dst->rss++; + vx_rsspages_inc(dst); set_pte(dst_pte, pte); page_dup_rmap(page); cont_copy_pte_range_noset: @@ -1110,7 +1115,8 @@ static int do_wp_page(struct mm_struct * page_table = pte_offset_map(pmd, address); if (likely(pte_same(*page_table, pte))) { if (PageReserved(old_page)) - ++mm->rss; + // ++mm->rss; + vx_rsspages_inc(mm); else page_remove_rmap(old_page); break_cow(vma, new_page, address, page_table); @@ -1368,6 +1374,10 @@ static int do_swap_page(struct mm_struct grab_swap_token(); } + if (!vx_rsspages_avail(mm, 1)) { + ret = VM_FAULT_OOM; + goto out; + } mark_page_accessed(page); lock_page(page); @@ -1392,7 +1402,8 @@ static int do_swap_page(struct mm_struct if (vm_swap_full()) remove_exclusive_swap_page(page); - mm->rss++; + // mm->rss++; + vx_rsspages_inc(mm); pte = mk_pte(page, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); @@ -1443,6 +1454,9 @@ do_anonymous_page(struct mm_struct *mm, if (unlikely(anon_vma_prepare(vma))) goto no_mem; + if (!vx_rsspages_avail(mm, 1)) + goto no_mem; + page = alloc_page_vma(GFP_HIGHUSER, vma, addr); if (!page) goto no_mem; @@ -1457,7 +1471,8 @@ do_anonymous_page(struct mm_struct *mm, spin_unlock(&mm->page_table_lock); goto out; } - mm->rss++; + // mm->rss++; + vx_rsspages_inc(mm); entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)), vma); @@ -1520,6 +1535,8 @@ retry: return VM_FAULT_SIGBUS; if (new_page == NOPAGE_OOM) return VM_FAULT_OOM; + if (!vx_rsspages_avail(mm, 1)) + return VM_FAULT_OOM; /* * Should we do an early C-O-W break? @@ -1566,7 +1583,8 @@ retry: /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { if (!PageReserved(new_page)) - ++mm->rss; + // ++mm->rss; + vx_rsspages_inc(mm); flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) diff -NurpP --minimal linux-2.6.10-rc1/mm/mlock.c linux-2.6.10-rc1-vs1.9.3/mm/mlock.c --- linux-2.6.10-rc1/mm/mlock.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/mlock.c 2004-10-31 00:41:27.000000000 +0200 @@ -8,6 +8,7 @@ #include #include #include +#include static int mlock_fixup(struct vm_area_struct * vma, @@ -50,7 +51,8 @@ static int mlock_fixup(struct vm_area_st ret = make_pages_present(start, end); } - vma->vm_mm->locked_vm -= pages; + // vma->vm_mm->locked_vm -= pages; + vx_vmlocked_sub(vma->vm_mm, pages); out: return ret; } @@ -102,7 +104,7 @@ static int do_mlock(unsigned long start, asmlinkage long sys_mlock(unsigned long start, size_t len) { - unsigned long locked; + unsigned long locked, grow; unsigned long lock_limit; int error = -ENOMEM; @@ -113,8 +115,10 @@ asmlinkage long sys_mlock(unsigned long len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; - locked = len >> PAGE_SHIFT; - locked += current->mm->locked_vm; + grow = len >> PAGE_SHIFT; + if (!vx_vmlocked_avail(current->mm, grow)) + goto out; + locked = current->mm->locked_vm + grow; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; @@ -122,6 +126,7 @@ asmlinkage long sys_mlock(unsigned long /* check against resource limits */ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = do_mlock(start, len, 1); +out: up_write(¤t->mm->mmap_sem); return error; } @@ -181,6 +186,8 @@ asmlinkage long sys_mlockall(int flags) lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; + if (!vx_vmlocked_avail(current->mm, current->mm->total_vm)) + goto out; if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); diff -NurpP --minimal linux-2.6.10-rc1/mm/mmap.c linux-2.6.10-rc1-vs1.9.3/mm/mmap.c --- linux-2.6.10-rc1/mm/mmap.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/mmap.c 2004-10-31 00:41:27.000000000 +0200 @@ -908,6 +908,10 @@ munmap_back: > current->signal->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; + /* check context space, maybe only Private writable mapping? */ + if (!vx_vmpages_avail(mm, len >> PAGE_SHIFT)) + return -ENOMEM; + if (accountable && (!(flags & MAP_NORESERVE) || sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { if (vm_flags & VM_SHARED) { @@ -1004,9 +1008,11 @@ munmap_back: kmem_cache_free(vm_area_cachep, vma); } out: - mm->total_vm += len >> PAGE_SHIFT; + // mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + // mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } if (flags & MAP_POPULATE) { @@ -1344,8 +1350,9 @@ int expand_stack(struct vm_area_struct * address &= PAGE_MASK; grow = (address - vma->vm_end) >> PAGE_SHIFT; - /* Overcommit.. */ - if (security_vm_enough_memory(grow)) { + /* Overcommit.. vx check first to avoid vm_unacct_memory() */ + if (!vx_vmpages_avail(vma->vm_mm, grow) || + security_vm_enough_memory(grow)) { anon_vma_unlock(vma); return -ENOMEM; } @@ -1358,9 +1365,11 @@ int expand_stack(struct vm_area_struct * return -ENOMEM; } vma->vm_end = address; - vma->vm_mm->total_vm += grow; + // vma->vm_mm->total_vm += grow; + vx_vmpages_add(vma->vm_mm, grow); if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; + // vma->vm_mm->locked_vm += grow; + vx_vmlocked_add(vma->vm_mm, grow); __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow); anon_vma_unlock(vma); return 0; @@ -1406,8 +1415,9 @@ int expand_stack(struct vm_area_struct * address &= PAGE_MASK; grow = (vma->vm_start - address) >> PAGE_SHIFT; - /* Overcommit.. */ - if (security_vm_enough_memory(grow)) { + /* Overcommit.. vx check first to avoid vm_unacct_memory() */ + if (!vx_vmpages_avail(vma->vm_mm, grow) || + security_vm_enough_memory(grow)) { anon_vma_unlock(vma); return -ENOMEM; } @@ -1421,9 +1431,11 @@ int expand_stack(struct vm_area_struct * } vma->vm_start = address; vma->vm_pgoff -= grow; - vma->vm_mm->total_vm += grow; + // vma->vm_mm->total_vm += grow; + vx_vmpages_add(vma->vm_mm, grow); if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; + // vma->vm_mm->locked_vm += grow; + vx_vmlocked_add(vma->vm_mm, grow); __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow); anon_vma_unlock(vma); return 0; @@ -1527,9 +1539,12 @@ static void unmap_vma(struct mm_struct * { size_t len = area->vm_end - area->vm_start; - area->vm_mm->total_vm -= len >> PAGE_SHIFT; + // area->vm_mm->total_vm -= len >> PAGE_SHIFT; + vx_vmpages_sub(area->vm_mm, len >> PAGE_SHIFT); + if (area->vm_flags & VM_LOCKED) - area->vm_mm->locked_vm -= len >> PAGE_SHIFT; + // area->vm_mm->locked_vm -= len >> PAGE_SHIFT; + vx_vmlocked_sub(area->vm_mm, len >> PAGE_SHIFT); vm_stat_unaccount(area); area->vm_mm->unmap_area(area); remove_vm_struct(area); @@ -1764,6 +1779,8 @@ unsigned long do_brk(unsigned long addr, locked += len; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; + if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT)) + return -ENOMEM; } /* @@ -1785,7 +1802,8 @@ unsigned long do_brk(unsigned long addr, if (mm->map_count > sysctl_max_map_count) return -ENOMEM; - if (security_vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> PAGE_SHIFT) || + !vx_vmpages_avail(mm, len >> PAGE_SHIFT)) return -ENOMEM; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; @@ -1813,9 +1831,11 @@ unsigned long do_brk(unsigned long addr, vma->vm_page_prot = protection_map[flags & 0x0f]; vma_link(mm, vma, prev, rb_link, rb_parent); out: - mm->total_vm += len >> PAGE_SHIFT; + // mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); if (flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + // mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } return addr; @@ -1847,9 +1867,12 @@ void exit_mmap(struct mm_struct *mm) vma = mm->mmap; mm->mmap = mm->mmap_cache = NULL; mm->mm_rb = RB_ROOT; - mm->rss = 0; - mm->total_vm = 0; - mm->locked_vm = 0; + // mm->rss = 0; + vx_rsspages_sub(mm, mm->rss); + // mm->total_vm = 0; + vx_vmpages_sub(mm, mm->total_vm); + // mm->locked_vm = 0; + vx_vmlocked_sub(mm, mm->locked_vm); spin_unlock(&mm->page_table_lock); diff -NurpP --minimal linux-2.6.10-rc1/mm/mremap.c linux-2.6.10-rc1-vs1.9.3/mm/mremap.c --- linux-2.6.10-rc1/mm/mremap.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/mremap.c 2004-10-31 00:41:27.000000000 +0200 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -224,10 +225,12 @@ static unsigned long move_vma(struct vm_ vma->vm_next->vm_flags |= VM_ACCOUNT; } - mm->total_vm += new_len >> PAGE_SHIFT; + // mm->total_vm += new_len >> PAGE_SHIFT; + vx_vmpages_add(mm, new_len >> PAGE_SHIFT); __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - mm->locked_vm += new_len >> PAGE_SHIFT; + // mm->locked_vm += new_len >> PAGE_SHIFT; + vx_vmlocked_add(mm, new_len >> PAGE_SHIFT); if (new_len > old_len) make_pages_present(new_addr + old_len, new_addr + new_len); @@ -333,11 +336,18 @@ unsigned long do_mremap(unsigned long ad ret = -EAGAIN; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) goto out; + ret = -ENOMEM; + if (!vx_vmlocked_avail(current->mm, + (new_len - old_len) >> PAGE_SHIFT)) + goto out; } ret = -ENOMEM; if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len) > current->signal->rlim[RLIMIT_AS].rlim_cur) goto out; + /* check context space, maybe only Private writable mapping? */ + if (!vx_vmpages_avail(current->mm, (new_len - old_len) >> PAGE_SHIFT)) + goto out; if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; @@ -361,11 +371,13 @@ unsigned long do_mremap(unsigned long ad vma_adjust(vma, vma->vm_start, addr + new_len, vma->vm_pgoff, NULL); - current->mm->total_vm += pages; + // current->mm->total_vm += pages; + vx_vmpages_add(current->mm, pages); __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, pages); if (vma->vm_flags & VM_LOCKED) { - current->mm->locked_vm += pages; + // current->mm->locked_vm += pages; + vx_vmlocked_add(vma->vm_mm, pages); make_pages_present(addr + old_len, addr + new_len); } diff -NurpP --minimal linux-2.6.10-rc1/mm/oom_kill.c linux-2.6.10-rc1-vs1.9.3/mm/oom_kill.c --- linux-2.6.10-rc1/mm/oom_kill.c 2004-10-18 23:54:30.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/oom_kill.c 2004-10-31 00:41:27.000000000 +0200 @@ -55,6 +55,7 @@ static unsigned long badness(struct task * The memory size of the process is the basis for the badness. */ points = p->mm->total_vm; + /* add vserver badness ;) */ /* * CPU time is in tens of seconds and run time is in thousands diff -NurpP --minimal linux-2.6.10-rc1/mm/page_alloc.c linux-2.6.10-rc1-vs1.9.3/mm/page_alloc.c --- linux-2.6.10-rc1/mm/page_alloc.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/page_alloc.c 2004-10-31 00:41:27.000000000 +0200 @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include @@ -1011,6 +1013,8 @@ void si_meminfo(struct sysinfo *val) val->freehigh = 0; #endif val->mem_unit = PAGE_SIZE; + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_meminfo(val); } EXPORT_SYMBOL(si_meminfo); diff -NurpP --minimal linux-2.6.10-rc1/mm/rmap.c linux-2.6.10-rc1-vs1.9.3/mm/rmap.c --- linux-2.6.10-rc1/mm/rmap.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/rmap.c 2004-10-31 00:41:27.000000000 +0200 @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -586,7 +587,8 @@ static int try_to_unmap_one(struct page BUG_ON(pte_file(*pte)); } - mm->rss--; + // mm->rss--; + vx_rsspages_dec(mm); page_remove_rmap(page); page_cache_release(page); @@ -686,7 +688,8 @@ static void try_to_unmap_cluster(unsigne page_remove_rmap(page); page_cache_release(page); - mm->rss--; + // mm->rss--; + vx_rsspages_dec(mm); (*mapcount)--; } diff -NurpP --minimal linux-2.6.10-rc1/mm/shmem.c linux-2.6.10-rc1-vs1.9.3/mm/shmem.c --- linux-2.6.10-rc1/mm/shmem.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/shmem.c 2004-10-31 00:41:27.000000000 +0200 @@ -51,7 +51,6 @@ #include /* This magic number is used in glibc for posix shared memory */ -#define TMPFS_MAGIC 0x01021994 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) @@ -1539,7 +1538,7 @@ static int shmem_statfs(struct super_blo { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - buf->f_type = TMPFS_MAGIC; + buf->f_type = TMPFS_SUPER_MAGIC; buf->f_bsize = PAGE_CACHE_SIZE; buf->f_namelen = NAME_MAX; if (sbinfo) { @@ -1935,7 +1934,7 @@ static int shmem_fill_super(struct super sb->s_maxbytes = SHMEM_MAX_BYTES; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = TMPFS_MAGIC; + sb->s_magic = TMPFS_SUPER_MAGIC; sb->s_op = &shmem_ops; inode = shmem_get_inode(sb, S_IFDIR | mode, 0); if (!inode) diff -NurpP --minimal linux-2.6.10-rc1/mm/swapfile.c linux-2.6.10-rc1-vs1.9.3/mm/swapfile.c --- linux-2.6.10-rc1/mm/swapfile.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/swapfile.c 2004-10-31 00:41:27.000000000 +0200 @@ -30,6 +30,8 @@ #include #include #include +#include +#include spinlock_t swaplock = SPIN_LOCK_UNLOCKED; unsigned int nr_swapfiles; @@ -431,7 +433,8 @@ static void unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir, swp_entry_t entry, struct page *page) { - vma->vm_mm->rss++; + // vma->vm_mm->rss++; + vx_rsspages_inc(vma->vm_mm); get_page(page); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, address); @@ -1626,6 +1629,8 @@ void si_swapinfo(struct sysinfo *val) val->freeswap = nr_swap_pages + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; swap_list_unlock(); + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_swapinfo(val); } /* diff -NurpP --minimal linux-2.6.10-rc1/mm/vmscan.c linux-2.6.10-rc1-vs1.9.3/mm/vmscan.c --- linux-2.6.10-rc1/mm/vmscan.c 2004-10-18 23:53:21.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/mm/vmscan.c 2004-10-31 00:41:27.000000000 +0200 @@ -1238,7 +1238,7 @@ static int __init kswapd_init(void) swap_setup(); for_each_pgdat(pgdat) pgdat->kswapd - = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL)); + = find_task_by_real_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL)); total_memory = nr_free_pagecache_pages(); hotcpu_notifier(cpu_callback, 0); return 0; diff -NurpP --minimal linux-2.6.10-rc1/net/core/dev.c linux-2.6.10-rc1-vs1.9.3/net/core/dev.c --- linux-2.6.10-rc1/net/core/dev.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/core/dev.c 2004-10-31 00:41:27.000000000 +0200 @@ -113,6 +113,8 @@ #include #endif /* CONFIG_NET_RADIO */ #include +#include +#include /* This define, if set, will randomly drop a packet when congestion * is more than moderate. It helps fairness in the multi-interface @@ -2015,6 +2017,9 @@ static int dev_ifconf(char __user *arg) total = 0; for (dev = dev_base; dev; dev = dev->next) { + if (vx_flags(VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, current->nx_info)) + continue; for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2075,6 +2080,10 @@ void dev_seq_stop(struct seq_file *seq, static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { + struct nx_info *nxi = current->nx_info; + + if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi)) + return; if (dev->get_stats) { struct net_device_stats *stats = dev->get_stats(dev); diff -NurpP --minimal linux-2.6.10-rc1/net/core/rtnetlink.c linux-2.6.10-rc1-vs1.9.3/net/core/rtnetlink.c --- linux-2.6.10-rc1/net/core/rtnetlink.c 2004-10-18 23:55:36.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/core/rtnetlink.c 2004-10-31 00:41:27.000000000 +0200 @@ -251,6 +251,9 @@ int rtnetlink_dump_ifinfo(struct sk_buff for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; + if (vx_info_flags(skb->sk->sk_vx_info, VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, skb->sk->sk_nx_info)) + continue; if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0) break; } @@ -416,6 +419,9 @@ void rtmsg_ifinfo(int type, struct net_d sizeof(struct rtnl_link_ifmap) + sizeof(struct rtnl_link_stats) + 128); + if (vx_flags(VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, current->nx_info)) + return; skb = alloc_skb(size, GFP_KERNEL); if (!skb) return; diff -NurpP --minimal linux-2.6.10-rc1/net/core/sock.c linux-2.6.10-rc1-vs1.9.3/net/core/sock.c --- linux-2.6.10-rc1/net/core/sock.c 2004-10-18 23:54:40.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/core/sock.c 2004-10-31 00:41:27.000000000 +0200 @@ -122,6 +122,7 @@ #include #include +#include #ifdef CONFIG_INET #include @@ -324,7 +325,10 @@ int sock_setsockopt(struct socket *sock, break; case SO_PASSCRED: - sock->passcred = valbool; + if (valbool) + set_bit(SOCK_PASS_CRED, &sock->flags); + else + clear_bit(SOCK_PASS_CRED, &sock->flags); break; case SO_TIMESTAMP: @@ -548,7 +552,7 @@ int sock_getsockopt(struct socket *sock, break; case SO_PASSCRED: - v.val = sock->passcred; + v.val = test_bit(SOCK_PASS_CRED, &sock->flags)?1:0; break; case SO_PEERCRED: @@ -623,6 +627,8 @@ struct sock *sk_alloc(int family, int pr sock_lock_init(sk); } sk->sk_slab = slab; + sock_vx_init(sk); + sock_nx_init(sk); if (security_sk_alloc(sk, family, priority)) { kmem_cache_free(slab, sk); @@ -653,6 +659,8 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + BUG_ON(sk->sk_vx_info); + BUG_ON(sk->sk_nx_info); kmem_cache_free(sk->sk_slab, sk); module_put(owner); } @@ -1195,6 +1203,11 @@ void sock_init_data(struct socket *sock, sk->sk_stamp.tv_sec = -1L; sk->sk_stamp.tv_usec = -1L; + sk->sk_vx_info = NULL; + sk->sk_xid = 0; + sk->sk_nx_info = NULL; + sk->sk_nid = 0; + atomic_set(&sk->sk_refcnt, 1); } diff -NurpP --minimal linux-2.6.10-rc1/net/ipv4/af_inet.c linux-2.6.10-rc1-vs1.9.3/net/ipv4/af_inet.c --- linux-2.6.10-rc1/net/ipv4/af_inet.c 2004-10-18 23:53:21.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv4/af_inet.c 2004-10-31 00:41:27.000000000 +0200 @@ -112,6 +112,7 @@ #ifdef CONFIG_IP_MROUTE #include #endif +#include DEFINE_SNMP_STAT(struct linux_mib, net_statistics); @@ -153,6 +154,13 @@ void inet_sock_destruct(struct sock *sk) if (inet->opt) kfree(inet->opt); + + vx_sock_dec(sk); + clr_vx_info(&sk->sk_vx_info); + sk->sk_xid = -1; + clr_nx_info(&sk->sk_nx_info); + sk->sk_nid = -1; + dst_release(sk->sk_dst_cache); #ifdef INET_REFCNT_DEBUG atomic_dec(&inet_sock_nr); @@ -266,8 +274,11 @@ static int inet_create(struct socket *so if (!answer) goto out_rcu_unlock; err = -EPERM; + if ((protocol == IPPROTO_ICMP) && vx_ccaps(VXC_RAW_ICMP)) + goto override; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; +override: err = -EPROTONOSUPPORT; if (!protocol) goto out_rcu_unlock; @@ -316,6 +327,12 @@ static int inet_create(struct socket *so sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + set_vx_info(&sk->sk_vx_info, current->vx_info); + sk->sk_xid = vx_current_xid(); + vx_sock_inc(sk); + set_nx_info(&sk->sk_nx_info, current->nx_info); + sk->sk_nid = nx_current_nid(); + inet->uc_ttl = -1; inet->mc_loop = 1; inet->mc_ttl = 1; @@ -377,6 +394,11 @@ int inet_release(struct socket *sock) !(current->flags & PF_EXITING)) timeout = sk->sk_lingertime; sock->sk = NULL; + vx_sock_dec(sk); + clr_vx_info(&sk->sk_vx_info); + sk->sk_xid = -1; + clr_nx_info(&sk->sk_nx_info); + sk->sk_nid = -1; sk->sk_prot->close(sk, timeout); } return 0; @@ -393,6 +415,10 @@ int inet_bind(struct socket *sock, struc unsigned short snum; int chk_addr_ret; int err; + __u32 s_addr; /* Address used for validation */ + __u32 s_addr1; /* Address used for socket */ + __u32 s_addr2; /* Broadcast address for the socket */ + struct nx_info *nxi = sk->sk_nx_info; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { @@ -403,7 +429,40 @@ int inet_bind(struct socket *sock, struc if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + s_addr = addr->sin_addr.s_addr; + s_addr1 = s_addr; + s_addr2 = 0xffffffffl; + + vxdprintk(VXD_CBIT(net, 3), + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d", + sk, sk->sk_nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0), + VXD_QUAD(s_addr)); + if (nxi) { + __u32 v4_bcast = nxi->v4_bcast; + __u32 ipv4root = nxi->ipv4[0]; + int nbipv4 = nxi->nbipv4; + + if (s_addr == 0) { + /* bind to any for 1-n */ + s_addr = ipv4root; + s_addr1 = (nbipv4 > 1) ? 0 : s_addr; + s_addr2 = v4_bcast; + } else if (s_addr == 0x0100007f) { + /* rewrite localhost to ipv4root */ + s_addr = ipv4root; + s_addr1 = ipv4root; + } else if (s_addr != v4_bcast) { + /* normal address bind */ + if (!addr_in_nx_info(nxi, s_addr)) + return -EADDRNOTAVAIL; + } + } + chk_addr_ret = inet_addr_type(s_addr); + + vxdprintk(VXD_CBIT(net, 3), + "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d", + sk, VXD_QUAD(s_addr), VXD_QUAD(s_addr1), VXD_QUAD(s_addr2)); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -415,7 +474,7 @@ int inet_bind(struct socket *sock, struc err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !inet->freebind && - addr->sin_addr.s_addr != INADDR_ANY && + s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -440,7 +499,8 @@ int inet_bind(struct socket *sock, struc if (sk->sk_state != TCP_CLOSE || inet->num) goto out_release_sock; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->rcv_saddr = inet->saddr = s_addr1; + inet->rcv_saddr2 = s_addr2; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->saddr = 0; /* Use device */ diff -NurpP --minimal linux-2.6.10-rc1/net/ipv4/devinet.c linux-2.6.10-rc1-vs1.9.3/net/ipv4/devinet.c --- linux-2.6.10-rc1/net/ipv4/devinet.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv4/devinet.c 2004-10-31 00:41:27.000000000 +0200 @@ -490,6 +490,33 @@ static __inline__ int inet_abc_len(u32 a return rc; } +/* + Check that a device is not member of the ipv4root assigned to the process + Return true if this is the case + + If the process is not bound to specific IP, then it returns 0 (all + interface are fine). +*/ +static inline int devinet_notiproot (struct in_ifaddr *ifa) +{ + int ret = 0; + struct nx_info *nxi; + + if ((nxi = current->nx_info)) { + int i; + int nbip = nxi->nbipv4; + __u32 addr = ifa->ifa_local; + ret = 1; + for (i=0; iipv4[i] == addr) { + ret = 0; + break; + } + } + } + return ret; +} + int devinet_ioctl(unsigned int cmd, void __user *arg) { @@ -597,6 +624,9 @@ int devinet_ioctl(unsigned int cmd, void ret = -EADDRNOTAVAIL; if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) goto done; + if (vx_flags(VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, current->nx_info)) + goto done; switch(cmd) { case SIOCGIFADDR: /* Get interface address */ @@ -740,6 +770,9 @@ static int inet_gifconf(struct net_devic goto out; for (; ifa; ifa = ifa->ifa_next) { + if (vx_flags(VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, current->nx_info)) + continue; if (!buf) { done += sizeof(ifr); continue; @@ -1046,6 +1079,7 @@ static int inet_dump_ifaddr(struct sk_bu struct net_device *dev; struct in_device *in_dev; struct in_ifaddr *ifa; + struct sock *sk = skb->sk; int s_ip_idx, s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; @@ -1063,6 +1097,9 @@ static int inet_dump_ifaddr(struct sk_bu for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { + if (sk && vx_info_flags(sk->sk_vx_info, VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, sk->sk_nx_info)) + continue; if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, diff -NurpP --minimal linux-2.6.10-rc1/net/ipv4/fib_hash.c linux-2.6.10-rc1-vs1.9.3/net/ipv4/fib_hash.c --- linux-2.6.10-rc1/net/ipv4/fib_hash.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv4/fib_hash.c 2004-10-31 00:41:27.000000000 +0200 @@ -1022,6 +1022,8 @@ static unsigned fib_flag_trans(int type, return flags; } +extern int dev_in_nx_info(struct net_device *, struct nx_info *); + /* * This outputs /proc/net/route. * @@ -1052,7 +1054,8 @@ static int fib_seq_show(struct seq_file prefix = f->fn_key; mask = FZ_MASK(iter->zone); flags = fib_flag_trans(fa->fa_type, mask, fi); - if (fi) + if (fi && (!vx_flags(VXF_HIDE_NETIF, 0) || + dev_in_nx_info(fi->fib_dev, current->nx_info))) snprintf(bf, sizeof(bf), "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", fi->fib_dev ? fi->fib_dev->name : "*", prefix, diff -NurpP --minimal linux-2.6.10-rc1/net/ipv4/netfilter/ipt_owner.c linux-2.6.10-rc1-vs1.9.3/net/ipv4/netfilter/ipt_owner.c --- linux-2.6.10-rc1/net/ipv4/netfilter/ipt_owner.c 2004-10-18 23:55:24.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv4/netfilter/ipt_owner.c 2004-10-31 00:41:27.000000000 +0200 @@ -61,7 +61,7 @@ match_pid(const struct sk_buff *skb, pid int i; read_lock(&tasklist_lock); - p = find_task_by_pid(pid); + p = find_task_by_real_pid(pid); if (!p) goto out; task_lock(p); diff -NurpP --minimal linux-2.6.10-rc1/net/ipv4/raw.c linux-2.6.10-rc1-vs1.9.3/net/ipv4/raw.c --- linux-2.6.10-rc1/net/ipv4/raw.c 2004-10-18 23:54:31.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv4/raw.c 2004-10-31 00:41:27.000000000 +0200 @@ -102,6 +102,27 @@ static void raw_v4_unhash(struct sock *s write_unlock_bh(&raw_v4_lock); } + +/* + * Check if a given address matches for a socket + * + * nxi: the socket's nx_info if any + * addr: to be verified address + * saddr/baddr: socket addresses + */ +static inline int raw_addr_match ( + struct nx_info *nxi, + uint32_t addr, + uint32_t saddr, + uint32_t baddr) +{ + if (addr && (saddr == addr || baddr == addr)) + return 1; + if (!saddr) + return addr_in_nx_info(nxi, addr); + return 0; +} + struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif) @@ -113,7 +134,8 @@ struct sock *__raw_v4_lookup(struct sock if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + raw_addr_match(sk->sk_nx_info, laddr, + inet->rcv_saddr, inet->rcv_saddr2) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -305,6 +327,10 @@ static int raw_send_hdrinc(struct sock * iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } + err = -EPERM; + if (!vx_check(0, VX_ADMIN) && !capable(CAP_NET_RAW) + && (!addr_in_nx_info(sk->sk_nx_info, iph->saddr))) + goto error; err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); @@ -477,6 +503,12 @@ static int raw_sendmsg(struct kiocb *ioc if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); + if (sk->sk_nx_info) { + err = ip_find_src(sk->sk_nx_info, &rt, &fl); + + if (err) + goto done; + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) @@ -738,7 +770,8 @@ static struct sock *raw_get_first(struct struct hlist_node *node; sk_for_each(sk, node, &raw_v4_htable[state->bucket]) - if (sk->sk_family == PF_INET) + if (sk->sk_family == PF_INET && + vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)) goto found; } sk = NULL; @@ -754,7 +787,8 @@ static struct sock *raw_get_next(struct sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != PF_INET); + } while (sk && (sk->sk_family != PF_INET || + !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))); if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { sk = sk_head(&raw_v4_htable[state->bucket]); diff -NurpP --minimal linux-2.6.10-rc1/net/ipv4/tcp_ipv4.c linux-2.6.10-rc1-vs1.9.3/net/ipv4/tcp_ipv4.c --- linux-2.6.10-rc1/net/ipv4/tcp_ipv4.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv4/tcp_ipv4.c 2004-10-31 00:41:27.000000000 +0200 @@ -74,6 +74,7 @@ #include #include #include +#include extern int sysctl_ip_dynaddr; int sysctl_tcp_tw_reuse; @@ -181,7 +182,6 @@ void tcp_bind_hash(struct sock *sk, stru static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) { - const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -194,9 +194,8 @@ static inline int tcp_bind_conflict(stru sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) + if (nx_addr_conflict(sk->sk_nx_info, + tcp_v4_rcv_saddr(sk), sk2)) break; } } @@ -405,6 +404,26 @@ void tcp_unhash(struct sock *sk) wake_up(&tcp_lhash_wait); } + +/* + * Check if a given address matches for a tcp socket + * + * nxi: the socket's nx_info if any + * addr: to be verified address + * saddr: socket addresses + */ +static inline int tcp_addr_match ( + struct nx_info *nxi, + uint32_t addr, + uint32_t saddr) +{ + if (addr && (saddr == addr)) + return 1; + if (!saddr) + return addr_in_nx_info(nxi, addr); + return 0; +} + /* Don't inline this cruft. Here are some nice properties to * exploit here. The BSD API does not allow a listening TCP * to specify the remote port nor the remote address for the @@ -426,11 +445,10 @@ static struct sock *__tcp_v4_lookup_list __u32 rcv_saddr = inet->rcv_saddr; score = (sk->sk_family == PF_INET ? 1 : 0); - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; + if (tcp_addr_match(sk->sk_nx_info, daddr, rcv_saddr)) score+=2; - } + else + continue; if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) continue; @@ -460,8 +478,8 @@ inline struct sock *tcp_v4_lookup_listen struct inet_opt *inet = inet_sk((sk = __sk_head(head))); if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + tcp_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) && !sk->sk_bound_dev_if) goto sherry_cache; sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif); @@ -2161,6 +2179,12 @@ static void *listening_get_next(struct s req = req->dl_next; while (1) { while (req) { + vxdprintk(VXD_CBIT(net, 6), + "sk,req: %p [#%d] (from %d)", req->sk, + (req->sk)?req->sk->sk_xid:0, current->xid); + if (req->sk && + !vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (req->class->family == st->family) { cur = req; goto out; @@ -2185,6 +2209,10 @@ get_req: } get_sk: sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)", + sk, sk->sk_xid, current->xid); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (sk->sk_family == st->family) { cur = sk; goto out; @@ -2233,18 +2261,26 @@ static void *established_get_first(struc read_lock(&tcp_ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) { - if (sk->sk_family != st->family) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egf: %p [#%d] (from %d)", + sk, sk->sk_xid, current->xid); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; + if (sk->sk_family != st->family) continue; - } rc = sk; goto out; } st->state = TCP_SEQ_STATE_TIME_WAIT; tw_for_each(tw, node, &tcp_ehash[st->bucket + tcp_ehash_size].chain) { - if (tw->tw_family != st->family) { + vxdprintk(VXD_CBIT(net, 6), + "tw: %p [#%d] (from %d)", + tw, tw->tw_xid, current->xid); + if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) + continue; + if (tw->tw_family != st->family) continue; - } rc = tw; goto out; } @@ -2268,7 +2304,8 @@ static void *established_get_next(struct tw = cur; tw = tw_next(tw); get_tw: - while (tw && tw->tw_family != st->family) { + while (tw && (tw->tw_family != st->family || + !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))) { tw = tw_next(tw); } if (tw) { @@ -2288,6 +2325,11 @@ get_tw: sk = sk_next(sk); sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egn: %p [#%d] (from %d)", + sk, sk->sk_xid, current->xid); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (sk->sk_family == st->family) goto found; } diff -NurpP --minimal linux-2.6.10-rc1/net/ipv4/tcp_minisocks.c linux-2.6.10-rc1-vs1.9.3/net/ipv4/tcp_minisocks.c --- linux-2.6.10-rc1/net/ipv4/tcp_minisocks.c 2004-10-18 23:54:54.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv4/tcp_minisocks.c 2004-10-31 00:41:27.000000000 +0200 @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -362,6 +364,11 @@ void tcp_time_wait(struct sock *sk, int tw->tw_ts_recent_stamp = tp->ts_recent_stamp; tw_dead_node_init(tw); + tw->tw_xid = sk->sk_xid; + tw->tw_vx_info = NULL; + tw->tw_nid = sk->sk_nid; + tw->tw_nx_info = NULL; + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -697,6 +704,8 @@ struct sock *tcp_create_openreq_child(st newsk->sk_state = TCP_SYN_RECV; /* SANITY */ + sock_vx_init(newsk); + sock_nx_init(newsk); sk_node_init(&newsk->sk_node); tcp_sk(newsk)->bind_hash = NULL; @@ -795,6 +804,12 @@ struct sock *tcp_create_openreq_child(st newsk->sk_err = 0; newsk->sk_priority = 0; atomic_set(&newsk->sk_refcnt, 2); + + set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info); + newsk->sk_xid = sk->sk_xid; + vx_sock_inc(newsk); + set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info); + newsk->sk_nid = sk->sk_nid; #ifdef INET_REFCNT_DEBUG atomic_inc(&inet_sock_nr); #endif diff -NurpP --minimal linux-2.6.10-rc1/net/ipv4/udp.c linux-2.6.10-rc1-vs1.9.3/net/ipv4/udp.c --- linux-2.6.10-rc1/net/ipv4/udp.c 2004-10-18 23:53:22.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv4/udp.c 2004-10-31 00:41:27.000000000 +0200 @@ -174,14 +174,12 @@ gotit: struct inet_opt *inet2 = inet_sk(sk2); if (inet2->num == snum && - sk2 != sk && - !ipv6_only_sock(sk2) && + sk2 != sk && !ipv6_only_sock(sk2) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && + nx_addr_conflict(sk->sk_nx_info, + tcp_v4_rcv_saddr(sk), sk2) && (!sk2->sk_reuse || !sk->sk_reuse)) goto fail; } @@ -216,6 +214,17 @@ static void udp_v4_unhash(struct sock *s write_unlock_bh(&udp_hash_lock); } +static inline int udp_in_list(struct nx_info *nx_info, u32 addr) +{ + int n = nx_info->nbipv4; + int i; + + for (i=0; iipv4[i] == addr) + return 1; + return 0; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -235,6 +244,11 @@ struct sock *udp_v4_lookup_longway(u32 s if (inet->rcv_saddr != daddr) continue; score+=2; + } else if (sk->sk_nx_info) { + if (udp_in_list(sk->sk_nx_info, daddr)) + score+=2; + else + continue; } if (inet->daddr) { if (inet->daddr != saddr) @@ -290,7 +304,8 @@ static inline struct sock *udp_v4_mcast_ if (inet->num != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr && + inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr) || ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; @@ -599,6 +614,15 @@ int udp_sendmsg(struct kiocb *iocb, stru .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + struct nx_info *nxi = sk->sk_nx_info; + + if (nxi) { + err = ip_find_src(nxi, &rt, &fl); + if (err) + goto out; + if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + daddr = fl.fl4_dst = nxi->ipv4[0]; + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; @@ -1333,8 +1357,10 @@ static struct sock *udp_get_first(struct for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; + sk_for_each(sk, node, &udp_hash[state->bucket]) { - if (sk->sk_family == state->family) + if (sk->sk_family == state->family && + vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)) goto found; } } @@ -1351,7 +1377,8 @@ static struct sock *udp_get_next(struct sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != state->family); + } while (sk && (sk->sk_family != state->family || + !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { sk = sk_head(&udp_hash[state->bucket]); diff -NurpP --minimal linux-2.6.10-rc1/net/ipv6/addrconf.c linux-2.6.10-rc1-vs1.9.3/net/ipv6/addrconf.c --- linux-2.6.10-rc1/net/ipv6/addrconf.c 2004-10-18 23:55:24.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv6/addrconf.c 2004-10-31 00:41:27.000000000 +0200 @@ -2662,6 +2662,10 @@ static int inet6_dump_addr(struct sk_buf struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; read_lock(&dev_base_lock); @@ -2881,6 +2885,10 @@ static int inet6_dump_ifinfo(struct sk_b struct net_device *dev; struct inet6_dev *idev; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + read_lock(&dev_base_lock); for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) diff -NurpP --minimal linux-2.6.10-rc1/net/ipv6/netfilter/ip6t_owner.c linux-2.6.10-rc1-vs1.9.3/net/ipv6/netfilter/ip6t_owner.c --- linux-2.6.10-rc1/net/ipv6/netfilter/ip6t_owner.c 2004-10-18 23:55:43.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/ipv6/netfilter/ip6t_owner.c 2004-10-31 00:41:27.000000000 +0200 @@ -28,7 +28,7 @@ match_pid(const struct sk_buff *skb, pid int i; read_lock(&tasklist_lock); - p = find_task_by_pid(pid); + p = find_task_by_real_pid(pid); if (!p) goto out; task_lock(p); diff -NurpP --minimal linux-2.6.10-rc1/net/netlink/af_netlink.c linux-2.6.10-rc1-vs1.9.3/net/netlink/af_netlink.c --- linux-2.6.10-rc1/net/netlink/af_netlink.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/netlink/af_netlink.c 2004-10-31 00:41:27.000000000 +0200 @@ -44,6 +44,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -242,6 +246,12 @@ static int netlink_create(struct socket sk->sk_destruct = netlink_sock_destruct; atomic_inc(&netlink_sock_nr); + set_vx_info(&sk->sk_vx_info, current->vx_info); + sk->sk_xid = vx_current_xid(); + vx_sock_inc(sk); + set_nx_info(&sk->sk_nx_info, current->nx_info); + sk->sk_nid = nx_current_nid(); + sk->sk_protocol = protocol; return 0; } @@ -283,6 +293,12 @@ static int netlink_release(struct socket notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } + vx_sock_dec(sk); + clr_vx_info(&sk->sk_vx_info); + sk->sk_xid = -1; + clr_nx_info(&sk->sk_nx_info); + sk->sk_nid = -1; + sock_put(sk); return 0; } diff -NurpP --minimal linux-2.6.10-rc1/net/socket.c linux-2.6.10-rc1-vs1.9.3/net/socket.c --- linux-2.6.10-rc1/net/socket.c 2004-10-18 23:53:50.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/socket.c 2004-10-31 00:41:27.000000000 +0200 @@ -93,6 +93,8 @@ #include #include +#include +#include static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, @@ -287,7 +289,7 @@ static struct inode *sock_alloc_inode(st ei->socket.ops = NULL; ei->socket.sk = NULL; ei->socket.file = NULL; - ei->socket.passcred = 0; + ei->socket.flags = 0; return &ei->vfs_inode; } @@ -531,7 +533,7 @@ static inline int __sock_sendmsg(struct struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); - int err; + int err, len; si->sock = sock; si->scm = NULL; @@ -542,7 +544,21 @@ static inline int __sock_sendmsg(struct if (err) return err; - return sock->ops->sendmsg(iocb, sock, msg, size); + len = sock->ops->sendmsg(iocb, sock, msg, size); + if (sock->sk) { + if (len == size) + vx_sock_send(sock->sk, size); + else + vx_sock_fail(sock->sk, size); + } + vxdprintk(VXD_CBIT(net, 7), + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + (unsigned int)size, len); + return len; } int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -580,7 +596,7 @@ int kernel_sendmsg(struct socket *sock, static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - int err; + int err, len; struct sock_iocb *si = kiocb_to_siocb(iocb); si->sock = sock; @@ -593,7 +609,17 @@ static inline int __sock_recvmsg(struct if (err) return err; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + len = sock->ops->recvmsg(iocb, sock, msg, size, flags); + if ((len >= 0) && sock->sk) + vx_sock_recv(sock->sk, len); + vxdprintk(VXD_CBIT(net, 7), + "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + (unsigned int)size, len); + return len; } int sock_recvmsg(struct socket *sock, struct msghdr *msg, @@ -1085,6 +1111,10 @@ static int __sock_create(int family, int if (type < 0 || type >= SOCK_MAX) return -EINVAL; + /* disable IPv6 inside vservers for now */ + if (family == PF_INET6 && !vx_check(0, VX_ADMIN)) + return -EAFNOSUPPORT; + /* Compatibility. This uglymoron is moved from INET layer to here to avoid @@ -1193,6 +1223,7 @@ asmlinkage long sys_socket(int family, i if (retval < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock->flags); retval = sock_map_fd(sock); if (retval < 0) goto out_release; @@ -1223,10 +1254,12 @@ asmlinkage long sys_socketpair(int famil err = sock_create(family, type, protocol, &sock1); if (err < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock1->flags); err = sock_create(family, type, protocol, &sock2); if (err < 0) goto out_release_1; + set_bit(SOCK_USER_SOCKET, &sock2->flags); err = sock1->ops->socketpair(sock1, sock2); if (err < 0) diff -NurpP --minimal linux-2.6.10-rc1/net/sunrpc/auth.c linux-2.6.10-rc1-vs1.9.3/net/sunrpc/auth.c --- linux-2.6.10-rc1/net/sunrpc/auth.c 2004-10-18 23:54:29.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/sunrpc/auth.c 2004-10-31 00:41:27.000000000 +0200 @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -261,6 +262,7 @@ rpcauth_lookupcred(struct rpc_auth *auth get_group_info(current->group_info); acred.uid = current->fsuid; acred.gid = current->fsgid; + acred.xid = current->xid; acred.group_info = current->group_info; dprintk("RPC: looking up %s cred\n", @@ -280,6 +282,7 @@ rpcauth_bindcred(struct rpc_task *task) get_group_info(current->group_info); acred.uid = current->fsuid; acred.gid = current->fsgid; + acred.xid = current->xid; acred.group_info = current->group_info; dprintk("RPC: %4d looking up %s cred\n", diff -NurpP --minimal linux-2.6.10-rc1/net/sunrpc/auth_unix.c linux-2.6.10-rc1-vs1.9.3/net/sunrpc/auth_unix.c --- linux-2.6.10-rc1/net/sunrpc/auth_unix.c 2004-10-18 23:54:32.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/sunrpc/auth_unix.c 2004-10-31 00:41:27.000000000 +0200 @@ -13,14 +13,17 @@ #include #include #include +#include #define NFS_NGROUPS 16 struct unx_cred { struct rpc_cred uc_base; gid_t uc_gid; + xid_t uc_xid; uid_t uc_puid; /* process uid */ gid_t uc_pgid; /* process gid */ + xid_t uc_pxid; /* process xid */ gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid @@ -80,6 +83,7 @@ unx_create_cred(struct rpc_auth *auth, s if (flags & RPC_TASK_ROOTCREDS) { cred->uc_uid = cred->uc_puid = 0; cred->uc_gid = cred->uc_pgid = 0; + cred->uc_xid = cred->uc_pxid = current->xid; cred->uc_gids[0] = NOGROUP; } else { int groups = acred->group_info->ngroups; @@ -88,8 +92,10 @@ unx_create_cred(struct rpc_auth *auth, s cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; + cred->uc_xid = acred->xid; cred->uc_puid = current->uid; cred->uc_pgid = current->gid; + cred->uc_pxid = current->xid; for (i = 0; i < groups; i++) cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) @@ -122,8 +128,10 @@ unx_match(struct auth_cred *acred, struc if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid + || cred->uc_xid != acred->xid || cred->uc_puid != current->uid - || cred->uc_pgid != current->gid) + || cred->uc_pgid != current->gid + || cred->uc_pxid != current->xid) return 0; groups = acred->group_info->ngroups; @@ -149,7 +157,7 @@ unx_marshal(struct rpc_task *task, u32 * struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; u32 *base, *hold; - int i; + int i, tagxid; *p++ = htonl(RPC_AUTH_UNIX); base = p++; @@ -159,14 +167,19 @@ unx_marshal(struct rpc_task *task, u32 * * Copy the UTS nodename captured when the client was created. */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); + tagxid = task->tk_client->cl_tagxid; /* Note: we don't use real uid if it involves raising privilege */ if (ruid && cred->uc_puid != 0 && cred->uc_pgid != 0) { - *p++ = htonl((u32) cred->uc_puid); - *p++ = htonl((u32) cred->uc_pgid); + *p++ = htonl((u32) XIDINO_UID(tagxid, + cred->uc_puid, cred->uc_pxid)); + *p++ = htonl((u32) XIDINO_GID(tagxid, + cred->uc_pgid, cred->uc_pxid)); } else { - *p++ = htonl((u32) cred->uc_uid); - *p++ = htonl((u32) cred->uc_gid); + *p++ = htonl((u32) XIDINO_UID(tagxid, + cred->uc_uid, cred->uc_xid)); + *p++ = htonl((u32) XIDINO_GID(tagxid, + cred->uc_gid, cred->uc_xid)); } hold = p++; for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) diff -NurpP --minimal linux-2.6.10-rc1/net/unix/af_unix.c linux-2.6.10-rc1-vs1.9.3/net/unix/af_unix.c --- linux-2.6.10-rc1/net/unix/af_unix.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/net/unix/af_unix.c 2004-10-31 00:41:27.000000000 +0200 @@ -118,6 +118,9 @@ #include #include #include +#include +#include +#include int sysctl_unix_max_dgram_qlen = 10; @@ -394,6 +397,9 @@ static int unix_release_sock (struct soc mntput(mnt); } + vx_sock_dec(sk); + clr_vx_info(&sk->sk_vx_info); + clr_nx_info(&sk->sk_nx_info); sock_put(sk); /* ---- Socket is dead now and most probably destroyed ---- */ @@ -548,6 +554,11 @@ static struct sock * unix_create1(struct sock_init_data(sock,sk); sk_set_owner(sk, THIS_MODULE); + set_vx_info(&sk->sk_vx_info, current->vx_info); + sk->sk_xid = vx_current_xid(); + vx_sock_inc(sk); + set_nx_info(&sk->sk_nx_info, current->nx_info); + sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; @@ -859,7 +870,7 @@ static int unix_dgram_connect(struct soc goto out; alen = err; - if (sock->passcred && !unix_sk(sk)->addr && + if (test_bit(SOCK_PASS_CRED, &sock->flags) && !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) goto out; @@ -950,7 +961,8 @@ static int unix_stream_connect(struct so goto out; addr_len = err; - if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0) + if (test_bit(SOCK_PASS_CRED, &sock->flags) + && !u->addr && (err = unix_autobind(sock)) != 0) goto out; timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); @@ -1284,7 +1296,8 @@ static int unix_dgram_sendmsg(struct kio goto out; } - if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0) + if (test_bit(SOCK_PASS_CRED, &sock->flags) + && !u->addr && (err = unix_autobind(sock)) != 0) goto out; err = -EMSGSIZE; diff -NurpP --minimal linux-2.6.10-rc1/security/commoncap.c linux-2.6.10-rc1-vs1.9.3/security/commoncap.c --- linux-2.6.10-rc1/security/commoncap.c 2004-10-31 00:41:03.000000000 +0200 +++ linux-2.6.10-rc1-vs1.9.3/security/commoncap.c 2004-10-31 00:41:27.000000000 +0200 @@ -125,7 +125,7 @@ void cap_bprm_apply_creds (struct linux_ /* Derived from fs/exec.c:compute_creds. */ kernel_cap_t new_permitted, working; - new_permitted = cap_intersect (bprm->cap_permitted, cap_bset); + new_permitted = cap_intersect (bprm->cap_permitted, vx_current_bcaps()); working = cap_intersect (bprm->cap_inheritable, current->cap_inheritable); new_permitted = cap_combine (new_permitted, working);