diff options
Diffstat (limited to 'tools/testing/selftests')
38 files changed, 5541 insertions, 0 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile new file mode 100644 index 000000000..2cee2b79b --- /dev/null +++ b/tools/testing/selftests/Makefile @@ -0,0 +1,25 @@ +TARGETS = breakpoints +TARGETS += cpu-hotplug +TARGETS += efivarfs +TARGETS += kcmp +TARGETS += memory-hotplug +TARGETS += mqueue +TARGETS += mount +TARGETS += net +TARGETS += ptrace +TARGETS += vm + +all: + for TARGET in $(TARGETS); do \ + make -C $$TARGET; \ + done; + +run_tests: all + for TARGET in $(TARGETS); do \ + make -C $$TARGET run_tests; \ + done; + +clean: + for TARGET in $(TARGETS); do \ + make -C $$TARGET clean; \ + done; diff --git a/tools/testing/selftests/README.txt b/tools/testing/selftests/README.txt new file mode 100644 index 000000000..5e2faf9c5 --- /dev/null +++ b/tools/testing/selftests/README.txt @@ -0,0 +1,42 @@ +Linux Kernel Selftests + +The kernel contains a set of "self tests" under the tools/testing/selftests/ +directory. These are intended to be small unit tests to exercise individual +code paths in the kernel. + +Running the selftests +===================== + +To build the tests: + + $ make -C tools/testing/selftests + + +To run the tests: + + $ make -C tools/testing/selftests run_tests + +- note that some tests will require root privileges. + + +To run only tests targetted for a single subsystem: + + $ make -C tools/testing/selftests TARGETS=cpu-hotplug run_tests + +See the top-level tools/testing/selftests/Makefile for the list of all possible +targets. + + +Contributing new tests +====================== + +In general, the rules for for selftests are + + * Do as much as you can if you're not root; + + * Don't take too long; + + * Don't break the build on any architecture, and + + * Don't cause the top-level "make run_tests" to fail if your feature is + unconfigured. diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile new file mode 100644 index 000000000..e18b42b25 --- /dev/null +++ b/tools/testing/selftests/breakpoints/Makefile @@ -0,0 +1,23 @@ +# Taken from perf makefile +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := x86 +endif +ifeq ($(ARCH),x86_64) + ARCH := x86 +endif + + +all: +ifeq ($(ARCH),x86) + gcc breakpoint_test.c -o breakpoint_test +else + echo "Not an x86 target, can't build breakpoints selftests" +endif + +run_tests: + @./breakpoint_test || echo "breakpoints selftests: [FAIL]" + +clean: + rm -fr breakpoint_test diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c new file mode 100644 index 000000000..a0743f3b2 --- /dev/null +++ b/tools/testing/selftests/breakpoints/breakpoint_test.c @@ -0,0 +1,394 @@ +/* + * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com> + * + * Licensed under the terms of the GNU GPL License version 2 + * + * Selftests for breakpoints (and more generally the do_debug() path) in x86. + */ + + +#include <sys/ptrace.h> +#include <unistd.h> +#include <stddef.h> +#include <sys/user.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> + + +/* Breakpoint access modes */ +enum { + BP_X = 1, + BP_RW = 2, + BP_W = 4, +}; + +static pid_t child_pid; + +/* + * Ensures the child and parent are always "talking" about + * the same test sequence. (ie: that we haven't forgotten + * to call check_trapped() somewhere). + */ +static int nr_tests; + +static void set_breakpoint_addr(void *addr, int n) +{ + int ret; + + ret = ptrace(PTRACE_POKEUSER, child_pid, + offsetof(struct user, u_debugreg[n]), addr); + if (ret) { + perror("Can't set breakpoint addr\n"); + exit(-1); + } +} + +static void toggle_breakpoint(int n, int type, int len, + int local, int global, int set) +{ + int ret; + + int xtype, xlen; + unsigned long vdr7, dr7; + + switch (type) { + case BP_X: + xtype = 0; + break; + case BP_W: + xtype = 1; + break; + case BP_RW: + xtype = 3; + break; + } + + switch (len) { + case 1: + xlen = 0; + break; + case 2: + xlen = 4; + break; + case 4: + xlen = 0xc; + break; + case 8: + xlen = 8; + break; + } + + dr7 = ptrace(PTRACE_PEEKUSER, child_pid, + offsetof(struct user, u_debugreg[7]), 0); + + vdr7 = (xlen | xtype) << 16; + vdr7 <<= 4 * n; + + if (local) { + vdr7 |= 1 << (2 * n); + vdr7 |= 1 << 8; + } + if (global) { + vdr7 |= 2 << (2 * n); + vdr7 |= 1 << 9; + } + + if (set) + dr7 |= vdr7; + else + dr7 &= ~vdr7; + + ret = ptrace(PTRACE_POKEUSER, child_pid, + offsetof(struct user, u_debugreg[7]), dr7); + if (ret) { + perror("Can't set dr7"); + exit(-1); + } +} + +/* Dummy variables to test read/write accesses */ +static unsigned long long dummy_var[4]; + +/* Dummy functions to test execution accesses */ +static void dummy_func(void) { } +static void dummy_func1(void) { } +static void dummy_func2(void) { } +static void dummy_func3(void) { } + +static void (*dummy_funcs[])(void) = { + dummy_func, + dummy_func1, + dummy_func2, + dummy_func3, +}; + +static int trapped; + +static void check_trapped(void) +{ + /* + * If we haven't trapped, wake up the parent + * so that it notices the failure. + */ + if (!trapped) + kill(getpid(), SIGUSR1); + trapped = 0; + + nr_tests++; +} + +static void write_var(int len) +{ + char *pcval; short *psval; int *pival; long long *plval; + int i; + + for (i = 0; i < 4; i++) { + switch (len) { + case 1: + pcval = (char *)&dummy_var[i]; + *pcval = 0xff; + break; + case 2: + psval = (short *)&dummy_var[i]; + *psval = 0xffff; + break; + case 4: + pival = (int *)&dummy_var[i]; + *pival = 0xffffffff; + break; + case 8: + plval = (long long *)&dummy_var[i]; + *plval = 0xffffffffffffffffLL; + break; + } + check_trapped(); + } +} + +static void read_var(int len) +{ + char cval; short sval; int ival; long long lval; + int i; + + for (i = 0; i < 4; i++) { + switch (len) { + case 1: + cval = *(char *)&dummy_var[i]; + break; + case 2: + sval = *(short *)&dummy_var[i]; + break; + case 4: + ival = *(int *)&dummy_var[i]; + break; + case 8: + lval = *(long long *)&dummy_var[i]; + break; + } + check_trapped(); + } +} + +/* + * Do the r/w/x accesses to trigger the breakpoints. And run + * the usual traps. + */ +static void trigger_tests(void) +{ + int len, local, global, i; + char val; + int ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("Can't be traced?\n"); + return; + } + + /* Wake up father so that it sets up the first test */ + kill(getpid(), SIGUSR1); + + /* Test instruction breakpoints */ + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + + for (i = 0; i < 4; i++) { + dummy_funcs[i](); + check_trapped(); + } + } + } + + /* Test write watchpoints */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + write_var(len); + } + } + } + + /* Test read/write watchpoints (on read accesses) */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + read_var(len); + } + } + } + + /* Icebp trap */ + asm(".byte 0xf1\n"); + check_trapped(); + + /* Int 3 trap */ + asm("int $3\n"); + check_trapped(); + + kill(getpid(), SIGUSR1); +} + +static void check_success(const char *msg) +{ + const char *msg2; + int child_nr_tests; + int status; + + /* Wait for the child to SIGTRAP */ + wait(&status); + + msg2 = "Failed"; + + if (WSTOPSIG(status) == SIGTRAP) { + child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid, + &nr_tests, 0); + if (child_nr_tests == nr_tests) + msg2 = "Ok"; + if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1)) { + perror("Can't poke\n"); + exit(-1); + } + } + + nr_tests++; + + printf("%s [%s]\n", msg, msg2); +} + +static void launch_instruction_breakpoints(char *buf, int local, int global) +{ + int i; + + for (i = 0; i < 4; i++) { + set_breakpoint_addr(dummy_funcs[i], i); + toggle_breakpoint(i, BP_X, 1, local, global, 1); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test breakpoint %d with local: %d global: %d", + i, local, global); + check_success(buf); + toggle_breakpoint(i, BP_X, 1, local, global, 0); + } +} + +static void launch_watchpoints(char *buf, int mode, int len, + int local, int global) +{ + const char *mode_str; + int i; + + if (mode == BP_W) + mode_str = "write"; + else + mode_str = "read"; + + for (i = 0; i < 4; i++) { + set_breakpoint_addr(&dummy_var[i], i); + toggle_breakpoint(i, mode, len, local, global, 1); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint %d with len: %d local: " + "%d global: %d", mode_str, i, len, local, global); + check_success(buf); + toggle_breakpoint(i, mode, len, local, global, 0); + } +} + +/* Set the breakpoints and check the child successfully trigger them */ +static void launch_tests(void) +{ + char buf[1024]; + int len, local, global, i; + + /* Instruction breakpoints */ + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + launch_instruction_breakpoints(buf, local, global); + } + } + + /* Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + launch_watchpoints(buf, BP_W, len, + local, global); + } + } + } + + /* Read-Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + launch_watchpoints(buf, BP_RW, len, + local, global); + } + } + } + + /* Icebp traps */ + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success("Test icebp"); + + /* Int 3 traps */ + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success("Test int 3 trap"); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); +} + +int main(int argc, char **argv) +{ + pid_t pid; + int ret; + + pid = fork(); + if (!pid) { + trigger_tests(); + return 0; + } + + child_pid = pid; + + wait(NULL); + + launch_tests(); + + wait(NULL); + + return 0; +} diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile new file mode 100644 index 000000000..12657a5e4 --- /dev/null +++ b/tools/testing/selftests/cpu-hotplug/Makefile @@ -0,0 +1,6 @@ +all: + +run_tests: + @./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]" + +clean: diff --git a/tools/testing/selftests/cpu-hotplug/on-off-test.sh b/tools/testing/selftests/cpu-hotplug/on-off-test.sh new file mode 100755 index 000000000..bdde7cf42 --- /dev/null +++ b/tools/testing/selftests/cpu-hotplug/on-off-test.sh @@ -0,0 +1,221 @@ +#!/bin/bash + +SYSFS= + +prerequisite() +{ + msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $msg sysfs is not mounted >&2 + exit 0 + fi + + if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then + echo $msg cpu hotplug is not supported >&2 + exit 0 + fi +} + +# +# list all hot-pluggable CPUs +# +hotpluggable_cpus() +{ + local state=${1:-.\*} + + for cpu in $SYSFS/devices/system/cpu/cpu*; do + if [ -f $cpu/online ] && grep -q $state $cpu/online; then + echo ${cpu##/*/cpu} + fi + done +} + +hotplaggable_offline_cpus() +{ + hotpluggable_cpus 0 +} + +hotpluggable_online_cpus() +{ + hotpluggable_cpus 1 +} + +cpu_is_online() +{ + grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online +} + +cpu_is_offline() +{ + grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online +} + +online_cpu() +{ + echo 1 > $SYSFS/devices/system/cpu/cpu$1/online +} + +offline_cpu() +{ + echo 0 > $SYSFS/devices/system/cpu/cpu$1/online +} + +online_cpu_expect_success() +{ + local cpu=$1 + + if ! online_cpu $cpu; then + echo $FUNCNAME $cpu: unexpected fail >&2 + elif ! cpu_is_online $cpu; then + echo $FUNCNAME $cpu: unexpected offline >&2 + fi +} + +online_cpu_expect_fail() +{ + local cpu=$1 + + if online_cpu $cpu 2> /dev/null; then + echo $FUNCNAME $cpu: unexpected success >&2 + elif ! cpu_is_offline $cpu; then + echo $FUNCNAME $cpu: unexpected online >&2 + fi +} + +offline_cpu_expect_success() +{ + local cpu=$1 + + if ! offline_cpu $cpu; then + echo $FUNCNAME $cpu: unexpected fail >&2 + elif ! cpu_is_offline $cpu; then + echo $FUNCNAME $cpu: unexpected offline >&2 + fi +} + +offline_cpu_expect_fail() +{ + local cpu=$1 + + if offline_cpu $cpu 2> /dev/null; then + echo $FUNCNAME $cpu: unexpected success >&2 + elif ! cpu_is_online $cpu; then + echo $FUNCNAME $cpu: unexpected offline >&2 + fi +} + +error=-12 +priority=0 + +while getopts e:hp: opt; do + case $opt in + e) + error=$OPTARG + ;; + h) + echo "Usage $0 [ -e errno ] [ -p notifier-priority ]" + exit + ;; + p) + priority=$OPTARG + ;; + esac +done + +if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then + echo "error code must be -4095 <= errno < 0" >&2 + exit 1 +fi + +prerequisite + +# +# Online all hot-pluggable CPUs +# +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_success $cpu +done + +# +# Offline all hot-pluggable CPUs +# +for cpu in `hotpluggable_online_cpus`; do + offline_cpu_expect_success $cpu +done + +# +# Online all hot-pluggable CPUs again +# +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_success $cpu +done + +# +# Test with cpu notifier error injection +# + +DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'` +NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu + +prerequisite_extra() +{ + msg="skip extra tests:" + + /sbin/modprobe -q -r cpu-notifier-error-inject + /sbin/modprobe -q cpu-notifier-error-inject priority=$priority + + if [ ! -d "$DEBUGFS" ]; then + echo $msg debugfs is not mounted >&2 + exit 0 + fi + + if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then + echo $msg cpu-notifier-error-inject module is not available >&2 + exit 0 + fi +} + +prerequisite_extra + +# +# Offline all hot-pluggable CPUs +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error +for cpu in `hotpluggable_online_cpus`; do + offline_cpu_expect_success $cpu +done + +# +# Test CPU hot-add error handling (offline => online) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_fail $cpu +done + +# +# Online all hot-pluggable CPUs +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_success $cpu +done + +# +# Test CPU hot-remove error handling (online => offline) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error +for cpu in `hotpluggable_online_cpus`; do + offline_cpu_expect_fail $cpu +done + +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error +/sbin/modprobe -q -r cpu-notifier-error-inject diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile new file mode 100644 index 000000000..29e8c6bc8 --- /dev/null +++ b/tools/testing/selftests/efivarfs/Makefile @@ -0,0 +1,12 @@ +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall + +test_objs = open-unlink create-read + +all: $(test_objs) + +run_tests: all + @/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]" + +clean: + rm -f $(test_objs) diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c new file mode 100644 index 000000000..7feef1880 --- /dev/null +++ b/tools/testing/selftests/efivarfs/create-read.c @@ -0,0 +1,38 @@ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> + +int main(int argc, char **argv) +{ + const char *path; + char buf[4]; + int fd, rc; + + if (argc < 2) { + fprintf(stderr, "usage: %s <path>\n", argv[0]); + return EXIT_FAILURE; + } + + path = argv[1]; + + /* create a test variable */ + fd = open(path, O_RDWR | O_CREAT, 0600); + if (fd < 0) { + perror("open(O_WRONLY)"); + return EXIT_FAILURE; + } + + rc = read(fd, buf, sizeof(buf)); + if (rc != 0) { + fprintf(stderr, "Reading a new var should return EOF\n"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh new file mode 100755 index 000000000..77edcdcc0 --- /dev/null +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -0,0 +1,198 @@ +#!/bin/bash + +efivarfs_mount=/sys/firmware/efi/efivars +test_guid=210be57c-9849-4fc7-a635-e6382d1aec27 + +check_prereqs() +{ + local msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi + + if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then + echo $msg efivarfs is not mounted on $efivarfs_mount >&2 + exit 0 + fi +} + +run_test() +{ + local test="$1" + + echo "--------------------" + echo "running $test" + echo "--------------------" + + if [ "$(type -t $test)" = 'function' ]; then + ( $test ) + else + ( ./$test ) + fi + + if [ $? -ne 0 ]; then + echo " [FAIL]" + rc=1 + else + echo " [PASS]" + fi +} + +test_create() +{ + local attrs='\x07\x00\x00\x00' + local file=$efivarfs_mount/$FUNCNAME-$test_guid + + printf "$attrs\x00" > $file + + if [ ! -e $file ]; then + echo "$file couldn't be created" >&2 + exit 1 + fi + + if [ $(stat -c %s $file) -ne 5 ]; then + echo "$file has invalid size" >&2 + exit 1 + fi +} + +test_create_empty() +{ + local file=$efivarfs_mount/$FUNCNAME-$test_guid + + : > $file + + if [ ! -e $file ]; then + echo "$file can not be created without writing" >&2 + exit 1 + fi +} + +test_create_read() +{ + local file=$efivarfs_mount/$FUNCNAME-$test_guid + ./create-read $file +} + +test_delete() +{ + local attrs='\x07\x00\x00\x00' + local file=$efivarfs_mount/$FUNCNAME-$test_guid + + printf "$attrs\x00" > $file + + if [ ! -e $file ]; then + echo "$file couldn't be created" >&2 + exit 1 + fi + + rm $file + + if [ -e $file ]; then + echo "$file couldn't be deleted" >&2 + exit 1 + fi + +} + +# test that we can remove a variable by issuing a write with only +# attributes specified +test_zero_size_delete() +{ + local attrs='\x07\x00\x00\x00' + local file=$efivarfs_mount/$FUNCNAME-$test_guid + + printf "$attrs\x00" > $file + + if [ ! -e $file ]; then + echo "$file does not exist" >&2 + exit 1 + fi + + printf "$attrs" > $file + + if [ -e $file ]; then + echo "$file should have been deleted" >&2 + exit 1 + fi +} + +test_open_unlink() +{ + local file=$efivarfs_mount/$FUNCNAME-$test_guid + ./open-unlink $file +} + +# test that we can create a range of filenames +test_valid_filenames() +{ + local attrs='\x07\x00\x00\x00' + local ret=0 + + local file_list="abc dump-type0-11-1-1362436005 1234 -" + for f in $file_list; do + local file=$efivarfs_mount/$f-$test_guid + + printf "$attrs\x00" > $file + + if [ ! -e $file ]; then + echo "$file could not be created" >&2 + ret=1 + else + rm $file + fi + done + + exit $ret +} + +test_invalid_filenames() +{ + local attrs='\x07\x00\x00\x00' + local ret=0 + + local file_list=" + -1234-1234-1234-123456789abc + foo + foo-bar + -foo- + foo-barbazba-foob-foob-foob-foobarbazfoo + foo------------------------------------- + -12345678-1234-1234-1234-123456789abc + a-12345678=1234-1234-1234-123456789abc + a-12345678-1234=1234-1234-123456789abc + a-12345678-1234-1234=1234-123456789abc + a-12345678-1234-1234-1234=123456789abc + 1112345678-1234-1234-1234-123456789abc" + + for f in $file_list; do + local file=$efivarfs_mount/$f + + printf "$attrs\x00" 2>/dev/null > $file + + if [ -e $file ]; then + echo "Creating $file should have failed" >&2 + rm $file + ret=1 + fi + done + + exit $ret +} + +check_prereqs + +rc=0 + +run_test test_create +run_test test_create_empty +run_test test_create_read +run_test test_delete +run_test test_zero_size_delete +run_test test_open_unlink +run_test test_valid_filenames +run_test test_invalid_filenames + +exit $rc diff --git a/tools/testing/selftests/efivarfs/open-unlink.c b/tools/testing/selftests/efivarfs/open-unlink.c new file mode 100644 index 000000000..8c0764407 --- /dev/null +++ b/tools/testing/selftests/efivarfs/open-unlink.c @@ -0,0 +1,63 @@ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +int main(int argc, char **argv) +{ + const char *path; + char buf[5]; + int fd, rc; + + if (argc < 2) { + fprintf(stderr, "usage: %s <path>\n", argv[0]); + return EXIT_FAILURE; + } + + path = argv[1]; + + /* attributes: EFI_VARIABLE_NON_VOLATILE | + * EFI_VARIABLE_BOOTSERVICE_ACCESS | + * EFI_VARIABLE_RUNTIME_ACCESS + */ + *(uint32_t *)buf = 0x7; + buf[4] = 0; + + /* create a test variable */ + fd = open(path, O_WRONLY | O_CREAT); + if (fd < 0) { + perror("open(O_WRONLY)"); + return EXIT_FAILURE; + } + + rc = write(fd, buf, sizeof(buf)); + if (rc != sizeof(buf)) { + perror("write"); + return EXIT_FAILURE; + } + + close(fd); + + fd = open(path, O_RDONLY); + if (fd < 0) { + perror("open"); + return EXIT_FAILURE; + } + + if (unlink(path) < 0) { + perror("unlink"); + return EXIT_FAILURE; + } + + rc = read(fd, buf, sizeof(buf)); + if (rc > 0) { + fprintf(stderr, "reading from an unlinked variable " + "shouldn't be possible\n"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile new file mode 100644 index 000000000..5386fd7c4 --- /dev/null +++ b/tools/testing/selftests/ipc/Makefile @@ -0,0 +1,25 @@ +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := X86 + CFLAGS := -DCONFIG_X86_32 -D__i386__ +endif +ifeq ($(ARCH),x86_64) + ARCH := X86 + CFLAGS := -DCONFIG_X86_64 -D__x86_64__ +endif + +CFLAGS += -I../../../../usr/include/ + +all: +ifeq ($(ARCH),X86) + gcc $(CFLAGS) msgque.c -o msgque_test +else + echo "Not an x86 target, can't build msgque selftest" +endif + +run_tests: all + ./msgque_test + +clean: + rm -fr ./msgque_test diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c new file mode 100644 index 000000000..d66418237 --- /dev/null +++ b/tools/testing/selftests/ipc/msgque.c @@ -0,0 +1,246 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <linux/msg.h> +#include <fcntl.h> + +#define MAX_MSG_SIZE 32 + +struct msg1 { + int msize; + long mtype; + char mtext[MAX_MSG_SIZE]; +}; + +#define TEST_STRING "Test sysv5 msg" +#define MSG_TYPE 1 + +#define ANOTHER_TEST_STRING "Yet another test sysv5 msg" +#define ANOTHER_MSG_TYPE 26538 + +struct msgque_data { + key_t key; + int msq_id; + int qbytes; + int qnum; + int mode; + struct msg1 *messages; +}; + +int restore_queue(struct msgque_data *msgque) +{ + int fd, ret, id, i; + char buf[32]; + + fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY); + if (fd == -1) { + printf("Failed to open /proc/sys/kernel/msg_next_id\n"); + return -errno; + } + sprintf(buf, "%d", msgque->msq_id); + + ret = write(fd, buf, strlen(buf)); + if (ret != strlen(buf)) { + printf("Failed to write to /proc/sys/kernel/msg_next_id\n"); + return -errno; + } + + id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL); + if (id == -1) { + printf("Failed to create queue\n"); + return -errno; + } + + if (id != msgque->msq_id) { + printf("Restored queue has wrong id (%d instead of %d)\n", + id, msgque->msq_id); + ret = -EFAULT; + goto destroy; + } + + for (i = 0; i < msgque->qnum; i++) { + if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype, + msgque->messages[i].msize, IPC_NOWAIT) != 0) { + printf("msgsnd failed (%m)\n"); + ret = -errno; + goto destroy; + }; + } + return 0; + +destroy: + if (msgctl(id, IPC_RMID, 0)) + printf("Failed to destroy queue: %d\n", -errno); + return ret; +} + +int check_and_destroy_queue(struct msgque_data *msgque) +{ + struct msg1 message; + int cnt = 0, ret; + + while (1) { + ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE, + 0, IPC_NOWAIT); + if (ret < 0) { + if (errno == ENOMSG) + break; + printf("Failed to read IPC message: %m\n"); + ret = -errno; + goto err; + } + if (ret != msgque->messages[cnt].msize) { + printf("Wrong message size: %d (expected %d)\n", ret, + msgque->messages[cnt].msize); + ret = -EINVAL; + goto err; + } + if (message.mtype != msgque->messages[cnt].mtype) { + printf("Wrong message type\n"); + ret = -EINVAL; + goto err; + } + if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) { + printf("Wrong message content\n"); + ret = -EINVAL; + goto err; + } + cnt++; + } + + if (cnt != msgque->qnum) { + printf("Wrong message number\n"); + ret = -EINVAL; + goto err; + } + + ret = 0; +err: + if (msgctl(msgque->msq_id, IPC_RMID, 0)) { + printf("Failed to destroy queue: %d\n", -errno); + return -errno; + } + return ret; +} + +int dump_queue(struct msgque_data *msgque) +{ + struct msqid64_ds ds; + int kern_id; + int i, ret; + + for (kern_id = 0; kern_id < 256; kern_id++) { + ret = msgctl(kern_id, MSG_STAT, &ds); + if (ret < 0) { + if (errno == -EINVAL) + continue; + printf("Failed to get stats for IPC queue with id %d\n", + kern_id); + return -errno; + } + + if (ret == msgque->msq_id) + break; + } + + msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum); + if (msgque->messages == NULL) { + printf("Failed to get stats for IPC queue\n"); + return -ENOMEM; + } + + msgque->qnum = ds.msg_qnum; + msgque->mode = ds.msg_perm.mode; + msgque->qbytes = ds.msg_qbytes; + + for (i = 0; i < msgque->qnum; i++) { + ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype, + MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY); + if (ret < 0) { + printf("Failed to copy IPC message: %m (%d)\n", errno); + return -errno; + } + msgque->messages[i].msize = ret; + } + return 0; +} + +int fill_msgque(struct msgque_data *msgque) +{ + struct msg1 msgbuf; + + msgbuf.mtype = MSG_TYPE; + memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING)); + if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING), + IPC_NOWAIT) != 0) { + printf("First message send failed (%m)\n"); + return -errno; + }; + + msgbuf.mtype = ANOTHER_MSG_TYPE; + memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING)); + if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING), + IPC_NOWAIT) != 0) { + printf("Second message send failed (%m)\n"); + return -errno; + }; + return 0; +} + +int main(int argc, char **argv) +{ + int msg, pid, err; + struct msgque_data msgque; + + msgque.key = ftok(argv[0], 822155650); + if (msgque.key == -1) { + printf("Can't make key\n"); + return -errno; + } + + msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666); + if (msgque.msq_id == -1) { + printf("Can't create queue\n"); + goto err_out; + } + + err = fill_msgque(&msgque); + if (err) { + printf("Failed to fill queue\n"); + goto err_destroy; + } + + err = dump_queue(&msgque); + if (err) { + printf("Failed to dump queue\n"); + goto err_destroy; + } + + err = check_and_destroy_queue(&msgque); + if (err) { + printf("Failed to check and destroy queue\n"); + goto err_out; + } + + err = restore_queue(&msgque); + if (err) { + printf("Failed to restore queue\n"); + goto err_destroy; + } + + err = check_and_destroy_queue(&msgque); + if (err) { + printf("Failed to test queue\n"); + goto err_out; + } + return 0; + +err_destroy: + if (msgctl(msgque.msq_id, IPC_RMID, 0)) { + printf("Failed to destroy queue: %d\n", -errno); + return -errno; + } +err_out: + return err; +} diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile new file mode 100644 index 000000000..56eb5523d --- /dev/null +++ b/tools/testing/selftests/kcmp/Makefile @@ -0,0 +1,29 @@ +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := X86 + CFLAGS := -DCONFIG_X86_32 -D__i386__ +endif +ifeq ($(ARCH),x86_64) + ARCH := X86 + CFLAGS := -DCONFIG_X86_64 -D__x86_64__ +endif + +CFLAGS += -I../../../../arch/x86/include/generated/ +CFLAGS += -I../../../../include/ +CFLAGS += -I../../../../usr/include/ +CFLAGS += -I../../../../arch/x86/include/ + +all: +ifeq ($(ARCH),X86) + gcc $(CFLAGS) kcmp_test.c -o kcmp_test +else + echo "Not an x86 target, can't build kcmp selftest" +endif + +run_tests: all + @./kcmp_test || echo "kcmp_test: [FAIL]" + +clean: + rm -fr ./run_test + rm -fr ./test-file diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c new file mode 100644 index 000000000..fa4f1b37e --- /dev/null +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -0,0 +1,96 @@ +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <limits.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> + +#include <linux/unistd.h> +#include <linux/kcmp.h> + +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> + +static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2) +{ + return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2); +} + +int main(int argc, char **argv) +{ + const char kpath[] = "kcmp-test-file"; + int pid1, pid2; + int fd1, fd2; + int status; + + fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644); + pid1 = getpid(); + + if (fd1 < 0) { + perror("Can't create file"); + exit(1); + } + + pid2 = fork(); + if (pid2 < 0) { + perror("fork failed"); + exit(1); + } + + if (!pid2) { + int pid2 = getpid(); + int ret; + + fd2 = open(kpath, O_RDWR, 0644); + if (fd2 < 0) { + perror("Can't open file"); + exit(1); + } + + /* An example of output and arguments */ + printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld " + "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld " + "INV: %2ld\n", + pid1, pid2, + sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd2), + sys_kcmp(pid1, pid2, KCMP_FILES, 0, 0), + sys_kcmp(pid1, pid2, KCMP_VM, 0, 0), + sys_kcmp(pid1, pid2, KCMP_FS, 0, 0), + sys_kcmp(pid1, pid2, KCMP_SIGHAND, 0, 0), + sys_kcmp(pid1, pid2, KCMP_IO, 0, 0), + sys_kcmp(pid1, pid2, KCMP_SYSVSEM, 0, 0), + + /* This one should fail */ + sys_kcmp(pid1, pid2, KCMP_TYPES + 1, 0, 0)); + + /* This one should return same fd */ + ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1); + if (ret) { + printf("FAIL: 0 expected but %d returned (%s)\n", + ret, strerror(errno)); + ret = -1; + } else + printf("PASS: 0 returned as expected\n"); + + /* Compare with self */ + ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0); + if (ret) { + printf("FAIL: 0 expected but %li returned (%s)\n", + ret, strerror(errno)); + ret = -1; + } else + printf("PASS: 0 returned as expected\n"); + + exit(ret); + } + + waitpid(pid2, &status, P_ALL); + + return 0; +} diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile new file mode 100644 index 000000000..0f49c3f5f --- /dev/null +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -0,0 +1,6 @@ +all: + +run_tests: + @./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" + +clean: diff --git a/tools/testing/selftests/memory-hotplug/on-off-test.sh b/tools/testing/selftests/memory-hotplug/on-off-test.sh new file mode 100755 index 000000000..a2816f631 --- /dev/null +++ b/tools/testing/selftests/memory-hotplug/on-off-test.sh @@ -0,0 +1,230 @@ +#!/bin/bash + +SYSFS= + +prerequisite() +{ + msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $msg sysfs is not mounted >&2 + exit 0 + fi + + if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then + echo $msg memory hotplug is not supported >&2 + exit 0 + fi +} + +# +# list all hot-pluggable memory +# +hotpluggable_memory() +{ + local state=${1:-.\*} + + for memory in $SYSFS/devices/system/memory/memory*; do + if grep -q 1 $memory/removable && + grep -q $state $memory/state; then + echo ${memory##/*/memory} + fi + done +} + +hotplaggable_offline_memory() +{ + hotpluggable_memory offline +} + +hotpluggable_online_memory() +{ + hotpluggable_memory online +} + +memory_is_online() +{ + grep -q online $SYSFS/devices/system/memory/memory$1/state +} + +memory_is_offline() +{ + grep -q offline $SYSFS/devices/system/memory/memory$1/state +} + +online_memory() +{ + echo online > $SYSFS/devices/system/memory/memory$1/state +} + +offline_memory() +{ + echo offline > $SYSFS/devices/system/memory/memory$1/state +} + +online_memory_expect_success() +{ + local memory=$1 + + if ! online_memory $memory; then + echo $FUNCNAME $memory: unexpected fail >&2 + elif ! memory_is_online $memory; then + echo $FUNCNAME $memory: unexpected offline >&2 + fi +} + +online_memory_expect_fail() +{ + local memory=$1 + + if online_memory $memory 2> /dev/null; then + echo $FUNCNAME $memory: unexpected success >&2 + elif ! memory_is_offline $memory; then + echo $FUNCNAME $memory: unexpected online >&2 + fi +} + +offline_memory_expect_success() +{ + local memory=$1 + + if ! offline_memory $memory; then + echo $FUNCNAME $memory: unexpected fail >&2 + elif ! memory_is_offline $memory; then + echo $FUNCNAME $memory: unexpected offline >&2 + fi +} + +offline_memory_expect_fail() +{ + local memory=$1 + + if offline_memory $memory 2> /dev/null; then + echo $FUNCNAME $memory: unexpected success >&2 + elif ! memory_is_online $memory; then + echo $FUNCNAME $memory: unexpected offline >&2 + fi +} + +error=-12 +priority=0 +ratio=10 + +while getopts e:hp:r: opt; do + case $opt in + e) + error=$OPTARG + ;; + h) + echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]" + exit + ;; + p) + priority=$OPTARG + ;; + r) + ratio=$OPTARG + ;; + esac +done + +if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then + echo "error code must be -4095 <= errno < 0" >&2 + exit 1 +fi + +prerequisite + +# +# Online all hot-pluggable memory +# +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_success $memory +done + +# +# Offline $ratio percent of hot-pluggable memory +# +for memory in `hotpluggable_online_memory`; do + if [ $((RANDOM % 100)) -lt $ratio ]; then + offline_memory_expect_success $memory + fi +done + +# +# Online all hot-pluggable memory again +# +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_success $memory +done + +# +# Test with memory notifier error injection +# + +DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'` +NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory + +prerequisite_extra() +{ + msg="skip extra tests:" + + /sbin/modprobe -q -r memory-notifier-error-inject + /sbin/modprobe -q memory-notifier-error-inject priority=$priority + + if [ ! -d "$DEBUGFS" ]; then + echo $msg debugfs is not mounted >&2 + exit 0 + fi + + if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then + echo $msg memory-notifier-error-inject module is not available >&2 + exit 0 + fi +} + +prerequisite_extra + +# +# Offline $ratio percent of hot-pluggable memory +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error +for memory in `hotpluggable_online_memory`; do + if [ $((RANDOM % 100)) -lt $ratio ]; then + offline_memory_expect_success $memory + fi +done + +# +# Test memory hot-add error handling (offline => online) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_fail $memory +done + +# +# Online all hot-pluggable memory +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_success $memory +done + +# +# Test memory hot-remove error handling (online => offline) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error +for memory in `hotpluggable_online_memory`; do + offline_memory_expect_fail $memory +done + +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error +/sbin/modprobe -q -r memory-notifier-error-inject diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile new file mode 100644 index 000000000..337d853c2 --- /dev/null +++ b/tools/testing/selftests/mount/Makefile @@ -0,0 +1,17 @@ +# Makefile for mount selftests. + +all: unprivileged-remount-test + +unprivileged-remount-test: unprivileged-remount-test.c + gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test + +# Allow specific tests to be selected. +test_unprivileged_remount: unprivileged-remount-test + @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi + +run_tests: all test_unprivileged_remount + +clean: + rm -f unprivileged-remount-test + +.PHONY: all test_unprivileged_remount diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c new file mode 100644 index 000000000..517785052 --- /dev/null +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -0,0 +1,370 @@ +#define _GNU_SOURCE +#include <sched.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/wait.h> +#include <sys/vfs.h> +#include <sys/statvfs.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <grp.h> +#include <stdbool.h> +#include <stdarg.h> + +#ifndef CLONE_NEWNS +# define CLONE_NEWNS 0x00020000 +#endif +#ifndef CLONE_NEWUTS +# define CLONE_NEWUTS 0x04000000 +#endif +#ifndef CLONE_NEWIPC +# define CLONE_NEWIPC 0x08000000 +#endif +#ifndef CLONE_NEWNET +# define CLONE_NEWNET 0x40000000 +#endif +#ifndef CLONE_NEWUSER +# define CLONE_NEWUSER 0x10000000 +#endif +#ifndef CLONE_NEWPID +# define CLONE_NEWPID 0x20000000 +#endif + +#ifndef MS_REC +# define MS_REC 16384 +#endif +#ifndef MS_RELATIME +# define MS_RELATIME (1 << 21) +#endif +#ifndef MS_STRICTATIME +# define MS_STRICTATIME (1 << 24) +#endif + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) +{ + char buf[4096]; + int fd; + ssize_t written; + int buf_len; + + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + if (buf_len < 0) { + die("vsnprintf failed: %s\n", + strerror(errno)); + } + if (buf_len >= sizeof(buf)) { + die("vsnprintf output truncated\n"); + } + + fd = open(filename, O_WRONLY); + if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; + die("open of %s failed: %s\n", + filename, strerror(errno)); + } + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + die("short write to %s\n", filename); + } else { + die("write to %s failed: %s\n", + filename, strerror(errno)); + } + } + if (close(fd) != 0) { + die("close of %s failed: %s\n", + filename, strerror(errno)); + } +} + +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); + +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); + +} + +static int read_mnt_flags(const char *path) +{ + int ret; + struct statvfs stat; + int mnt_flags; + + ret = statvfs(path, &stat); + if (ret != 0) { + die("statvfs of %s failed: %s\n", + path, strerror(errno)); + } + if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \ + ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \ + ST_SYNCHRONOUS | ST_MANDLOCK)) { + die("Unrecognized mount flags\n"); + } + mnt_flags = 0; + if (stat.f_flag & ST_RDONLY) + mnt_flags |= MS_RDONLY; + if (stat.f_flag & ST_NOSUID) + mnt_flags |= MS_NOSUID; + if (stat.f_flag & ST_NODEV) + mnt_flags |= MS_NODEV; + if (stat.f_flag & ST_NOEXEC) + mnt_flags |= MS_NOEXEC; + if (stat.f_flag & ST_NOATIME) + mnt_flags |= MS_NOATIME; + if (stat.f_flag & ST_NODIRATIME) + mnt_flags |= MS_NODIRATIME; + if (stat.f_flag & ST_RELATIME) + mnt_flags |= MS_RELATIME; + if (stat.f_flag & ST_SYNCHRONOUS) + mnt_flags |= MS_SYNCHRONOUS; + if (stat.f_flag & ST_MANDLOCK) + mnt_flags |= ST_MANDLOCK; + + return mnt_flags; +} + +static void create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER) !=0) { + die("unshare(CLONE_NEWUSER) failed: %s\n", + strerror(errno)); + } + + maybe_write_file("/proc/self/setgroups", "deny"); + write_file("/proc/self/uid_map", "0 %d 1", uid); + write_file("/proc/self/gid_map", "0 %d 1", gid); + + if (setgid(0) != 0) { + die ("setgid(0) failed %s\n", + strerror(errno)); + } + if (setuid(0) != 0) { + die("setuid(0) failed %s\n", + strerror(errno)); + } +} + +static +bool test_unpriv_remount(const char *fstype, const char *mount_options, + int mount_flags, int remount_flags, int invalid_flags) +{ + pid_t child; + + child = fork(); + if (child == -1) { + die("fork failed: %s\n", + strerror(errno)); + } + if (child != 0) { /* parent */ + pid_t pid; + int status; + pid = waitpid(child, &status, 0); + if (pid == -1) { + die("waitpid failed: %s\n", + strerror(errno)); + } + if (pid != child) { + die("waited for %d got %d\n", + child, pid); + } + if (!WIFEXITED(status)) { + die("child did not terminate cleanly\n"); + } + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + } + + create_and_enter_userns(); + if (unshare(CLONE_NEWNS) != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) { + die("mount of %s with options '%s' on /tmp failed: %s\n", + fstype, + mount_options? mount_options : "", + strerror(errno)); + } + + create_and_enter_userns(); + + if (unshare(CLONE_NEWNS) != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + if (mount("/tmp", "/tmp", "none", + MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp failed: %s\n", + strerror(errno)); + } + + if (mount("/tmp", "/tmp", "none", + MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp with invalid flags " + "succeeded unexpectedly\n"); + } + exit(EXIT_SUCCESS); +} + +static bool test_unpriv_remount_simple(int mount_flags) +{ + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0); +} + +static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags) +{ + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, + invalid_flags); +} + +static bool test_priv_mount_unpriv_remount(void) +{ + pid_t child; + int ret; + const char *orig_path = "/dev"; + const char *dest_path = "/tmp"; + int orig_mnt_flags, remount_mnt_flags; + + child = fork(); + if (child == -1) { + die("fork failed: %s\n", + strerror(errno)); + } + if (child != 0) { /* parent */ + pid_t pid; + int status; + pid = waitpid(child, &status, 0); + if (pid == -1) { + die("waitpid failed: %s\n", + strerror(errno)); + } + if (pid != child) { + die("waited for %d got %d\n", + child, pid); + } + if (!WIFEXITED(status)) { + die("child did not terminate cleanly\n"); + } + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + } + + orig_mnt_flags = read_mnt_flags(orig_path); + + create_and_enter_userns(); + ret = unshare(CLONE_NEWNS); + if (ret != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL); + if (ret != 0) { + die("recursive bind mount of %s onto %s failed: %s\n", + orig_path, dest_path, strerror(errno)); + } + + ret = mount(dest_path, dest_path, "none", + MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL); + if (ret != 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp failed: %s\n", + strerror(errno)); + } + + remount_mnt_flags = read_mnt_flags(dest_path); + if (orig_mnt_flags != remount_mnt_flags) { + die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n", + dest_path, orig_path); + } + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + if (!test_unpriv_remount_simple(MS_RDONLY)) { + die("MS_RDONLY malfunctions\n"); + } + if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) { + die("MS_NODEV malfunctions\n"); + } + if (!test_unpriv_remount_simple(MS_NOSUID)) { + die("MS_NOSUID malfunctions\n"); + } + if (!test_unpriv_remount_simple(MS_NOEXEC)) { + die("MS_NOEXEC malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_RELATIME, + MS_NOATIME)) + { + die("MS_RELATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_STRICTATIME, + MS_NOATIME)) + { + die("MS_STRICTATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_NOATIME, + MS_STRICTATIME)) + { + die("MS_NOATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME, + MS_NOATIME)) + { + die("MS_RELATIME|MS_NODIRATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME, + MS_NOATIME)) + { + die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME, + MS_STRICTATIME)) + { + die("MS_NOATIME|MS_DIRATIME malfunctions\n"); + } + if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME)) + { + die("Default atime malfunctions\n"); + } + if (!test_priv_mount_unpriv_remount()) { + die("Mount flags unexpectedly changed after remount\n"); + } + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/mqueue/.gitignore b/tools/testing/selftests/mqueue/.gitignore new file mode 100644 index 000000000..d8d423772 --- /dev/null +++ b/tools/testing/selftests/mqueue/.gitignore @@ -0,0 +1,2 @@ +mq_open_tests +mq_perf_tests diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile new file mode 100644 index 000000000..218a122c7 --- /dev/null +++ b/tools/testing/selftests/mqueue/Makefile @@ -0,0 +1,10 @@ +all: + gcc -O2 -lrt mq_open_tests.c -o mq_open_tests + gcc -O2 -lrt -lpthread -lpopt -o mq_perf_tests mq_perf_tests.c + +run_tests: + @./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]" + @./mq_perf_tests || echo "mq_perf_tests: [FAIL]" + +clean: + rm -f mq_open_tests mq_perf_tests diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c new file mode 100644 index 000000000..711cc2923 --- /dev/null +++ b/tools/testing/selftests/mqueue/mq_open_tests.c @@ -0,0 +1,492 @@ +/* + * This application is Copyright 2012 Red Hat, Inc. + * Doug Ledford <dledford@redhat.com> + * + * mq_open_tests is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * mq_open_tests is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For the full text of the license, see <http://www.gnu.org/licenses/>. + * + * mq_open_tests.c + * Tests the various situations that should either succeed or fail to + * open a posix message queue and then reports whether or not they + * did as they were supposed to. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <limits.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <mqueue.h> + +static char *usage = +"Usage:\n" +" %s path\n" +"\n" +" path Path name of the message queue to create\n" +"\n" +" Note: this program must be run as root in order to enable all tests\n" +"\n"; + +char *DEF_MSGS = "/proc/sys/fs/mqueue/msg_default"; +char *DEF_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_default"; +char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max"; +char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max"; + +int default_settings; +struct rlimit saved_limits, cur_limits; +int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize; +int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize; +FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize; +char *queue_path; +mqd_t queue = -1; + +static inline void __set(FILE *stream, int value, char *err_msg); +void shutdown(int exit_val, char *err_cause, int line_no); +static inline int get(FILE *stream); +static inline void set(FILE *stream, int value); +static inline void getr(int type, struct rlimit *rlim); +static inline void setr(int type, struct rlimit *rlim); +void validate_current_settings(); +static inline void test_queue(struct mq_attr *attr, struct mq_attr *result); +static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result); + +static inline void __set(FILE *stream, int value, char *err_msg) +{ + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + perror(err_msg); +} + + +void shutdown(int exit_val, char *err_cause, int line_no) +{ + static int in_shutdown = 0; + + /* In case we get called recursively by a set() call below */ + if (in_shutdown++) + return; + + seteuid(0); + + if (queue != -1) + if (mq_close(queue)) + perror("mq_close() during shutdown"); + if (queue_path) + /* + * Be silent if this fails, if we cleaned up already it's + * expected to fail + */ + mq_unlink(queue_path); + if (default_settings) { + if (saved_def_msgs) + __set(def_msgs, saved_def_msgs, + "failed to restore saved_def_msgs"); + if (saved_def_msgsize) + __set(def_msgsize, saved_def_msgsize, + "failed to restore saved_def_msgsize"); + } + if (saved_max_msgs) + __set(max_msgs, saved_max_msgs, + "failed to restore saved_max_msgs"); + if (saved_max_msgsize) + __set(max_msgsize, saved_max_msgsize, + "failed to restore saved_max_msgsize"); + if (exit_val) + error(exit_val, errno, "%s at %d", err_cause, line_no); + exit(0); +} + +static inline int get(FILE *stream) +{ + int value; + rewind(stream); + if (fscanf(stream, "%d", &value) != 1) + shutdown(4, "Error reading /proc entry", __LINE__ - 1); + return value; +} + +static inline void set(FILE *stream, int value) +{ + int new_value; + + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + return shutdown(5, "Failed writing to /proc file", + __LINE__ - 1); + new_value = get(stream); + if (new_value != value) + return shutdown(5, "We didn't get what we wrote to /proc back", + __LINE__ - 1); +} + +static inline void getr(int type, struct rlimit *rlim) +{ + if (getrlimit(type, rlim)) + shutdown(6, "getrlimit()", __LINE__ - 1); +} + +static inline void setr(int type, struct rlimit *rlim) +{ + if (setrlimit(type, rlim)) + shutdown(7, "setrlimit()", __LINE__ - 1); +} + +void validate_current_settings() +{ + int rlim_needed; + + if (cur_limits.rlim_cur < 4096) { + printf("Current rlimit value for POSIX message queue bytes is " + "unreasonably low,\nincreasing.\n\n"); + cur_limits.rlim_cur = 8192; + cur_limits.rlim_max = 16384; + setr(RLIMIT_MSGQUEUE, &cur_limits); + } + + if (default_settings) { + rlim_needed = (cur_def_msgs + 1) * (cur_def_msgsize + 1 + + 2 * sizeof(void *)); + if (rlim_needed > cur_limits.rlim_cur) { + printf("Temporarily lowering default queue parameters " + "to something that will work\n" + "with the current rlimit values.\n\n"); + set(def_msgs, 10); + cur_def_msgs = 10; + set(def_msgsize, 128); + cur_def_msgsize = 128; + } + } else { + rlim_needed = (cur_max_msgs + 1) * (cur_max_msgsize + 1 + + 2 * sizeof(void *)); + if (rlim_needed > cur_limits.rlim_cur) { + printf("Temporarily lowering maximum queue parameters " + "to something that will work\n" + "with the current rlimit values in case this is " + "a kernel that ties the default\n" + "queue parameters to the maximum queue " + "parameters.\n\n"); + set(max_msgs, 10); + cur_max_msgs = 10; + set(max_msgsize, 128); + cur_max_msgsize = 128; + } + } +} + +/* + * test_queue - Test opening a queue, shutdown if we fail. This should + * only be called in situations that should never fail. We clean up + * after ourselves and return the queue attributes in *result. + */ +static inline void test_queue(struct mq_attr *attr, struct mq_attr *result) +{ + int flags = O_RDWR | O_EXCL | O_CREAT; + int perms = DEFFILEMODE; + + if ((queue = mq_open(queue_path, flags, perms, attr)) == -1) + shutdown(1, "mq_open()", __LINE__); + if (mq_getattr(queue, result)) + shutdown(1, "mq_getattr()", __LINE__); + if (mq_close(queue)) + shutdown(1, "mq_close()", __LINE__); + queue = -1; + if (mq_unlink(queue_path)) + shutdown(1, "mq_unlink()", __LINE__); +} + +/* + * Same as test_queue above, but failure is not fatal. + * Returns: + * 0 - Failed to create a queue + * 1 - Created a queue, attributes in *result + */ +static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result) +{ + int flags = O_RDWR | O_EXCL | O_CREAT; + int perms = DEFFILEMODE; + + if ((queue = mq_open(queue_path, flags, perms, attr)) == -1) + return 0; + if (mq_getattr(queue, result)) + shutdown(1, "mq_getattr()", __LINE__); + if (mq_close(queue)) + shutdown(1, "mq_close()", __LINE__); + queue = -1; + if (mq_unlink(queue_path)) + shutdown(1, "mq_unlink()", __LINE__); + return 1; +} + +int main(int argc, char *argv[]) +{ + struct mq_attr attr, result; + + if (argc != 2) { + fprintf(stderr, "Must pass a valid queue name\n\n"); + fprintf(stderr, usage, argv[0]); + exit(1); + } + + /* + * Although we can create a msg queue with a non-absolute path name, + * unlink will fail. So, if the name doesn't start with a /, add one + * when we save it. + */ + if (*argv[1] == '/') + queue_path = strdup(argv[1]); + else { + queue_path = malloc(strlen(argv[1]) + 2); + if (!queue_path) { + perror("malloc()"); + exit(1); + } + queue_path[0] = '/'; + queue_path[1] = 0; + strcat(queue_path, argv[1]); + } + + if (getuid() != 0) { + fprintf(stderr, "Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + exit(1); + } + + /* Find out what files there are for us to make tweaks in */ + def_msgs = fopen(DEF_MSGS, "r+"); + def_msgsize = fopen(DEF_MSGSIZE, "r+"); + max_msgs = fopen(MAX_MSGS, "r+"); + max_msgsize = fopen(MAX_MSGSIZE, "r+"); + + if (!max_msgs) + shutdown(2, "Failed to open msg_max", __LINE__); + if (!max_msgsize) + shutdown(2, "Failed to open msgsize_max", __LINE__); + if (def_msgs || def_msgsize) + default_settings = 1; + + /* Load up the current system values for everything we can */ + getr(RLIMIT_MSGQUEUE, &saved_limits); + cur_limits = saved_limits; + if (default_settings) { + saved_def_msgs = cur_def_msgs = get(def_msgs); + saved_def_msgsize = cur_def_msgsize = get(def_msgsize); + } + saved_max_msgs = cur_max_msgs = get(max_msgs); + saved_max_msgsize = cur_max_msgsize = get(max_msgsize); + + /* Tell the user our initial state */ + printf("\nInitial system state:\n"); + printf("\tUsing queue path:\t\t%s\n", queue_path); + printf("\tRLIMIT_MSGQUEUE(soft):\t\t%d\n", saved_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t%d\n", saved_limits.rlim_max); + printf("\tMaximum Message Size:\t\t%d\n", saved_max_msgsize); + printf("\tMaximum Queue Size:\t\t%d\n", saved_max_msgs); + if (default_settings) { + printf("\tDefault Message Size:\t\t%d\n", saved_def_msgsize); + printf("\tDefault Queue Size:\t\t%d\n", saved_def_msgs); + } else { + printf("\tDefault Message Size:\t\tNot Supported\n"); + printf("\tDefault Queue Size:\t\tNot Supported\n"); + } + printf("\n"); + + validate_current_settings(); + + printf("Adjusted system state for testing:\n"); + printf("\tRLIMIT_MSGQUEUE(soft):\t\t%d\n", cur_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t%d\n", cur_limits.rlim_max); + printf("\tMaximum Message Size:\t\t%d\n", cur_max_msgsize); + printf("\tMaximum Queue Size:\t\t%d\n", cur_max_msgs); + if (default_settings) { + printf("\tDefault Message Size:\t\t%d\n", cur_def_msgsize); + printf("\tDefault Queue Size:\t\t%d\n", cur_def_msgs); + } + + printf("\n\nTest series 1, behavior when no attr struct " + "passed to mq_open:\n"); + if (!default_settings) { + test_queue(NULL, &result); + printf("Given sane system settings, mq_open without an attr " + "struct succeeds:\tPASS\n"); + if (result.mq_maxmsg != cur_max_msgs || + result.mq_msgsize != cur_max_msgsize) { + printf("Kernel does not support setting the default " + "mq attributes,\nbut also doesn't tie the " + "defaults to the maximums:\t\t\tPASS\n"); + } else { + set(max_msgs, ++cur_max_msgs); + set(max_msgsize, ++cur_max_msgsize); + test_queue(NULL, &result); + if (result.mq_maxmsg == cur_max_msgs && + result.mq_msgsize == cur_max_msgsize) + printf("Kernel does not support setting the " + "default mq attributes and\n" + "also ties system wide defaults to " + "the system wide maximums:\t\t" + "FAIL\n"); + else + printf("Kernel does not support setting the " + "default mq attributes,\n" + "but also doesn't tie the defaults to " + "the maximums:\t\t\tPASS\n"); + } + } else { + printf("Kernel supports setting defaults separately from " + "maximums:\t\tPASS\n"); + /* + * While we are here, go ahead and test that the kernel + * properly follows the default settings + */ + test_queue(NULL, &result); + printf("Given sane values, mq_open without an attr struct " + "succeeds:\t\tPASS\n"); + if (result.mq_maxmsg != cur_def_msgs || + result.mq_msgsize != cur_def_msgsize) + printf("Kernel supports setting defaults, but does " + "not actually honor them:\tFAIL\n\n"); + else { + set(def_msgs, ++cur_def_msgs); + set(def_msgsize, ++cur_def_msgsize); + /* In case max was the same as the default */ + set(max_msgs, ++cur_max_msgs); + set(max_msgsize, ++cur_max_msgsize); + test_queue(NULL, &result); + if (result.mq_maxmsg != cur_def_msgs || + result.mq_msgsize != cur_def_msgsize) + printf("Kernel supports setting defaults, but " + "does not actually honor them:\t" + "FAIL\n"); + else + printf("Kernel properly honors default setting " + "knobs:\t\t\t\tPASS\n"); + } + set(def_msgs, cur_max_msgs + 1); + cur_def_msgs = cur_max_msgs + 1; + set(def_msgsize, cur_max_msgsize + 1); + cur_def_msgsize = cur_max_msgsize + 1; + if (cur_def_msgs * (cur_def_msgsize + 2 * sizeof(void *)) >= + cur_limits.rlim_cur) { + cur_limits.rlim_cur = (cur_def_msgs + 2) * + (cur_def_msgsize + 2 * sizeof(void *)); + cur_limits.rlim_max = 2 * cur_limits.rlim_cur; + setr(RLIMIT_MSGQUEUE, &cur_limits); + } + if (test_queue_fail(NULL, &result)) { + if (result.mq_maxmsg == cur_max_msgs && + result.mq_msgsize == cur_max_msgsize) + printf("Kernel properly limits default values " + "to lesser of default/max:\t\tPASS\n"); + else + printf("Kernel does not properly set default " + "queue parameters when\ndefaults > " + "max:\t\t\t\t\t\t\t\tFAIL\n"); + } else + printf("Kernel fails to open mq because defaults are " + "greater than maximums:\tFAIL\n"); + set(def_msgs, --cur_def_msgs); + set(def_msgsize, --cur_def_msgsize); + cur_limits.rlim_cur = cur_limits.rlim_max = cur_def_msgs * + cur_def_msgsize; + setr(RLIMIT_MSGQUEUE, &cur_limits); + if (test_queue_fail(NULL, &result)) + printf("Kernel creates queue even though defaults " + "would exceed\nrlimit setting:" + "\t\t\t\t\t\t\t\tFAIL\n"); + else + printf("Kernel properly fails to create queue when " + "defaults would\nexceed rlimit:" + "\t\t\t\t\t\t\t\tPASS\n"); + } + + /* + * Test #2 - open with an attr struct that exceeds rlimit + */ + printf("\n\nTest series 2, behavior when attr struct is " + "passed to mq_open:\n"); + cur_max_msgs = 32; + cur_max_msgsize = cur_limits.rlim_max >> 4; + set(max_msgs, cur_max_msgs); + set(max_msgsize, cur_max_msgsize); + attr.mq_maxmsg = cur_max_msgs; + attr.mq_msgsize = cur_max_msgsize; + if (test_queue_fail(&attr, &result)) + printf("Queue open in excess of rlimit max when euid = 0 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open in excess of rlimit max when euid = 0 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = cur_max_msgs + 1; + attr.mq_msgsize = 10; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_maxmsg > limit when euid = 0 " + "succeeded:\t\tPASS\n"); + else + printf("Queue open with mq_maxmsg > limit when euid = 0 " + "failed:\t\tFAIL\n"); + attr.mq_maxmsg = 1; + attr.mq_msgsize = cur_max_msgsize + 1; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_msgsize > limit when euid = 0 " + "succeeded:\t\tPASS\n"); + else + printf("Queue open with mq_msgsize > limit when euid = 0 " + "failed:\t\tFAIL\n"); + attr.mq_maxmsg = 65536; + attr.mq_msgsize = 65536; + if (test_queue_fail(&attr, &result)) + printf("Queue open with total size > 2GB when euid = 0 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with total size > 2GB when euid = 0 " + "failed:\t\t\tPASS\n"); + seteuid(99); + attr.mq_maxmsg = cur_max_msgs; + attr.mq_msgsize = cur_max_msgsize; + if (test_queue_fail(&attr, &result)) + printf("Queue open in excess of rlimit max when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open in excess of rlimit max when euid = 99 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = cur_max_msgs + 1; + attr.mq_msgsize = 10; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_maxmsg > limit when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with mq_maxmsg > limit when euid = 99 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = 1; + attr.mq_msgsize = cur_max_msgsize + 1; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_msgsize > limit when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with mq_msgsize > limit when euid = 99 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = 65536; + attr.mq_msgsize = 65536; + if (test_queue_fail(&attr, &result)) + printf("Queue open with total size > 2GB when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with total size > 2GB when euid = 99 " + "failed:\t\t\tPASS\n"); + + shutdown(0,"",0); +} diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c new file mode 100644 index 000000000..2fadd4b97 --- /dev/null +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -0,0 +1,741 @@ +/* + * This application is Copyright 2012 Red Hat, Inc. + * Doug Ledford <dledford@redhat.com> + * + * mq_perf_tests is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * mq_perf_tests is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For the full text of the license, see <http://www.gnu.org/licenses/>. + * + * mq_perf_tests.c + * Tests various types of message queue workloads, concentrating on those + * situations that invole large message sizes, large message queue depths, + * or both, and reports back useful metrics about kernel message queue + * performance. + * + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <limits.h> +#include <errno.h> +#include <signal.h> +#include <pthread.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <mqueue.h> +#include <popt.h> + +static char *usage = +"Usage:\n" +" %s [-c #[,#..] -f] path\n" +"\n" +" -c # Skip most tests and go straight to a high queue depth test\n" +" and then run that test continuously (useful for running at\n" +" the same time as some other workload to see how much the\n" +" cache thrashing caused by adding messages to a very deep\n" +" queue impacts the performance of other programs). The number\n" +" indicates which CPU core we should bind the process to during\n" +" the run. If you have more than one physical CPU, then you\n" +" will need one copy per physical CPU package, and you should\n" +" specify the CPU cores to pin ourself to via a comma separated\n" +" list of CPU values.\n" +" -f Only usable with continuous mode. Pin ourself to the CPUs\n" +" as requested, then instead of looping doing a high mq\n" +" workload, just busy loop. This will allow us to lock up a\n" +" single CPU just like we normally would, but without actually\n" +" thrashing the CPU cache. This is to make it easier to get\n" +" comparable numbers from some other workload running on the\n" +" other CPUs. One set of numbers with # CPUs locked up running\n" +" an mq workload, and another set of numbers with those same\n" +" CPUs locked away from the test workload, but not doing\n" +" anything to trash the cache like the mq workload might.\n" +" path Path name of the message queue to create\n" +"\n" +" Note: this program must be run as root in order to enable all tests\n" +"\n"; + +char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max"; +char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max"; + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define MAX_CPUS 64 +char *cpu_option_string; +int cpus_to_pin[MAX_CPUS]; +int num_cpus_to_pin; +pthread_t cpu_threads[MAX_CPUS]; +pthread_t main_thread; +cpu_set_t *cpu_set; +int cpu_set_size; +int cpus_online; + +#define MSG_SIZE 16 +#define TEST1_LOOPS 10000000 +#define TEST2_LOOPS 100000 +int continuous_mode; +int continuous_mode_fake; + +struct rlimit saved_limits, cur_limits; +int saved_max_msgs, saved_max_msgsize; +int cur_max_msgs, cur_max_msgsize; +FILE *max_msgs, *max_msgsize; +int cur_nice; +char *queue_path = "/mq_perf_tests"; +mqd_t queue = -1; +struct mq_attr result; +int mq_prio_max; + +const struct poptOption options[] = { + { + .longName = "continuous", + .shortName = 'c', + .argInfo = POPT_ARG_STRING, + .arg = &cpu_option_string, + .val = 'c', + .descrip = "Run continuous tests at a high queue depth in " + "order to test the effects of cache thrashing on " + "other tasks on the system. This test is intended " + "to be run on one core of each physical CPU while " + "some other CPU intensive task is run on all the other " + "cores of that same physical CPU and the other task " + "is timed. It is assumed that the process of adding " + "messages to the message queue in a tight loop will " + "impact that other task to some degree. Once the " + "tests are performed in this way, you should then " + "re-run the tests using fake mode in order to check " + "the difference in time required to perform the CPU " + "intensive task", + .argDescrip = "cpu[,cpu]", + }, + { + .longName = "fake", + .shortName = 'f', + .argInfo = POPT_ARG_NONE, + .arg = &continuous_mode_fake, + .val = 0, + .descrip = "Tie up the CPUs that we would normally tie up in" + "continuous mode, but don't actually do any mq stuff, " + "just keep the CPU busy so it can't be used to process " + "system level tasks as this would free up resources on " + "the other CPU cores and skew the comparison between " + "the no-mqueue work and mqueue work tests", + .argDescrip = NULL, + }, + { + .longName = "path", + .shortName = 'p', + .argInfo = POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT, + .arg = &queue_path, + .val = 'p', + .descrip = "The name of the path to use in the mqueue " + "filesystem for our tests", + .argDescrip = "pathname", + }, + POPT_AUTOHELP + POPT_TABLEEND +}; + +static inline void __set(FILE *stream, int value, char *err_msg); +void shutdown(int exit_val, char *err_cause, int line_no); +void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context); +void sig_action(int signum, siginfo_t *info, void *context); +static inline int get(FILE *stream); +static inline void set(FILE *stream, int value); +static inline int try_set(FILE *stream, int value); +static inline void getr(int type, struct rlimit *rlim); +static inline void setr(int type, struct rlimit *rlim); +static inline void open_queue(struct mq_attr *attr); +void increase_limits(void); + +static inline void __set(FILE *stream, int value, char *err_msg) +{ + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + perror(err_msg); +} + + +void shutdown(int exit_val, char *err_cause, int line_no) +{ + static int in_shutdown = 0; + int errno_at_shutdown = errno; + int i; + + /* In case we get called by multiple threads or from an sighandler */ + if (in_shutdown++) + return; + + for (i = 0; i < num_cpus_to_pin; i++) + if (cpu_threads[i]) { + pthread_kill(cpu_threads[i], SIGUSR1); + pthread_join(cpu_threads[i], NULL); + } + + if (queue != -1) + if (mq_close(queue)) + perror("mq_close() during shutdown"); + if (queue_path) + /* + * Be silent if this fails, if we cleaned up already it's + * expected to fail + */ + mq_unlink(queue_path); + if (saved_max_msgs) + __set(max_msgs, saved_max_msgs, + "failed to restore saved_max_msgs"); + if (saved_max_msgsize) + __set(max_msgsize, saved_max_msgsize, + "failed to restore saved_max_msgsize"); + if (exit_val) + error(exit_val, errno_at_shutdown, "%s at %d", + err_cause, line_no); + exit(0); +} + +void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context) +{ + if (pthread_self() != main_thread) + pthread_exit(0); + else { + fprintf(stderr, "Caught signal %d in SIGUSR1 handler, " + "exiting\n", signum); + shutdown(0, "", 0); + fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n"); + exit(0); + } +} + +void sig_action(int signum, siginfo_t *info, void *context) +{ + if (pthread_self() != main_thread) + pthread_kill(main_thread, signum); + else { + fprintf(stderr, "Caught signal %d, exiting\n", signum); + shutdown(0, "", 0); + fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n"); + exit(0); + } +} + +static inline int get(FILE *stream) +{ + int value; + rewind(stream); + if (fscanf(stream, "%d", &value) != 1) + shutdown(4, "Error reading /proc entry", __LINE__); + return value; +} + +static inline void set(FILE *stream, int value) +{ + int new_value; + + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + return shutdown(5, "Failed writing to /proc file", __LINE__); + new_value = get(stream); + if (new_value != value) + return shutdown(5, "We didn't get what we wrote to /proc back", + __LINE__); +} + +static inline int try_set(FILE *stream, int value) +{ + int new_value; + + rewind(stream); + fprintf(stream, "%d", value); + new_value = get(stream); + return new_value == value; +} + +static inline void getr(int type, struct rlimit *rlim) +{ + if (getrlimit(type, rlim)) + shutdown(6, "getrlimit()", __LINE__); +} + +static inline void setr(int type, struct rlimit *rlim) +{ + if (setrlimit(type, rlim)) + shutdown(7, "setrlimit()", __LINE__); +} + +/** + * open_queue - open the global queue for testing + * @attr - An attr struct specifying the desired queue traits + * @result - An attr struct that lists the actual traits the queue has + * + * This open is not allowed to fail, failure will result in an orderly + * shutdown of the program. The global queue_path is used to set what + * queue to open, the queue descriptor is saved in the global queue + * variable. + */ +static inline void open_queue(struct mq_attr *attr) +{ + int flags = O_RDWR | O_EXCL | O_CREAT | O_NONBLOCK; + int perms = DEFFILEMODE; + + queue = mq_open(queue_path, flags, perms, attr); + if (queue == -1) + shutdown(1, "mq_open()", __LINE__); + if (mq_getattr(queue, &result)) + shutdown(1, "mq_getattr()", __LINE__); + printf("\n\tQueue %s created:\n", queue_path); + printf("\t\tmq_flags:\t\t\t%s\n", result.mq_flags & O_NONBLOCK ? + "O_NONBLOCK" : "(null)"); + printf("\t\tmq_maxmsg:\t\t\t%d\n", result.mq_maxmsg); + printf("\t\tmq_msgsize:\t\t\t%d\n", result.mq_msgsize); + printf("\t\tmq_curmsgs:\t\t\t%d\n", result.mq_curmsgs); +} + +void *fake_cont_thread(void *arg) +{ + int i; + + for (i = 0; i < num_cpus_to_pin; i++) + if (cpu_threads[i] == pthread_self()) + break; + printf("\tStarted fake continuous mode thread %d on CPU %d\n", i, + cpus_to_pin[i]); + while (1) + ; +} + +void *cont_thread(void *arg) +{ + char buff[MSG_SIZE]; + int i, priority; + + for (i = 0; i < num_cpus_to_pin; i++) + if (cpu_threads[i] == pthread_self()) + break; + printf("\tStarted continuous mode thread %d on CPU %d\n", i, + cpus_to_pin[i]); + while (1) { + while (mq_send(queue, buff, sizeof(buff), 0) == 0) + ; + mq_receive(queue, buff, sizeof(buff), &priority); + } +} + +#define drain_queue() \ + while (mq_receive(queue, buff, MSG_SIZE, &prio_in) == MSG_SIZE) + +#define do_untimed_send() \ + do { \ + if (mq_send(queue, buff, MSG_SIZE, prio_out)) \ + shutdown(3, "Test send failure", __LINE__); \ + } while (0) + +#define do_send_recv() \ + do { \ + clock_gettime(clock, &start); \ + if (mq_send(queue, buff, MSG_SIZE, prio_out)) \ + shutdown(3, "Test send failure", __LINE__); \ + clock_gettime(clock, &middle); \ + if (mq_receive(queue, buff, MSG_SIZE, &prio_in) != MSG_SIZE) \ + shutdown(3, "Test receive failure", __LINE__); \ + clock_gettime(clock, &end); \ + nsec = ((middle.tv_sec - start.tv_sec) * 1000000000) + \ + (middle.tv_nsec - start.tv_nsec); \ + send_total.tv_nsec += nsec; \ + if (send_total.tv_nsec >= 1000000000) { \ + send_total.tv_sec++; \ + send_total.tv_nsec -= 1000000000; \ + } \ + nsec = ((end.tv_sec - middle.tv_sec) * 1000000000) + \ + (end.tv_nsec - middle.tv_nsec); \ + recv_total.tv_nsec += nsec; \ + if (recv_total.tv_nsec >= 1000000000) { \ + recv_total.tv_sec++; \ + recv_total.tv_nsec -= 1000000000; \ + } \ + } while (0) + +struct test { + char *desc; + void (*func)(int *); +}; + +void const_prio(int *prio) +{ + return; +} + +void inc_prio(int *prio) +{ + if (++*prio == mq_prio_max) + *prio = 0; +} + +void dec_prio(int *prio) +{ + if (--*prio < 0) + *prio = mq_prio_max - 1; +} + +void random_prio(int *prio) +{ + *prio = random() % mq_prio_max; +} + +struct test test2[] = { + {"\n\tTest #2a: Time send/recv message, queue full, constant prio\n", + const_prio}, + {"\n\tTest #2b: Time send/recv message, queue full, increasing prio\n", + inc_prio}, + {"\n\tTest #2c: Time send/recv message, queue full, decreasing prio\n", + dec_prio}, + {"\n\tTest #2d: Time send/recv message, queue full, random prio\n", + random_prio}, + {NULL, NULL} +}; + +/** + * Tests to perform (all done with MSG_SIZE messages): + * + * 1) Time to add/remove message with 0 messages on queue + * 1a) with constant prio + * 2) Time to add/remove message when queue close to capacity: + * 2a) with constant prio + * 2b) with increasing prio + * 2c) with decreasing prio + * 2d) with random prio + * 3) Test limits of priorities honored (double check _SC_MQ_PRIO_MAX) + */ +void *perf_test_thread(void *arg) +{ + char buff[MSG_SIZE]; + int prio_out, prio_in; + int i; + clockid_t clock; + pthread_t *t; + struct timespec res, start, middle, end, send_total, recv_total; + unsigned long long nsec; + struct test *cur_test; + + t = &cpu_threads[0]; + printf("\n\tStarted mqueue performance test thread on CPU %d\n", + cpus_to_pin[0]); + mq_prio_max = sysconf(_SC_MQ_PRIO_MAX); + if (mq_prio_max == -1) + shutdown(2, "sysconf(_SC_MQ_PRIO_MAX)", __LINE__); + if (pthread_getcpuclockid(cpu_threads[0], &clock) != 0) + shutdown(2, "pthread_getcpuclockid", __LINE__); + + if (clock_getres(clock, &res)) + shutdown(2, "clock_getres()", __LINE__); + + printf("\t\tMax priorities:\t\t\t%d\n", mq_prio_max); + printf("\t\tClock resolution:\t\t%d nsec%s\n", res.tv_nsec, + res.tv_nsec > 1 ? "s" : ""); + + + + printf("\n\tTest #1: Time send/recv message, queue empty\n"); + printf("\t\t(%d iterations)\n", TEST1_LOOPS); + prio_out = 0; + send_total.tv_sec = 0; + send_total.tv_nsec = 0; + recv_total.tv_sec = 0; + recv_total.tv_nsec = 0; + for (i = 0; i < TEST1_LOOPS; i++) + do_send_recv(); + printf("\t\tSend msg:\t\t\t%d.%ds total time\n", + send_total.tv_sec, send_total.tv_nsec); + nsec = ((unsigned long long)send_total.tv_sec * 1000000000 + + send_total.tv_nsec) / TEST1_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + printf("\t\tRecv msg:\t\t\t%d.%ds total time\n", + recv_total.tv_sec, recv_total.tv_nsec); + nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 + + recv_total.tv_nsec) / TEST1_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + + + for (cur_test = test2; cur_test->desc != NULL; cur_test++) { + printf(cur_test->desc); + printf("\t\t(%d iterations)\n", TEST2_LOOPS); + prio_out = 0; + send_total.tv_sec = 0; + send_total.tv_nsec = 0; + recv_total.tv_sec = 0; + recv_total.tv_nsec = 0; + printf("\t\tFilling queue..."); + fflush(stdout); + clock_gettime(clock, &start); + for (i = 0; i < result.mq_maxmsg - 1; i++) { + do_untimed_send(); + cur_test->func(&prio_out); + } + clock_gettime(clock, &end); + nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) * + 1000000000) + (end.tv_nsec - start.tv_nsec); + printf("done.\t\t%lld.%llds\n", nsec / 1000000000, + nsec % 1000000000); + printf("\t\tTesting..."); + fflush(stdout); + for (i = 0; i < TEST2_LOOPS; i++) { + do_send_recv(); + cur_test->func(&prio_out); + } + printf("done.\n"); + printf("\t\tSend msg:\t\t\t%d.%ds total time\n", + send_total.tv_sec, send_total.tv_nsec); + nsec = ((unsigned long long)send_total.tv_sec * 1000000000 + + send_total.tv_nsec) / TEST2_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + printf("\t\tRecv msg:\t\t\t%d.%ds total time\n", + recv_total.tv_sec, recv_total.tv_nsec); + nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 + + recv_total.tv_nsec) / TEST2_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + printf("\t\tDraining queue..."); + fflush(stdout); + clock_gettime(clock, &start); + drain_queue(); + clock_gettime(clock, &end); + nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) * + 1000000000) + (end.tv_nsec - start.tv_nsec); + printf("done.\t\t%lld.%llds\n", nsec / 1000000000, + nsec % 1000000000); + } + return 0; +} + +void increase_limits(void) +{ + cur_limits.rlim_cur = RLIM_INFINITY; + cur_limits.rlim_max = RLIM_INFINITY; + setr(RLIMIT_MSGQUEUE, &cur_limits); + while (try_set(max_msgs, cur_max_msgs += 10)) + ; + cur_max_msgs = get(max_msgs); + while (try_set(max_msgsize, cur_max_msgsize += 1024)) + ; + cur_max_msgsize = get(max_msgsize); + if (setpriority(PRIO_PROCESS, 0, -20) != 0) + shutdown(2, "setpriority()", __LINE__); + cur_nice = -20; +} + +int main(int argc, char *argv[]) +{ + struct mq_attr attr; + char *option, *next_option; + int i, cpu; + struct sigaction sa; + poptContext popt_context; + char rc; + void *retval; + + main_thread = pthread_self(); + num_cpus_to_pin = 0; + + if (sysconf(_SC_NPROCESSORS_ONLN) == -1) { + perror("sysconf(_SC_NPROCESSORS_ONLN)"); + exit(1); + } + cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); + cpu_set = CPU_ALLOC(cpus_online); + if (cpu_set == NULL) { + perror("CPU_ALLOC()"); + exit(1); + } + cpu_set_size = CPU_ALLOC_SIZE(cpus_online); + CPU_ZERO_S(cpu_set_size, cpu_set); + + popt_context = poptGetContext(NULL, argc, (const char **)argv, + options, 0); + + while ((rc = poptGetNextOpt(popt_context)) > 0) { + switch (rc) { + case 'c': + continuous_mode = 1; + option = cpu_option_string; + do { + next_option = strchr(option, ','); + if (next_option) + *next_option = '\0'; + cpu = atoi(option); + if (cpu >= cpus_online) + fprintf(stderr, "CPU %d exceeds " + "cpus online, ignoring.\n", + cpu); + else + cpus_to_pin[num_cpus_to_pin++] = cpu; + if (next_option) + option = ++next_option; + } while (next_option && num_cpus_to_pin < MAX_CPUS); + /* Double check that they didn't give us the same CPU + * more than once */ + for (cpu = 0; cpu < num_cpus_to_pin; cpu++) { + if (CPU_ISSET_S(cpus_to_pin[cpu], cpu_set_size, + cpu_set)) { + fprintf(stderr, "Any given CPU may " + "only be given once.\n"); + exit(1); + } else + CPU_SET_S(cpus_to_pin[cpu], + cpu_set_size, cpu_set); + } + break; + case 'p': + /* + * Although we can create a msg queue with a + * non-absolute path name, unlink will fail. So, + * if the name doesn't start with a /, add one + * when we save it. + */ + option = queue_path; + if (*option != '/') { + queue_path = malloc(strlen(option) + 2); + if (!queue_path) { + perror("malloc()"); + exit(1); + } + queue_path[0] = '/'; + queue_path[1] = 0; + strcat(queue_path, option); + free(option); + } + break; + } + } + + if (continuous_mode && num_cpus_to_pin == 0) { + fprintf(stderr, "Must pass at least one CPU to continuous " + "mode.\n"); + poptPrintUsage(popt_context, stderr, 0); + exit(1); + } else if (!continuous_mode) { + num_cpus_to_pin = 1; + cpus_to_pin[0] = cpus_online - 1; + } + + if (getuid() != 0) { + fprintf(stderr, "Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + exit(1); + } + + max_msgs = fopen(MAX_MSGS, "r+"); + max_msgsize = fopen(MAX_MSGSIZE, "r+"); + if (!max_msgs) + shutdown(2, "Failed to open msg_max", __LINE__); + if (!max_msgsize) + shutdown(2, "Failed to open msgsize_max", __LINE__); + + /* Load up the current system values for everything we can */ + getr(RLIMIT_MSGQUEUE, &saved_limits); + cur_limits = saved_limits; + saved_max_msgs = cur_max_msgs = get(max_msgs); + saved_max_msgsize = cur_max_msgsize = get(max_msgsize); + errno = 0; + cur_nice = getpriority(PRIO_PROCESS, 0); + if (errno) + shutdown(2, "getpriority()", __LINE__); + + /* Tell the user our initial state */ + printf("\nInitial system state:\n"); + printf("\tUsing queue path:\t\t\t%s\n", queue_path); + printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%d\n", saved_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%d\n", saved_limits.rlim_max); + printf("\tMaximum Message Size:\t\t\t%d\n", saved_max_msgsize); + printf("\tMaximum Queue Size:\t\t\t%d\n", saved_max_msgs); + printf("\tNice value:\t\t\t\t%d\n", cur_nice); + printf("\n"); + + increase_limits(); + + printf("Adjusted system state for testing:\n"); + if (cur_limits.rlim_cur == RLIM_INFINITY) { + printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t(unlimited)\n"); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t(unlimited)\n"); + } else { + printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%d\n", + cur_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%d\n", + cur_limits.rlim_max); + } + printf("\tMaximum Message Size:\t\t\t%d\n", cur_max_msgsize); + printf("\tMaximum Queue Size:\t\t\t%d\n", cur_max_msgs); + printf("\tNice value:\t\t\t\t%d\n", cur_nice); + printf("\tContinuous mode:\t\t\t(%s)\n", continuous_mode ? + (continuous_mode_fake ? "fake mode" : "enabled") : + "disabled"); + printf("\tCPUs to pin:\t\t\t\t%d", cpus_to_pin[0]); + for (cpu = 1; cpu < num_cpus_to_pin; cpu++) + printf(",%d", cpus_to_pin[cpu]); + printf("\n"); + + sa.sa_sigaction = sig_action_SIGUSR1; + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, SIGHUP); + sigaddset(&sa.sa_mask, SIGINT); + sigaddset(&sa.sa_mask, SIGQUIT); + sigaddset(&sa.sa_mask, SIGTERM); + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGUSR1, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGUSR1)", __LINE__); + sa.sa_sigaction = sig_action; + if (sigaction(SIGHUP, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGHUP)", __LINE__); + if (sigaction(SIGINT, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGINT)", __LINE__); + if (sigaction(SIGQUIT, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGQUIT)", __LINE__); + if (sigaction(SIGTERM, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGTERM)", __LINE__); + + if (!continuous_mode_fake) { + attr.mq_flags = O_NONBLOCK; + attr.mq_maxmsg = cur_max_msgs; + attr.mq_msgsize = MSG_SIZE; + open_queue(&attr); + } + for (i = 0; i < num_cpus_to_pin; i++) { + pthread_attr_t thread_attr; + void *thread_func; + + if (continuous_mode_fake) + thread_func = &fake_cont_thread; + else if (continuous_mode) + thread_func = &cont_thread; + else + thread_func = &perf_test_thread; + + CPU_ZERO_S(cpu_set_size, cpu_set); + CPU_SET_S(cpus_to_pin[i], cpu_set_size, cpu_set); + pthread_attr_init(&thread_attr); + pthread_attr_setaffinity_np(&thread_attr, cpu_set_size, + cpu_set); + if (pthread_create(&cpu_threads[i], &thread_attr, thread_func, + NULL)) + shutdown(1, "pthread_create()", __LINE__); + pthread_attr_destroy(&thread_attr); + } + + if (!continuous_mode) { + pthread_join(cpu_threads[0], &retval); + shutdown((long)retval, "perf_test_thread()", __LINE__); + } else { + while (1) + sleep(1); + } + shutdown(0, "", 0); +} diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore new file mode 100644 index 000000000..00326629d --- /dev/null +++ b/tools/testing/selftests/net/.gitignore @@ -0,0 +1,3 @@ +socket +psock_fanout +psock_tpacket diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile new file mode 100644 index 000000000..750512ba2 --- /dev/null +++ b/tools/testing/selftests/net/Makefile @@ -0,0 +1,19 @@ +# Makefile for net selftests + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall -O2 -g + +CFLAGS += -I../../../../usr/include/ + +NET_PROGS = socket psock_fanout psock_tpacket + +all: $(NET_PROGS) +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +run_tests: all + @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]" + @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]" + +clean: + $(RM) $(NET_PROGS) diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c new file mode 100644 index 000000000..57b9c2b7c --- /dev/null +++ b/tools/testing/selftests/net/psock_fanout.c @@ -0,0 +1,312 @@ +/* + * Copyright 2013 Google Inc. + * Author: Willem de Bruijn (willemb@google.com) + * + * A basic test of packet socket fanout behavior. + * + * Control: + * - create fanout fails as expected with illegal flag combinations + * - join fanout fails as expected with diverging types or flags + * + * Datapath: + * Open a pair of packet sockets and a pair of INET sockets, send a known + * number of packets across the two INET sockets and count the number of + * packets enqueued onto the two packet sockets. + * + * The test currently runs for + * - PACKET_FANOUT_HASH + * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER + * - PACKET_FANOUT_LB + * - PACKET_FANOUT_CPU + * - PACKET_FANOUT_ROLLOVER + * + * Todo: + * - functionality: PACKET_FANOUT_FLAG_DEFRAG + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define _GNU_SOURCE /* for sched_setaffinity */ + +#include <arpa/inet.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <net/ethernet.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "psock_lib.h" + +#define RING_NUM_FRAMES 20 + +/* Open a socket in a given fanout mode. + * @return -1 if mode is bad, a valid socket otherwise */ +static int sock_fanout_open(uint16_t typeflags, int num_packets) +{ + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP)); + if (fd < 0) { + perror("socket packet"); + exit(1); + } + + /* fanout group ID is always 0: tests whether old groups are deleted */ + val = ((int) typeflags) << 16; + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { + if (close(fd)) { + perror("close packet"); + exit(1); + } + return -1; + } + + pair_udp_setfilter(fd); + return fd; +} + +static char *sock_fanout_open_ring(int fd) +{ + struct tpacket_req req = { + .tp_block_size = getpagesize(), + .tp_frame_size = getpagesize(), + .tp_block_nr = RING_NUM_FRAMES, + .tp_frame_nr = RING_NUM_FRAMES, + }; + char *ring; + int val = TPACKET_V2; + + if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val, + sizeof(val))) { + perror("packetsock ring setsockopt version"); + exit(1); + } + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, + sizeof(req))) { + perror("packetsock ring setsockopt"); + exit(1); + } + + ring = mmap(0, req.tp_block_size * req.tp_block_nr, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (!ring) { + fprintf(stderr, "packetsock ring mmap\n"); + exit(1); + } + + return ring; +} + +static int sock_fanout_read_ring(int fd, void *ring) +{ + struct tpacket2_hdr *header = ring; + int count = 0; + + while (header->tp_status & TP_STATUS_USER && count < RING_NUM_FRAMES) { + count++; + header = ring + (count * getpagesize()); + } + + return count; +} + +static int sock_fanout_read(int fds[], char *rings[], const int expect[]) +{ + int ret[2]; + + ret[0] = sock_fanout_read_ring(fds[0], rings[0]); + ret[1] = sock_fanout_read_ring(fds[1], rings[1]); + + fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n", + ret[0], ret[1], expect[0], expect[1]); + + if ((!(ret[0] == expect[0] && ret[1] == expect[1])) && + (!(ret[0] == expect[1] && ret[1] == expect[0]))) { + fprintf(stderr, "ERROR: incorrect queue lengths\n"); + return 1; + } + + return 0; +} + +/* Test illegal mode + flag combination */ +static void test_control_single(void) +{ + fprintf(stderr, "test: control single socket\n"); + + if (sock_fanout_open(PACKET_FANOUT_ROLLOVER | + PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { + fprintf(stderr, "ERROR: opened socket with dual rollover\n"); + exit(1); + } +} + +/* Test illegal group with different modes or flags */ +static void test_control_group(void) +{ + int fds[2]; + + fprintf(stderr, "test: control multiple sockets\n"); + + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed to open HASH socket\n"); + exit(1); + } + if (sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) { + fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); + exit(1); + } + if (sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) { + fprintf(stderr, "ERROR: joined group with wrong flag ro\n"); + exit(1); + } + if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) { + fprintf(stderr, "ERROR: joined group with wrong mode\n"); + exit(1); + } + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + if (fds[1] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + if (close(fds[1]) || close(fds[0])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + +static int test_datapath(uint16_t typeflags, int port_off, + const int expect1[], const int expect2[]) +{ + const int expect0[] = { 0, 0 }; + char *rings[2]; + int fds[2], fds_udp[2][2], ret; + + fprintf(stderr, "test: datapath 0x%hx\n", typeflags); + + fds[0] = sock_fanout_open(typeflags, 20); + fds[1] = sock_fanout_open(typeflags, 20); + if (fds[0] == -1 || fds[1] == -1) { + fprintf(stderr, "ERROR: failed open\n"); + exit(1); + } + rings[0] = sock_fanout_open_ring(fds[0]); + rings[1] = sock_fanout_open_ring(fds[1]); + pair_udp_open(fds_udp[0], PORT_BASE); + pair_udp_open(fds_udp[1], PORT_BASE + port_off); + sock_fanout_read(fds, rings, expect0); + + /* Send data, but not enough to overflow a queue */ + pair_udp_send(fds_udp[0], 15); + pair_udp_send(fds_udp[1], 5); + ret = sock_fanout_read(fds, rings, expect1); + + /* Send more data, overflow the queue */ + pair_udp_send(fds_udp[0], 15); + /* TODO: ensure consistent order between expect1 and expect2 */ + ret |= sock_fanout_read(fds, rings, expect2); + + if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) || + munmap(rings[0], RING_NUM_FRAMES * getpagesize())) { + fprintf(stderr, "close rings\n"); + exit(1); + } + if (close(fds_udp[1][1]) || close(fds_udp[1][0]) || + close(fds_udp[0][1]) || close(fds_udp[0][0]) || + close(fds[1]) || close(fds[0])) { + fprintf(stderr, "close datapath\n"); + exit(1); + } + + return ret; +} + +static int set_cpuaffinity(int cpuid) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpuid, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) { + if (errno != EINVAL) { + fprintf(stderr, "setaffinity %d\n", cpuid); + exit(1); + } + return 1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; + const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; + const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; + const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } }; + const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; + const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; + int port_off = 2, tries = 5, ret; + + test_control_single(); + test_control_group(); + + /* find a set of ports that do not collide onto the same socket */ + ret = test_datapath(PACKET_FANOUT_HASH, port_off, + expect_hash[0], expect_hash[1]); + while (ret && tries--) { + fprintf(stderr, "info: trying alternate ports (%d)\n", tries); + ret = test_datapath(PACKET_FANOUT_HASH, ++port_off, + expect_hash[0], expect_hash[1]); + } + + ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, + port_off, expect_hash_rb[0], expect_hash_rb[1]); + ret |= test_datapath(PACKET_FANOUT_LB, + port_off, expect_lb[0], expect_lb[1]); + ret |= test_datapath(PACKET_FANOUT_ROLLOVER, + port_off, expect_rb[0], expect_rb[1]); + + set_cpuaffinity(0); + ret |= test_datapath(PACKET_FANOUT_CPU, port_off, + expect_cpu0[0], expect_cpu0[1]); + if (!set_cpuaffinity(1)) + /* TODO: test that choice alternates with previous */ + ret |= test_datapath(PACKET_FANOUT_CPU, port_off, + expect_cpu1[0], expect_cpu1[1]); + + if (ret) + return 1; + + printf("OK. All tests passed\n"); + return 0; +} diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h new file mode 100644 index 000000000..37da54ac8 --- /dev/null +++ b/tools/testing/selftests/net/psock_lib.h @@ -0,0 +1,127 @@ +/* + * Copyright 2013 Google Inc. + * Author: Willem de Bruijn <willemb@google.com> + * Daniel Borkmann <dborkman@redhat.com> + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef PSOCK_LIB_H +#define PSOCK_LIB_H + +#include <sys/types.h> +#include <sys/socket.h> +#include <string.h> +#include <arpa/inet.h> +#include <unistd.h> + +#define DATA_LEN 100 +#define DATA_CHAR 'a' + +#define PORT_BASE 8000 + +#ifndef __maybe_unused +# define __maybe_unused __attribute__ ((__unused__)) +#endif + +static __maybe_unused void pair_udp_setfilter(int fd) +{ + struct sock_filter bpf_filter[] = { + { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */ + { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ + { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */ + { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ + { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */ + { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ + { 0x06, 0, 0, 0x00000060 }, /* RET match */ + { 0x06, 0, 0, 0x00000000 }, /* RET no match */ + }; + struct sock_fprog bpf_prog; + + bpf_prog.filter = bpf_filter; + bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, + sizeof(bpf_prog))) { + perror("setsockopt SO_ATTACH_FILTER"); + exit(1); + } +} + +static __maybe_unused void pair_udp_open(int fds[], uint16_t port) +{ + struct sockaddr_in saddr, daddr; + + fds[0] = socket(PF_INET, SOCK_DGRAM, 0); + fds[1] = socket(PF_INET, SOCK_DGRAM, 0); + if (fds[0] == -1 || fds[1] == -1) { + fprintf(stderr, "ERROR: socket dgram\n"); + exit(1); + } + + memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; + saddr.sin_port = htons(port); + saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + memset(&daddr, 0, sizeof(daddr)); + daddr.sin_family = AF_INET; + daddr.sin_port = htons(port + 1); + daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + /* must bind both to get consistent hash result */ + if (bind(fds[1], (void *) &daddr, sizeof(daddr))) { + perror("bind"); + exit(1); + } + if (bind(fds[0], (void *) &saddr, sizeof(saddr))) { + perror("bind"); + exit(1); + } + if (connect(fds[0], (void *) &daddr, sizeof(daddr))) { + perror("connect"); + exit(1); + } +} + +static __maybe_unused void pair_udp_send(int fds[], int num) +{ + char buf[DATA_LEN], rbuf[DATA_LEN]; + + memset(buf, DATA_CHAR, sizeof(buf)); + while (num--) { + /* Should really handle EINTR and EAGAIN */ + if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) { + fprintf(stderr, "ERROR: send failed left=%d\n", num); + exit(1); + } + if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) { + fprintf(stderr, "ERROR: recv failed left=%d\n", num); + exit(1); + } + if (memcmp(buf, rbuf, sizeof(buf))) { + fprintf(stderr, "ERROR: data failed left=%d\n", num); + exit(1); + } + } +} + +static __maybe_unused void pair_udp_close(int fds[]) +{ + close(fds[0]); + close(fds[1]); +} + +#endif /* PSOCK_LIB_H */ diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c new file mode 100644 index 000000000..c41b58640 --- /dev/null +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -0,0 +1,824 @@ +/* + * Copyright 2013 Red Hat, Inc. + * Author: Daniel Borkmann <dborkman@redhat.com> + * + * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior. + * + * Control: + * Test the setup of the TPACKET socket with different patterns that are + * known to fail (TODO) resp. succeed (OK). + * + * Datapath: + * Open a pair of packet sockets and send resp. receive an a priori known + * packet pattern accross the sockets and check if it was received resp. + * sent correctly. Fanout in combination with RX_RING is currently not + * tested here. + * + * The test currently runs for + * - TPACKET_V1: RX_RING, TX_RING + * - TPACKET_V2: RX_RING, TX_RING + * - TPACKET_V3: RX_RING + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <linux/if_packet.h> +#include <linux/filter.h> +#include <ctype.h> +#include <fcntl.h> +#include <unistd.h> +#include <bits/wordsize.h> +#include <net/ethernet.h> +#include <netinet/ip.h> +#include <arpa/inet.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> +#include <net/if.h> +#include <inttypes.h> +#include <poll.h> + +#include "psock_lib.h" + +#ifndef bug_on +# define bug_on(cond) assert(!(cond)) +#endif + +#ifndef __aligned_tpacket +# define __aligned_tpacket __attribute__((aligned(TPACKET_ALIGNMENT))) +#endif + +#ifndef __align_tpacket +# define __align_tpacket(x) __attribute__((aligned(TPACKET_ALIGN(x)))) +#endif + +#define BLOCK_STATUS(x) ((x)->h1.block_status) +#define BLOCK_NUM_PKTS(x) ((x)->h1.num_pkts) +#define BLOCK_O2FP(x) ((x)->h1.offset_to_first_pkt) +#define BLOCK_LEN(x) ((x)->h1.blk_len) +#define BLOCK_SNUM(x) ((x)->h1.seq_num) +#define BLOCK_O2PRIV(x) ((x)->offset_to_priv) +#define BLOCK_PRIV(x) ((void *) ((uint8_t *) (x) + BLOCK_O2PRIV(x))) +#define BLOCK_HDR_LEN (ALIGN_8(sizeof(struct block_desc))) +#define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1)) +#define BLOCK_PLUS_PRIV(sz_pri) (BLOCK_HDR_LEN + ALIGN_8((sz_pri))) + +#define NUM_PACKETS 100 + +struct ring { + struct iovec *rd; + uint8_t *mm_space; + size_t mm_len, rd_len; + struct sockaddr_ll ll; + void (*walk)(int sock, struct ring *ring); + int type, rd_num, flen, version; + union { + struct tpacket_req req; + struct tpacket_req3 req3; + }; +}; + +struct block_desc { + uint32_t version; + uint32_t offset_to_priv; + struct tpacket_hdr_v1 h1; +}; + +union frame_map { + struct { + struct tpacket_hdr tp_h __aligned_tpacket; + struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr)); + } *v1; + struct { + struct tpacket2_hdr tp_h __aligned_tpacket; + struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr)); + } *v2; + void *raw; +}; + +static unsigned int total_packets, total_bytes; + +static int pfsocket(int ver) +{ + int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + if (sock == -1) { + perror("socket"); + exit(1); + } + + ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver)); + if (ret == -1) { + perror("setsockopt"); + exit(1); + } + + return sock; +} + +static void status_bar_update(void) +{ + if (total_packets % 10 == 0) { + fprintf(stderr, "."); + fflush(stderr); + } +} + +static void test_payload(void *pay, size_t len) +{ + struct ethhdr *eth = pay; + + if (len < sizeof(struct ethhdr)) { + fprintf(stderr, "test_payload: packet too " + "small: %zu bytes!\n", len); + exit(1); + } + + if (eth->h_proto != htons(ETH_P_IP)) { + fprintf(stderr, "test_payload: wrong ethernet " + "type: 0x%x!\n", ntohs(eth->h_proto)); + exit(1); + } +} + +static void create_payload(void *pay, size_t *len) +{ + int i; + struct ethhdr *eth = pay; + struct iphdr *ip = pay + sizeof(*eth); + + /* Lets create some broken crap, that still passes + * our BPF filter. + */ + + *len = DATA_LEN + 42; + + memset(pay, 0xff, ETH_ALEN * 2); + eth->h_proto = htons(ETH_P_IP); + + for (i = 0; i < sizeof(*ip); ++i) + ((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand(); + + ip->ihl = 5; + ip->version = 4; + ip->protocol = 0x11; + ip->frag_off = 0; + ip->ttl = 64; + ip->tot_len = htons((uint16_t) *len - sizeof(*eth)); + + ip->saddr = htonl(INADDR_LOOPBACK); + ip->daddr = htonl(INADDR_LOOPBACK); + + memset(pay + sizeof(*eth) + sizeof(*ip), + DATA_CHAR, DATA_LEN); +} + +static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr) +{ + return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER); +} + +static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_KERNEL; + __sync_synchronize(); +} + +static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr) +{ + return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER); +} + +static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_KERNEL; + __sync_synchronize(); +} + +static inline int __v1_v2_rx_kernel_ready(void *base, int version) +{ + switch (version) { + case TPACKET_V1: + return __v1_rx_kernel_ready(base); + case TPACKET_V2: + return __v2_rx_kernel_ready(base); + default: + bug_on(1); + return 0; + } +} + +static inline void __v1_v2_rx_user_ready(void *base, int version) +{ + switch (version) { + case TPACKET_V1: + __v1_rx_user_ready(base); + break; + case TPACKET_V2: + __v2_rx_user_ready(base); + break; + } +} + +static void walk_v1_v2_rx(int sock, struct ring *ring) +{ + struct pollfd pfd; + int udp_sock[2]; + union frame_map ppd; + unsigned int frame_num = 0; + + bug_on(ring->type != PACKET_RX_RING); + + pair_udp_open(udp_sock, PORT_BASE); + pair_udp_setfilter(sock); + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = sock; + pfd.events = POLLIN | POLLERR; + pfd.revents = 0; + + pair_udp_send(udp_sock, NUM_PACKETS); + + while (total_packets < NUM_PACKETS * 2) { + while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base, + ring->version)) { + ppd.raw = ring->rd[frame_num].iov_base; + + switch (ring->version) { + case TPACKET_V1: + test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac, + ppd.v1->tp_h.tp_snaplen); + total_bytes += ppd.v1->tp_h.tp_snaplen; + break; + + case TPACKET_V2: + test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac, + ppd.v2->tp_h.tp_snaplen); + total_bytes += ppd.v2->tp_h.tp_snaplen; + break; + } + + status_bar_update(); + total_packets++; + + __v1_v2_rx_user_ready(ppd.raw, ring->version); + + frame_num = (frame_num + 1) % ring->rd_num; + } + + poll(&pfd, 1, 1); + } + + pair_udp_close(udp_sock); + + if (total_packets != 2 * NUM_PACKETS) { + fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n", + ring->version, total_packets, NUM_PACKETS); + exit(1); + } + + fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1); +} + +static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr) +{ + return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)); +} + +static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_SEND_REQUEST; + __sync_synchronize(); +} + +static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr) +{ + return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)); +} + +static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_SEND_REQUEST; + __sync_synchronize(); +} + +static inline int __v1_v2_tx_kernel_ready(void *base, int version) +{ + switch (version) { + case TPACKET_V1: + return __v1_tx_kernel_ready(base); + case TPACKET_V2: + return __v2_tx_kernel_ready(base); + default: + bug_on(1); + return 0; + } +} + +static inline void __v1_v2_tx_user_ready(void *base, int version) +{ + switch (version) { + case TPACKET_V1: + __v1_tx_user_ready(base); + break; + case TPACKET_V2: + __v2_tx_user_ready(base); + break; + } +} + +static void __v1_v2_set_packet_loss_discard(int sock) +{ + int ret, discard = 1; + + ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard, + sizeof(discard)); + if (ret == -1) { + perror("setsockopt"); + exit(1); + } +} + +static void walk_v1_v2_tx(int sock, struct ring *ring) +{ + struct pollfd pfd; + int rcv_sock, ret; + size_t packet_len; + union frame_map ppd; + char packet[1024]; + unsigned int frame_num = 0, got = 0; + struct sockaddr_ll ll = { + .sll_family = PF_PACKET, + .sll_halen = ETH_ALEN, + }; + + bug_on(ring->type != PACKET_TX_RING); + bug_on(ring->rd_num < NUM_PACKETS); + + rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + if (rcv_sock == -1) { + perror("socket"); + exit(1); + } + + pair_udp_setfilter(rcv_sock); + + ll.sll_ifindex = if_nametoindex("lo"); + ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll)); + if (ret == -1) { + perror("bind"); + exit(1); + } + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = sock; + pfd.events = POLLOUT | POLLERR; + pfd.revents = 0; + + total_packets = NUM_PACKETS; + create_payload(packet, &packet_len); + + while (total_packets > 0) { + while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base, + ring->version) && + total_packets > 0) { + ppd.raw = ring->rd[frame_num].iov_base; + + switch (ring->version) { + case TPACKET_V1: + ppd.v1->tp_h.tp_snaplen = packet_len; + ppd.v1->tp_h.tp_len = packet_len; + + memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN - + sizeof(struct sockaddr_ll), packet, + packet_len); + total_bytes += ppd.v1->tp_h.tp_snaplen; + break; + + case TPACKET_V2: + ppd.v2->tp_h.tp_snaplen = packet_len; + ppd.v2->tp_h.tp_len = packet_len; + + memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN - + sizeof(struct sockaddr_ll), packet, + packet_len); + total_bytes += ppd.v2->tp_h.tp_snaplen; + break; + } + + status_bar_update(); + total_packets--; + + __v1_v2_tx_user_ready(ppd.raw, ring->version); + + frame_num = (frame_num + 1) % ring->rd_num; + } + + poll(&pfd, 1, 1); + } + + bug_on(total_packets != 0); + + ret = sendto(sock, NULL, 0, 0, NULL, 0); + if (ret == -1) { + perror("sendto"); + exit(1); + } + + while ((ret = recvfrom(rcv_sock, packet, sizeof(packet), + 0, NULL, NULL)) > 0 && + total_packets < NUM_PACKETS) { + got += ret; + test_payload(packet, ret); + + status_bar_update(); + total_packets++; + } + + close(rcv_sock); + + if (total_packets != NUM_PACKETS) { + fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n", + ring->version, total_packets, NUM_PACKETS); + exit(1); + } + + fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got); +} + +static void walk_v1_v2(int sock, struct ring *ring) +{ + if (ring->type == PACKET_RX_RING) + walk_v1_v2_rx(sock, ring); + else + walk_v1_v2_tx(sock, ring); +} + +static uint64_t __v3_prev_block_seq_num = 0; + +void __v3_test_block_seq_num(struct block_desc *pbd) +{ + if (__v3_prev_block_seq_num + 1 != BLOCK_SNUM(pbd)) { + fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected " + "seq:%"PRIu64" != actual seq:%"PRIu64"\n", + __v3_prev_block_seq_num, __v3_prev_block_seq_num + 1, + (uint64_t) BLOCK_SNUM(pbd)); + exit(1); + } + + __v3_prev_block_seq_num = BLOCK_SNUM(pbd); +} + +static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num) +{ + if (BLOCK_NUM_PKTS(pbd)) { + if (bytes != BLOCK_LEN(pbd)) { + fprintf(stderr, "\nblock:%u with %upackets, expected " + "len:%u != actual len:%u\n", block_num, + BLOCK_NUM_PKTS(pbd), bytes, BLOCK_LEN(pbd)); + exit(1); + } + } else { + if (BLOCK_LEN(pbd) != BLOCK_PLUS_PRIV(13)) { + fprintf(stderr, "\nblock:%u, expected len:%lu != " + "actual len:%u\n", block_num, BLOCK_HDR_LEN, + BLOCK_LEN(pbd)); + exit(1); + } + } +} + +static void __v3_test_block_header(struct block_desc *pbd, const int block_num) +{ + uint32_t block_status = BLOCK_STATUS(pbd); + + if ((block_status & TP_STATUS_USER) == 0) { + fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num); + exit(1); + } + + __v3_test_block_seq_num(pbd); +} + +static void __v3_walk_block(struct block_desc *pbd, const int block_num) +{ + int num_pkts = BLOCK_NUM_PKTS(pbd), i; + unsigned long bytes = 0; + unsigned long bytes_with_padding = BLOCK_PLUS_PRIV(13); + struct tpacket3_hdr *ppd; + + __v3_test_block_header(pbd, block_num); + + ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + BLOCK_O2FP(pbd)); + for (i = 0; i < num_pkts; ++i) { + bytes += ppd->tp_snaplen; + + if (ppd->tp_next_offset) + bytes_with_padding += ppd->tp_next_offset; + else + bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac); + + test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen); + + status_bar_update(); + total_packets++; + + ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset); + __sync_synchronize(); + } + + __v3_test_block_len(pbd, bytes_with_padding, block_num); + total_bytes += bytes; +} + +void __v3_flush_block(struct block_desc *pbd) +{ + BLOCK_STATUS(pbd) = TP_STATUS_KERNEL; + __sync_synchronize(); +} + +static void walk_v3_rx(int sock, struct ring *ring) +{ + unsigned int block_num = 0; + struct pollfd pfd; + struct block_desc *pbd; + int udp_sock[2]; + + bug_on(ring->type != PACKET_RX_RING); + + pair_udp_open(udp_sock, PORT_BASE); + pair_udp_setfilter(sock); + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = sock; + pfd.events = POLLIN | POLLERR; + pfd.revents = 0; + + pair_udp_send(udp_sock, NUM_PACKETS); + + while (total_packets < NUM_PACKETS * 2) { + pbd = (struct block_desc *) ring->rd[block_num].iov_base; + + while ((BLOCK_STATUS(pbd) & TP_STATUS_USER) == 0) + poll(&pfd, 1, 1); + + __v3_walk_block(pbd, block_num); + __v3_flush_block(pbd); + + block_num = (block_num + 1) % ring->rd_num; + } + + pair_udp_close(udp_sock); + + if (total_packets != 2 * NUM_PACKETS) { + fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n", + total_packets, NUM_PACKETS); + exit(1); + } + + fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1); +} + +static void walk_v3(int sock, struct ring *ring) +{ + if (ring->type == PACKET_RX_RING) + walk_v3_rx(sock, ring); + else + bug_on(1); +} + +static void __v1_v2_fill(struct ring *ring, unsigned int blocks) +{ + ring->req.tp_block_size = getpagesize() << 2; + ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7; + ring->req.tp_block_nr = blocks; + + ring->req.tp_frame_nr = ring->req.tp_block_size / + ring->req.tp_frame_size * + ring->req.tp_block_nr; + + ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr; + ring->walk = walk_v1_v2; + ring->rd_num = ring->req.tp_frame_nr; + ring->flen = ring->req.tp_frame_size; +} + +static void __v3_fill(struct ring *ring, unsigned int blocks) +{ + ring->req3.tp_retire_blk_tov = 64; + ring->req3.tp_sizeof_priv = 13; + ring->req3.tp_feature_req_word |= TP_FT_REQ_FILL_RXHASH; + + ring->req3.tp_block_size = getpagesize() << 2; + ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7; + ring->req3.tp_block_nr = blocks; + + ring->req3.tp_frame_nr = ring->req3.tp_block_size / + ring->req3.tp_frame_size * + ring->req3.tp_block_nr; + + ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr; + ring->walk = walk_v3; + ring->rd_num = ring->req3.tp_block_nr; + ring->flen = ring->req3.tp_block_size; +} + +static void setup_ring(int sock, struct ring *ring, int version, int type) +{ + int ret = 0; + unsigned int blocks = 256; + + ring->type = type; + ring->version = version; + + switch (version) { + case TPACKET_V1: + case TPACKET_V2: + if (type == PACKET_TX_RING) + __v1_v2_set_packet_loss_discard(sock); + __v1_v2_fill(ring, blocks); + ret = setsockopt(sock, SOL_PACKET, type, &ring->req, + sizeof(ring->req)); + break; + + case TPACKET_V3: + __v3_fill(ring, blocks); + ret = setsockopt(sock, SOL_PACKET, type, &ring->req3, + sizeof(ring->req3)); + break; + } + + if (ret == -1) { + perror("setsockopt"); + exit(1); + } + + ring->rd_len = ring->rd_num * sizeof(*ring->rd); + ring->rd = malloc(ring->rd_len); + if (ring->rd == NULL) { + perror("malloc"); + exit(1); + } + + total_packets = 0; + total_bytes = 0; +} + +static void mmap_ring(int sock, struct ring *ring) +{ + int i; + + ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0); + if (ring->mm_space == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + memset(ring->rd, 0, ring->rd_len); + for (i = 0; i < ring->rd_num; ++i) { + ring->rd[i].iov_base = ring->mm_space + (i * ring->flen); + ring->rd[i].iov_len = ring->flen; + } +} + +static void bind_ring(int sock, struct ring *ring) +{ + int ret; + + ring->ll.sll_family = PF_PACKET; + ring->ll.sll_protocol = htons(ETH_P_ALL); + ring->ll.sll_ifindex = if_nametoindex("lo"); + ring->ll.sll_hatype = 0; + ring->ll.sll_pkttype = 0; + ring->ll.sll_halen = 0; + + ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll)); + if (ret == -1) { + perror("bind"); + exit(1); + } +} + +static void walk_ring(int sock, struct ring *ring) +{ + ring->walk(sock, ring); +} + +static void unmap_ring(int sock, struct ring *ring) +{ + munmap(ring->mm_space, ring->mm_len); + free(ring->rd); +} + +static int test_kernel_bit_width(void) +{ + char in[512], *ptr; + int num = 0, fd; + ssize_t ret; + + fd = open("/proc/kallsyms", O_RDONLY); + if (fd == -1) { + perror("open"); + exit(1); + } + + ret = read(fd, in, sizeof(in)); + if (ret <= 0) { + perror("read"); + exit(1); + } + + close(fd); + + ptr = in; + while(!isspace(*ptr)) { + num++; + ptr++; + } + + return num * 4; +} + +static int test_user_bit_width(void) +{ + return __WORDSIZE; +} + +static const char *tpacket_str[] = { + [TPACKET_V1] = "TPACKET_V1", + [TPACKET_V2] = "TPACKET_V2", + [TPACKET_V3] = "TPACKET_V3", +}; + +static const char *type_str[] = { + [PACKET_RX_RING] = "PACKET_RX_RING", + [PACKET_TX_RING] = "PACKET_TX_RING", +}; + +static int test_tpacket(int version, int type) +{ + int sock; + struct ring ring; + + fprintf(stderr, "test: %s with %s ", tpacket_str[version], + type_str[type]); + fflush(stderr); + + if (version == TPACKET_V1 && + test_kernel_bit_width() != test_user_bit_width()) { + fprintf(stderr, "test: skip %s %s since user and kernel " + "space have different bit width\n", + tpacket_str[version], type_str[type]); + return 0; + } + + sock = pfsocket(version); + memset(&ring, 0, sizeof(ring)); + setup_ring(sock, &ring, version, type); + mmap_ring(sock, &ring); + bind_ring(sock, &ring); + walk_ring(sock, &ring); + unmap_ring(sock, &ring); + close(sock); + + fprintf(stderr, "\n"); + return 0; +} + +int main(void) +{ + int ret = 0; + + ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING); + ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING); + + ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING); + ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING); + + ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING); + + if (ret) + return 1; + + printf("OK. All tests passed\n"); + return 0; +} diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests new file mode 100644 index 000000000..5246e782d --- /dev/null +++ b/tools/testing/selftests/net/run_afpackettests @@ -0,0 +1,26 @@ +#!/bin/sh + +if [ $(id -u) != 0 ]; then + echo $msg must be run as root >&2 + exit 0 +fi + +echo "--------------------" +echo "running psock_fanout test" +echo "--------------------" +./psock_fanout +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + +echo "--------------------" +echo "running psock_tpacket test" +echo "--------------------" +./psock_tpacket +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests new file mode 100644 index 000000000..c09a682df --- /dev/null +++ b/tools/testing/selftests/net/run_netsocktests @@ -0,0 +1,12 @@ +#!/bin/bash + +echo "--------------------" +echo "running socket test" +echo "--------------------" +./socket +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c new file mode 100644 index 000000000..0f227f2f9 --- /dev/null +++ b/tools/testing/selftests/net/socket.c @@ -0,0 +1,92 @@ +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> + +struct socket_testcase { + int domain; + int type; + int protocol; + + /* 0 = valid file descriptor + * -foo = error foo + */ + int expect; + + /* If non-zero, accept EAFNOSUPPORT to handle the case + * of the protocol not being configured into the kernel. + */ + int nosupport_ok; +}; + +static struct socket_testcase tests[] = { + { AF_MAX, 0, 0, -EAFNOSUPPORT, 0 }, + { AF_INET, SOCK_STREAM, IPPROTO_TCP, 0, 1 }, + { AF_INET, SOCK_DGRAM, IPPROTO_TCP, -EPROTONOSUPPORT, 1 }, + { AF_INET, SOCK_DGRAM, IPPROTO_UDP, 0, 1 }, + { AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 }, +}; + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define ERR_STRING_SZ 64 + +static int run_tests(void) +{ + char err_string1[ERR_STRING_SZ]; + char err_string2[ERR_STRING_SZ]; + int i, err; + + err = 0; + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct socket_testcase *s = &tests[i]; + int fd; + + fd = socket(s->domain, s->type, s->protocol); + if (fd < 0) { + if (s->nosupport_ok && + errno == EAFNOSUPPORT) + continue; + + if (s->expect < 0 && + errno == -s->expect) + continue; + + strerror_r(-s->expect, err_string1, ERR_STRING_SZ); + strerror_r(errno, err_string2, ERR_STRING_SZ); + + fprintf(stderr, "socket(%d, %d, %d) expected " + "err (%s) got (%s)\n", + s->domain, s->type, s->protocol, + err_string1, err_string2); + + err = -1; + break; + } else { + close(fd); + + if (s->expect < 0) { + strerror_r(errno, err_string1, ERR_STRING_SZ); + + fprintf(stderr, "socket(%d, %d, %d) expected " + "success got err (%s)\n", + s->domain, s->type, s->protocol, + err_string1); + + err = -1; + break; + } + } + } + + return err; +} + +int main(void) +{ + int err = run_tests(); + + return err; +} diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile new file mode 100644 index 000000000..47ae2d385 --- /dev/null +++ b/tools/testing/selftests/ptrace/Makefile @@ -0,0 +1,10 @@ +CFLAGS += -iquote../../../../include/uapi -Wall +peeksiginfo: peeksiginfo.c + +all: peeksiginfo + +clean: + rm -f peeksiginfo + +run_tests: all + @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]" diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c new file mode 100644 index 000000000..d46558b1f --- /dev/null +++ b/tools/testing/selftests/ptrace/peeksiginfo.c @@ -0,0 +1,214 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <linux/types.h> +#include <sys/wait.h> +#include <sys/syscall.h> +#include <sys/user.h> +#include <sys/mman.h> + +#include "linux/ptrace.h" + +static int sys_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo) +{ + return syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo); +} + +static int sys_rt_tgsigqueueinfo(pid_t tgid, pid_t tid, + int sig, siginfo_t *uinfo) +{ + return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo); +} + +static int sys_ptrace(int request, pid_t pid, void *addr, void *data) +{ + return syscall(SYS_ptrace, request, pid, addr, data); +} + +#define SIGNR 10 +#define TEST_SICODE_PRIV -1 +#define TEST_SICODE_SHARE -2 + +#define err(fmt, ...) \ + fprintf(stderr, \ + "Error (%s:%d): " fmt, \ + __FILE__, __LINE__, ##__VA_ARGS__) + +static int check_error_paths(pid_t child) +{ + struct ptrace_peeksiginfo_args arg; + int ret, exit_code = -1; + void *addr_rw, *addr_ro; + + /* + * Allocate two contiguous pages. The first one is for read-write, + * another is for read-only. + */ + addr_rw = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr_rw == MAP_FAILED) { + err("mmap() failed: %m\n"); + return 1; + } + + addr_ro = mmap(addr_rw + PAGE_SIZE, PAGE_SIZE, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (addr_ro == MAP_FAILED) { + err("mmap() failed: %m\n"); + goto out; + } + + arg.nr = SIGNR; + arg.off = 0; + + /* Unsupported flags */ + arg.flags = ~0; + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_rw); + if (ret != -1 || errno != EINVAL) { + err("sys_ptrace() returns %d (expected -1)," + " errno %d (expected %d): %m\n", + ret, errno, EINVAL); + goto out; + } + arg.flags = 0; + + /* A part of the buffer is read-only */ + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, + addr_ro - sizeof(siginfo_t) * 2); + if (ret != 2) { + err("sys_ptrace() returns %d (expected 2): %m\n", ret); + goto out; + } + + /* Read-only buffer */ + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_ro); + if (ret != -1 && errno != EFAULT) { + err("sys_ptrace() returns %d (expected -1)," + " errno %d (expected %d): %m\n", + ret, errno, EFAULT); + goto out; + } + + exit_code = 0; +out: + munmap(addr_rw, 2 * PAGE_SIZE); + return exit_code; +} + +int check_direct_path(pid_t child, int shared, int nr) +{ + struct ptrace_peeksiginfo_args arg = {.flags = 0, .nr = nr, .off = 0}; + int i, j, ret, exit_code = -1; + siginfo_t siginfo[SIGNR]; + int si_code; + + if (shared == 1) { + arg.flags = PTRACE_PEEKSIGINFO_SHARED; + si_code = TEST_SICODE_SHARE; + } else { + arg.flags = 0; + si_code = TEST_SICODE_PRIV; + } + + for (i = 0; i < SIGNR; ) { + arg.off = i; + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, siginfo); + if (ret == -1) { + err("ptrace() failed: %m\n"); + goto out; + } + + if (ret == 0) + break; + + for (j = 0; j < ret; j++, i++) { + if (siginfo[j].si_code == si_code && + siginfo[j].si_int == i) + continue; + + err("%d: Wrong siginfo i=%d si_code=%d si_int=%d\n", + shared, i, siginfo[j].si_code, siginfo[j].si_int); + goto out; + } + } + + if (i != SIGNR) { + err("Only %d signals were read\n", i); + goto out; + } + + exit_code = 0; +out: + return exit_code; +} + +int main(int argc, char *argv[]) +{ + siginfo_t siginfo[SIGNR]; + int i, exit_code = 1; + sigset_t blockmask; + pid_t child; + + sigemptyset(&blockmask); + sigaddset(&blockmask, SIGRTMIN); + sigprocmask(SIG_BLOCK, &blockmask, NULL); + + child = fork(); + if (child == -1) { + err("fork() failed: %m"); + return 1; + } else if (child == 0) { + pid_t ppid = getppid(); + while (1) { + if (ppid != getppid()) + break; + sleep(1); + } + return 1; + } + + /* Send signals in process-wide and per-thread queues */ + for (i = 0; i < SIGNR; i++) { + siginfo->si_code = TEST_SICODE_SHARE; + siginfo->si_int = i; + sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo); + + siginfo->si_code = TEST_SICODE_PRIV; + siginfo->si_int = i; + sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo); + } + + if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1) + return 1; + + waitpid(child, NULL, 0); + + /* Dump signals one by one*/ + if (check_direct_path(child, 0, 1)) + goto out; + /* Dump all signals for one call */ + if (check_direct_path(child, 0, SIGNR)) + goto out; + + /* + * Dump signal from the process-wide queue. + * The number of signals is not multible to the buffer size + */ + if (check_direct_path(child, 1, 3)) + goto out; + + if (check_error_paths(child)) + goto out; + + printf("PASS\n"); + exit_code = 0; +out: + if (sys_ptrace(PTRACE_KILL, child, NULL, NULL) == -1) + return 1; + + waitpid(child, NULL, 0); + + return exit_code; +} diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile new file mode 100644 index 000000000..436d2e818 --- /dev/null +++ b/tools/testing/selftests/vm/Makefile @@ -0,0 +1,14 @@ +# Makefile for vm selftests + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall + +all: hugepage-mmap hugepage-shm map_hugetlb thuge-gen +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +run_tests: all + @/bin/sh ./run_vmtests || echo "vmtests: [FAIL]" + +clean: + $(RM) hugepage-mmap hugepage-shm map_hugetlb diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c new file mode 100644 index 000000000..a10f310d2 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-mmap.c @@ -0,0 +1,92 @@ +/* + * hugepage-mmap: + * + * Example of using huge page memory in a user application using the mmap + * system call. Before running this application, make sure that the + * administrator has mounted the hugetlbfs filesystem (on some directory + * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this + * example, the app is requesting memory of size 256MB that is backed by + * huge pages. + * + * For the ia64 architecture, the Linux kernel reserves Region number 4 for + * huge pages. That means that if one requires a fixed address, a huge page + * aligned address starting with 0x800000... will be required. If a fixed + * address is not required, the kernel will select an address in the proper + * range. + * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <fcntl.h> + +#define FILE_NAME "huge/hugepagefile" +#define LENGTH (256UL*1024*1024) +#define PROTECTION (PROT_READ | PROT_WRITE) + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define FLAGS (MAP_SHARED | MAP_FIXED) +#else +#define ADDR (void *)(0x0UL) +#define FLAGS (MAP_SHARED) +#endif + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +int main(void) +{ + void *addr; + int fd, ret; + + fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + perror("Open failed"); + exit(1); + } + + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + unlink(FILE_NAME); + exit(1); + } + + printf("Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + munmap(addr, LENGTH); + close(fd); + unlink(FILE_NAME); + + return ret; +} diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/vm/hugepage-shm.c new file mode 100644 index 000000000..0d0ef4fc0 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-shm.c @@ -0,0 +1,100 @@ +/* + * hugepage-shm: + * + * Example of using huge page memory in a user application using Sys V shared + * memory system calls. In this example the app is requesting 256MB of + * memory that is backed by huge pages. The application uses the flag + * SHM_HUGETLB in the shmget system call to inform the kernel that it is + * requesting huge pages. + * + * For the ia64 architecture, the Linux kernel reserves Region number 4 for + * huge pages. That means that if one requires a fixed address, a huge page + * aligned address starting with 0x800000... will be required. If a fixed + * address is not required, the kernel will select an address in the proper + * range. + * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. + * + * Note: The default shared memory limit is quite low on many kernels, + * you may need to increase it via: + * + * echo 268435456 > /proc/sys/kernel/shmmax + * + * This will increase the maximum size per shared memory segment to 256MB. + * The other limit that you will hit eventually is shmall which is the + * total amount of shared memory in pages. To set it to 16GB on a system + * with a 4kB pagesize do: + * + * echo 4194304 > /proc/sys/kernel/shmall + */ + +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/mman.h> + +#ifndef SHM_HUGETLB +#define SHM_HUGETLB 04000 +#endif + +#define LENGTH (256UL*1024*1024) + +#define dprintf(x) printf(x) + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define SHMAT_FLAGS (SHM_RND) +#else +#define ADDR (void *)(0x0UL) +#define SHMAT_FLAGS (0) +#endif + +int main(void) +{ + int shmid; + unsigned long i; + char *shmaddr; + + shmid = shmget(2, LENGTH, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); + if (shmid < 0) { + perror("shmget"); + exit(1); + } + printf("shmid: 0x%x\n", shmid); + + shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS); + if (shmaddr == (char *)-1) { + perror("Shared memory attach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(2); + } + printf("shmaddr: %p\n", shmaddr); + + dprintf("Starting the writes:\n"); + for (i = 0; i < LENGTH; i++) { + shmaddr[i] = (char)(i); + if (!(i % (1024 * 1024))) + dprintf("."); + } + dprintf("\n"); + + dprintf("Starting the Check..."); + for (i = 0; i < LENGTH; i++) + if (shmaddr[i] != (char)i) { + printf("\nIndex %lu mismatched\n", i); + exit(3); + } + dprintf("Done.\n"); + + if (shmdt((const void *)shmaddr) != 0) { + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(4); + } + + shmctl(shmid, IPC_RMID, NULL); + + return 0; +} diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c new file mode 100644 index 000000000..ac56639dd --- /dev/null +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -0,0 +1,79 @@ +/* + * Example of using hugepage memory in a user application using the mmap + * system call with MAP_HUGETLB flag. Before running this program make + * sure the administrator has allocated enough default sized huge pages + * to cover the 256 MB allocation. + * + * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. + * That means the addresses starting with 0x800000... will need to be + * specified. Specifying a fixed address is not required on ppc64, i386 + * or x86_64. + */ +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <fcntl.h> + +#define LENGTH (256UL*1024*1024) +#define PROTECTION (PROT_READ | PROT_WRITE) + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 /* arch specific */ +#endif + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) +#else +#define ADDR (void *)(0x0UL) +#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) +#endif + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +int main(void) +{ + void *addr; + int ret; + + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + printf("Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + munmap(addr, LENGTH); + + return ret; +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests new file mode 100644 index 000000000..4c53cae6c --- /dev/null +++ b/tools/testing/selftests/vm/run_vmtests @@ -0,0 +1,77 @@ +#!/bin/bash +#please run as root + +#we need 256M, below is the size in kB +needmem=262144 +mnt=./huge + +#get pagesize and freepages from /proc/meminfo +while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi + if [ "$name" = "Hugepagesize:" ]; then + pgsize=$size + fi +done < /proc/meminfo + +#set proper nr_hugepages +if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then + nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` + needpgs=`expr $needmem / $pgsize` + if [ $freepgs -lt $needpgs ]; then + lackpgs=$(( $needpgs - $freepgs )) + echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages + if [ $? -ne 0 ]; then + echo "Please run this test as root" + exit 1 + fi + fi +else + echo "no hugetlbfs support in kernel?" + exit 1 +fi + +mkdir $mnt +mount -t hugetlbfs none $mnt + +echo "--------------------" +echo "running hugepage-mmap" +echo "--------------------" +./hugepage-mmap +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + +shmmax=`cat /proc/sys/kernel/shmmax` +shmall=`cat /proc/sys/kernel/shmall` +echo 268435456 > /proc/sys/kernel/shmmax +echo 4194304 > /proc/sys/kernel/shmall +echo "--------------------" +echo "running hugepage-shm" +echo "--------------------" +./hugepage-shm +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi +echo $shmmax > /proc/sys/kernel/shmmax +echo $shmall > /proc/sys/kernel/shmall + +echo "--------------------" +echo "running map_hugetlb" +echo "--------------------" +./map_hugetlb +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + +#cleanup +umount $mnt +rm -rf $mnt +echo $nr_hugepgs > /proc/sys/vm/nr_hugepages diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c new file mode 100644 index 000000000..c87957295 --- /dev/null +++ b/tools/testing/selftests/vm/thuge-gen.c @@ -0,0 +1,254 @@ +/* Test selecting other page sizes for mmap/shmget. + + Before running this huge pages for each huge page size must have been + reserved. + For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used. + Also shmmax must be increased. + And you need to run as root to work around some weird permissions in shm. + And nothing using huge pages should run in parallel. + When the program aborts you may need to clean up the shm segments with + ipcrm -m by hand, like this + sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m + (warning this will remove all if someone else uses them) */ + +#define _GNU_SOURCE 1 +#include <sys/mman.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <glob.h> +#include <assert.h> +#include <unistd.h> +#include <stdarg.h> +#include <string.h> + +#define err(x) perror(x), exit(1) + +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f +#define MAP_HUGETLB 0x40000 + +#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#define SHM_HUGE_SHIFT 26 +#define SHM_HUGE_MASK 0x3f +#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) +#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) + +#define NUM_PAGESIZES 5 + +#define NUM_PAGES 4 + +#define Dprintf(fmt...) // printf(fmt) + +unsigned long page_sizes[NUM_PAGESIZES]; +int num_page_sizes; + +int ilog2(unsigned long v) +{ + int l = 0; + while ((1UL << l) < v) + l++; + return l; +} + +void find_pagesizes(void) +{ + glob_t g; + int i; + glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g); + assert(g.gl_pathc <= NUM_PAGESIZES); + for (i = 0; i < g.gl_pathc; i++) { + sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB", + &page_sizes[i]); + page_sizes[i] <<= 10; + printf("Found %luMB\n", page_sizes[i] >> 20); + } + num_page_sizes = g.gl_pathc; + globfree(&g); +} + +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + free(line); + return hps; +} + +void show(unsigned long ps) +{ + char buf[100]; + if (ps == getpagesize()) + return; + printf("%luMB: ", ps >> 20); + fflush(stdout); + snprintf(buf, sizeof buf, + "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); + system(buf); +} + +unsigned long read_sysfs(int warn, char *fmt, ...) +{ + char *line = NULL; + size_t linelen = 0; + char buf[100]; + FILE *f; + va_list ap; + unsigned long val = 0; + + va_start(ap, fmt); + vsnprintf(buf, sizeof buf, fmt, ap); + va_end(ap); + + f = fopen(buf, "r"); + if (!f) { + if (warn) + printf("missing %s\n", buf); + return 0; + } + if (getline(&line, &linelen, f) > 0) { + sscanf(line, "%lu", &val); + } + fclose(f); + free(line); + return val; +} + +unsigned long read_free(unsigned long ps) +{ + return read_sysfs(ps != getpagesize(), + "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); +} + +void test_mmap(unsigned long size, unsigned flags) +{ + char *map; + unsigned long before, after; + int err; + + before = read_free(size); + map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, 0, 0); + + if (map == (char *)-1) err("mmap"); + memset(map, 0xff, size*NUM_PAGES); + after = read_free(size); + Dprintf("before %lu after %lu diff %ld size %lu\n", + before, after, before - after, size); + assert(size == getpagesize() || (before - after) == NUM_PAGES); + show(size); + err = munmap(map, size); + assert(!err); +} + +void test_shmget(unsigned long size, unsigned flags) +{ + int id; + unsigned long before, after; + int err; + + before = read_free(size); + id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags); + if (id < 0) err("shmget"); + + struct shm_info i; + if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl"); + Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss); + + + Dprintf("id %d\n", id); + char *map = shmat(id, NULL, 0600); + if (map == (char*)-1) err("shmat"); + + shmctl(id, IPC_RMID, NULL); + + memset(map, 0xff, size*NUM_PAGES); + after = read_free(size); + + Dprintf("before %lu after %lu diff %ld size %lu\n", + before, after, before - after, size); + assert(size == getpagesize() || (before - after) == NUM_PAGES); + show(size); + err = shmdt(map); + assert(!err); +} + +void sanity_checks(void) +{ + int i; + unsigned long largest = getpagesize(); + + for (i = 0; i < num_page_sizes; i++) { + if (page_sizes[i] > largest) + largest = page_sizes[i]; + + if (read_free(page_sizes[i]) < NUM_PAGES) { + printf("Not enough huge pages for page size %lu MB, need %u\n", + page_sizes[i] >> 20, + NUM_PAGES); + exit(0); + } + } + + if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) { + printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES); + exit(0); + } + +#if defined(__x86_64__) + if (largest != 1U<<30) { + printf("No GB pages available on x86-64\n" + "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES); + exit(0); + } +#endif +} + +int main(void) +{ + int i; + unsigned default_hps = default_huge_page_size(); + + find_pagesizes(); + + sanity_checks(); + + for (i = 0; i < num_page_sizes; i++) { + unsigned long ps = page_sizes[i]; + int arg = ilog2(ps) << MAP_HUGE_SHIFT; + printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg); + test_mmap(ps, MAP_HUGETLB | arg); + } + printf("Testing default huge mmap\n"); + test_mmap(default_hps, SHM_HUGETLB); + + puts("Testing non-huge shmget"); + test_shmget(getpagesize(), 0); + + for (i = 0; i < num_page_sizes; i++) { + unsigned long ps = page_sizes[i]; + int arg = ilog2(ps) << SHM_HUGE_SHIFT; + printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg); + test_shmget(ps, SHM_HUGETLB | arg); + } + puts("default huge shmget"); + test_shmget(default_hps, SHM_HUGETLB); + + return 0; +} |
