123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725 |
- /*
- * Part of Very Secure FTPd
- * Licence: GPL v2
- * Author: Chris Evans
- * seccompsandbox.c
- *
- * Code to lock down the accessible kernel API in a Linux seccomp filter
- * sandbox. Works in Ubuntu 11.10 and newer.
- */
- #include "seccompsandbox.h"
- #if defined(__linux__) && defined(__x86_64__)
- #include "session.h"
- #include "sysutil.h"
- #include "tunables.h"
- #include "utility.h"
- #include <errno.h>
- #include <netinet/in.h>
- #include <netinet/tcp.h>
- #include <sys/fcntl.h>
- #include <sys/mman.h>
- #include <sys/prctl.h>
- #include <sys/socket.h>
- #include <sys/types.h>
- #include <linux/filter.h>
- #include <asm/unistd.h>
- /* #define DEBUG_SIGSYS 1 */
- #ifndef PR_SET_SECCOMP
- #define PR_SET_SECCOMP 22
- #endif
- #ifndef PR_SET_NO_NEW_PRIVS
- #define PR_SET_NO_NEW_PRIVS 38
- #endif
- #ifndef __NR_openat
- #define __NR_openat 257
- #endif
- #ifndef O_LARGEFILE
- #define O_LARGEFILE 00100000
- #endif
- #ifndef O_DIRECTORY
- #define O_DIRECTORY 00200000
- #endif
- #ifndef O_CLOEXEC
- #define O_CLOEXEC 002000000
- #endif
- #define kMaxSyscalls 100
- #ifdef DEBUG_SIGSYS
- #include <signal.h>
- #include <string.h>
- void
- handle_sigsys(int sig)
- {
- (void) sig;
- }
- #endif
- static const int kOpenFlags =
- O_CREAT|O_EXCL|O_APPEND|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_LARGEFILE;
- static size_t s_syscall_index;
- static size_t s_1_arg_validations;
- static size_t s_2_arg_validations;
- static size_t s_3_arg_validations;
- static int s_syscalls[kMaxSyscalls];
- static int s_errnos[kMaxSyscalls];
- static int s_args_1[kMaxSyscalls];
- static int s_vals_1[kMaxSyscalls];
- static int s_args_2[kMaxSyscalls];
- static int s_vals_2[kMaxSyscalls];
- static int s_args_3[kMaxSyscalls];
- static int s_vals_3[kMaxSyscalls];
- static void
- allow_nr(int nr)
- {
- if (s_syscall_index >= kMaxSyscalls)
- {
- bug("out of syscall space");
- }
- if (nr < 0)
- {
- bug("negative syscall");
- }
- s_errnos[s_syscall_index] = 0;
- s_syscalls[s_syscall_index++] = nr;
- }
- static void
- reject_nr(int nr, int errcode)
- {
- if (s_syscall_index >= kMaxSyscalls)
- {
- bug("out of syscall space");
- }
- if (nr < 0)
- {
- bug("negative syscall");
- }
- if (errcode < 0 || errcode > 255)
- {
- bug("bad errcode");
- }
- s_errnos[s_syscall_index] = errcode;
- s_syscalls[s_syscall_index++] = nr;
- }
- static void
- allow_nr_1_arg_match(int nr, int arg, int val)
- {
- if (s_syscall_index >= kMaxSyscalls)
- {
- bug("out of syscall space");
- }
- if (nr < 0)
- {
- bug("negative syscall");
- }
- if (arg < 1 || arg > 6)
- {
- bug("arg out of range");
- }
- s_args_1[s_syscall_index] = arg;
- s_vals_1[s_syscall_index] = val;
- s_errnos[s_syscall_index] = 0;
- s_syscalls[s_syscall_index++] = nr;
- s_1_arg_validations++;
- }
- static void
- allow_nr_1_arg_mask(int nr, int arg, int val)
- {
- if (s_syscall_index >= kMaxSyscalls)
- {
- bug("out of syscall space");
- }
- if (nr < 0)
- {
- bug("negative syscall");
- }
- if (arg < 1 || arg > 6)
- {
- bug("arg out of range");
- }
- s_args_1[s_syscall_index] = 100 + arg;
- s_vals_1[s_syscall_index] = val;
- s_errnos[s_syscall_index] = 0;
- s_syscalls[s_syscall_index++] = nr;
- s_1_arg_validations++;
- }
- static void
- allow_nr_2_arg_match(int nr, int arg1, int val1, int arg2, int val2)
- {
- if (s_syscall_index >= kMaxSyscalls)
- {
- bug("out of syscall space");
- }
- if (nr < 0)
- {
- bug("negative syscall");
- }
- if (arg1 < 1 || arg1 > 6)
- {
- bug("arg1 out of range");
- }
- if (arg2 < 1 || arg2 > 6)
- {
- bug("arg2 out of range");
- }
- s_args_1[s_syscall_index] = arg1;
- s_vals_1[s_syscall_index] = val1;
- s_args_2[s_syscall_index] = arg2;
- s_vals_2[s_syscall_index] = val2;
- s_errnos[s_syscall_index] = 0;
- s_syscalls[s_syscall_index++] = nr;
- s_2_arg_validations++;
- }
- static void
- allow_nr_2_arg_mask_match(int nr, int arg1, int val1, int arg2, int val2)
- {
- if (s_syscall_index >= kMaxSyscalls)
- {
- bug("out of syscall space");
- }
- if (nr < 0)
- {
- bug("negative syscall");
- }
- if (arg1 < 1 || arg1 > 6)
- {
- bug("arg1 out of range");
- }
- if (arg2 < 1 || arg2 > 6)
- {
- bug("arg2 out of range");
- }
- s_args_1[s_syscall_index] = 100 + arg1;
- s_vals_1[s_syscall_index] = val1;
- s_args_2[s_syscall_index] = arg2;
- s_vals_2[s_syscall_index] = val2;
- s_errnos[s_syscall_index] = 0;
- s_syscalls[s_syscall_index++] = nr;
- s_2_arg_validations++;
- }
- static void
- allow_nr_3_arg_match(int nr, int arg1, int val1, int arg2, int val2, int arg3,
- int val3)
- {
- if (s_syscall_index >= kMaxSyscalls)
- {
- bug("out of syscall space");
- }
- if (nr < 0)
- {
- bug("negative syscall");
- }
- if (arg1 < 1 || arg1 > 6)
- {
- bug("arg1 out of range");
- }
- if (arg2 < 1 || arg2 > 6)
- {
- bug("arg2 out of range");
- }
- if (arg3 < 1 || arg3 > 6)
- {
- bug("arg3 out of range");
- }
- s_args_1[s_syscall_index] = arg1;
- s_vals_1[s_syscall_index] = val1;
- s_args_2[s_syscall_index] = arg2;
- s_vals_2[s_syscall_index] = val2;
- s_args_3[s_syscall_index] = arg3;
- s_vals_3[s_syscall_index] = val3;
- s_errnos[s_syscall_index] = 0;
- s_syscalls[s_syscall_index++] = nr;
- s_3_arg_validations++;
- }
- static void
- seccomp_sandbox_setup_data_connections()
- {
- allow_nr_3_arg_match(__NR_socket, 1, PF_INET, 2, SOCK_STREAM, 3, IPPROTO_TCP);
- allow_nr_3_arg_match(__NR_socket,
- 1, PF_INET6,
- 2, SOCK_STREAM,
- 3, IPPROTO_TCP);
- allow_nr(__NR_bind);
- allow_nr(__NR_select);
- if (tunable_port_enable)
- {
- allow_nr(__NR_connect);
- allow_nr_2_arg_match(__NR_getsockopt, 2, SOL_SOCKET, 3, SO_ERROR);
- allow_nr_2_arg_match(__NR_setsockopt, 2, SOL_SOCKET, 3, SO_REUSEADDR);
- allow_nr_1_arg_match(__NR_fcntl, 2, F_GETFL);
- allow_nr_2_arg_match(__NR_fcntl, 2, F_SETFL, 3, O_RDWR|O_NONBLOCK);
- allow_nr_2_arg_match(__NR_fcntl, 2, F_SETFL, 3, O_RDWR);
- }
- if (tunable_pasv_enable)
- {
- allow_nr(__NR_listen);
- allow_nr(__NR_accept);
- }
- }
- static void
- seccomp_sandbox_setup_base()
- {
- /* Simple reads and writes on existing descriptors. */
- allow_nr(__NR_read);
- allow_nr(__NR_write);
- /* Needed for memory management. */
- allow_nr_2_arg_match(__NR_mmap,
- 3, PROT_READ|PROT_WRITE,
- 4, MAP_PRIVATE|MAP_ANON);
- allow_nr_1_arg_mask(__NR_mprotect, 3, PROT_READ);
- allow_nr(__NR_munmap);
- allow_nr(__NR_brk);
- /* glibc falls back gracefully if mremap() fails during realloc(). */
- reject_nr(__NR_mremap, ENOSYS);
- /* Misc simple low-risk calls. */
- allow_nr(__NR_gettimeofday); /* Used by logging. */
- allow_nr(__NR_rt_sigreturn); /* Used to handle SIGPIPE. */
- allow_nr(__NR_restart_syscall);
- allow_nr(__NR_close);
- /* Always need to be able to exit ! */
- allow_nr(__NR_exit_group);
- }
- void
- seccomp_sandbox_init()
- {
- if (s_syscall_index != 0)
- {
- bug("bad state in seccomp_sandbox_init");
- }
- }
- void
- seccomp_sandbox_setup_prelogin(const struct vsf_session* p_sess)
- {
- (void) p_sess;
- seccomp_sandbox_setup_base();
- /* Peeking FTP commands from the network. */
- allow_nr_1_arg_match(__NR_recvfrom, 4, MSG_PEEK);
- /* Misc simple low-risk calls */
- allow_nr(__NR_nanosleep); /* Used for bandwidth / login throttling. */
- allow_nr(__NR_getpid); /* Used by logging. */
- allow_nr(__NR_shutdown); /* Used for QUIT or a timeout. */
- allow_nr_1_arg_match(__NR_fcntl, 2, F_GETFL);
- /* It's safe to allow O_RDWR in fcntl because these flags cannot be changed.
- * Also, sockets are O_RDWR.
- */
- allow_nr_2_arg_mask_match(__NR_fcntl, 3, kOpenFlags|O_ACCMODE, 2, F_SETFL);
- /* Config-dependent items follow. */
- if (tunable_idle_session_timeout > 0)
- {
- allow_nr(__NR_rt_sigaction);
- allow_nr(__NR_alarm);
- }
- if (tunable_xferlog_enable || tunable_dual_log_enable)
- {
- /* For file locking. */
- allow_nr_1_arg_match(__NR_fcntl, 2, F_SETLKW);
- allow_nr_1_arg_match(__NR_fcntl, 2, F_SETLK);
- }
- if (tunable_ssl_enable)
- {
- allow_nr_1_arg_match(__NR_recvmsg, 3, 0);
- allow_nr_2_arg_match(__NR_setsockopt, 2, IPPROTO_TCP, 3, TCP_NODELAY);
- }
- if (tunable_syslog_enable)
- {
- reject_nr(__NR_socket, EACCES);
- }
- }
- void
- seccomp_sandbox_setup_postlogin(const struct vsf_session* p_sess)
- {
- int is_anon = p_sess->is_anonymous;
- int open_flag = kOpenFlags;
- if (tunable_write_enable)
- {
- open_flag |= O_ACCMODE;
- }
- /* Put lstat() first because it is a very hot syscall for large directory
- * listings. And the current BPF only allows a linear scan of allowed
- * syscalls.
- */
- allow_nr(__NR_lstat);
- /* Allow all the simple pre-login things and then expand upon them. */
- seccomp_sandbox_setup_prelogin(p_sess);
- /* Simple file descriptor-based operations. */
- if (tunable_xferlog_enable || tunable_dual_log_enable ||
- tunable_lock_upload_files)
- {
- allow_nr_1_arg_match(__NR_fcntl, 2, F_SETLKW);
- allow_nr_1_arg_match(__NR_fcntl, 2, F_SETLK);
- }
- if (tunable_async_abor_enable)
- {
- allow_nr_2_arg_match(__NR_fcntl, 2, F_SETOWN, 3, vsf_sysutil_getpid());
- }
- allow_nr_2_arg_match(__NR_setsockopt, 2, SOL_SOCKET, 3, SO_KEEPALIVE);
- allow_nr_2_arg_match(__NR_setsockopt, 2, SOL_SOCKET, 3, SO_LINGER);
- allow_nr_2_arg_match(__NR_setsockopt, 2, IPPROTO_IP, 3, IP_TOS);
- allow_nr(__NR_fstat);
- allow_nr(__NR_lseek);
- /* Since we use chroot() to restrict filesystem access, we can just blanket
- * allow open().
- */
- allow_nr_1_arg_mask(__NR_open, 2, open_flag);
- allow_nr_1_arg_mask(__NR_openat, 3, open_flag);
- /* Other pathname-based metadata queries. */
- allow_nr(__NR_stat);
- allow_nr(__NR_readlink);
- /* Directory handling: query, change, read. */
- allow_nr(__NR_getcwd);
- allow_nr(__NR_chdir);
- allow_nr(__NR_getdents);
- /* Misc */
- allow_nr(__NR_umask);
- /* Config-dependent items follow. */
- if (tunable_use_sendfile)
- {
- allow_nr(__NR_sendfile);
- }
- if (tunable_idle_session_timeout > 0 ||
- tunable_data_connection_timeout > 0 ||
- tunable_async_abor_enable)
- {
- allow_nr(__NR_rt_sigaction);
- }
- if (tunable_idle_session_timeout > 0 || tunable_data_connection_timeout > 0)
- {
- allow_nr(__NR_alarm);
- }
- if (tunable_one_process_model)
- {
- seccomp_sandbox_setup_data_connections();
- if (is_anon && tunable_chown_uploads)
- {
- allow_nr(__NR_fchmod);
- allow_nr(__NR_fchown);
- }
- }
- else
- {
- /* Need to receieve file descriptors from privileged broker. */
- allow_nr_1_arg_match(__NR_recvmsg, 3, 0);
- if ((is_anon && tunable_chown_uploads) || tunable_ssl_enable)
- {
- /* Need to send file descriptors to privileged broker. */
- allow_nr_1_arg_match(__NR_sendmsg, 3, 0);
- }
- }
- if (tunable_syslog_enable)
- {
- /* The ability to pass an address spec isn't needed so disable it. We ensure
- * the 6th arg (socklen) is 0. We could have checked the 5th arg (sockptr)
- * but I don't know if 64-bit compares work in the kernel filter, so we're
- * happy to check the socklen arg, which is 32 bits.
- */
- allow_nr_1_arg_match(__NR_sendto, 6, 0);
- }
- if (tunable_text_userdb_names)
- {
- reject_nr(__NR_socket, EACCES);
- allow_nr_2_arg_match(__NR_mmap, 3, PROT_READ, 4, MAP_SHARED);
- }
- if (tunable_write_enable)
- {
- if (!is_anon || tunable_anon_mkdir_write_enable)
- {
- allow_nr(__NR_mkdir);
- }
- if (!is_anon ||
- tunable_anon_other_write_enable ||
- tunable_delete_failed_uploads)
- {
- allow_nr(__NR_unlink);
- }
- if (!is_anon || tunable_anon_other_write_enable)
- {
- allow_nr(__NR_rmdir);
- allow_nr(__NR_rename);
- allow_nr(__NR_ftruncate);
- if (tunable_mdtm_write)
- {
- allow_nr(__NR_utime);
- allow_nr(__NR_utimes);
- }
- }
- if (!is_anon && tunable_chmod_enable)
- {
- allow_nr(__NR_chmod);
- }
- }
- }
- void
- seccomp_sandbox_setup_postlogin_broker()
- {
- seccomp_sandbox_setup_base();
- seccomp_sandbox_setup_data_connections();
- allow_nr_1_arg_match(__NR_sendmsg, 3, 0);
- }
- void
- seccomp_sandbox_lockdown()
- {
- size_t len = (s_syscall_index * 2) +
- (s_1_arg_validations * 3) +
- (s_2_arg_validations * 5) +
- (s_3_arg_validations * 7) +
- 5;
- struct sock_filter filters[len];
- struct sock_filter* p_filter = filters;
- struct sock_fprog prog;
- size_t i;
- int ret;
- prog.len = len;
- prog.filter = filters;
- /* Validate the syscall architecture. */
- p_filter->code = BPF_LD+BPF_W+BPF_ABS;
- p_filter->jt = 0;
- p_filter->jf = 0;
- /* Offset 4 for syscall architecture. */
- p_filter->k = 4;
- p_filter++;
- p_filter->code = BPF_JMP+BPF_JEQ+BPF_K;
- p_filter->jt = 1;
- p_filter->jf = 0;
- /* AUDIT_ARCH_X86_64 */
- p_filter->k = 0xc000003e;
- p_filter++;
- p_filter->code = BPF_RET+BPF_K;
- p_filter->jt = 0;
- p_filter->jf = 0;
- /* SECCOMP_RET_KILL */
- p_filter->k = 0;
- p_filter++;
- /* Load the syscall number. */
- p_filter->code = BPF_LD+BPF_W+BPF_ABS;
- p_filter->jt = 0;
- p_filter->jf = 0;
- /* Offset 0 for syscall number. */
- p_filter->k = 0;
- p_filter++;
- for (i = 0; i < s_syscall_index; ++i)
- {
- int block_size = 1;
- if (s_args_3[i])
- {
- block_size = 8;
- }
- else if (s_args_2[i])
- {
- block_size = 6;
- }
- else if (s_args_1[i])
- {
- block_size = 4;
- }
- /* Check for syscall number match. */
- p_filter->code = BPF_JMP+BPF_JEQ+BPF_K;
- p_filter->jt = 0;
- p_filter->jf = block_size;
- p_filter->k = s_syscalls[i];
- p_filter++;
- /* Check argument matches if necessary. */
- if (s_args_3[i])
- {
- p_filter->code = BPF_LD+BPF_W+BPF_ABS;
- p_filter->jt = 0;
- p_filter->jf = 0;
- p_filter->k = 16 + ((s_args_3[i] - 1) * 8);
- p_filter++;
- p_filter->code = BPF_JMP+BPF_JEQ+BPF_K;
- p_filter->jt = 0;
- p_filter->jf = 5;
- p_filter->k = s_vals_3[i];
- p_filter++;
- }
- if (s_args_2[i])
- {
- p_filter->code = BPF_LD+BPF_W+BPF_ABS;
- p_filter->jt = 0;
- p_filter->jf = 0;
- p_filter->k = 16 + ((s_args_2[i] - 1) * 8);
- p_filter++;
- p_filter->code = BPF_JMP+BPF_JEQ+BPF_K;
- p_filter->jt = 0;
- p_filter->jf = 3;
- p_filter->k = s_vals_2[i];
- p_filter++;
- }
- if (s_args_1[i])
- {
- int arg = s_args_1[i];
- int code = BPF_JMP+BPF_JEQ+BPF_K;
- int val = s_vals_1[i];
- int jt = 0;
- int jf = 1;
- if (arg > 100)
- {
- arg -= 100;
- code = BPF_JMP+BPF_JSET+BPF_K;
- val = ~val;
- jt = 1;
- jf = 0;
- }
- p_filter->code = BPF_LD+BPF_W+BPF_ABS;
- p_filter->jt = 0;
- p_filter->jf = 0;
- p_filter->k = 16 + ((arg - 1) * 8);
- p_filter++;
- p_filter->code = code;
- p_filter->jt = jt;
- p_filter->jf = jf;
- p_filter->k = val;
- p_filter++;
- }
- p_filter->code = BPF_RET+BPF_K;
- p_filter->jt = 0;
- p_filter->jf = 0;
- if (!s_errnos[i])
- {
- /* SECCOMP_RET_ALLOW */
- p_filter->k = 0x7fff0000;
- }
- else
- {
- /* SECCOMP_RET_ERRNO */
- p_filter->k = 0x00050000 + s_errnos[i];
- }
- p_filter++;
- if (s_args_1[i])
- {
- /* We trashed the accumulator so put it back. */
- p_filter->code = BPF_LD+BPF_W+BPF_ABS;
- p_filter->jt = 0;
- p_filter->jf = 0;
- p_filter->k = 0;
- p_filter++;
- }
- }
- /* No "allow" matches so kill. */
- p_filter->code = BPF_RET+BPF_K;
- p_filter->jt = 0;
- p_filter->jf = 0;
- #ifdef DEBUG_SIGSYS
- /* SECCOMP_RET_TRAP */
- p_filter->k = 0x00030000;
- #else
- /* SECCOMP_RET_KILL */
- p_filter->k = 0;
- #endif
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- if (ret != 0)
- {
- if (errno == EINVAL)
- {
- /* Kernel isn't good enough. */
- return;
- }
- die("prctl PR_SET_NO_NEW_PRIVS");
- }
- if (!tunable_seccomp_sandbox)
- {
- return;
- }
- #ifdef DEBUG_SIGSYS
- {
- struct sigaction sa;
- memset(&sa, '\0', sizeof(sa));
- sa.sa_handler = handle_sigsys;
- sigaction(SIGSYS, &sa, NULL);
- }
- #endif
- ret = prctl(PR_SET_SECCOMP, 2, &prog, 0, 0);
- if (ret != 0)
- {
- if (errno == EINVAL)
- {
- /* Kernel isn't good enough. */
- return;
- }
- die("prctl PR_SET_SECCOMP failed");
- }
- }
- #else /* __linux__ && __x86_64__ */
- void
- seccomp_sandbox_init()
- {
- }
- void
- seccomp_sandbox_setup_prelogin(const struct vsf_session* p_sess)
- {
- (void) p_sess;
- }
- void
- seccomp_sandbox_setup_postlogin(const struct vsf_session* p_sess)
- {
- (void) p_sess;
- }
- void
- seccomp_sandbox_setup_postlogin_broker()
- {
- }
- void
- seccomp_sandbox_lockdown()
- {
- }
- #endif /* __linux__ && __x86_64__ */
|