Synopsis: System V Semaphore denial of service NetBSD versions: NetBSD 1.4, 1.4.1, and 1.4.2 Thanks to: Artur Grabowski, Jason Thorpe, Bill Sommerfeld Reported in NetBSD Security Advisory: SA2000-005 *** sys/sys/proc.h.orig 1999/03/25 04:45:56 1.74 --- sys/sys/proc.h 2000/04/30 20:12:04 1.74.2.2 *************** *** 230,235 **** --- 230,252 ---- #define P_NOCLDWAIT 0x20000 /* No zombies if child dies */ /* + * These flags are kept in schedflags. schedflags may be modified + * only at splstatclock(). + */ + extern int schedflags; + + #define PSCHED_SEENRR 0x0001 /* process has been in roundrobin() */ + #define PSCHED_SHOULDYIELD 0x0002 /* process should yield */ + + #define PSCHED_SWITCHCLEAR (PSCHED_SEENRR|PSCHED_SHOULDYIELD) + + /* + * Macro to compute the exit signal to be delivered. + */ + #define P_EXITSIG(p) (((p)->p_flag & (P_TRACED|P_FSTRACE)) ? SIGCHLD : \ + p->p_exitsig) + + /* * MOVE TO ucred.h? * * Shareable process credentials (always resident). This includes a reference *************** *** 330,335 **** --- 347,354 ---- void fixjobc __P((struct proc *p, struct pgrp *pgrp, int entering)); int inferior __P((struct proc *p)); int leavepgrp __P((struct proc *p)); + void yield __P((void)); + void preempt __P((struct proc *)); void mi_switch __P((void)); void pgdelete __P((struct pgrp *pgrp)); void procinit __P((void)); *** sys/kern/kern_subr.c.orig 2000/02/01 22:54:45 1.45.2.1 --- sys/kern/kern_subr.c 2000/04/30 20:12:42 1.45.2.3 *************** *** 125,135 **** u_int cnt; int error = 0; char *cp = buf; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) panic("uiomove: mode"); ! if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("uiomove proc"); #endif while (n > 0 && uio->uio_resid) { --- 125,136 ---- u_int cnt; int error = 0; char *cp = buf; + struct proc *p = uio->uio_procp; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) panic("uiomove: mode"); ! if (uio->uio_segflg == UIO_USERSPACE && p != curproc) panic("uiomove proc"); #endif while (n > 0 && uio->uio_resid) { *************** *** 145,150 **** --- 146,153 ---- switch (uio->uio_segflg) { case UIO_USERSPACE: + if (schedflags & PSCHED_SHOULDYIELD) + preempt(NULL); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else *** sys/kern/kern_synch.c.orig 1999/10/17 22:29:40 1.57.2.1 --- sys/kern/kern_synch.c 2000/04/30 20:13:11 1.57.2.3 *************** *** 1,6 **** --- 1,43 ---- /* $NetBSD: 20000527-yield,v 1.2 2000/05/28 23:52:32 sommerfeld Exp $ */ /*- + * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, + * NASA Ames Research Center. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + /*- * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. *************** *** 68,73 **** --- 105,111 ---- u_char curpriority; /* usrpri of curproc */ int lbolt; /* once a second sleep address */ + int schedflags; /* preemption needed? */ void roundrobin __P((void *)); void schedcpu __P((void *)); *************** *** 82,88 **** --- 120,140 ---- roundrobin(arg) void *arg; { + int s; + if (curproc != NULL) { + s = splstatclock(); + if (schedflags & PSCHED_SEENRR) { + /* + * The process has already been through a roundrobin + * without switching and may be hogging the CPU. + * Indicate that the process should yield. + */ + schedflags |= PSCHED_SHOULDYIELD; + } else + schedflags |= PSCHED_SEENRR; + splx(s); + } need_resched(); timeout(roundrobin, NULL, hz / 10); } *************** *** 569,574 **** --- 621,671 ---- } /* + * General yield call. Puts the current process back on its run queue and + * performs a voluntary context switch. + */ + void + yield() + { + struct proc *p = curproc; + int s; + + p->p_priority = p->p_usrpri; + s = splstatclock(); + setrunqueue(p); + p->p_stats->p_ru.ru_nvcsw++; + mi_switch(); + splx(s); + } + + /* + * General preemption call. Puts the current process back on its run queue + * and performs an involuntary context switch. If a process is supplied, + * we switch to that process. Otherwise, we use the normal process selection + * criteria. + */ + void + preempt(newp) + struct proc *newp; + { + struct proc *p = curproc; + int s; + + /* + * XXX Switching to a specific process is not supported yet. + */ + if (newp != NULL) + panic("preempt: cpu_preempt not yet implemented"); + + p->p_priority = p->p_usrpri; + s = splstatclock(); + setrunqueue(p); + p->p_stats->p_ru.ru_nivcsw++; + mi_switch(); + splx(s); + } + + /* * The machine independent parts of mi_switch(). * Must be called at splstatclock() or higher. */ *************** *** 625,630 **** --- 722,733 ---- p->p_nice = autoniceval + NZERO; resetpriority(p); } + + /* + * Process is about to yield the CPU; clear the appropriate + * scheduling flags. + */ + schedflags &= ~PSCHED_SWITCHCLEAR; /* * Pick a new current process and record its start time. *** sys/kern/kern_ktrace.c.orig 1998/09/11 12:50:10 1.33 --- sys/kern/kern_ktrace.c 2000/04/30 20:13:33 1.33.6.2 *************** *** 53,64 **** #include <sys/mount.h> #include <sys/syscallargs.h> ! struct ktr_header *ktrgetheader __P((int)); ! int ktrops __P((struct proc *, struct proc *, int, int, void *)); ! int ktrsetchildren __P((struct proc *, struct proc *, int, int, void *)); ! void ktrwrite __P((struct proc *, void *, struct ktr_header *)); ! int ktrcanset __P((struct proc *, struct proc *)); void ktrderef(p) struct proc *p; --- 53,67 ---- #include <sys/mount.h> #include <sys/syscallargs.h> ! #include <vm/vm.h> /* XXX for uvmexp, needed by PAGE_SIZE */ ! #include <uvm/uvm.h> /* XXX for uvmexp, needed by PAGE_SIZE */ + void ktrinitheader __P((struct ktr_header *, struct proc *, int)); + int ktrops __P((struct proc *, struct proc *, int, int, void *)); + int ktrsetchildren __P((struct proc *, struct proc *, int, int, void *)); + int ktrwrite __P((struct proc *, void *, struct ktr_header *)); + int ktrcanset __P((struct proc *, struct proc *)); + void ktrderef(p) struct proc *p; *************** *** 94,113 **** } } ! struct ktr_header * ! ktrgetheader(type) int type; { - struct ktr_header *kth; - struct proc *p = curproc; /* XXX */ ! MALLOC(kth, struct ktr_header *, sizeof(struct ktr_header), ! M_TEMP, M_WAITOK); kth->ktr_type = type; microtime(&kth->ktr_time); kth->ktr_pid = p->p_pid; memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN); - return (kth); } void --- 97,114 ---- } } ! void ! ktrinitheader(kth, p, type) ! struct ktr_header *kth; ! struct proc *p; int type; { ! memset(kth, 0, sizeof(*kth)); kth->ktr_type = type; microtime(&kth->ktr_time); kth->ktr_pid = p->p_pid; memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN); } void *************** *** 117,142 **** size_t argsize; register_t args[]; { ! struct ktr_header *kth; ! struct ktr_syscall *ktp; struct proc *p = curproc; /* XXX */ register_t *argp; ! int len = sizeof(struct ktr_syscall) + argsize; int i; p->p_traceflag |= KTRFAC_ACTIVE; ! kth = ktrgetheader(KTR_SYSCALL); ! MALLOC(ktp, struct ktr_syscall *, len, M_TEMP, M_WAITOK); ktp->ktr_code = code; ktp->ktr_argsize = argsize; argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); for (i = 0; i < (argsize / sizeof(*argp)); i++) *argp++ = args[i]; ! kth->ktr_buf = (caddr_t)ktp; ! kth->ktr_len = len; ! ktrwrite(p, v, kth); ! FREE(ktp, M_TEMP); ! FREE(kth, M_TEMP); p->p_traceflag &= ~KTRFAC_ACTIVE; } --- 118,142 ---- size_t argsize; register_t args[]; { ! struct ktr_header kth; ! struct ktr_syscall *ktp; struct proc *p = curproc; /* XXX */ register_t *argp; ! size_t len = sizeof(struct ktr_syscall) + argsize; int i; p->p_traceflag |= KTRFAC_ACTIVE; ! ktrinitheader(&kth, p, KTR_SYSCALL); ! ktp = malloc(len, M_TEMP, M_WAITOK); ktp->ktr_code = code; ktp->ktr_argsize = argsize; argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); for (i = 0; i < (argsize / sizeof(*argp)); i++) *argp++ = args[i]; ! kth.ktr_buf = (caddr_t)ktp; ! kth.ktr_len = len; ! (void) ktrwrite(p, v, &kth); ! free(ktp, M_TEMP); p->p_traceflag &= ~KTRFAC_ACTIVE; } *************** *** 147,167 **** int error; register_t retval; { ! struct ktr_header *kth; struct ktr_sysret ktp; struct proc *p = curproc; /* XXX */ p->p_traceflag |= KTRFAC_ACTIVE; ! kth = ktrgetheader(KTR_SYSRET); ktp.ktr_code = code; ktp.ktr_error = error; ktp.ktr_retval = retval; /* what about val2 ? */ ! kth->ktr_buf = (caddr_t)&ktp; ! kth->ktr_len = sizeof(struct ktr_sysret); ! ktrwrite(p, v, kth); ! FREE(kth, M_TEMP); p->p_traceflag &= ~KTRFAC_ACTIVE; } --- 147,166 ---- int error; register_t retval; { ! struct ktr_header kth; struct ktr_sysret ktp; struct proc *p = curproc; /* XXX */ p->p_traceflag |= KTRFAC_ACTIVE; ! ktrinitheader(&kth, p, KTR_SYSRET); ktp.ktr_code = code; ktp.ktr_error = error; ktp.ktr_retval = retval; /* what about val2 ? */ ! kth.ktr_buf = (caddr_t)&ktp; ! kth.ktr_len = sizeof(struct ktr_sysret); ! (void) ktrwrite(p, v, &kth); p->p_traceflag &= ~KTRFAC_ACTIVE; } *************** *** 170,185 **** void *v; char *path; { ! struct ktr_header *kth; struct proc *p = curproc; /* XXX */ p->p_traceflag |= KTRFAC_ACTIVE; ! kth = ktrgetheader(KTR_NAMEI); ! kth->ktr_len = strlen(path); ! kth->ktr_buf = path; ! ktrwrite(p, v, kth); ! FREE(kth, M_TEMP); p->p_traceflag &= ~KTRFAC_ACTIVE; } --- 169,183 ---- void *v; char *path; { ! struct ktr_header kth; struct proc *p = curproc; /* XXX */ p->p_traceflag |= KTRFAC_ACTIVE; ! ktrinitheader(&kth, p, KTR_NAMEI); ! kth.ktr_len = strlen(path); ! kth.ktr_buf = path; ! (void) ktrwrite(p, v, &kth); p->p_traceflag &= ~KTRFAC_ACTIVE; } *************** *** 189,203 **** struct proc *p; char *emul; { ! struct ktr_header *kth; p->p_traceflag |= KTRFAC_ACTIVE; ! kth = ktrgetheader(KTR_EMUL); ! kth->ktr_len = strlen(emul); ! kth->ktr_buf = emul; ! ktrwrite(p, v, kth); ! FREE(kth, M_TEMP); p->p_traceflag &= ~KTRFAC_ACTIVE; } --- 187,200 ---- struct proc *p; char *emul; { ! struct ktr_header kth; p->p_traceflag |= KTRFAC_ACTIVE; ! ktrinitheader(&kth, p, KTR_EMUL); ! kth.ktr_len = strlen(emul); ! kth.ktr_buf = emul; ! (void) ktrwrite(p, v, &kth); p->p_traceflag &= ~KTRFAC_ACTIVE; } *************** *** 209,245 **** struct iovec *iov; int len, error; { ! struct ktr_header *kth; struct ktr_genio *ktp; caddr_t cp; int resid = len, cnt; struct proc *p = curproc; /* XXX */ ! if (error) return; p->p_traceflag |= KTRFAC_ACTIVE; ! kth = ktrgetheader(KTR_GENIO); ! MALLOC(ktp, struct ktr_genio *, sizeof(struct ktr_genio) + len, ! M_TEMP, M_WAITOK); ktp->ktr_fd = fd; ktp->ktr_rw = rw; cp = (caddr_t)((char *)ktp + sizeof(struct ktr_genio)); while (resid > 0) { ! if ((cnt = iov->iov_len) > resid) cnt = resid; ! if (copyin(iov->iov_base, cp, (unsigned)cnt)) ! goto done; ! cp += cnt; resid -= cnt; - iov++; } - kth->ktr_buf = (caddr_t)ktp; - kth->ktr_len = sizeof(struct ktr_genio) + len; ! ktrwrite(p, v, kth); ! done: ! FREE(kth, M_TEMP); ! FREE(ktp, M_TEMP); p->p_traceflag &= ~KTRFAC_ACTIVE; } --- 206,260 ---- struct iovec *iov; int len, error; { ! struct ktr_header kth; struct ktr_genio *ktp; caddr_t cp; int resid = len, cnt; struct proc *p = curproc; /* XXX */ ! int buflen; ! if (error) return; + p->p_traceflag |= KTRFAC_ACTIVE; ! ! buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio)); ! ! ktrinitheader(&kth, p, KTR_GENIO); ! ktp = malloc(buflen, M_TEMP, M_WAITOK); ktp->ktr_fd = fd; ktp->ktr_rw = rw; + + kth.ktr_buf = (caddr_t)ktp; + cp = (caddr_t)((char *)ktp + sizeof(struct ktr_genio)); + buflen -= sizeof(struct ktr_genio); + while (resid > 0) { ! if (schedflags & PSCHED_SHOULDYIELD) ! preempt(NULL); ! ! cnt = min(iov->iov_len, buflen); ! if (cnt > resid) cnt = resid; ! if (copyin(iov->iov_base, cp, cnt)) ! break; ! ! kth.ktr_len = cnt + sizeof(struct ktr_genio); ! ! if (ktrwrite(p, v, &kth) != 0) ! break; ! ! iov->iov_base = (caddr_t)iov->iov_base + cnt; ! iov->iov_len -= cnt; ! ! if (iov->iov_len == 0) ! iov++; ! resid -= cnt; } ! free(ktp, M_TEMP); p->p_traceflag &= ~KTRFAC_ACTIVE; } *************** *** 251,271 **** sigset_t *mask; int code; { ! struct ktr_header *kth; struct ktr_psig kp; struct proc *p = curproc; /* XXX */ p->p_traceflag |= KTRFAC_ACTIVE; ! kth = ktrgetheader(KTR_PSIG); kp.signo = (char)sig; kp.action = action; kp.mask = *mask; kp.code = code; ! kth->ktr_buf = (caddr_t)&kp; ! kth->ktr_len = sizeof(struct ktr_psig); ! ktrwrite(p, v, kth); ! FREE(kth, M_TEMP); p->p_traceflag &= ~KTRFAC_ACTIVE; } --- 266,285 ---- sigset_t *mask; int code; { ! struct ktr_header kth; struct ktr_psig kp; struct proc *p = curproc; /* XXX */ p->p_traceflag |= KTRFAC_ACTIVE; ! ktrinitheader(&kth, p, KTR_PSIG); kp.signo = (char)sig; kp.action = action; kp.mask = *mask; kp.code = code; ! kth.ktr_buf = (caddr_t)&kp; ! kth.ktr_len = sizeof(struct ktr_psig); ! (void) ktrwrite(p, v, &kth); p->p_traceflag &= ~KTRFAC_ACTIVE; } *************** *** 274,292 **** void *v; int out, user; { ! struct ktr_header *kth; ! struct ktr_csw kc; struct proc *p = curproc; /* XXX */ p->p_traceflag |= KTRFAC_ACTIVE; ! kth = ktrgetheader(KTR_CSW); kc.out = out; kc.user = user; ! kth->ktr_buf = (caddr_t)&kc; ! kth->ktr_len = sizeof(struct ktr_csw); ! ktrwrite(p, v, kth); ! FREE(kth, M_TEMP); p->p_traceflag &= ~KTRFAC_ACTIVE; } --- 288,305 ---- void *v; int out, user; { ! struct ktr_header kth; ! struct ktr_csw kc; struct proc *p = curproc; /* XXX */ p->p_traceflag |= KTRFAC_ACTIVE; ! ktrinitheader(&kth, p, KTR_CSW); kc.out = out; kc.user = user; ! kth.ktr_buf = (caddr_t)&kc; ! kth.ktr_len = sizeof(struct ktr_csw); ! (void) ktrwrite(p, v, &kth); p->p_traceflag &= ~KTRFAC_ACTIVE; } *************** *** 361,371 **** error = ESRCH; goto done; } ! for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) if (descend) ret |= ktrsetchildren(curp, p, ops, facs, fp); else ret |= ktrops(curp, p, ops, facs, fp); } else { /* --- 374,386 ---- error = ESRCH; goto done; } ! for (p = LIST_FIRST(&pg->pg_members); p != NULL; ! p = LIST_NEXT(p, p_pglist)) { if (descend) ret |= ktrsetchildren(curp, p, ops, facs, fp); else ret |= ktrops(curp, p, ops, facs, fp); + } } else { /* *************** *** 463,473 **** error = ESRCH; goto done; } ! for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) if (descend) ret |= ktrsetchildren(curp, p, ops, facs, vp); else ret |= ktrops(curp, p, ops, facs, vp); } else { /* --- 478,490 ---- error = ESRCH; goto done; } ! for (p = LIST_FIRST(&pg->pg_members); p != NULL; ! p = LIST_NEXT(p, p_pglist)) { if (descend) ret |= ktrsetchildren(curp, p, ops, facs, vp); else ret |= ktrops(curp, p, ops, facs, vp); + } } else { /* *************** *** 550,562 **** * otherwise do any siblings, and if done with this level, * follow back up the tree (but not past top). */ ! if (p->p_children.lh_first) ! p = p->p_children.lh_first; else for (;;) { if (p == top) return (ret); ! if (p->p_sibling.le_next) { ! p = p->p_sibling.le_next; break; } p = p->p_pptr; --- 567,579 ---- * otherwise do any siblings, and if done with this level, * follow back up the tree (but not past top). */ ! if (LIST_FIRST(&p->p_children) != NULL) ! p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) return (ret); ! if (LIST_NEXT(p, p_sibling) != NULL) { ! p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; *************** *** 565,571 **** /*NOTREACHED*/ } ! void ktrwrite(p, v, kth) struct proc *p; void *v; --- 582,588 ---- /*NOTREACHED*/ } ! int ktrwrite(p, v, kth) struct proc *p; void *v; *************** *** 576,582 **** int error; if (v == NULL) ! return; auio.uio_iov = &aiov[0]; auio.uio_offset = 0; auio.uio_segflg = UIO_SYSSPACE; --- 593,599 ---- int error; if (v == NULL) ! return (0); auio.uio_iov = &aiov[0]; auio.uio_offset = 0; auio.uio_segflg = UIO_SYSSPACE; *************** *** 605,621 **** error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred); VOP_UNLOCK(vp, 0); } ! if (!error) ! return; /* ! * If error encountered, give up tracing on this vnode. */ ! log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", ! error); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { if (p->p_tracep == v) ktrderef(p); } } /* --- 622,643 ---- error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred); VOP_UNLOCK(vp, 0); } ! if (error == 0) ! return (0); /* ! * If error encountered, give up tracing on this vnode. Don't report ! * EPIPE as this can easily happen with fktrace()/ktruss. */ ! if (error != EPIPE) ! log(LOG_NOTICE, ! "ktrace write failed, errno %d, tracing stopped\n", ! error); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { if (p->p_tracep == v) ktrderef(p); } + + return (error); } /*