/*	$NetBSD: sockin.c,v 1.67 2022/09/03 02:53:18 thorpej Exp $	*/

/*
 * Copyright (c) 2008, 2009 Antti Kantee.  All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.67 2022/09/03 02:53:18 thorpej Exp $");

#include <sys/param.h>
#include <sys/condvar.h>
#include <sys/domain.h>
#include <sys/kmem.h>
#include <sys/kthread.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/once.h>
#include <sys/poll.h>
#include <sys/protosw.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/time.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/radix.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>

#include <rump-sys/kern.h>

#include <rump/rumpuser.h>

#include "sockin_user.h"

/*
 * An inet communication domain which uses the socket interface.
 * Supports IPv4 & IPv6 UDP/TCP.
 */

DOMAIN_DEFINE(sockindomain);
DOMAIN_DEFINE(sockin6domain);

static int	sockin_do_init(void);
static void	sockin_init(void);
static int	sockin_attach(struct socket *, int);
static void	sockin_detach(struct socket *);
static int	sockin_accept(struct socket *, struct sockaddr *);
static int	sockin_connect2(struct socket *, struct socket *);
static int	sockin_bind(struct socket *, struct sockaddr *, struct lwp *);
static int	sockin_listen(struct socket *, struct lwp *);
static int	sockin_connect(struct socket *, struct sockaddr *, struct lwp *);
static int	sockin_disconnect(struct socket *);
static int	sockin_shutdown(struct socket *);
static int	sockin_abort(struct socket *);
static int	sockin_ioctl(struct socket *, u_long, void *, struct ifnet *);
static int	sockin_stat(struct socket *, struct stat *);
static int	sockin_peeraddr(struct socket *, struct sockaddr *);
static int	sockin_sockaddr(struct socket *, struct sockaddr *);
static int	sockin_rcvd(struct socket *, int, struct lwp *);
static int	sockin_recvoob(struct socket *, struct mbuf *, int);
static int	sockin_send(struct socket *, struct mbuf *, struct sockaddr *,
			    struct mbuf *, struct lwp *);
static int	sockin_sendoob(struct socket *, struct mbuf *, struct mbuf *);
static int	sockin_purgeif(struct socket *, struct ifnet *);
static int	sockin_ctloutput(int op, struct socket *, struct sockopt *);

static const struct pr_usrreqs sockin_usrreqs = {
	.pr_attach = sockin_attach,
	.pr_detach = sockin_detach,
	.pr_accept = sockin_accept,
	.pr_bind = sockin_bind,
	.pr_listen = sockin_listen,
	.pr_connect = sockin_connect,
	.pr_connect2 = sockin_connect2,
	.pr_disconnect = sockin_disconnect,
	.pr_shutdown = sockin_shutdown,
	.pr_abort = sockin_abort,
	.pr_ioctl = sockin_ioctl,
	.pr_stat = sockin_stat,
	.pr_peeraddr = sockin_peeraddr,
	.pr_sockaddr = sockin_sockaddr,
	.pr_rcvd = sockin_rcvd,
	.pr_recvoob = sockin_recvoob,
	.pr_send = sockin_send,
	.pr_sendoob = sockin_sendoob,
	.pr_purgeif = sockin_purgeif,
};

const struct protosw sockinsw[] = {
{
	.pr_type = SOCK_DGRAM,
	.pr_domain = &sockindomain,
	.pr_protocol = IPPROTO_UDP,
	.pr_flags = PR_ATOMIC|PR_ADDR,
	.pr_usrreqs = &sockin_usrreqs,
	.pr_ctloutput = sockin_ctloutput,
},
{
	.pr_type = SOCK_STREAM,
	.pr_domain = &sockindomain,
	.pr_protocol = IPPROTO_TCP,
	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
	.pr_usrreqs = &sockin_usrreqs,
	.pr_ctloutput = sockin_ctloutput,
}};
const struct protosw sockin6sw[] = {
{
	.pr_type = SOCK_DGRAM,
	.pr_domain = &sockin6domain,
	.pr_protocol = IPPROTO_UDP,
	.pr_flags = PR_ATOMIC|PR_ADDR,
	.pr_usrreqs = &sockin_usrreqs,
	.pr_ctloutput = sockin_ctloutput,
},
{
	.pr_type = SOCK_STREAM,
	.pr_domain = &sockin6domain,
	.pr_protocol = IPPROTO_TCP,
	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
	.pr_usrreqs = &sockin_usrreqs,
	.pr_ctloutput = sockin_ctloutput,
}};

struct domain sockindomain = {
	.dom_family = PF_INET,
	.dom_name = "socket_inet",
	.dom_init = sockin_init,
	.dom_externalize = NULL,
	.dom_dispose = NULL,
	.dom_protosw = sockinsw,
	.dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
	.dom_rtattach = rt_inithead,
	.dom_rtoffset = 32,
	.dom_maxrtkey = sizeof(struct sockaddr_in),
	.dom_ifattach = NULL,
	.dom_ifdetach = NULL,
	.dom_link = { NULL },
	.dom_mowner = MOWNER_INIT("",""),
	.dom_sockaddr_cmp = NULL
};
struct domain sockin6domain = {
	.dom_family = PF_INET6,
	.dom_name = "socket_inet6",
	.dom_init = sockin_init,
	.dom_externalize = NULL,
	.dom_dispose = NULL,
	.dom_protosw = sockin6sw,
	.dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
	.dom_rtattach = rt_inithead,
	.dom_rtoffset = 32,
	.dom_maxrtkey = sizeof(struct sockaddr_in6),
	.dom_ifattach = NULL,
	.dom_ifdetach = NULL,
	.dom_link = { NULL },
	.dom_mowner = MOWNER_INIT("",""),
	.dom_sockaddr_cmp = NULL
};

#define SO2S(so) ((intptr_t)(so->so_internal))
#define SOCKIN_SBSIZE 65536

struct sockin_unit {
	struct socket *su_so;

	LIST_ENTRY(sockin_unit) su_entries;
};
static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
static kmutex_t su_mtx;
static bool rebuild;
static int nsock;

/* XXX: for the bpf hack */
static struct ifnet sockin_if;
int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }

static int
registersock(struct socket *so, int news)
{
	struct sockin_unit *su;

	su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
	if (!su)
		return ENOMEM;

	so->so_internal = (void *)(intptr_t)news;
	su->su_so = so;

	mutex_enter(&su_mtx);
	LIST_INSERT_HEAD(&su_ent, su, su_entries);
	nsock++;
	rebuild = true;
	mutex_exit(&su_mtx);

	return 0;
}

static void
removesock(struct socket *so)
{
	struct sockin_unit *su_iter;

	mutex_enter(&su_mtx);
	LIST_FOREACH(su_iter, &su_ent, su_entries) {
		if (su_iter->su_so == so)
			break;
	}
	if (!su_iter)
		panic("no such socket");

	LIST_REMOVE(su_iter, su_entries);
	nsock--;
	rebuild = true;
	mutex_exit(&su_mtx);

	rumpuser_close(SO2S(su_iter->su_so));
	kmem_free(su_iter, sizeof(*su_iter));
}

static void
sockin_process(struct socket *so)
{
	struct sockaddr_in6 from;
	struct iovec io;
	struct msghdr rmsg;
	struct mbuf *m;
	size_t n, plen;
	int error;

	m = m_gethdr(M_WAIT, MT_DATA);
	if (so->so_proto->pr_type == SOCK_DGRAM) {
		plen = IP_MAXPACKET;
		MEXTMALLOC(m, plen, M_DONTWAIT);
	} else {
		plen = MCLBYTES;
		MCLGET(m, M_DONTWAIT);
	}
	if ((m->m_flags & M_EXT) == 0) {
		m_freem(m);
		return;
	}

	memset(&rmsg, 0, sizeof(rmsg));
	io.iov_base = mtod(m, void *);
	io.iov_len = plen;
	rmsg.msg_iov = &io;
	rmsg.msg_iovlen = 1;
	rmsg.msg_name = (struct sockaddr *)&from;
	rmsg.msg_namelen = sizeof(from);

	error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
	if (error || n == 0) {
		m_freem(m);

		/* Treat a TCP socket a goner */
		if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
			mutex_enter(softnet_lock);
			soisdisconnected(so);
			mutex_exit(softnet_lock);
			removesock(so);
		}
		return;
	}
	m->m_len = m->m_pkthdr.len = n;

	bpf_mtap_af(&sockin_if, AF_UNSPEC, m, BPF_D_IN);

	mutex_enter(softnet_lock);
	if (so->so_proto->pr_type == SOCK_DGRAM) {
		if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
			m_freem(m);
		}
	} else {
		sbappendstream(&so->so_rcv, m);
	}

	sorwakeup(so);
	mutex_exit(softnet_lock);
}

static void
sockin_waccept(struct socket *so)
{
	struct socket *nso;
	struct sockaddr_in6 sin;
	int news, error, slen;

	slen = sizeof(sin);
	error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
	    &slen, &news);
	if (error)
		return;

	mutex_enter(softnet_lock);
	nso = sonewconn(so, true);
	if (nso == NULL)
		goto errout;
	if (registersock(nso, news) != 0)
		goto errout;
	mutex_exit(softnet_lock);
	return;

 errout:
	rumpuser_close(news);
	if (nso)
		soclose(nso);
	mutex_exit(softnet_lock);
}

#define POLLTIMEOUT 100	/* check for new entries every 100ms */

/* XXX: doesn't handle socket (kernel) locking properly? */
static void
sockinworker(void *arg)
{
	struct pollfd *pfds = NULL, *npfds;
	struct sockin_unit *su_iter;
	struct socket *so;
	int cursock = 0, i, rv, error;

	/*
	 * Loop reading requests.  Check for new sockets periodically
	 * (could be smarter, but I'm lazy).
	 */
	for (;;) {
		if (rebuild) {
			npfds = NULL;
			mutex_enter(&su_mtx);
			if (nsock)
				npfds = kmem_alloc(nsock * sizeof(*npfds),
				    KM_NOSLEEP);
			if (npfds || nsock == 0) {
				if (pfds)
					kmem_free(pfds, cursock*sizeof(*pfds));
				pfds = npfds;
				cursock = nsock;
				rebuild = false;

				i = 0;
				LIST_FOREACH(su_iter, &su_ent, su_entries) {
					pfds[i].fd = SO2S(su_iter->su_so);
					pfds[i].events = POLLIN;
					pfds[i].revents = 0;
					i++;
				}
				KASSERT(i == nsock);
			}
			mutex_exit(&su_mtx);
		}

		/* find affected sockets & process */
		error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
		for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
			if (pfds[i].revents & POLLIN) {
				mutex_enter(&su_mtx);
				LIST_FOREACH(su_iter, &su_ent, su_entries) {
					if (SO2S(su_iter->su_so)==pfds[i].fd) {
						so = su_iter->su_so;
						mutex_exit(&su_mtx);
						if(so->so_options&SO_ACCEPTCONN)
							sockin_waccept(so);
						else
							sockin_process(so);
						mutex_enter(&su_mtx);
						break;
					}
				}
				/* if we can't find it, just wing it */
				KASSERT(rebuild || su_iter);
				mutex_exit(&su_mtx);
				pfds[i].revents = 0;
				rv--;
				i = -1;
				continue;
			}

			/* something else?  ignore */
			if (pfds[i].revents) {
				pfds[i].revents = 0;
				rv--;
			}
		}
		KASSERT(rv <= 0);
	}

}

static int
sockin_do_init(void)
{
	int rv;

	if (rump_threads) {
		if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
		    NULL, NULL, "sockwork")) != 0)
			panic("sockin_init: could not create worker thread\n");
	} else {
		printf("sockin_init: no threads => no worker thread\n");
	}
	mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
	strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
	bpf_attach(&sockin_if, DLT_NULL, 0);
	return 0;
}

static void
sockin_init(void)
{
	static ONCE_DECL(init);

	RUN_ONCE(&init, sockin_do_init);
}

static int
sockin_attach(struct socket *so, int proto)
{
	const int type = so->so_proto->pr_type;
	int error, news, family;

	sosetlock(so);
	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
		error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
		if (error)
			return error;
	}

	family = so->so_proto->pr_domain->dom_family;
	KASSERT(family == PF_INET || family == PF_INET6);
	error = rumpcomp_sockin_socket(family, type, 0, &news);
	if (error)
		return error;

	/* For UDP sockets, make sure we can send/recv maximum. */
	if (type == SOCK_DGRAM) {
		int sbsize = SOCKIN_SBSIZE;
		error = rumpcomp_sockin_setsockopt(news,
		    SOL_SOCKET, SO_SNDBUF,
		    &sbsize, sizeof(sbsize));
		sbsize = SOCKIN_SBSIZE;
		error = rumpcomp_sockin_setsockopt(news,
		    SOL_SOCKET, SO_RCVBUF,
		    &sbsize, sizeof(sbsize));
	}

	if ((error = registersock(so, news)) != 0)
		rumpuser_close(news);

	return error;
}

static void
sockin_detach(struct socket *so)
{
	panic("sockin_detach: IMPLEMENT ME\n");
}

static int
sockin_accept(struct socket *so, struct sockaddr *nam)
{
	KASSERT(solocked(so));

	/* we do all the work in the worker thread */
	return 0;
}

static int
sockin_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
	KASSERT(solocked(so));
	KASSERT(nam != NULL);

	return rumpcomp_sockin_bind(SO2S(so), nam, nam->sa_len);
}

static int
sockin_listen(struct socket *so, struct lwp *l)
{
	KASSERT(solocked(so));

	return rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
}

static int
sockin_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
	int error = 0;

	KASSERT(solocked(so));
	KASSERT(nam != NULL);

	error = rumpcomp_sockin_connect(SO2S(so), nam, nam->sa_len);
	if (error == 0)
		soisconnected(so);

	return error;
}

static int
sockin_connect2(struct socket *so, struct socket *so2)
{
	KASSERT(solocked(so));

	panic("sockin_connect2: IMPLEMENT ME, connect2 not supported");
}

static int
sockin_disconnect(struct socket *so)
{
	KASSERT(solocked(so));

	panic("sockin_disconnect: IMPLEMENT ME, disconnect not supported");
}

static int
sockin_shutdown(struct socket *so)
{
	KASSERT(solocked(so));

	removesock(so);
	return 0;
}

static int
sockin_abort(struct socket *so)
{
	KASSERT(solocked(so));

	panic("sockin_abort: IMPLEMENT ME, abort not supported");
}

static int
sockin_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
	return ENOTTY;
}

static int
sockin_stat(struct socket *so, struct stat *ub)
{
	KASSERT(solocked(so));

	return 0;
}

static int
sockin_peeraddr(struct socket *so, struct sockaddr *nam)
{
	KASSERT(solocked(so));

	int error = 0;
	int slen = nam->sa_len;

	error = rumpcomp_sockin_getname(SO2S(so),
	    nam, &slen, RUMPCOMP_SOCKIN_PEERNAME);
	if (error == 0)
		nam->sa_len = slen;
	return error;
}

static int
sockin_sockaddr(struct socket *so, struct sockaddr *nam)
{
	KASSERT(solocked(so));

	int error = 0;
	int slen = nam->sa_len;

	error = rumpcomp_sockin_getname(SO2S(so),
	    nam, &slen, RUMPCOMP_SOCKIN_SOCKNAME);
	if (error == 0)
		nam->sa_len = slen;
	return error;
}

static int
sockin_rcvd(struct socket *so, int flags, struct lwp *l)
{
	KASSERT(solocked(so));

	panic("sockin_rcvd: IMPLEMENT ME, rcvd not supported");
}

static int
sockin_recvoob(struct socket *so, struct mbuf *m, int flags)
{
	KASSERT(solocked(so));

	panic("sockin_recvoob: IMPLEMENT ME, recvoob not supported");
}

static int
sockin_send(struct socket *so, struct mbuf *m, struct sockaddr *saddr,
    struct mbuf *control, struct lwp *l)
{
	struct msghdr mhdr;
	size_t iov_max, i;
	struct iovec iov_buf[32], *iov;
	struct mbuf *m2;
	size_t tot, n;
	int error = 0;
	int s;

	bpf_mtap_af(&sockin_if, AF_UNSPEC, m, BPF_D_OUT);

	memset(&mhdr, 0, sizeof(mhdr));

	iov_max = 0;
	for (m2 = m; m2 != NULL; m2 = m2->m_next) {
		iov_max++;
	}

	if (iov_max <= __arraycount(iov_buf)) {
		iov = iov_buf;
	} else {
		iov = kmem_alloc(sizeof(struct iovec) * iov_max,
		    KM_SLEEP);
	}

	tot = 0;
	for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
		iov[i].iov_base = m2->m_data;
		iov[i].iov_len = m2->m_len;
		tot += m2->m_len;
	}
	mhdr.msg_iov = iov;
	mhdr.msg_iovlen = i;
	s = SO2S(so);

	if (saddr != NULL) {
		mhdr.msg_name = saddr;
		mhdr.msg_namelen = saddr->sa_len;
	}

	rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);

	if (iov != iov_buf)
		kmem_free(iov, sizeof(struct iovec) * iov_max);

	m_freem(m);
	m_freem(control);

	/* this assumes too many things to list.. buthey, testing */
	if (!rump_threads)
		sockin_process(so);

	return error;
}

static int
sockin_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
	KASSERT(solocked(so));

	panic("sockin_sendoob: IMPLEMENT ME, sendoob not supported");
}

static int
sockin_purgeif(struct socket *so, struct ifnet *ifp)
{

	panic("sockin_purgeif: IMPLEMENT ME, purgeif not supported");
}

static int
sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{

	return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
	    sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
}

int sockin_unavailable(void);
int
sockin_unavailable(void)
{

        panic("interface not available in with sockin");
}
__strong_alias(rtrequest,sockin_unavailable);
__strong_alias(ifunit,sockin_unavailable);
__strong_alias(ifreq_setaddr,sockin_unavailable);
__strong_alias(rt_delete_matched_entries,sockin_unavailable);