/*	$NetBSD: nfs_boot.c,v 1.90 2024/07/05 04:31:54 rin Exp $	*/

/*-
 * Copyright (c) 1995, 1997 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Adam Glass and Gordon W. Ross.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Support for NFS diskless booting, specifically getting information
 * about where to mount root from, what pathnames, etc.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nfs_boot.c,v 1.90 2024/07/05 04:31:54 rin Exp $");

#ifdef _KERNEL_OPT
#include "opt_nfs.h"
#include "opt_tftproot.h"
#include "opt_nfs_boot.h"
#endif

#ifdef NFS_BOOT_TCP
#undef NFS_BOOT_UDP
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/mbuf.h>
#include <sys/reboot.h>
#include <sys/socket.h>
#include <sys/socketvar.h>

#include <net/if.h>
#include <net/route.h>
#include <net/if_ether.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/if_inarp.h>

#include <nfs/rpcv2.h>
#include <nfs/krpc.h>
#include <nfs/xdr_subs.h>

#include <nfs/nfsproto.h>
#include <nfs/nfs.h>
#include <nfs/nfsmount.h>
#include <nfs/nfsdiskless.h>

/*
 * There are three implementations of NFS diskless boot.
 * One implementation uses BOOTP (RFC951, RFC1048),
 * Sun RPC/bootparams or static configuration.  See the
 * files:
 *    nfs_bootdhcp.c:   BOOTP (RFC951, RFC1048)
 *    nfs_bootparam.c:  Sun RPC/bootparams
 *    nfs_bootstatic.c: honour config(1) description
 */
#if defined(NFS_BOOT_BOOTP) || defined(NFS_BOOT_DHCP)
int nfs_boot_rfc951 = 1; /* BOOTP enabled (default) */
#endif
#ifdef NFS_BOOT_BOOTPARAM
int nfs_boot_bootparam = 1; /* BOOTPARAM enabled (default) */
#endif
#ifdef NFS_BOOT_BOOTSTATIC
int nfs_boot_bootstatic = 1; /* BOOTSTATIC enabled (default) */
#endif

#define IP_MIN_MTU 576

/* mountd RPC */
static int md_mount(struct sockaddr_in *mdsin, char *path,
	struct nfs_args *argp, struct lwp *l);

static int nfs_boot_delroute_matcher(struct rtentry *, void *);
static void nfs_boot_defrt(struct in_addr *);
static  int nfs_boot_getfh(struct nfs_dlmount *ndm, struct lwp *);


/*
 * Called with an empty nfs_diskless struct to be filled in.
 * Find an interface, determine its ip address (etc.) and
 * save all the boot parameters in the nfs_diskless struct.
 */
int
nfs_boot_init(struct nfs_diskless *nd, struct lwp *lwp)
{
	struct ifnet *ifp;
	int error = 0;
	int flags __unused;

	/* Explicitly necessary or build fails
	 * due to unused variable, otherwise.
	 */
	flags = 0;

	/*
	 * Find the network interface.
	 */
	ifp = ifunit(device_xname(root_device));
	if (ifp == NULL) {
		printf("nfs_boot: '%s' not found\n",
		       device_xname(root_device));
		return (ENXIO);
	}
	nd->nd_ifp = ifp;

	error = EADDRNOTAVAIL; /* ??? */
#if defined(NFS_BOOT_BOOTSTATIC)
	if (error && nfs_boot_bootstatic) {
		printf("nfs_boot: trying static\n");
		error = nfs_bootstatic(nd, lwp, &flags);
	}
#endif
#if defined(NFS_BOOT_BOOTP) || defined(NFS_BOOT_DHCP)
	if (error && nfs_boot_rfc951) {
#if defined(NFS_BOOT_DHCP)
		printf("nfs_boot: trying DHCP/BOOTP\n");
#else
		printf("nfs_boot: trying BOOTP\n");
#endif
		error = nfs_bootdhcp(nd, lwp, &flags);
	}
#endif
#ifdef NFS_BOOT_BOOTPARAM
	if (error && nfs_boot_bootparam) {
		printf("nfs_boot: trying RARP (and RPC/bootparam)\n");
		error = nfs_bootparam(nd, lwp, &flags);
	}
#endif
	if (error)
		return (error);

	/*
	 * Set MTU if passed
	 */
	if (nd->nd_mtu >= IP_MIN_MTU )
		nfs_boot_setmtu(nd->nd_ifp, nd->nd_mtu, lwp);
	
	/*
	 * If the gateway address is set, add a default route.
	 * (The mountd RPCs may go across a gateway.)
	 */
	if (nd->nd_gwip.s_addr)
		nfs_boot_defrt(&nd->nd_gwip);

#ifdef TFTPROOT
	if (nd->nd_nomount)
		goto out;
#endif
	/*
	 * Now fetch the NFS file handles as appropriate.
	 */
	error = nfs_boot_getfh(&nd->nd_root, lwp);

	if (error)
		nfs_boot_cleanup(nd, lwp);

#ifdef TFTPROOT
out:
#endif
	return (error);
}

void
nfs_boot_cleanup(struct nfs_diskless *nd, struct lwp *lwp)
{

	nfs_boot_deladdress(nd->nd_ifp, lwp, nd->nd_myip.s_addr);
	nfs_boot_ifupdown(nd->nd_ifp, lwp, 0);
	nfs_boot_flushrt(nd->nd_ifp);
}

int
nfs_boot_ifupdown(struct ifnet *ifp, struct lwp *lwp, int up)
{
	struct socket *so;
	struct ifreq ireq;
	int error;

	memset(&ireq, 0, sizeof(ireq));
	memcpy(ireq.ifr_name, ifp->if_xname, IFNAMSIZ);

	/*
	 * Get a socket to use for various things in here.
	 * After this, use "goto out" to cleanup and return.
	 */
	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp, NULL);
	if (error) {
		printf("ifupdown: socreate, error=%d\n", error);
		return (error);
	}

	/*
	 * Bring up the interface. (just set the "up" flag)
	 * Get the old interface flags and or IFF_UP into them so
	 * things like media selection flags are not clobbered.
	 */
	error = ifioctl(so, SIOCGIFFLAGS, (void *)&ireq, lwp);
	if (error) {
		printf("ifupdown: GIFFLAGS, error=%d\n", error);
		goto out;
	}
	if (up)
		ireq.ifr_flags |= IFF_UP;
	else
		ireq.ifr_flags &= ~IFF_UP;
	error = ifioctl(so, SIOCSIFFLAGS, &ireq, lwp);
	if (error) {
		printf("ifupdown: SIFFLAGS, error=%d\n", error);
		goto out;
	}

	if (up)
		/* give the link some time to get up */
		tsleep(nfs_boot_ifupdown, PZERO, "nfsbif", 3 * hz);
out:
	soclose(so);
	return (error);
}

void
nfs_boot_setmtu(struct ifnet *ifp, int mtu, struct lwp *lwp)
{
	struct socket *so;
	struct ifreq ireq;
	int error;

	memset(&ireq, 0, sizeof(ireq));
	memcpy(ireq.ifr_name, ifp->if_xname, IFNAMSIZ);

	/*
	 * Get a socket to use for various things in here.
	 * After this, use "goto out" to cleanup and return.
	 */
	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp, NULL);
	if (error) {
		printf("setmtu: socreate, error=%d\n", error);
		return;
	}

	/*
	 * Get structure, set the new MTU, push structure.
	 */
	error = ifioctl(so, SIOCGIFMTU, (void *)&ireq, lwp);
	if (error) {
		printf("setmtu: GIFMTU, error=%d\n", error);
		goto out;
	}

	ireq.ifr_mtu = mtu;

	error = ifioctl(so, SIOCSIFMTU, &ireq, lwp);
	if (error) {
		printf("setmtu: SIFMTU, error=%d\n", error);
		goto out;
	}

out:
	soclose(so);
	return;
}

int
nfs_boot_setaddress(struct ifnet *ifp, struct lwp *lwp,
		uint32_t addr, uint32_t netmask, uint32_t braddr)
{
	struct socket *so;
	struct ifaliasreq iareq;
	struct sockaddr_in *sin;
	int error;

	/*
	 * Get a socket to use for various things in here.
	 * After this, use "goto out" to cleanup and return.
	 */
	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp, NULL);
	if (error) {
		printf("setaddress: socreate, error=%d\n", error);
		return (error);
	}

	memset(&iareq, 0, sizeof(iareq));
	memcpy(iareq.ifra_name, ifp->if_xname, IFNAMSIZ);

	/* Set the I/F address */
	sin = (struct sockaddr_in *)&iareq.ifra_addr;
	sin->sin_len = sizeof(*sin);
	sin->sin_family = AF_INET;
	sin->sin_addr.s_addr = addr;

	/* Set the netmask */
	if (netmask != INADDR_ANY) {
		sin = (struct sockaddr_in *)&iareq.ifra_mask;
		sin->sin_len = sizeof(*sin);
		sin->sin_family = AF_INET;
		sin->sin_addr.s_addr = netmask;
	} /* else leave subnetmask unspecified (len=0) */

	/* Set the broadcast addr. */
	if (braddr != INADDR_ANY) {
		sin = (struct sockaddr_in *)&iareq.ifra_broadaddr;
		sin->sin_len = sizeof(*sin);
		sin->sin_family = AF_INET;
		sin->sin_addr.s_addr = braddr;
	} /* else leave broadcast addr unspecified (len=0) */

	error = ifioctl(so, SIOCAIFADDR, (void *)&iareq, lwp);
	if (error) {
		printf("setaddress, error=%d\n", error);
		goto out;
	}

	/* give the link some time to get up */
	tsleep(nfs_boot_setaddress, PZERO, "nfsbtd", 3 * hz);
out:
	soclose(so);
	return (error);
}

int
nfs_boot_deladdress(struct ifnet *ifp, struct lwp *lwp, uint32_t addr)
{
	struct socket *so;
	struct ifreq ifr;
	struct sockaddr_in sin;
	struct in_addr ia = {.s_addr = addr};
	int error;

	/*
	 * Get a socket to use for various things in here.
	 * After this, use "goto out" to cleanup and return.
	 */
	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp, NULL);
	if (error) {
		printf("deladdress: socreate, error=%d\n", error);
		return (error);
	}

	memset(&ifr, 0, sizeof(ifr));
	memcpy(ifr.ifr_name, ifp->if_xname, IFNAMSIZ);

	sockaddr_in_init(&sin, &ia, 0);
	ifreq_setaddr(SIOCDIFADDR, &ifr, sintocsa(&sin));

	error = ifioctl(so, SIOCDIFADDR, &ifr, lwp);
	if (error) {
		printf("deladdress, error=%d\n", error);
		goto out;
	}

out:
	soclose(so);
	return (error);
}

int
nfs_boot_setrecvtimo(struct socket *so)
{
	struct timeval tv;

	tv.tv_sec = 1;
	tv.tv_usec = 0;

	return (so_setsockopt(NULL, so, SOL_SOCKET, SO_RCVTIMEO, &tv,
	    sizeof(tv)));
}

int
nfs_boot_enbroadcast(struct socket *so)
{
	int32_t on;

	on = 1;
	return (so_setsockopt(NULL, so, SOL_SOCKET, SO_BROADCAST, &on,
	    sizeof(on)));
}

int
nfs_boot_sobind_ipport(struct socket *so, uint16_t port, struct lwp *l)
{
	struct sockaddr_in sin;
	int error;

	sin.sin_len = sizeof(sin);
	sin.sin_family = AF_INET;
	sin.sin_addr.s_addr = INADDR_ANY;
	sin.sin_port = htons(port);
	error = sobind(so, (struct sockaddr *)&sin, l);
	return (error);
}

/*
 * What is the longest we will wait before re-sending a request?
 * Note this is also the frequency of "timeout" messages.
 * The re-send loop counts up linearly to this maximum, so the
 * first complaint will happen after (1+2+3+4+5)=15 seconds.
 */
#define	MAX_RESEND_DELAY 5	/* seconds */
#define TOTAL_TIMEOUT   30	/* seconds */

int
nfs_boot_sendrecv(struct socket *so, struct sockaddr_in *nam,
		int (*sndproc)(struct mbuf *, void *, int),
		struct mbuf *snd,
		int (*rcvproc)(struct mbuf **, void *),
		struct mbuf **rcv, struct mbuf **from_p,
		void *context, struct lwp *lwp)
{
	int error, rcvflg, timo, secs, waited;
	struct mbuf *m, *from;
	struct uio uio;

	/* Free at end if not null. */
	from = NULL;

	/*
	 * Send it, repeatedly, until a reply is received,
	 * but delay each re-send by an increasing amount.
	 * If the delay hits the maximum, start complaining.
	 */
	waited = timo = 0;
send_again:
	waited += timo;
	if (waited >= TOTAL_TIMEOUT)
		return (ETIMEDOUT);

	/* Determine new timeout. */
	if (timo < MAX_RESEND_DELAY)
		timo++;
	else
		printf("nfs_boot: timeout...\n");

	if (sndproc) {
		error = (*sndproc)(snd, context, waited);
		if (error)
			goto out;
	}

	/* Send request (or re-send). */
	m = m_copypacket(snd, M_WAIT);
	if (m == NULL) {
		error = ENOBUFS;
		goto out;
	}
	error = (*so->so_send)(so, (struct sockaddr *)nam, NULL,
	    m, NULL, 0, lwp);
	if (error) {
		printf("nfs_boot: sosend: %d\n", error);
		goto out;
	}
	m = NULL;

	/*
	 * Wait for up to timo seconds for a reply.
	 * The socket receive timeout was set to 1 second.
	 */

	secs = timo;
	for (;;) {
		m_freem(from);
		from = NULL;
		m_freem(m);
		m = NULL;
		uio.uio_resid = 1 << 16; /* ??? */
		rcvflg = 0;
		error = (*so->so_receive)(so, &from, &uio, &m, NULL, &rcvflg);
		if (error == EWOULDBLOCK) {
			if (--secs <= 0)
				goto send_again;
			continue;
		}
		if (error)
			goto out;
#ifdef DIAGNOSTIC
		if (!m || !(m->m_flags & M_PKTHDR)
		    || (1 << 16) - uio.uio_resid != m->m_pkthdr.len)
			panic("nfs_boot_sendrecv: return size");
#endif

		if ((*rcvproc)(&m, context))
			continue;

		if (rcv)
			*rcv = m;
		else
			m_freem(m);
		if (from_p) {
			*from_p = from;
			from = NULL;
		}
		break;
	}
out:
	m_freem(from);
	return (error);
}

/*
 * Install a default route to the passed IP address.
 */
static void
nfs_boot_defrt(struct in_addr *gw_ip)
{
	struct sockaddr dst, gw, mask;
	struct sockaddr_in *sin;
	int error;

	/* Destination: (default) */
	memset((void *)&dst, 0, sizeof(dst));
	dst.sa_len = sizeof(dst);
	dst.sa_family = AF_INET;
	/* Gateway: */
	memset((void *)&gw, 0, sizeof(gw));
	sin = (struct sockaddr_in *)&gw;
	sin->sin_len = sizeof(*sin);
	sin->sin_family = AF_INET;
	sin->sin_addr.s_addr = gw_ip->s_addr;
	/* Mask: (zero length) */
	/* XXX - Just pass a null pointer? */
	memset(&mask, 0, sizeof(mask));

	/* add, dest, gw, mask, flags, 0 */
	error = rtrequest(RTM_ADD, &dst, &gw, &mask,
			  (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL);
	if (error) {
		printf("nfs_boot: add route, error=%d\n", error);
		error = 0;
	}
}

static int
nfs_boot_delroute_matcher(struct rtentry *rt, void *w)
{

	if ((void *)rt->rt_ifp != w)
		return 0;

	return 1;
}

void
nfs_boot_flushrt(struct ifnet *ifp)
{

	rt_delete_matched_entries(AF_INET, nfs_boot_delroute_matcher, ifp, false);
}

/*
 * Get an initial NFS file handle using Sun RPC/mountd.
 * Separate function because we used to call it twice.
 * (once for root and once for swap)
 *
 * ndm  output
 */
static int
nfs_boot_getfh(struct nfs_dlmount *ndm, struct lwp *l)
{
	struct nfs_args *args;
	struct sockaddr_in *sin;
	char *pathname;
	int error;
	u_int16_t port;

	args = &ndm->ndm_args;

	/* Initialize mount args. */
	memset((void *) args, 0, sizeof(*args));
	args->addr     = &ndm->ndm_saddr;
	args->addrlen  = args->addr->sa_len;
#ifdef NFS_BOOT_UDP
	args->sotype   = SOCK_DGRAM;
#else
	args->sotype   = SOCK_STREAM;
#endif
	args->fh       = ndm->ndm_fh;
	args->hostname = ndm->ndm_host;
	args->flags    = NFSMNT_NOCONN | NFSMNT_RESVPORT;

#ifndef NFS_V2_ONLY
	args->flags    |= NFSMNT_NFSV3;
#endif
#ifdef	NFS_BOOT_OPTIONS
	args->flags    |= NFS_BOOT_OPTIONS;
#endif
#ifdef	NFS_BOOT_RWSIZE
	/*
	 * Reduce rsize,wsize for interfaces that consistently
	 * drop fragments of long UDP messages.  (i.e. wd8003).
	 * You can always change these later via remount.
	 */
	args->flags   |= NFSMNT_WSIZE | NFSMNT_RSIZE;
	args->wsize    = NFS_BOOT_RWSIZE;
	args->rsize    = NFS_BOOT_RWSIZE;
#endif

	/*
	 * Find the pathname part of the "server:pathname"
	 * string left in ndm->ndm_host by nfs_boot_init.
	 */
	pathname = strchr(ndm->ndm_host, ':');
	if (pathname == 0) {
		printf("nfs_boot: getfh - no pathname\n");
		return (EIO);
	}
	pathname++;

	/*
	 * Get file handle using RPC to mountd/mount
	 */
	sin = (struct sockaddr_in *)&ndm->ndm_saddr;
	error = md_mount(sin, pathname, args, l);
	if (error) {
		printf("nfs_boot: mountd `%s', error=%d\n",
		       ndm->ndm_host, error);
		return (error);
	}

	/* Set port number for NFS use. */
	/* XXX: NFS port is always 2049, right? */
retry:
	error = krpc_portmap(sin, NFS_PROG,
		    (args->flags & NFSMNT_NFSV3) ? NFS_VER3 : NFS_VER2,
		    (args->sotype == SOCK_STREAM) ? IPPROTO_TCP : IPPROTO_UDP,
		    &port, l);
	if (port == htons(0))
		error = EIO;
	if (error) {
		if (args->sotype == SOCK_STREAM) {
			args->sotype = SOCK_DGRAM;
			goto retry;
		}
		printf("nfs_boot: portmap NFS, error=%d\n", error);
		return (error);
	}
	sin->sin_port = port;
	return (0);
}


/*
 * RPC: mountd/mount
 * Given a server pathname, get an NFS file handle.
 * Also, sets sin->sin_port to the NFS service port.
 *
 * mdsin   mountd server address
 */
static int
md_mount(struct sockaddr_in *mdsin, char *path,
	 struct nfs_args *argp, struct lwp *lwp)
{
	/* The RPC structures */
	struct rdata {
		u_int32_t errno;
		union {
			u_int8_t  v2fh[NFSX_V2FH];
			struct {
				u_int32_t fhlen;
				u_int8_t  fh[1];
			} v3fh;
		} fh;
	} *rdata;
	struct mbuf *m;
	u_int8_t *fh;
	int minlen, error;
	int mntver;

	mntver = (argp->flags & NFSMNT_NFSV3) ? 3 : 2;
	do {
		/*
		 * Get port number for MOUNTD.
		 */
		error = krpc_portmap(mdsin, RPCPROG_MNT, mntver,
		                    IPPROTO_UDP, &mdsin->sin_port, lwp);
		if (error)
			continue;

		/* This mbuf is consumed by krpc_call. */
		m = xdr_string_encode(path, strlen(path));
		if (m == NULL)
			return ENOMEM;

		/* Do RPC to mountd. */
		error = krpc_call(mdsin, RPCPROG_MNT, mntver,
		                  RPCMNT_MOUNT, &m, NULL, lwp);
		if (error != EPROGMISMATCH)
			break;
		/* Try lower version of mountd. */
	} while (--mntver >= 1);
	if (error) {
		printf("nfs_boot: mountd error=%d\n", error);
		return error;
	}
	if (mntver != 3)
		argp->flags &= ~NFSMNT_NFSV3;

	/* The reply might have only the errno. */
	if (m->m_len < 4)
		goto bad;
	/* Have at least errno, so check that. */
	rdata = mtod(m, struct rdata *);
	error = fxdr_unsigned(u_int32_t, rdata->errno);
	if (error)
		goto out;

	/* Have errno==0, so the fh must be there. */
	if (mntver == 3) {
		argp->fhsize   = fxdr_unsigned(u_int32_t, rdata->fh.v3fh.fhlen);
		if (argp->fhsize > NFSX_V3FHMAX)
			goto bad;
		minlen = 2 * sizeof(u_int32_t) + argp->fhsize;
	} else {
		argp->fhsize   = NFSX_V2FH;
		minlen = sizeof(u_int32_t) + argp->fhsize;
	}

	if (m->m_len < minlen) {
		m = m_pullup(m, minlen);
		if (m == NULL)
			return(EBADRPC);
		rdata = mtod(m, struct rdata *);
	}

	fh = (mntver == 3) ?
		rdata->fh.v3fh.fh : rdata->fh.v2fh;
	memcpy(argp->fh, fh, argp->fhsize);

	goto out;

bad:
	error = EBADRPC;

out:
	m_freem(m);
	return error;
}