Source to kern/netport_tcp.c
/*
* Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
*
* @[email protected]
*
* "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
* Reserved. This file contains Original Code and/or Modifications of
* Original Code as defined in and that are subject to the Apple Public
* Source License Version 1.0 (the 'License'). You may not use this file
* except in compliance with the License. Please obtain a copy of the
* License at http://www.apple.com/publicsource and read it before using
* this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
* License for the specific language governing rights and limitations
* under the License."
*
* @[email protected]
*/
/*
* Mach Operating System
* Copyright (c) 1989 Carnegie-Mellon University
* Copyright (c) 1988 Carnegie-Mellon University
* Copyright (c) 1987 Carnegie-Mellon University
* All rights reserved. The CMU software License Agreement specifies
* the terms and conditions for use and redistribution.
*/
/*
* File: netport_tcp.c
* Purpose:
* Front-end to the TCP system for the netport system
* implementing network IPC in the kernel.
*/
/*
* This module is copied from the TCP tranport module of the user-state
* network server, with only a minimum of changes.
*/
#import <sys/types.h>
#import <sys/socket.h>
#import <sys/socketvar.h>
#import <netinet/in.h>
#import <mach/kern_return.h>
#import <mach/port.h>
#import <kern/queue.h>
#import <kern/lock.h>
#import <kern/thread.h>
#import <kern/task.h>
#import <kern/ipc_netport.h>
#import <kern/kern_msg.h>
#import <kern/zalloc.h>
#import <sys/param.h>
#import <kern/xpr.h>
#import <mach/vm_param.h>
#ifndef NULL
#define NULL 0
#endif NULL
#if NeXT
#define Debugger(s) panic(s)
#define queue_enter_first queue_enter_head
#else NeXT
#define Debugger(s) kdb_kintr()
#endif NeXT
/*
* Definitions for compatibility with the network server coding conventions
* and debugging mechanism.
*/
#define PRIVATE /**/
#define PUBLIC /**/
#define EXPORT /**/
#define BEGIN(name) {
#define END }
#define RETURN(val) return (val)
#define RET return
#define DEBUG0(a,b,c) XPR(XPR_NPTCP,(c,0,0,0,0,0))
#define DEBUG1(a,b,c,p1) XPR(XPR_NPTCP,(c,p1,0,0,0,0))
#define DEBUG2(a,b,c,p1,p2) XPR(XPR_NPTCP,(c,p1,p2,0,0,0))
#define DEBUG3(a,b,c,p1,p2,p3) XPR(XPR_NPTCP,(c,p1,p2,p3,0,0))
#define DEBUG4(a,b,c,p1,p2,p3,p4) XPR(XPR_NPTCP,(c,p1,p2,p3,p4,0))
#define DEBUG5(a,b,c,p1,p2,p3,p4,p5) XPR(XPR_NPTCP,(c,p1,p2,p3,p4,p5))
#define DEBUG6(a,b,c,p1,p2,p3,p4,p5,p6) XPR(XPR_NPTCP,(c,p1,p2,p3,p4,p5))
#define INCSTAT(s) /**/
#define msg np_msg
#define errno np_errno
#define ERROR(fmt) { \
np_printf fmt; \
if (np_flags & NP_DEBUG) \
Debugger("NP"); \
}
static char np_msg[200];
static int np_errno;
#define mutex slock
#define mutex_lock(l) simple_lock(l)
#define mutex_unlock(l) simple_unlock(l)
#define mutex_init(l) simple_lock_init(l)
typedef int kern_cond_t;
#define condition_init(c) /**/
#define condition_wait(c,l) { \
simple_unlock(l); \
sleep((caddr_t)c,PZERO+1); \
simple_lock(l); \
}
#define condition_signal(c) wakeup((caddr_t)c)
#if NeXT
#else NeXT
extern task_t first_task;
#endif NeXT
/*
* Macros to derive a TCP connection ID from a trid obtained from a client
* and vice-versa.
*/
#define SET_TCPID(tcpid,trid) { (tcpid) = (trid).v1; }
#define SET_TRID(trid,tcpid) { (trid).v1 = (tcpid); }
/*
* Transaction records.
*/
typedef struct tcp_trans {
int state; /* see defines below */
unsigned long trid;
int client_id;
kern_msg_t kmsg;
int len;
int crypt_level;
/* int (*reply_proc)(); */
queue_chain_t transq; /* list of pending/waiting transactions */
} tcp_trans_t, *tcp_trans_ptr_t;
#define TCP_TR_INVALID 0
#define TCP_TR_PENDING 1 /* awaiting a reply */
#define TCP_TR_WAITING 2 /* awaiting transmission */
zone_t tcp_trans_zone;
/*
* Forward declarations.
*/
void np_tcp_conn_handler();
/*
* TCP port to be used by the Mach netport service.
*/
#define TCP_NETMSG_PORT 2454
/*
* Debugging flags.
*/
#define TCP_DBG_MAJOR (0x1) /* major events */
#define TCP_DBG_CRASH (0x2) /* host crashes */
#define TCP_DBG_VERBOSE (0x4) /* verbose output */
/*
* Connection records.
*/
typedef struct tcp_conn {
int state; /* see defines below */
struct socket *sock; /* socket structure */
thread_t th; /* service thread */
struct mutex lock; /* lock for this record */
kern_cond_t cond; /* to wake up the service thread */
netaddr_t dest; /* peer for current connection */
queue_head_t trans; /* list of pending/waiting transactions */
int count; /* number of pending/waiting trans */
queue_chain_t connq; /* list of records */
unsigned long incarn; /* incarnation number */
tcp_ctl_t ctlbuf; /* for xmit control header */
} tcp_conn_t, *tcp_conn_ptr_t;
#define TCP_INVALID 0
#define TCP_FREE 1
#define TCP_CONNECTED 2
#define TCP_OPENING 3
#define TCP_CLOSING 4
#define TCP_CLOSED 5
/*
* Static declarations.
*/
PRIVATE tcp_conn_t conn_vec[32]; /* connection records */
PRIVATE queue_head_t conn_lru; /* LRU list of active conn */
PRIVATE int conn_num; /* number of active conn */
PRIVATE queue_head_t conn_free; /* list of free conn */
PRIVATE kern_cond_t conn_cond; /* to wake up listener */
PRIVATE int conn_closing; /* number of conn in TCP_CLOSING */
PRIVATE struct mutex conn_lock; /* lock for conn_lru & conn_free */
/*
* Transport IDs are composed of 16 bits for the client side and 16 bits
* for the server side. The client side is just a counter, to be matched
* between the message and the transaction record. The server side is composed
* of 8 bits of index of the connection record in the conn_vec array and
* 8 bits of incarnation number for this connection record.
*
* We can afford not to protect the counter for client-side IDs with a lock,
* because transaction records for one connection are protected by the lock
* that connection, and they never move from one connection to another.
*
* XXX This is not completely foolproof if there is A LOT of traffic,
* but it's cheap.
*/
PRIVATE unsigned long trid_counter;
#define cptoix(cp) (((cp) - conn_vec)/sizeof(tcp_conn_t))
#define ixtocp(id) ((tcp_conn_ptr_t)&(conn_vec[(id)]))
#define TRID_SET_CLIENT(trid) { trid = (trid_counter++) & 0xffff; }
#define TRID_GET_CLIENT(trid,cl) { (cl) = (trid) & 0xffff; }
#define TRID_SET_SERVER(trid,sv) { (trid) |= \
(cptoix(sv) << 24) | ((sv)->incarn << 16);}
#define TRID_GET_SERVER(trid,sv) { (sv) = ixtocp((trid) >> 24); \
if ((((trid) >> 16) & 0xff) != (sv)->incarn) (sv) = NULL; }
/*
* Limits on connected sockets.
*/
#define TCP_CONN_STEADY 6 /* steady-state max [6] */
#define TCP_CONN_OPENING 8 /* max open/opening [8] */
#define TCP_CONN_MAX 10 /* absolute maximum [10] */
/*
* Zone for kmsg's to be used for incoming messages.
*/
extern zone_t netport_kmsg_zone;
#define DATA_SIZE_MAX \
(NETPORT_MSG_SIZE_MAX \
- sizeof(struct KMsg) \
+ sizeof(tcp_ctl_t) \
+ sizeof(ipc_network_hdr_t) \
+ sizeof(msg_header_t))
/*
* Macro for transmission of a simple control message.
*
* cp->lock must be held throughout.
*/
#define tcp_xmit_control(cp,ctlcode,a_trid,a_code,ret) { \
int b_len; \
\
(cp)->ctlbuf.ctl = htonl(ctlcode); \
(cp)->ctlbuf.trid = htonl(a_trid); \
(cp)->ctlbuf.code = htonl(a_code); \
(cp)->ctlbuf.size = 0; \
(cp)->ctlbuf.crypt_level = 0; \
b_len = sizeof(tcp_ctl_t); \
ret = mach_tcp_send(PORT_NULL,(cp)->sock, \
&((cp)->ctlbuf),&b_len,0); \
INCSTAT(tcp_send); \
DEBUG6(TCP_DBG_VERBOSE,0,2803,cp,ctlcode,a_trid, \
a_code,ret,errno); \
}
/*
* Macro for transmission of data.
*
* cp->lock must be held throughout.
*/
#define tcp_xmit_data(cp,ctlcode,a_trid,a_code,a_kmsg,a_len,a_crypt,ret) { \
int b_len; \
\
if (a_kmsg) { \
(a_kmsg)->tcp_ctl.ctl = htonl(ctlcode); \
(a_kmsg)->tcp_ctl.trid = htonl(a_trid); \
(a_kmsg)->tcp_ctl.code = htonl(a_code); \
(a_kmsg)->tcp_ctl.size = htonl(a_len); \
(a_kmsg)->tcp_ctl.crypt_level = htonl(a_crypt); \
\
DEBUG6(TCP_DBG_VERBOSE,0,2800,cp,ctlcode,a_trid, \
a_code,&a_kmsg,a_crypt); \
\
/* \
* XXX Worry about data encryption. \
*/ \
\
/* \
* Send everything in one pass. \
*/ \
b_len = sizeof(tcp_ctl_t) + (a_len); \
ret = mach_tcp_send(PORT_NULL,(cp)->sock, \
&((a_kmsg)->tcp_ctl),&b_len,0); \
INCSTAT(tcp_send); \
DEBUG3(TCP_DBG_VERBOSE,0,2801,b_len,ret,errno); \
} else { \
tcp_xmit_control((cp),(ctlcode),(a_trid),(a_code),(ret)); \
} \
}
/*
* np_printf --
*
* Special version of printf to avoid using sprintf in ERROR.
*/
np_printf(msg,fmt,p1,p2,p3,p4,p5,p6)
char *msg;
char *fmt;
int p1;
int p2;
int p3;
int p4;
int p5;
int p6;
{
printf(fmt,p1,p2,p3,p4,p5,p6);
printf("\n");
}
/*
* np_tcp_init_conn --
*
* Allocate and initialize a new TCP connection record.
*
* Parameters:
*
* Results:
*
* pointer to the new record.
*
* Side effects:
*
* Starts a new thread to handle the connection.
*
* Note:
*
* conn_lock must be acquired before calling this routine.
* It is held throughout its execution.
*/
PRIVATE tcp_conn_ptr_t np_tcp_init_conn()
BEGIN("np_tcp_init_conn")
tcp_conn_ptr_t cp;
int i;
char name[40];
/*
* Find an unused connection record in the conn_vec array.
* We could have used the global memory allocator for that,
* but since there are few connection records, why bother...
*
* conn_lock guarantees mutual exclusion.
*/
cp = NULL;
for (i = 0; i < 32; i++) {
if (conn_vec[i].state == TCP_INVALID) {
cp = &conn_vec[i];
break;
}
}
if (cp == NULL) {
panic("The TCP module cannot allocate a new connection record");
}
cp->state = TCP_FREE;
cp->sock = 0;
cp->count = 0;
cp->dest = 0;
mutex_init(&cp->lock);
mutex_lock(&cp->lock);
condition_init(&cp->cond);
queue_init(&cp->trans);
cp->th = NULL;
#if NeXT
(void) kernel_thread(kernel_task,np_tcp_conn_handler);
#else NeX T
(void) kernel_thread(first_task,np_tcp_conn_handler);
#endif NeXT
/* sprintf(name,"np_tcp_conn_handler(0x%x)",cp); */
DEBUG2(TCP_DBG_MAJOR,0,2805,cp,cp->th);
mutex_unlock(&cp->lock);
RETURN(cp);
END
/*
* np_tcp_close_conn --
*
* Arrange to close down one TCP connection as soon as possible.
*
* Parameters:
*
* Results:
*
* Side effects:
*
* Note:
*
* conn_lock must be acquired before calling this routine.
* It is held throughout its execution.
*/
PRIVATE void np_tcp_close_conn()
BEGIN("np_tcp_close_conn")
tcp_conn_ptr_t first;
tcp_conn_ptr_t cp;
kern_return_t ret;
/*
* Look for an old connection to recycle.
*/
first = (tcp_conn_ptr_t)queue_first(&conn_lru);
cp = (tcp_conn_ptr_t)queue_last(&conn_lru);
while (cp != first) {
if (cp->count == 0) {
mutex_lock(&cp->lock);
if ((cp->count == 0) && (cp->state == TCP_CONNECTED)) {
break;
} else {
mutex_unlock(&cp->lock);
}
}
cp = (tcp_conn_ptr_t)queue_prev(&cp->connq);
}
if (cp == first) {
/*
* We are over-committed. We will try again
* to close something at the next request or
* reply.
*
* XXX We could also set a timer to kill someone at
* random, to give new clients a chance.
*/
DEBUG2(TCP_DBG_MAJOR,0,2838,conn_num,conn_closing);
} else {
/*
* Close this unused connection.
*/
DEBUG4(TCP_DBG_MAJOR,0,2839,cp,cp->dest,conn_num,conn_closing);
cp->state = TCP_CLOSING;
conn_closing++;
tcp_xmit_control(cp,TCP_CTL_CLOSEREQ,0,0,ret);
mutex_unlock(&cp->lock);
}
RET;
END
/*
* netport_tcp_sendrequest --
*
* Send a request through the TCP interface.
*
* Parameters:
*
* client_id : an identifier assigned by the client to this transaction
* kmsg : the data to be sent
* len : the length of the data in kmsg
* to : the destination of the request
* crypt_level : whether the data should be encrypted
*
* Results:
*
* TR_SUCCESS or a specific failure code.
*
* Side effects:
*
* Design:
*
* Note:
*
*/
EXPORT int netport_tcp_sendrequest(client_id,kmsg,len,to,crypt_level)
int client_id;
kern_msg_t kmsg;
int len;
netaddr_t to;
int crypt_level;
BEGIN("netport_tcp_sendrequest")
tcp_conn_ptr_t first;
tcp_conn_ptr_t cp;
tcp_trans_ptr_t tp;
kern_return_t ret;
mutex_lock(&conn_lock);
DEBUG4(TCP_DBG_VERBOSE,0,2837,to,client_id,conn_num,conn_closing);
INCSTAT(tcp_requests_sent);
/*
* Find an open connection to the destination.
*/
first = (tcp_conn_ptr_t)queue_first(&conn_lru);
cp = first;
while (!queue_end(&conn_lru,(queue_entry_t)cp)) {
if (cp->dest == to) {
break;
}
cp = (tcp_conn_ptr_t)queue_next(&cp->connq);
}
if (queue_end(&conn_lru,(queue_entry_t)cp)) {
/*
* Could not find an open connection.
*/
if (conn_num < TCP_CONN_OPENING) {
/*
* Immediately start a new connection.
*/
if (queue_empty(&conn_free)) {
/*
* Initialize a new connection record.
*/
cp = np_tcp_init_conn();
} else {
cp = (tcp_conn_ptr_t)queue_first(&conn_free);
queue_remove(&conn_free,cp,
tcp_conn_ptr_t,connq);
}
mutex_lock(&cp->lock);
DEBUG2(TCP_DBG_MAJOR,0,2840,cp,to);
queue_enter_first(&conn_lru,cp,tcp_conn_ptr_t,connq);
conn_num++;
cp->dest = to;
cp->state = TCP_OPENING;
cp->count = 1;
#ifdef notdef
/*
* This is done when placing cp on the free list.
*/
queue_init(&cp->trans);
#endif notdef
condition_signal(&cp->cond);
mutex_unlock(&cp->lock);
if ((conn_num - conn_closing) > TCP_CONN_STEADY) {
np_tcp_close_conn();
}
mutex_unlock(&conn_lock);
} else {
/*
* We are over-committed. Tell the caller to wait.
*/
DEBUG0(TCP_DBG_MAJOR,0,2841);
if ((conn_num - conn_closing) > TCP_CONN_STEADY) {
np_tcp_close_conn();
}
mutex_unlock(&conn_lock);
RETURN(TR_OVERLOAD);
}
} else {
/*
* Found an open connection. Use it!
*/
DEBUG2(TCP_DBG_VERBOSE,0,2842,cp,cp->dest);
if (cp != first) {
/*
* Place the record at the head of the queue.
*/
queue_remove(&conn_lru,cp,tcp_conn_ptr_t,connq);
queue_enter_first(&conn_lru,cp,tcp_conn_ptr_t,connq);
}
if ((conn_num - conn_closing) > TCP_CONN_STEADY) {
np_tcp_close_conn();
}
mutex_lock(&cp->lock);
cp->count++;
mutex_unlock(&conn_lock);
}
/*
* At this point, we have a lock on a connection record for the
* right destination. See if we can transmit the data.
*/
/*
* Link the transaction record in the connection record.
*/
ZALLOC(tcp_trans_zone,tp,tcp_trans_ptr_t);
if (tp == NULL) {
panic("netport_tcp_sendrequest: cannot get a transaction record");
}
tp->client_id = client_id;
TRID_SET_CLIENT(tp->trid);
DEBUG4(TCP_DBG_VERBOSE,0,2843,cp,cp->state,tp,tp->trid);
if (cp->state == TCP_FREE) {
panic("TCP module trying to transmit on a free connection");
}
if (cp->state == TCP_CONNECTED) {
/*
* Send all the data on the socket.
*/
tp->state = TCP_TR_PENDING;
tcp_xmit_data(cp,TCP_CTL_REQUEST,tp->trid,0,kmsg,len,crypt_level,ret);
if (ret != KERN_SUCCESS) {
/*
* Something went wrong. Most probably, the client is dead.
*/
DEBUG2(TCP_DBG_CRASH,0,2844,cp->dest,errno);
cp->count--;
mutex_unlock(&cp->lock);
ZFREE(tcp_trans_zone,tp);
RETURN(TR_FAILURE);
}
} else {
tp->state = TCP_TR_WAITING;
tp->kmsg = kmsg;
tp->len = len;
tp->crypt_level = crypt_level;
}
queue_enter(&cp->trans,tp,tcp_trans_ptr_t,transq);
mutex_unlock(&cp->lock);
RETURN(TR_SUCCESS);
END
/*
* netport_tcp_sendreply --
*
* Send a response through the TCP interface.
*
* Parameters:
*
* trid : transport-level ID for a previous operation on this
* transaction
* code : a return code to be passed to the client.
* kmsg : the data to be sent
* len : the length of the data in kmsg
* crypt_level : whether the data should be encrypted
*
* Results:
*
* TR_SUCCESS or a specific failure code.
*
* Side effects:
*
* Design:
*
* Note:
*
*/
EXPORT int netport_tcp_sendreply(trid,code,kmsg,len,crypt_level)
trid_t trid;
int code;
kern_msg_t kmsg;
int len;
int crypt_level;
BEGIN("netport_tcp_sendreply")
tcp_conn_ptr_t cp;
kern_return_t ret;
int tcpid;
SET_TCPID(tcpid,trid);
TRID_GET_SERVER(tcpid,cp);
/*
* If the client has died, the connection record may
* already have been reused, and we may be sending this reply
* to the wrong machine. This should be detected by the
* incarnation number in the trid.
*/
if (cp == NULL) {
DEBUG1(TCP_DBG_CRASH,0,2847,tcpid);
RETURN(TR_FAILURE);
}
mutex_lock(&cp->lock);
DEBUG4(TCP_DBG_VERBOSE,0,2845,tcpid,cp,cp->dest,cp->state);
INCSTAT(tcp_replies_sent);
if (cp->state != TCP_CONNECTED) {
/*
* The client has died or the connection has just
* been dropped. Drop the reply.
*/
mutex_unlock(&cp->lock);
RETURN(TR_FAILURE);
}
cp->count--;
tcp_xmit_data(cp,TCP_CTL_REPLY,tcpid,code,kmsg,len,crypt_level,ret);
if (ret != KERN_SUCCESS) {
/*
* Something went wrong. Most probably, the client is dead.
*/
DEBUG2(TCP_DBG_CRASH,0,2846,cp->dest,errno);
mutex_unlock(&cp->lock);
RETURN(TR_FAILURE);
}
mutex_unlock(&cp->lock);
/*
* Update the LRU list of active connections and check for
* excess connections.
*/
mutex_lock(&conn_lock);
if (cp != (tcp_conn_ptr_t)queue_first(&conn_lru)) {
/*
* Place the record at the head of the queue.
*/
queue_remove(&conn_lru,cp,tcp_conn_ptr_t,connq);
queue_enter_first(&conn_lru,cp,tcp_conn_ptr_t,connq);
}
if ((conn_num - conn_closing) > TCP_CONN_STEADY) {
np_tcp_close_conn();
}
mutex_unlock(&conn_lock);
RETURN(TR_SUCCESS);
END
/*
* np_tcp_conn_handler_open --
*
* Handler for one connection - opening phase.
*
* Parameters:
*
* cp: pointer to the connection record.
*
* Results:
*
* TRUE if the connection was successfully opened, FALSE otherwise.
*
* Side effects:
*
* Transactions waiting in the connection record are initiated.
*
* Note:
*
* cp->lock must be locked on entry. It is also locked on exit, but
* it may be unlocked during the execution of this procedure.
*/
PRIVATE boolean_t np_tcp_conn_handler_open(cp)
tcp_conn_ptr_t cp;
BEGIN("np_tcp_conn_handler_open")
tcp_trans_ptr_t tp;
struct socket *cs;
struct sockaddr_in sname;
/* netaddr_t peeraddr; */
kern_return_t ret;
sname.sin_family = AF_INET;
sname.sin_port = htons(TCP_NETMSG_PORT);
sname.sin_addr.s_addr = (u_long)(cp->dest);
/* peeraddr = cp->dest; */
/*
* Unlock the record while we are waiting for the connection
* to be established.
*/
mutex_unlock(&cp->lock);
mutex_lock(&conn_lock);
ret = mach_tcp_socket(PORT_NULL,&cs);
mutex_unlock(&conn_lock);
if (ret != KERN_SUCCESS) {
ERROR((msg,"np_tcp_conn_handler.socket failed: errno=%d",errno));
panic("tcp");
}
if (np_flags & NP_SODEBUG) {
cs->so_options |= SO_DEBUG;
}
ret = mach_tcp_connect(PORT_NULL,cs,&sname,sizeof(struct sockaddr_in));
if (ret != KERN_SUCCESS) {
DEBUG2(TCP_DBG_CRASH,0,2815,0,errno);
mutex_lock(&cp->lock);
RETURN(FALSE);
}
INCSTAT(tcp_connect);
mutex_lock(&cp->lock);
cp->sock = cs;
cp->state = TCP_CONNECTED;
DEBUG3(TCP_DBG_VERBOSE,0,2816,cp,cs,0);
/*
* Look for transactions waiting to be transmitted.
*/
tp = (tcp_trans_ptr_t)queue_first(&cp->trans);
while (!queue_end(&cp->trans,(queue_entry_t)tp)) {
DEBUG2(TCP_DBG_VERBOSE,0,2817,tp,tp->state);
if (tp->state == TCP_TR_WAITING) {
tp->state = TCP_TR_PENDING;
tcp_xmit_data(cp,TCP_CTL_REQUEST,tp->trid,0,
tp->kmsg,tp->len,tp->crypt_level,ret);
if (ret != KERN_SUCCESS) {
RETURN(FALSE);
}
}
tp = (tcp_trans_ptr_t)queue_next(&tp->transq);
}
RETURN(TRUE);
END
/*
* np_tcp_conn_handler_active --
*
* Handler for one connection - active phase.
*
* Parameters:
*
* cp: pointer to the connection record.
*
* Results:
*
* Exits when the connection should be closed.
*
* Note:
*
* For now, the data received on the connection is only kept until the
* higher-level handler procedure (disp_in_request or reply_proc) returns.
* This allows the use of a data buffer on the stack.
*
*/
PRIVATE void np_tcp_conn_handler_active(cp)
tcp_conn_ptr_t cp;
BEGIN("np_tcp_conn_handler_active")
struct socket *cs;
netaddr_t peeraddr;
tcp_trans_ptr_t tp;
kern_return_t ret;
kern_msg_t kmsg;
kern_msg_t new_kmsg;
int len;
caddr_t bufp; /* current location in data */
int buf_count; /* data available in buf */
int buf_free; /* free space in buf */
int data_size;
unsigned long trid;
trid_t trid_cl;
int s;
peeraddr = cp->dest; /* OK not to lock at this point */
cs = cp->sock;
new_kmsg = NULL;
/*
* Enter the recv loop.
*/
for (;;) {
/*
* Get a fresh kmsg for a receive buffer.
*/
if (new_kmsg == NULL) {
ZALLOC(netport_kmsg_zone,kmsg,kern_msg_t);
if (kmsg == NULL) {
panic("netport out of kmsgs");
}
kmsg->home_zone = netport_kmsg_zone;
bufp = (caddr_t)&(kmsg->tcp_ctl);
buf_count = 0;
buf_free = DATA_SIZE_MAX;
} else {
/*
* There is already some data obtained
* in the previous pass.
*/
kmsg = new_kmsg;
}
/*
* Get at least a tcp control header in the
* buffer.
*/
while (buf_count < sizeof(tcp_ctl_t)) {
len = buf_free;
ret = mach_tcp_recv(PORT_NULL,cs,bufp,&len,0);
if ((ret != KERN_SUCCESS) || (len <= 0)) {
ZFREE(netport_kmsg_zone,kmsg);
RET;
}
INCSTAT(tcp_recv);
DEBUG2(TCP_DBG_VERBOSE,0,2820,ret,peeraddr);
buf_count += len;
buf_free -= len;
bufp += len;
}
/*
* Do all the required byte-swapping (Sigh!).
*/
kmsg->tcp_ctl.ctl = ntohl(kmsg->tcp_ctl.ctl);
kmsg->tcp_ctl.trid = ntohl(kmsg->tcp_ctl.trid);
kmsg->tcp_ctl.code = ntohl(kmsg->tcp_ctl.code);
kmsg->tcp_ctl.size = ntohl(kmsg->tcp_ctl.size);
kmsg->tcp_ctl.crypt_level = ntohl(kmsg->tcp_ctl.crypt_level);
/*
* Read any user data.
* Advance the current data pointer.
*/
buf_count -= sizeof(tcp_ctl_t);
data_size = kmsg->tcp_ctl.size;
if (data_size > (buf_count + buf_free)) {
ERROR((msg,"Netport: size too big from 0x%x\n", peeraddr));
ZFREE(netport_kmsg_zone,kmsg);
RET;
}
while (buf_count < data_size) {
len = buf_free;
ret = mach_tcp_recv(PORT_NULL,cs,bufp,&len,0);
if ((ret != KERN_SUCCESS) || (len <= 0)) {
ZFREE(netport_kmsg_zone,kmsg);
RET;
}
INCSTAT(tcp_recv);
buf_count += len;
buf_free -= len;
bufp += len;
}
/*
* If we received more data than we asked for,
* transfer the excess in a new kmsg.
*/
if (buf_count > data_size) {
ZALLOC(netport_kmsg_zone,new_kmsg,kern_msg_t);
if (new_kmsg == NULL) {
panic("netport out of kmsgs");
}
new_kmsg->home_zone = netport_kmsg_zone;
bcopy(((caddr_t)&(kmsg->netmsg_hdr)) + data_size,
(caddr_t)&(new_kmsg->tcp_ctl),
buf_count - data_size);
buf_count -= data_size;
bufp = (caddr_t)(&(new_kmsg->tcp_ctl)) + buf_count;
buf_free = DATA_SIZE_MAX - buf_count;
} else {
new_kmsg = NULL;
}
/*
* XXX Worry about encryption.
*/
/*
* Now process the message.
*/
DEBUG1(TCP_DBG_VERBOSE,0,2826,kmsg->tcp_ctl.ctl);
switch(kmsg->tcp_ctl.ctl) {
case TCP_CTL_REQUEST:
INCSTAT(tcp_requests_rcvd);
mutex_lock(&cp->lock);
cp->count++;
if (cp->state == TCP_CLOSING) {
cp->state = TCP_CONNECTED;
mutex_unlock(&cp->lock);
mutex_lock(&conn_lock);
conn_closing--;
mutex_unlock(&conn_lock);
} else {
mutex_unlock(&cp->lock);
}
trid = kmsg->tcp_ctl.trid;
TRID_SET_SERVER(trid,cp);
SET_TRID(trid_cl,trid);
(void) netport_handle_rq(TR_TCP_ENTRY,trid_cl,
kmsg,data_size,peeraddr,
kmsg->tcp_ctl.crypt_level,FALSE);
/*
* The kmsg will be destroyed by netmsg_input_rq.
*/
#ifdef notdef
if (disp_ret != DISP_WILL_REPLY) {
mutex_lock(&cp->lock);
DEBUG3(TCP_DBG_VERBOSE,0,2827,peeraddr,
trid,disp_ret);
tcp_xmit_control(cp,TCP_CTL_REPLY,trid,
disp_ret,ret);
cp->count--;
mutex_unlock(&cp->lock);
if (ret != KERN_SUCCESS) {
RET;
}
}
#endif notdef
break;
case TCP_CTL_REPLY:
INCSTAT(tcp_replies_rcvd);
mutex_lock(&cp->lock);
if (cp->state == TCP_CLOSING) {
cp->state = TCP_CONNECTED;
mutex_unlock(&cp->lock);
mutex_lock(&conn_lock);
conn_closing--;
mutex_unlock(&conn_lock);
mutex_lock(&cp->lock);
}
/*
* Find the transaction record.
*/
TRID_GET_CLIENT(kmsg->tcp_ctl.trid,trid);
tp = (tcp_trans_ptr_t)queue_first(&cp->trans);
while (!queue_end(&cp->trans,(queue_entry_t)tp)) {
if (tp->trid == trid) {
break;
}
tp = (tcp_trans_ptr_t)queue_next(&tp->transq);
}
if (queue_end(&cp->trans,(queue_entry_t)tp)) {
ERROR((msg,
"np_tcp_conn_handler_active: cannot find the transaction record for a reply"));
mutex_unlock(&cp->lock);
ZFREE(netport_kmsg_zone,kmsg);
} else {
queue_remove(&cp->trans,tp,
tcp_trans_ptr_t,transq);
cp->count--;
mutex_unlock(&cp->lock);
DEBUG1(TCP_DBG_VERBOSE,0,2828,tp);
netport_handle_rp(tp->client_id,
kmsg->tcp_ctl.code,kmsg,data_size);
/*
* The kmsg will be destroyed by
* the reply_proc.
*/
ZFREE(tcp_trans_zone,tp);
}
break;
case TCP_CTL_CLOSEREQ:
mutex_lock(&cp->lock);
if (cp->count == 0) {
/*
* Send CLOSEREP.
*/
DEBUG1(TCP_DBG_MAJOR,0,2829,cp->dest);
tcp_xmit_control(cp,TCP_CTL_CLOSEREP,
0,0,ret);
if (cp->state != TCP_CLOSING) {
cp->state = TCP_CLOSED;
}
mutex_unlock(&cp->lock);
ZFREE(netport_kmsg_zone,kmsg);
RET;
} else {
/*
* We have some data in
* transit. Nothing more
* should be needed.
*/
DEBUG2(TCP_DBG_MAJOR,0,2830,cp->dest,
cp->count);
cp->state = TCP_CONNECTED;
mutex_unlock(&cp->lock);
ZFREE(netport_kmsg_zone,kmsg);
}
break;
case TCP_CTL_CLOSEREP:
mutex_lock(&cp->lock);
DEBUG1(TCP_DBG_MAJOR,0,2831,cp->dest);
/*
* cp->state can only be TCP_CLOSING:
*
* We have sent a CLOSEREQ, and set the
* state to TCP_CLOSING then. If the state
* has changed since then, it must be because
* we have received data. But this data can only
* be a request, because we had nothing going on
* when we sent the CLOSEREQ. This CLOSEREQ must
* arrive at the other end before our reply
* because TCP does not reorder messages. But
* then the CLOSEREQ will be rejected because
* of the pending transaction.
*/
mutex_unlock(&cp->lock);
ZFREE(netport_kmsg_zone,kmsg);
RET;
default:
ERROR((msg,
"np_tcp_conn_handler_active: received an unknown ctl code: %d",
kmsg->tcp_ctl.ctl));
ZFREE(netport_kmsg_zone,kmsg);
break;
}
}
END
/*
* np_tcp_conn_handler_close --
*
* Handler for one connection - closing phase.
*
* Parameters:
*
* cp: pointer to the connection record.
*
* Results:
*
* none.
*
* Note:
*
*/
PRIVATE void np_tcp_conn_handler_close(cp)
tcp_conn_ptr_t cp;
BEGIN("np_tcp_conn_handler_close")
tcp_trans_ptr_t tp;
int s;
/*
* Some transactions might be initiated after the active phase exits
* and before this phase starts. Hopefully, they will be stopped by
* the TCP_CLOSING or TCP_CLOSED states, or the send will fail.
*/
mutex_lock(&conn_lock);
mutex_lock(&cp->lock);
mach_tcp_close(PORT_NULL,cp->sock);
INCSTAT(tcp_close);
if (cp->state == TCP_CLOSING) {
conn_closing--;
}
cp->state = TCP_FREE;
/*
* Go down the list of waiting/pending transactions
* and abort them.
* The client is of course free to retry them later.
*/
while (!queue_empty(&cp->trans)) {
tp = (tcp_trans_ptr_t)queue_first(&cp->trans);
DEBUG3(TCP_DBG_VERBOSE,0,2834,tp,tp->state,tp->client_id);
if (tp->state == TCP_TR_WAITING) {
netport_handle_rp(tp->client_id,TR_SEND_FAILURE,0,0);
} else {
netport_handle_rp(tp->client_id,TR_FAILURE,0,0);
}
queue_remove(&cp->trans,tp,tcp_trans_ptr_t,transq);
ZFREE(tcp_trans_zone,tp);
}
queue_init(&cp->trans);
cp->count = 0;
queue_remove(&conn_lru,cp,tcp_conn_ptr_t,connq);
queue_enter(&conn_free,cp,tcp_conn_ptr_t,connq);
mutex_unlock(&cp->lock);
conn_num--;
DEBUG1(TCP_DBG_MAJOR,0,2835,conn_num);
if (conn_num == (TCP_CONN_MAX - 1)) {
/*
* OK to start accepting connections again.
*/
DEBUG0(TCP_DBG_MAJOR,0,2836);
condition_signal(&conn_cond);
}
mutex_unlock(&conn_lock);
RET;
END
/*
* np_tcp_conn_handler --
*
* Handler for one connection.
*
* Parameters:
*
* Results:
*
* Should never exit.
*
* Note:
*
* The first thing the thread must do is locate the connection record which
* it is to service. This is guaranteed to succeed because there are exactly
* as many threads as there are valid connection records.
*
* For clarity, this code is split into three different procedures handling
* the opening, active and closing phases of the life of the connection.
*
*/
PRIVATE void np_tcp_conn_handler()
BEGIN("np_tcp_conn_handler")
tcp_conn_ptr_t cp;
int i;
boolean_t active;
/*
* Find the connection record.
*/
mutex_lock(&conn_lock);
cp = NULL;
for (i = 0; i < 32; i++) {
if (conn_vec[i].state != TCP_INVALID) {
cp = &conn_vec[i];
if (cp->th == NULL) {
cp->th = current_thread();
break;
} else {
cp = NULL;
}
}
}
mutex_unlock(&conn_lock);
if (cp == NULL) {
panic("TCP connection handler cannot find a connection record");
}
/*
* Service loop.
*/
for (;;) {
/*
* First wait to be activated.
*/
mutex_lock(&cp->lock);
while(cp->state == TCP_FREE) {
DEBUG0(TCP_DBG_VERBOSE,0,2811);
condition_wait(&cp->cond,&cp->lock);
}
/*
* At this point, the state is either TCP_OPENING (local open)
* or TCP_CONNECTED (remote open).
*/
DEBUG3(TCP_DBG_VERBOSE,0,2812,cp,cp->state,cp->dest);
if (cp->state == TCP_OPENING) {
/*
* Open a new connection.
*/
active = np_tcp_conn_handler_open(cp);
} else {
active = TRUE;
}
cp->incarn = (cp->incarn++) & 0xff;
mutex_unlock(&cp->lock);
if (active) {
DEBUG3(TCP_DBG_MAJOR,0,2813,cp,cp->sock,cp->dest);
np_tcp_conn_handler_active(cp);
DEBUG3(TCP_DBG_MAJOR,0,2814,cp,cp->sock,cp->dest);
}
/*
* Close the connection.
*/
np_tcp_conn_handler_close(cp);
}
END
/*
* np_tcp_listener --
*
* Handler for the listener socket.
*
* Parameters:
*
* Results:
*
* Should never exit.
*
* Note:
*
*/
PRIVATE void np_tcp_listener()
BEGIN("np_tcp_listener")
struct socket *s;
struct socket *newsock;
kern_return_t ret;
struct sockaddr_in sname;
int snamelen;
tcp_conn_ptr_t cp;
/*
* First create the listener socket.
*/
mutex_lock(&conn_lock);
ret = mach_tcp_socket(PORT_NULL,&s);
mutex_unlock(&conn_lock);
if (ret != KERN_SUCCESS) {
ERROR((msg,"np_tcp_listener.socket failed: errno=%d",errno));
panic("tcp");
}
sname.sin_family = AF_INET;
sname.sin_port = htons(TCP_NETMSG_PORT);
sname.sin_addr.s_addr = INADDR_ANY;
ret = mach_tcp_bind(PORT_NULL,s,&sname,sizeof(struct sockaddr_in));
if (ret != KERN_SUCCESS) {
ERROR((msg,"np_tcp_listener.bind failed: errno=%d",errno));
panic("tcp");
}
ret = mach_tcp_listen(PORT_NULL,s,2);
if (ret != KERN_SUCCESS) {
ERROR((msg,"np_tcp_listener.listen failed: errno=%d",errno));
panic("tcp");
}
DEBUG1(TCP_DBG_VERBOSE,0,2806,s);
/*
* Loop forever accepting connections.
*/
for (;;) {
mutex_lock(&conn_lock);
while (conn_num >= TCP_CONN_MAX) {
DEBUG1(TCP_DBG_VERBOSE,0,2810,conn_num);
condition_wait(&conn_cond,&conn_lock);
}
mutex_unlock(&conn_lock);
DEBUG0(TCP_DBG_VERBOSE,0,2807);
snamelen = sizeof(struct sockaddr_in);
ret = mach_tcp_accept(PORT_NULL,s,&sname,&snamelen,&newsock);
if (ret != KERN_SUCCESS) {
ERROR((msg,
"np_tcp_listener.accept failed: errno=%d",errno));
continue;
}
INCSTAT(tcp_accept);
DEBUG0(TCP_DBG_VERBOSE,0,2808);
if (np_flags & NP_SODEBUG) {
newsock->so_options |= SO_DEBUG;
}
mutex_lock(&conn_lock);
if (queue_empty(&conn_free)) {
/*
* Initialize a new connection record.
*/
cp = np_tcp_init_conn();
} else {
cp = (tcp_conn_ptr_t)queue_first(&conn_free);
queue_remove(&conn_free,cp,tcp_conn_ptr_t,connq);
}
mutex_lock(&cp->lock);
DEBUG4(TCP_DBG_MAJOR,0,2809,ret,cp,
sname.sin_addr.s_addr,sname.sin_port);
queue_enter_first(&conn_lru,cp,tcp_conn_ptr_t,connq);
conn_num++;
cp->sock = newsock;
cp->dest = (netaddr_t)(sname.sin_addr.s_addr);
cp->state = TCP_CONNECTED;
cp->count = 0;
#ifdef notdef
/*
* This is done when placing cp on the free list.
*/
queue_init(&cp->trans);
#endif notdef
condition_signal(&cp->cond);
mutex_unlock(&cp->lock);
if ((conn_num - conn_closing) > TCP_CONN_STEADY) {
np_tcp_close_conn();
}
mutex_unlock(&conn_lock);
}
END
/*
* netport_tcp_init --
*
* Initialises the TCP transport protocol.
*
* Parameters:
*
* Results:
*
* FALSE : we failed to initialise the TCP transport protocol.
* TRUE : we were successful.
*
* Side effects:
*
* Initialises the TCP protocol entry point in the switch array.
* Allocates the listener port and creates a thread to listen to the network.
*
*/
EXPORT boolean_t netport_tcp_init()
BEGIN("netport_tcp_init")
int i;
tcp_conn_ptr_t cp;
/*
* Initialize the set of connection records and the lists.
*/
for (i = 0; i < 32; i++) {
conn_vec[i].state = TCP_INVALID;
conn_vec[i].incarn = 0;
}
mutex_init(&conn_lock);
mutex_lock(&conn_lock);
condition_init(&conn_cond);
queue_init(&conn_lru);
queue_init(&conn_free);
conn_num = 0;
conn_closing = 0;
trid_counter = 10;
/*
* Create a first connection record (just a test).
*/
cp = np_tcp_init_conn();
queue_enter(&conn_free,cp,tcp_conn_ptr_t,connq);
/*
* Set up the entry in the transport switch.
*/
transport_switch[TR_TCP_ENTRY].sendrequest = netport_tcp_sendrequest;
transport_switch[TR_TCP_ENTRY].sendreply = netport_tcp_sendreply;
/*
* Initialize the zone for transaction records.
*/
tcp_trans_zone = zinit(sizeof(tcp_trans_t), 64 * 1024, page_size, FALSE,
"netport TCP transaction records");
/*
* Initialize the TCP interface.
*/
mach_tcp_init(PORT_NULL,NULL);
/*
* Start the listener.
*/
#if NeXT
(void) kernel_thread(kernel_task,np_tcp_listener);
#else NeX T
(void) kernel_thread(first_task,np_tcp_listener);
#endif NeXT
/*
* Get the show on the road...
*/
DEBUG0(TCP_DBG_MAJOR,0,2804);
mutex_unlock(&conn_lock);
RETURN(TRUE);
END