Annotation of qemu/roms/ipxe/src/net/tcp.c, revision 1.1.1.1

1.1       root        1: #include <string.h>
                      2: #include <stdlib.h>
                      3: #include <stdio.h>
                      4: #include <assert.h>
                      5: #include <errno.h>
                      6: #include <byteswap.h>
                      7: #include <ipxe/timer.h>
                      8: #include <ipxe/iobuf.h>
                      9: #include <ipxe/malloc.h>
                     10: #include <ipxe/retry.h>
                     11: #include <ipxe/refcnt.h>
                     12: #include <ipxe/xfer.h>
                     13: #include <ipxe/open.h>
                     14: #include <ipxe/uri.h>
                     15: #include <ipxe/netdevice.h>
                     16: #include <ipxe/tcpip.h>
                     17: #include <ipxe/tcp.h>
                     18: 
                     19: /** @file
                     20:  *
                     21:  * TCP protocol
                     22:  *
                     23:  */
                     24: 
                     25: FILE_LICENCE ( GPL2_OR_LATER );
                     26: 
                     27: /** A TCP connection */
                     28: struct tcp_connection {
                     29:        /** Reference counter */
                     30:        struct refcnt refcnt;
                     31:        /** List of TCP connections */
                     32:        struct list_head list;
                     33: 
                     34:        /** Flags */
                     35:        unsigned int flags;
                     36: 
                     37:        /** Data transfer interface */
                     38:        struct interface xfer;
                     39: 
                     40:        /** Remote socket address */
                     41:        struct sockaddr_tcpip peer;
                     42:        /** Local port */
                     43:        unsigned int local_port;
                     44: 
                     45:        /** Current TCP state */
                     46:        unsigned int tcp_state;
                     47:        /** Previous TCP state
                     48:         *
                     49:         * Maintained only for debug messages
                     50:         */
                     51:        unsigned int prev_tcp_state;
                     52:        /** Current sequence number
                     53:         *
                     54:         * Equivalent to SND.UNA in RFC 793 terminology.
                     55:         */
                     56:        uint32_t snd_seq;
                     57:        /** Unacknowledged sequence count
                     58:         *
                     59:         * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology.
                     60:         */
                     61:        uint32_t snd_sent;
                     62:        /** Send window
                     63:         *
                     64:         * Equivalent to SND.WND in RFC 793 terminology
                     65:         */
                     66:        uint32_t snd_win;
                     67:        /** Current acknowledgement number
                     68:         *
                     69:         * Equivalent to RCV.NXT in RFC 793 terminology.
                     70:         */
                     71:        uint32_t rcv_ack;
                     72:        /** Receive window
                     73:         *
                     74:         * Equivalent to RCV.WND in RFC 793 terminology.
                     75:         */
                     76:        uint32_t rcv_win;
                     77:        /** Received timestamp value
                     78:         *
                     79:         * Updated when a packet is received; copied to ts_recent when
                     80:         * the window is advanced.
                     81:         */
                     82:        uint32_t ts_val;
                     83:        /** Most recent received timestamp that advanced the window
                     84:         *
                     85:         * Equivalent to TS.Recent in RFC 1323 terminology.
                     86:         */
                     87:        uint32_t ts_recent;
                     88: 
                     89:        /** Transmit queue */
                     90:        struct list_head tx_queue;
                     91:        /** Receive queue */
                     92:        struct list_head rx_queue;
                     93:        /** Retransmission timer */
                     94:        struct retry_timer timer;
                     95:        /** Shutdown (TIME_WAIT) timer */
                     96:        struct retry_timer wait;
                     97: };
                     98: 
                     99: /** TCP flags */
                    100: enum tcp_flags {
                    101:        /** TCP data transfer interface has been closed */
                    102:        TCP_XFER_CLOSED = 0x0001,
                    103:        /** TCP timestamps are enabled */
                    104:        TCP_TS_ENABLED = 0x0002,
                    105:        /** TCP acknowledgement is pending */
                    106:        TCP_ACK_PENDING = 0x0004,
                    107: };
                    108: 
                    109: /** TCP internal header
                    110:  *
                    111:  * This is the header that replaces the TCP header for packets
                    112:  * enqueued on the receive queue.
                    113:  */
                    114: struct tcp_rx_queued_header {
                    115:        /** SEQ value, in host-endian order
                    116:         *
                    117:         * This represents the SEQ value at the time the packet is
                    118:         * enqueued, and so excludes the SYN, if present.
                    119:         */
                    120:        uint32_t seq;
                    121:        /** Flags
                    122:         *
                    123:         * Only FIN is valid within this flags byte; all other flags
                    124:         * have already been processed by the time the packet is
                    125:         * enqueued.
                    126:         */
                    127:        uint8_t flags;
                    128:        /** Reserved */
                    129:        uint8_t reserved[3];
                    130: };
                    131: 
                    132: /**
                    133:  * List of registered TCP connections
                    134:  */
                    135: static LIST_HEAD ( tcp_conns );
                    136: 
                    137: /* Forward declarations */
                    138: static struct interface_descriptor tcp_xfer_desc;
                    139: static void tcp_expired ( struct retry_timer *timer, int over );
                    140: static void tcp_wait_expired ( struct retry_timer *timer, int over );
                    141: static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
                    142:                        uint32_t win );
                    143: 
                    144: /**
                    145:  * Name TCP state
                    146:  *
                    147:  * @v state            TCP state
                    148:  * @ret name           Name of TCP state
                    149:  */
                    150: static inline __attribute__ (( always_inline )) const char *
                    151: tcp_state ( int state ) {
                    152:        switch ( state ) {
                    153:        case TCP_CLOSED:                return "CLOSED";
                    154:        case TCP_LISTEN:                return "LISTEN";
                    155:        case TCP_SYN_SENT:              return "SYN_SENT";
                    156:        case TCP_SYN_RCVD:              return "SYN_RCVD";
                    157:        case TCP_ESTABLISHED:           return "ESTABLISHED";
                    158:        case TCP_FIN_WAIT_1:            return "FIN_WAIT_1";
                    159:        case TCP_FIN_WAIT_2:            return "FIN_WAIT_2";
                    160:        case TCP_CLOSING_OR_LAST_ACK:   return "CLOSING/LAST_ACK";
                    161:        case TCP_TIME_WAIT:             return "TIME_WAIT";
                    162:        case TCP_CLOSE_WAIT:            return "CLOSE_WAIT";
                    163:        default:                        return "INVALID";
                    164:        }
                    165: }
                    166: 
                    167: /**
                    168:  * Dump TCP state transition
                    169:  *
                    170:  * @v tcp              TCP connection
                    171:  */
                    172: static inline __attribute__ (( always_inline )) void
                    173: tcp_dump_state ( struct tcp_connection *tcp ) {
                    174: 
                    175:        if ( tcp->tcp_state != tcp->prev_tcp_state ) {
                    176:                DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp,
                    177:                       tcp_state ( tcp->prev_tcp_state ),
                    178:                       tcp_state ( tcp->tcp_state ) );
                    179:        }
                    180:        tcp->prev_tcp_state = tcp->tcp_state;
                    181: }
                    182: 
                    183: /**
                    184:  * Dump TCP flags
                    185:  *
                    186:  * @v flags            TCP flags
                    187:  */
                    188: static inline __attribute__ (( always_inline )) void
                    189: tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) {
                    190:        if ( flags & TCP_RST )
                    191:                DBGC2 ( tcp, " RST" );
                    192:        if ( flags & TCP_SYN )
                    193:                DBGC2 ( tcp, " SYN" );
                    194:        if ( flags & TCP_PSH )
                    195:                DBGC2 ( tcp, " PSH" );
                    196:        if ( flags & TCP_FIN )
                    197:                DBGC2 ( tcp, " FIN" );
                    198:        if ( flags & TCP_ACK )
                    199:                DBGC2 ( tcp, " ACK" );
                    200: }
                    201: 
                    202: /***************************************************************************
                    203:  *
                    204:  * Open and close
                    205:  *
                    206:  ***************************************************************************
                    207:  */
                    208: 
                    209: /**
                    210:  * Bind TCP connection to local port
                    211:  *
                    212:  * @v tcp              TCP connection
                    213:  * @v port             Local port number
                    214:  * @ret rc             Return status code
                    215:  *
                    216:  * If the port is 0, the connection is assigned an available port
                    217:  * between 1024 and 65535.
                    218:  */
                    219: static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) {
                    220:        struct tcp_connection *existing;
                    221:        uint16_t try_port;
                    222:        unsigned int i;
                    223: 
                    224:        /* If no port is specified, find an available port */
                    225:        if ( ! port ) {
                    226:                try_port = random();
                    227:                for ( i = 0 ; i < 65536 ; i++ ) {
                    228:                        try_port++;
                    229:                        if ( try_port < 1024 )
                    230:                                continue;
                    231:                        if ( tcp_bind ( tcp, try_port ) == 0 )
                    232:                                return 0;
                    233:                }
                    234:                DBGC ( tcp, "TCP %p could not bind: no free ports\n", tcp );
                    235:                return -EADDRINUSE;
                    236:        }
                    237: 
                    238:        /* Attempt bind to local port */
                    239:        list_for_each_entry ( existing, &tcp_conns, list ) {
                    240:                if ( existing->local_port == port ) {
                    241:                        DBGC ( tcp, "TCP %p could not bind: port %d in use\n",
                    242:                               tcp, port );
                    243:                        return -EADDRINUSE;
                    244:                }
                    245:        }
                    246:        tcp->local_port = port;
                    247: 
                    248:        DBGC ( tcp, "TCP %p bound to port %d\n", tcp, port );
                    249:        return 0;
                    250: }
                    251: 
                    252: /**
                    253:  * Open a TCP connection
                    254:  *
                    255:  * @v xfer             Data transfer interface
                    256:  * @v peer             Peer socket address
                    257:  * @v local            Local socket address, or NULL
                    258:  * @ret rc             Return status code
                    259:  */
                    260: static int tcp_open ( struct interface *xfer, struct sockaddr *peer,
                    261:                      struct sockaddr *local ) {
                    262:        struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer;
                    263:        struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local;
                    264:        struct tcp_connection *tcp;
                    265:        unsigned int bind_port;
                    266:        int rc;
                    267: 
                    268:        /* Allocate and initialise structure */
                    269:        tcp = zalloc ( sizeof ( *tcp ) );
                    270:        if ( ! tcp )
                    271:                return -ENOMEM;
                    272:        DBGC ( tcp, "TCP %p allocated\n", tcp );
                    273:        ref_init ( &tcp->refcnt, NULL );
                    274:        intf_init ( &tcp->xfer, &tcp_xfer_desc, &tcp->refcnt );
                    275:        timer_init ( &tcp->timer, tcp_expired, &tcp->refcnt );
                    276:        timer_init ( &tcp->wait, tcp_wait_expired, &tcp->refcnt );
                    277:        tcp->prev_tcp_state = TCP_CLOSED;
                    278:        tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN );
                    279:        tcp_dump_state ( tcp );
                    280:        tcp->snd_seq = random();
                    281:        INIT_LIST_HEAD ( &tcp->tx_queue );
                    282:        INIT_LIST_HEAD ( &tcp->rx_queue );
                    283:        memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) );
                    284: 
                    285:        /* Bind to local port */
                    286:        bind_port = ( st_local ? ntohs ( st_local->st_port ) : 0 );
                    287:        if ( ( rc = tcp_bind ( tcp, bind_port ) ) != 0 )
                    288:                goto err;
                    289: 
                    290:        /* Start timer to initiate SYN */
                    291:        start_timer_nodelay ( &tcp->timer );
                    292: 
                    293:        /* Attach parent interface, transfer reference to connection
                    294:         * list and return
                    295:         */
                    296:        intf_plug_plug ( &tcp->xfer, xfer );
                    297:        list_add ( &tcp->list, &tcp_conns );
                    298:        return 0;
                    299: 
                    300:  err:
                    301:        ref_put ( &tcp->refcnt );
                    302:        return rc;
                    303: }
                    304: 
                    305: /**
                    306:  * Close TCP connection
                    307:  *
                    308:  * @v tcp              TCP connection
                    309:  * @v rc               Reason for close
                    310:  *
                    311:  * Closes the data transfer interface.  If the TCP state machine is in
                    312:  * a suitable state, the connection will be deleted.
                    313:  */
                    314: static void tcp_close ( struct tcp_connection *tcp, int rc ) {
                    315:        struct io_buffer *iobuf;
                    316:        struct io_buffer *tmp;
                    317: 
                    318:        /* Close data transfer interface */
                    319:        intf_shutdown ( &tcp->xfer, rc );
                    320:        tcp->flags |= TCP_XFER_CLOSED;
                    321: 
                    322:        /* If we are in CLOSED, or have otherwise not yet received a
                    323:         * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the
                    324:         * connection.
                    325:         */
                    326:        if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
                    327: 
                    328:                /* Transition to CLOSED for the sake of debugging messages */
                    329:                tcp->tcp_state = TCP_CLOSED;
                    330:                tcp_dump_state ( tcp );
                    331: 
                    332:                /* Free any unprocessed I/O buffers */
                    333:                list_for_each_entry_safe ( iobuf, tmp, &tcp->rx_queue, list ) {
                    334:                        list_del ( &iobuf->list );
                    335:                        free_iob ( iobuf );
                    336:                }
                    337: 
                    338:                /* Free any unsent I/O buffers */
                    339:                list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) {
                    340:                        list_del ( &iobuf->list );
                    341:                        free_iob ( iobuf );
                    342:                }
                    343: 
                    344:                /* Remove from list and drop reference */
                    345:                stop_timer ( &tcp->timer );
                    346:                list_del ( &tcp->list );
                    347:                ref_put ( &tcp->refcnt );
                    348:                DBGC ( tcp, "TCP %p connection deleted\n", tcp );
                    349:                return;
                    350:        }
                    351: 
                    352:        /* If we have not had our SYN acknowledged (i.e. we are in
                    353:         * SYN_RCVD), pretend that it has been acknowledged so that we
                    354:         * can send a FIN without breaking things.
                    355:         */
                    356:        if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
                    357:                tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 );
                    358: 
                    359:        /* If we have no data remaining to send, start sending FIN */
                    360:        if ( list_empty ( &tcp->tx_queue ) ) {
                    361:                tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
                    362:                tcp_dump_state ( tcp );
                    363:        }
                    364: }
                    365: 
                    366: /***************************************************************************
                    367:  *
                    368:  * Transmit data path
                    369:  *
                    370:  ***************************************************************************
                    371:  */
                    372: 
                    373: /**
                    374:  * Calculate transmission window
                    375:  *
                    376:  * @v tcp              TCP connection
                    377:  * @ret len            Maximum length that can be sent in a single packet
                    378:  */
                    379: static size_t tcp_xmit_win ( struct tcp_connection *tcp ) {
                    380:        size_t len;
                    381: 
                    382:        /* Not ready if we're not in a suitable connection state */
                    383:        if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) )
                    384:                return 0;
                    385: 
                    386:        /* Length is the minimum of the receiver's window and the path MTU */
                    387:        len = tcp->snd_win;
                    388:        if ( len > TCP_PATH_MTU )
                    389:                len = TCP_PATH_MTU;
                    390: 
                    391:        return len;
                    392: }
                    393: 
                    394: /**
                    395:  * Process TCP transmit queue
                    396:  *
                    397:  * @v tcp              TCP connection
                    398:  * @v max_len          Maximum length to process
                    399:  * @v dest             I/O buffer to fill with data, or NULL
                    400:  * @v remove           Remove data from queue
                    401:  * @ret len            Length of data processed
                    402:  *
                    403:  * This processes at most @c max_len bytes from the TCP connection's
                    404:  * transmit queue.  Data will be copied into the @c dest I/O buffer
                    405:  * (if provided) and, if @c remove is true, removed from the transmit
                    406:  * queue.
                    407:  */
                    408: static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len,
                    409:                                     struct io_buffer *dest, int remove ) {
                    410:        struct io_buffer *iobuf;
                    411:        struct io_buffer *tmp;
                    412:        size_t frag_len;
                    413:        size_t len = 0;
                    414: 
                    415:        list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) {
                    416:                frag_len = iob_len ( iobuf );
                    417:                if ( frag_len > max_len )
                    418:                        frag_len = max_len;
                    419:                if ( dest ) {
                    420:                        memcpy ( iob_put ( dest, frag_len ), iobuf->data,
                    421:                                 frag_len );
                    422:                }
                    423:                if ( remove ) {
                    424:                        iob_pull ( iobuf, frag_len );
                    425:                        if ( ! iob_len ( iobuf ) ) {
                    426:                                list_del ( &iobuf->list );
                    427:                                free_iob ( iobuf );
                    428:                        }
                    429:                }
                    430:                len += frag_len;
                    431:                max_len -= frag_len;
                    432:        }
                    433:        return len;
                    434: }
                    435: 
                    436: /**
                    437:  * Transmit any outstanding data
                    438:  *
                    439:  * @v tcp              TCP connection
                    440:  * 
                    441:  * Transmits any outstanding data on the connection.
                    442:  *
                    443:  * Note that even if an error is returned, the retransmission timer
                    444:  * will have been started if necessary, and so the stack will
                    445:  * eventually attempt to retransmit the failed packet.
                    446:  */
                    447: static int tcp_xmit ( struct tcp_connection *tcp ) {
                    448:        struct io_buffer *iobuf;
                    449:        struct tcp_header *tcphdr;
                    450:        struct tcp_mss_option *mssopt;
                    451:        struct tcp_timestamp_padded_option *tsopt;
                    452:        void *payload;
                    453:        unsigned int flags;
                    454:        size_t len = 0;
                    455:        uint32_t seq_len;
                    456:        uint32_t app_win;
                    457:        uint32_t max_rcv_win;
                    458:        int rc;
                    459: 
                    460:        /* If retransmission timer is already running, do nothing */
                    461:        if ( timer_running ( &tcp->timer ) )
                    462:                return 0;
                    463: 
                    464:        /* Calculate both the actual (payload) and sequence space
                    465:         * lengths that we wish to transmit.
                    466:         */
                    467:        if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) {
                    468:                len = tcp_process_tx_queue ( tcp, tcp_xmit_win ( tcp ),
                    469:                                             NULL, 0 );
                    470:        }
                    471:        seq_len = len;
                    472:        flags = TCP_FLAGS_SENDING ( tcp->tcp_state );
                    473:        if ( flags & ( TCP_SYN | TCP_FIN ) ) {
                    474:                /* SYN or FIN consume one byte, and we can never send both */
                    475:                assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) );
                    476:                seq_len++;
                    477:        }
                    478:        tcp->snd_sent = seq_len;
                    479: 
                    480:        /* If we have nothing to transmit, stop now */
                    481:        if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) )
                    482:                return 0;
                    483: 
                    484:        /* If we are transmitting anything that requires
                    485:         * acknowledgement (i.e. consumes sequence space), start the
                    486:         * retransmission timer.  Do this before attempting to
                    487:         * allocate the I/O buffer, in case allocation itself fails.
                    488:         */
                    489:        if ( seq_len )
                    490:                start_timer ( &tcp->timer );
                    491: 
                    492:        /* Allocate I/O buffer */
                    493:        iobuf = alloc_iob ( len + MAX_LL_NET_HEADER_LEN );
                    494:        if ( ! iobuf ) {
                    495:                DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x "
                    496:                       "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ),
                    497:                       tcp->rcv_ack );
                    498:                return -ENOMEM;
                    499:        }
                    500:        iob_reserve ( iobuf, MAX_LL_NET_HEADER_LEN );
                    501: 
                    502:        /* Fill data payload from transmit queue */
                    503:        tcp_process_tx_queue ( tcp, len, iobuf, 0 );
                    504: 
                    505:        /* Expand receive window if possible */
                    506:        max_rcv_win = ( ( freemem * 3 ) / 4 );
                    507:        if ( max_rcv_win > TCP_MAX_WINDOW_SIZE )
                    508:                max_rcv_win = TCP_MAX_WINDOW_SIZE;
                    509:        app_win = xfer_window ( &tcp->xfer );
                    510:        if ( max_rcv_win > app_win )
                    511:                max_rcv_win = app_win;
                    512:        max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
                    513:        if ( tcp->rcv_win < max_rcv_win )
                    514:                tcp->rcv_win = max_rcv_win;
                    515: 
                    516:        /* Fill up the TCP header */
                    517:        payload = iobuf->data;
                    518:        if ( flags & TCP_SYN ) {
                    519:                mssopt = iob_push ( iobuf, sizeof ( *mssopt ) );
                    520:                mssopt->kind = TCP_OPTION_MSS;
                    521:                mssopt->length = sizeof ( *mssopt );
                    522:                mssopt->mss = htons ( TCP_MSS );
                    523:        }
                    524:        if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) {
                    525:                tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
                    526:                memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) );
                    527:                tsopt->tsopt.kind = TCP_OPTION_TS;
                    528:                tsopt->tsopt.length = sizeof ( tsopt->tsopt );
                    529:                tsopt->tsopt.tsval = htonl ( currticks() );
                    530:                tsopt->tsopt.tsecr = htonl ( tcp->ts_recent );
                    531:        }
                    532:        if ( len != 0 )
                    533:                flags |= TCP_PSH;
                    534:        tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
                    535:        memset ( tcphdr, 0, sizeof ( *tcphdr ) );
                    536:        tcphdr->src = htons ( tcp->local_port );
                    537:        tcphdr->dest = tcp->peer.st_port;
                    538:        tcphdr->seq = htonl ( tcp->snd_seq );
                    539:        tcphdr->ack = htonl ( tcp->rcv_ack );
                    540:        tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
                    541:        tcphdr->flags = flags;
                    542:        tcphdr->win = htons ( tcp->rcv_win );
                    543:        tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
                    544: 
                    545:        /* Dump header */
                    546:        DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x           %08x %4zd",
                    547:                tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
                    548:                ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ),
                    549:                ntohl ( tcphdr->ack ), len );
                    550:        tcp_dump_flags ( tcp, tcphdr->flags );
                    551:        DBGC2 ( tcp, "\n" );
                    552: 
                    553:        /* Transmit packet */
                    554:        if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL,
                    555:                               &tcphdr->csum ) ) != 0 ) {
                    556:                DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n",
                    557:                       tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ),
                    558:                       tcp->rcv_ack, strerror ( rc ) );
                    559:                return rc;
                    560:        }
                    561: 
                    562:        /* Clear ACK-pending flag */
                    563:        tcp->flags &= ~TCP_ACK_PENDING;
                    564: 
                    565:        return 0;
                    566: }
                    567: 
                    568: /**
                    569:  * Retransmission timer expired
                    570:  *
                    571:  * @v timer            Retransmission timer
                    572:  * @v over             Failure indicator
                    573:  */
                    574: static void tcp_expired ( struct retry_timer *timer, int over ) {
                    575:        struct tcp_connection *tcp =
                    576:                container_of ( timer, struct tcp_connection, timer );
                    577: 
                    578:        DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp,
                    579:               ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ),
                    580:               tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack );
                    581: 
                    582:        assert ( ( tcp->tcp_state == TCP_SYN_SENT ) ||
                    583:                 ( tcp->tcp_state == TCP_SYN_RCVD ) ||
                    584:                 ( tcp->tcp_state == TCP_ESTABLISHED ) ||
                    585:                 ( tcp->tcp_state == TCP_FIN_WAIT_1 ) ||
                    586:                 ( tcp->tcp_state == TCP_CLOSE_WAIT ) ||
                    587:                 ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) );
                    588: 
                    589:        if ( over ) {
                    590:                /* If we have finally timed out and given up,
                    591:                 * terminate the connection
                    592:                 */
                    593:                tcp->tcp_state = TCP_CLOSED;
                    594:                tcp_dump_state ( tcp );
                    595:                tcp_close ( tcp, -ETIMEDOUT );
                    596:        } else {
                    597:                /* Otherwise, retransmit the packet */
                    598:                tcp_xmit ( tcp );
                    599:        }
                    600: }
                    601: 
                    602: /**
                    603:  * Shutdown timer expired
                    604:  *
                    605:  * @v timer            Shutdown timer
                    606:  * @v over             Failure indicator
                    607:  */
                    608: static void tcp_wait_expired ( struct retry_timer *timer, int over __unused ) {
                    609:        struct tcp_connection *tcp =
                    610:                container_of ( timer, struct tcp_connection, wait );
                    611: 
                    612:        assert ( tcp->tcp_state == TCP_TIME_WAIT );
                    613: 
                    614:        DBGC ( tcp, "TCP %p wait complete in %s for %08x..%08x %08x\n", tcp,
                    615:               tcp_state ( tcp->tcp_state ), tcp->snd_seq,
                    616:               ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack );
                    617: 
                    618:        tcp->tcp_state = TCP_CLOSED;
                    619:        tcp_dump_state ( tcp );
                    620:        tcp_close ( tcp, 0 );
                    621: }
                    622: 
                    623: /**
                    624:  * Send RST response to incoming packet
                    625:  *
                    626:  * @v in_tcphdr                TCP header of incoming packet
                    627:  * @ret rc             Return status code
                    628:  */
                    629: static int tcp_xmit_reset ( struct tcp_connection *tcp,
                    630:                            struct sockaddr_tcpip *st_dest,
                    631:                            struct tcp_header *in_tcphdr ) {
                    632:        struct io_buffer *iobuf;
                    633:        struct tcp_header *tcphdr;
                    634:        int rc;
                    635: 
                    636:        /* Allocate space for dataless TX buffer */
                    637:        iobuf = alloc_iob ( MAX_LL_NET_HEADER_LEN );
                    638:        if ( ! iobuf ) {
                    639:                DBGC ( tcp, "TCP %p could not allocate iobuf for RST "
                    640:                       "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ),
                    641:                       ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) );
                    642:                return -ENOMEM;
                    643:        }
                    644:        iob_reserve ( iobuf, MAX_LL_NET_HEADER_LEN );
                    645: 
                    646:        /* Construct RST response */
                    647:        tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
                    648:        memset ( tcphdr, 0, sizeof ( *tcphdr ) );
                    649:        tcphdr->src = in_tcphdr->dest;
                    650:        tcphdr->dest = in_tcphdr->src;
                    651:        tcphdr->seq = in_tcphdr->ack;
                    652:        tcphdr->ack = in_tcphdr->seq;
                    653:        tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
                    654:        tcphdr->flags = ( TCP_RST | TCP_ACK );
                    655:        tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE );
                    656:        tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
                    657: 
                    658:        /* Dump header */
                    659:        DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x           %08x %4d",
                    660:                tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
                    661:                ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ),
                    662:                ntohl ( tcphdr->ack ), 0 );
                    663:        tcp_dump_flags ( tcp, tcphdr->flags );
                    664:        DBGC2 ( tcp, "\n" );
                    665: 
                    666:        /* Transmit packet */
                    667:        if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest,
                    668:                               NULL, &tcphdr->csum ) ) != 0 ) {
                    669:                DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: "
                    670:                       "%s\n", tcp, ntohl ( in_tcphdr->ack ),
                    671:                       ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ),
                    672:                       strerror ( rc ) );
                    673:                return rc;
                    674:        }
                    675: 
                    676:        return 0;
                    677: }
                    678: 
                    679: /***************************************************************************
                    680:  *
                    681:  * Receive data path
                    682:  *
                    683:  ***************************************************************************
                    684:  */
                    685: 
                    686: /**
                    687:  * Identify TCP connection by local port number
                    688:  *
                    689:  * @v local_port       Local port
                    690:  * @ret tcp            TCP connection, or NULL
                    691:  */
                    692: static struct tcp_connection * tcp_demux ( unsigned int local_port ) {
                    693:        struct tcp_connection *tcp;
                    694: 
                    695:        list_for_each_entry ( tcp, &tcp_conns, list ) {
                    696:                if ( tcp->local_port == local_port )
                    697:                        return tcp;
                    698:        }
                    699:        return NULL;
                    700: }
                    701: 
                    702: /**
                    703:  * Parse TCP received options
                    704:  *
                    705:  * @v tcp              TCP connection
                    706:  * @v data             Raw options data
                    707:  * @v len              Raw options length
                    708:  * @v options          Options structure to fill in
                    709:  */
                    710: static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data,
                    711:                          size_t len, struct tcp_options *options ) {
                    712:        const void *end = ( data + len );
                    713:        const struct tcp_option *option;
                    714:        unsigned int kind;
                    715: 
                    716:        memset ( options, 0, sizeof ( *options ) );
                    717:        while ( data < end ) {
                    718:                option = data;
                    719:                kind = option->kind;
                    720:                if ( kind == TCP_OPTION_END )
                    721:                        return;
                    722:                if ( kind == TCP_OPTION_NOP ) {
                    723:                        data++;
                    724:                        continue;
                    725:                }
                    726:                switch ( kind ) {
                    727:                case TCP_OPTION_MSS:
                    728:                        options->mssopt = data;
                    729:                        break;
                    730:                case TCP_OPTION_TS:
                    731:                        options->tsopt = data;
                    732:                        break;
                    733:                default:
                    734:                        DBGC ( tcp, "TCP %p received unknown option %d\n",
                    735:                               tcp, kind );
                    736:                        break;
                    737:                }
                    738:                data += option->length;
                    739:        }
                    740: }
                    741: 
                    742: /**
                    743:  * Consume received sequence space
                    744:  *
                    745:  * @v tcp              TCP connection
                    746:  * @v seq_len          Sequence space length to consume
                    747:  */
                    748: static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) {
                    749: 
                    750:        /* Sanity check */
                    751:        assert ( seq_len > 0 );
                    752: 
                    753:        /* Update acknowledgement number */
                    754:        tcp->rcv_ack += seq_len;
                    755: 
                    756:        /* Update window */
                    757:        if ( tcp->rcv_win > seq_len ) {
                    758:                tcp->rcv_win -= seq_len;
                    759:        } else {
                    760:                tcp->rcv_win = 0;
                    761:        }
                    762: 
                    763:        /* Update timestamp */
                    764:        tcp->ts_recent = tcp->ts_val;
                    765: 
                    766:        /* Mark ACK as pending */
                    767:        tcp->flags |= TCP_ACK_PENDING;
                    768: }
                    769: 
                    770: /**
                    771:  * Handle TCP received SYN
                    772:  *
                    773:  * @v tcp              TCP connection
                    774:  * @v seq              SEQ value (in host-endian order)
                    775:  * @v options          TCP options
                    776:  * @ret rc             Return status code
                    777:  */
                    778: static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
                    779:                        struct tcp_options *options ) {
                    780: 
                    781:        /* Synchronise sequence numbers on first SYN */
                    782:        if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
                    783:                tcp->rcv_ack = seq;
                    784:                if ( options->tsopt )
                    785:                        tcp->flags |= TCP_TS_ENABLED;
                    786:        }
                    787: 
                    788:        /* Ignore duplicate SYN */
                    789:        if ( seq != tcp->rcv_ack )
                    790:                return 0;
                    791: 
                    792:        /* Acknowledge SYN */
                    793:        tcp_rx_seq ( tcp, 1 );
                    794: 
                    795:        /* Mark SYN as received and start sending ACKs with each packet */
                    796:        tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) |
                    797:                            TCP_STATE_RCVD ( TCP_SYN ) );
                    798: 
                    799:        return 0;
                    800: }
                    801: 
                    802: /**
                    803:  * Handle TCP received ACK
                    804:  *
                    805:  * @v tcp              TCP connection
                    806:  * @v ack              ACK value (in host-endian order)
                    807:  * @v win              WIN value (in host-endian order)
                    808:  * @ret rc             Return status code
                    809:  */
                    810: static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
                    811:                        uint32_t win ) {
                    812:        uint32_t ack_len = ( ack - tcp->snd_seq );
                    813:        size_t len;
                    814:        unsigned int acked_flags;
                    815: 
                    816:        /* Check for out-of-range or old duplicate ACKs */
                    817:        if ( ack_len > tcp->snd_sent ) {
                    818:                DBGC ( tcp, "TCP %p received ACK for %08x..%08x, "
                    819:                       "sent only %08x..%08x\n", tcp, tcp->snd_seq,
                    820:                       ( tcp->snd_seq + ack_len ), tcp->snd_seq,
                    821:                       ( tcp->snd_seq + tcp->snd_sent ) );
                    822: 
                    823:                if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) {
                    824:                        /* Just ignore what might be old duplicate ACKs */
                    825:                        return 0;
                    826:                } else {
                    827:                        /* Send RST if an out-of-range ACK is received
                    828:                         * on a not-yet-established connection, as per
                    829:                         * RFC 793.
                    830:                         */
                    831:                        return -EINVAL;
                    832:                }
                    833:        }
                    834: 
                    835:        /* Ignore ACKs that don't actually acknowledge any new data.
                    836:         * (In particular, do not stop the retransmission timer; this
                    837:         * avoids creating a sorceror's apprentice syndrome when a
                    838:         * duplicate ACK is received and we still have data in our
                    839:         * transmit queue.)
                    840:         */
                    841:        if ( ack_len == 0 )
                    842:                return 0;
                    843: 
                    844:        /* Stop the retransmission timer */
                    845:        stop_timer ( &tcp->timer );
                    846: 
                    847:        /* Determine acknowledged flags and data length */
                    848:        len = ack_len;
                    849:        acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) &
                    850:                        ( TCP_SYN | TCP_FIN ) );
                    851:        if ( acked_flags )
                    852:                len--;
                    853: 
                    854:        /* Update SEQ and sent counters, and window size */
                    855:        tcp->snd_seq = ack;
                    856:        tcp->snd_sent = 0;
                    857:        tcp->snd_win = win;
                    858: 
                    859:        /* Remove any acknowledged data from transmit queue */
                    860:        tcp_process_tx_queue ( tcp, len, NULL, 1 );
                    861:                
                    862:        /* Mark SYN/FIN as acknowledged if applicable. */
                    863:        if ( acked_flags )
                    864:                tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags );
                    865: 
                    866:        /* Start sending FIN if we've had all possible data ACKed */
                    867:        if ( list_empty ( &tcp->tx_queue ) && ( tcp->flags & TCP_XFER_CLOSED ) )
                    868:                tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
                    869: 
                    870:        return 0;
                    871: }
                    872: 
                    873: /**
                    874:  * Handle TCP received data
                    875:  *
                    876:  * @v tcp              TCP connection
                    877:  * @v seq              SEQ value (in host-endian order)
                    878:  * @v iobuf            I/O buffer
                    879:  * @ret rc             Return status code
                    880:  *
                    881:  * This function takes ownership of the I/O buffer.
                    882:  */
                    883: static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq,
                    884:                         struct io_buffer *iobuf ) {
                    885:        uint32_t already_rcvd;
                    886:        uint32_t len;
                    887:        int rc;
                    888: 
                    889:        /* Ignore duplicate or out-of-order data */
                    890:        already_rcvd = ( tcp->rcv_ack - seq );
                    891:        len = iob_len ( iobuf );
                    892:        if ( already_rcvd >= len ) {
                    893:                free_iob ( iobuf );
                    894:                return 0;
                    895:        }
                    896:        iob_pull ( iobuf, already_rcvd );
                    897:        len -= already_rcvd;
                    898: 
                    899:        /* Acknowledge new data */
                    900:        tcp_rx_seq ( tcp, len );
                    901: 
                    902:        /* Deliver data to application */
                    903:        if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) {
                    904:                DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n",
                    905:                       tcp, seq, ( seq + len ), strerror ( rc ) );
                    906:                return rc;
                    907:        }
                    908: 
                    909:        return 0;
                    910: }
                    911: 
                    912: /**
                    913:  * Handle TCP received FIN
                    914:  *
                    915:  * @v tcp              TCP connection
                    916:  * @v seq              SEQ value (in host-endian order)
                    917:  * @ret rc             Return status code
                    918:  */
                    919: static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) {
                    920: 
                    921:        /* Ignore duplicate or out-of-order FIN */
                    922:        if ( seq != tcp->rcv_ack )
                    923:                return 0;
                    924: 
                    925:        /* Acknowledge FIN */
                    926:        tcp_rx_seq ( tcp, 1 );
                    927: 
                    928:        /* Mark FIN as received */
                    929:        tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
                    930: 
                    931:        /* Close connection */
                    932:        tcp_close ( tcp, 0 );
                    933: 
                    934:        return 0;
                    935: }
                    936: 
                    937: /**
                    938:  * Handle TCP received RST
                    939:  *
                    940:  * @v tcp              TCP connection
                    941:  * @v seq              SEQ value (in host-endian order)
                    942:  * @ret rc             Return status code
                    943:  */
                    944: static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) {
                    945: 
                    946:        /* Accept RST only if it falls within the window.  If we have
                    947:         * not yet received a SYN, then we have no window to test
                    948:         * against, so fall back to checking that our SYN has been
                    949:         * ACKed.
                    950:         */
                    951:        if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
                    952:                if ( ! tcp_in_window ( seq, tcp->rcv_ack, tcp->rcv_win ) )
                    953:                        return 0;
                    954:        } else {
                    955:                if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
                    956:                        return 0;
                    957:        }
                    958: 
                    959:        /* Abort connection */
                    960:        tcp->tcp_state = TCP_CLOSED;
                    961:        tcp_dump_state ( tcp );
                    962:        tcp_close ( tcp, -ECONNRESET );
                    963: 
                    964:        DBGC ( tcp, "TCP %p connection reset by peer\n", tcp );
                    965:        return -ECONNRESET;
                    966: }
                    967: 
                    968: /**
                    969:  * Enqueue received TCP packet
                    970:  *
                    971:  * @v tcp              TCP connection
                    972:  * @v seq              SEQ value (in host-endian order)
                    973:  * @v flags            TCP flags
                    974:  * @v iobuf            I/O buffer
                    975:  */
                    976: static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq,
                    977:                             uint8_t flags, struct io_buffer *iobuf ) {
                    978:        struct tcp_rx_queued_header *tcpqhdr;
                    979:        struct io_buffer *queued;
                    980:        size_t len;
                    981:        uint32_t seq_len;
                    982: 
                    983:        /* Calculate remaining flags and sequence length.  Note that
                    984:         * SYN, if present, has already been processed by this point.
                    985:         */
                    986:        flags &= TCP_FIN;
                    987:        len = iob_len ( iobuf );
                    988:        seq_len = ( len + ( flags ? 1 : 0 ) );
                    989: 
                    990:        /* Discard immediately (to save memory) if:
                    991:         *
                    992:         * a) we have not yet received a SYN (and so have no defined
                    993:         *    receive window), or
                    994:         * b) the packet lies entirely outside the receive window, or
                    995:         * c) there is no further content to process.
                    996:         */
                    997:        if ( ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) ||
                    998:             ( tcp_cmp ( seq, tcp->rcv_ack + tcp->rcv_win ) >= 0 ) ||
                    999:             ( tcp_cmp ( seq + seq_len, tcp->rcv_ack ) < 0 ) ||
                   1000:             ( seq_len == 0 ) ) {
                   1001:                free_iob ( iobuf );
                   1002:                return;
                   1003:        }
                   1004: 
                   1005:        /* Add internal header */
                   1006:        tcpqhdr = iob_push ( iobuf, sizeof ( *tcpqhdr ) );
                   1007:        tcpqhdr->seq = seq;
                   1008:        tcpqhdr->flags = flags;
                   1009: 
                   1010:        /* Add to RX queue */
                   1011:        list_for_each_entry ( queued, &tcp->rx_queue, list ) {
                   1012:                tcpqhdr = queued->data;
                   1013:                if ( tcp_cmp ( seq, tcpqhdr->seq ) < 0 )
                   1014:                        break;
                   1015:        }
                   1016:        list_add_tail ( &iobuf->list, &queued->list );
                   1017: }
                   1018: 
                   1019: /**
                   1020:  * Process receive queue
                   1021:  *
                   1022:  * @v tcp              TCP connection
                   1023:  */
                   1024: static void tcp_process_rx_queue ( struct tcp_connection *tcp ) {
                   1025:        struct io_buffer *iobuf;
                   1026:        struct tcp_rx_queued_header *tcpqhdr;
                   1027:        uint32_t seq;
                   1028:        unsigned int flags;
                   1029:        size_t len;
                   1030: 
                   1031:        /* Process all applicable received buffers.  Note that we
                   1032:         * cannot use list_for_each_entry() to iterate over the RX
                   1033:         * queue, since tcp_discard() may remove packets from the RX
                   1034:         * queue while we are processing.
                   1035:         */
                   1036:        while ( ( iobuf = list_first_entry ( &tcp->rx_queue, struct io_buffer,
                   1037:                                             list ) ) ) {
                   1038: 
                   1039:                /* Stop processing when we hit the first gap */
                   1040:                tcpqhdr = iobuf->data;
                   1041:                if ( tcp_cmp ( tcpqhdr->seq, tcp->rcv_ack ) > 0 )
                   1042:                        break;
                   1043: 
                   1044:                /* Strip internal header and remove from RX queue */
                   1045:                list_del ( &iobuf->list );
                   1046:                seq = tcpqhdr->seq;
                   1047:                flags = tcpqhdr->flags;
                   1048:                iob_pull ( iobuf, sizeof ( *tcpqhdr ) );
                   1049:                len = iob_len ( iobuf );
                   1050: 
                   1051:                /* Handle new data, if any */
                   1052:                tcp_rx_data ( tcp, seq, iob_disown ( iobuf ) );
                   1053:                seq += len;
                   1054: 
                   1055:                /* Handle FIN, if present */
                   1056:                if ( flags & TCP_FIN ) {
                   1057:                        tcp_rx_fin ( tcp, seq );
                   1058:                        seq++;
                   1059:                }
                   1060:        }
                   1061: }
                   1062: 
                   1063: /**
                   1064:  * Process received packet
                   1065:  *
                   1066:  * @v iobuf            I/O buffer
                   1067:  * @v st_src           Partially-filled source address
                   1068:  * @v st_dest          Partially-filled destination address
                   1069:  * @v pshdr_csum       Pseudo-header checksum
                   1070:  * @ret rc             Return status code
                   1071:   */
                   1072: static int tcp_rx ( struct io_buffer *iobuf,
                   1073:                    struct sockaddr_tcpip *st_src,
                   1074:                    struct sockaddr_tcpip *st_dest __unused,
                   1075:                    uint16_t pshdr_csum ) {
                   1076:        struct tcp_header *tcphdr = iobuf->data;
                   1077:        struct tcp_connection *tcp;
                   1078:        struct tcp_options options;
                   1079:        size_t hlen;
                   1080:        uint16_t csum;
                   1081:        uint32_t seq;
                   1082:        uint32_t ack;
                   1083:        uint32_t win;
                   1084:        unsigned int flags;
                   1085:        size_t len;
                   1086:        uint32_t seq_len;
                   1087:        int rc;
                   1088: 
                   1089:        /* Sanity check packet */
                   1090:        if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) {
                   1091:                DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n",
                   1092:                      iob_len ( iobuf ), sizeof ( *tcphdr ) );
                   1093:                rc = -EINVAL;
                   1094:                goto discard;
                   1095:        }
                   1096:        hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4;
                   1097:        if ( hlen < sizeof ( *tcphdr ) ) {
                   1098:                DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n",
                   1099:                      hlen, sizeof ( *tcphdr ) );
                   1100:                rc = -EINVAL;
                   1101:                goto discard;
                   1102:        }
                   1103:        if ( hlen > iob_len ( iobuf ) ) {
                   1104:                DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n",
                   1105:                      hlen, iob_len ( iobuf ) );
                   1106:                rc = -EINVAL;
                   1107:                goto discard;
                   1108:        }
                   1109:        csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data,
                   1110:                                       iob_len ( iobuf ) );
                   1111:        if ( csum != 0 ) {
                   1112:                DBG ( "TCP checksum incorrect (is %04x including checksum "
                   1113:                      "field, should be 0000)\n", csum );
                   1114:                rc = -EINVAL;
                   1115:                goto discard;
                   1116:        }
                   1117:        
                   1118:        /* Parse parameters from header and strip header */
                   1119:        tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
                   1120:        seq = ntohl ( tcphdr->seq );
                   1121:        ack = ntohl ( tcphdr->ack );
                   1122:        win = ntohs ( tcphdr->win );
                   1123:        flags = tcphdr->flags;
                   1124:        tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ),
                   1125:                      ( hlen - sizeof ( *tcphdr ) ), &options );
                   1126:        if ( options.tsopt )
                   1127:                tcp->ts_val = ntohl ( options.tsopt->tsval );
                   1128:        iob_pull ( iobuf, hlen );
                   1129:        len = iob_len ( iobuf );
                   1130:        seq_len = ( len + ( ( flags & TCP_SYN ) ? 1 : 0 ) +
                   1131:                    ( ( flags & TCP_FIN ) ? 1 : 0 ) );
                   1132: 
                   1133:        /* Dump header */
                   1134:        DBGC2 ( tcp, "TCP %p RX %d<-%d           %08x %08x..%08x %4zd",
                   1135:                tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ),
                   1136:                ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ),
                   1137:                ( ntohl ( tcphdr->seq ) + seq_len ), len );
                   1138:        tcp_dump_flags ( tcp, tcphdr->flags );
                   1139:        DBGC2 ( tcp, "\n" );
                   1140: 
                   1141:        /* If no connection was found, send RST */
                   1142:        if ( ! tcp ) {
                   1143:                tcp_xmit_reset ( tcp, st_src, tcphdr );
                   1144:                rc = -ENOTCONN;
                   1145:                goto discard;
                   1146:        }
                   1147: 
                   1148:        /* Handle ACK, if present */
                   1149:        if ( flags & TCP_ACK ) {
                   1150:                if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) {
                   1151:                        tcp_xmit_reset ( tcp, st_src, tcphdr );
                   1152:                        goto discard;
                   1153:                }
                   1154:        }
                   1155: 
                   1156:        /* Force an ACK if this packet is out of order */
                   1157:        if ( ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) &&
                   1158:             ( seq != tcp->rcv_ack ) ) {
                   1159:                tcp->flags |= TCP_ACK_PENDING;
                   1160:        }
                   1161: 
                   1162:        /* Handle SYN, if present */
                   1163:        if ( flags & TCP_SYN ) {
                   1164:                tcp_rx_syn ( tcp, seq, &options );
                   1165:                seq++;
                   1166:        }
                   1167: 
                   1168:        /* Handle RST, if present */
                   1169:        if ( flags & TCP_RST ) {
                   1170:                if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 )
                   1171:                        goto discard;
                   1172:        }
                   1173: 
                   1174:        /* Enqueue received data */
                   1175:        tcp_rx_enqueue ( tcp, seq, flags, iob_disown ( iobuf ) );
                   1176: 
                   1177:        /* Process receive queue */
                   1178:        tcp_process_rx_queue ( tcp );
                   1179: 
                   1180:        /* Dump out any state change as a result of the received packet */
                   1181:        tcp_dump_state ( tcp );
                   1182: 
                   1183:        /* Send out any pending data */
                   1184:        tcp_xmit ( tcp );
                   1185: 
                   1186:        /* If this packet was the last we expect to receive, set up
                   1187:         * timer to expire and cause the connection to be freed.
                   1188:         */
                   1189:        if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) {
                   1190:                stop_timer ( &tcp->wait );
                   1191:                start_timer_fixed ( &tcp->wait, ( 2 * TCP_MSL ) );
                   1192:        }
                   1193: 
                   1194:        return 0;
                   1195: 
                   1196:  discard:
                   1197:        /* Free received packet */
                   1198:        free_iob ( iobuf );
                   1199:        return rc;
                   1200: }
                   1201: 
                   1202: /** TCP protocol */
                   1203: struct tcpip_protocol tcp_protocol __tcpip_protocol = {
                   1204:        .name = "TCP",
                   1205:        .rx = tcp_rx,
                   1206:        .tcpip_proto = IP_TCP,
                   1207: };
                   1208: 
                   1209: /**
                   1210:  * Discard some cached TCP data
                   1211:  *
                   1212:  * @ret discarded      Number of cached items discarded
                   1213:  */
                   1214: static unsigned int tcp_discard ( void ) {
                   1215:        struct tcp_connection *tcp;
                   1216:        struct io_buffer *iobuf;
                   1217:        unsigned int discarded = 0;
                   1218: 
                   1219:        /* Try to drop one queued RX packet from each connection */
                   1220:        list_for_each_entry ( tcp, &tcp_conns, list ) {
                   1221:                list_for_each_entry_reverse ( iobuf, &tcp->rx_queue, list ) {
                   1222:                        list_del ( &iobuf->list );
                   1223:                        free_iob ( iobuf );
                   1224:                        discarded++;
                   1225:                        break;
                   1226:                }
                   1227:        }
                   1228: 
                   1229:        return discarded;
                   1230: }
                   1231: 
                   1232: /** TCP cache discarder */
                   1233: struct cache_discarder tcp_cache_discarder __cache_discarder = {
                   1234:        .discard = tcp_discard,
                   1235: };
                   1236: 
                   1237: /***************************************************************************
                   1238:  *
                   1239:  * Data transfer interface
                   1240:  *
                   1241:  ***************************************************************************
                   1242:  */
                   1243: 
                   1244: /**
                   1245:  * Close interface
                   1246:  *
                   1247:  * @v tcp              TCP connection
                   1248:  * @v rc               Reason for close
                   1249:  */
                   1250: static void tcp_xfer_close ( struct tcp_connection *tcp, int rc ) {
                   1251: 
                   1252:        /* Close data transfer interface */
                   1253:        tcp_close ( tcp, rc );
                   1254: 
                   1255:        /* Transmit FIN, if possible */
                   1256:        tcp_xmit ( tcp );
                   1257: }
                   1258: 
                   1259: /**
                   1260:  * Check flow control window
                   1261:  *
                   1262:  * @v tcp              TCP connection
                   1263:  * @ret len            Length of window
                   1264:  */
                   1265: static size_t tcp_xfer_window ( struct tcp_connection *tcp ) {
                   1266: 
                   1267:        /* Not ready if data queue is non-empty.  This imposes a limit
                   1268:         * of only one unACKed packet in the TX queue at any time; we
                   1269:         * do this to conserve memory usage.
                   1270:         */
                   1271:        if ( ! list_empty ( &tcp->tx_queue ) )
                   1272:                return 0;
                   1273: 
                   1274:        /* Return TCP window length */
                   1275:        return tcp_xmit_win ( tcp );
                   1276: }
                   1277: 
                   1278: /**
                   1279:  * Deliver datagram as I/O buffer
                   1280:  *
                   1281:  * @v tcp              TCP connection
                   1282:  * @v iobuf            Datagram I/O buffer
                   1283:  * @v meta             Data transfer metadata
                   1284:  * @ret rc             Return status code
                   1285:  */
                   1286: static int tcp_xfer_deliver ( struct tcp_connection *tcp,
                   1287:                              struct io_buffer *iobuf,
                   1288:                              struct xfer_metadata *meta __unused ) {
                   1289: 
                   1290:        /* Enqueue packet */
                   1291:        list_add_tail ( &iobuf->list, &tcp->tx_queue );
                   1292: 
                   1293:        /* Transmit data, if possible */
                   1294:        tcp_xmit ( tcp );
                   1295: 
                   1296:        return 0;
                   1297: }
                   1298: 
                   1299: /** TCP data transfer interface operations */
                   1300: static struct interface_operation tcp_xfer_operations[] = {
                   1301:        INTF_OP ( xfer_deliver, struct tcp_connection *, tcp_xfer_deliver ),
                   1302:        INTF_OP ( xfer_window, struct tcp_connection *, tcp_xfer_window ),
                   1303:        INTF_OP ( intf_close, struct tcp_connection *, tcp_xfer_close ),
                   1304: };
                   1305: 
                   1306: /** TCP data transfer interface descriptor */
                   1307: static struct interface_descriptor tcp_xfer_desc =
                   1308:        INTF_DESC ( struct tcp_connection, xfer, tcp_xfer_operations );
                   1309: 
                   1310: /***************************************************************************
                   1311:  *
                   1312:  * Openers
                   1313:  *
                   1314:  ***************************************************************************
                   1315:  */
                   1316: 
                   1317: /** TCP socket opener */
                   1318: struct socket_opener tcp_socket_opener __socket_opener = {
                   1319:        .semantics      = TCP_SOCK_STREAM,
                   1320:        .family         = AF_INET,
                   1321:        .open           = tcp_open,
                   1322: };
                   1323: 
                   1324: /** Linkage hack */
                   1325: int tcp_sock_stream = TCP_SOCK_STREAM;
                   1326: 
                   1327: /**
                   1328:  * Open TCP URI
                   1329:  *
                   1330:  * @v xfer             Data transfer interface
                   1331:  * @v uri              URI
                   1332:  * @ret rc             Return status code
                   1333:  */
                   1334: static int tcp_open_uri ( struct interface *xfer, struct uri *uri ) {
                   1335:        struct sockaddr_tcpip peer;
                   1336: 
                   1337:        /* Sanity check */
                   1338:        if ( ! uri->host )
                   1339:                return -EINVAL;
                   1340: 
                   1341:        memset ( &peer, 0, sizeof ( peer ) );
                   1342:        peer.st_port = htons ( uri_port ( uri, 0 ) );
                   1343:        return xfer_open_named_socket ( xfer, SOCK_STREAM,
                   1344:                                        ( struct sockaddr * ) &peer,
                   1345:                                        uri->host, NULL );
                   1346: }
                   1347: 
                   1348: /** TCP URI opener */
                   1349: struct uri_opener tcp_uri_opener __uri_opener = {
                   1350:        .scheme         = "tcp",
                   1351:        .open           = tcp_open_uri,
                   1352: };
                   1353: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.